diff options
472 files changed, 12817 insertions, 5488 deletions
diff --git a/Documentation/00-INDEX b/Documentation/00-INDEX index 1977fab38656..6de71308a906 100644 --- a/Documentation/00-INDEX +++ b/Documentation/00-INDEX @@ -361,8 +361,6 @@ telephony/ - directory with info on telephony (e.g. voice over IP) support. time_interpolators.txt - info on time interpolators. -tipar.txt - - information about Parallel link cable for Texas Instruments handhelds. tty.txt - guide to the locking policies of the tty layer. uml/ diff --git a/Documentation/CodingStyle b/Documentation/CodingStyle index 6caa14615578..1875e502f872 100644 --- a/Documentation/CodingStyle +++ b/Documentation/CodingStyle @@ -474,25 +474,29 @@ make a good program). So, you can either get rid of GNU emacs, or change it to use saner values. To do the latter, you can stick the following in your .emacs file: -(defun linux-c-mode () - "C mode with adjusted defaults for use with the Linux kernel." - (interactive) - (c-mode) - (c-set-style "K&R") - (setq tab-width 8) - (setq indent-tabs-mode t) - (setq c-basic-offset 8)) - -This will define the M-x linux-c-mode command. When hacking on a -module, if you put the string -*- linux-c -*- somewhere on the first -two lines, this mode will be automatically invoked. Also, you may want -to add - -(setq auto-mode-alist (cons '("/usr/src/linux.*/.*\\.[ch]$" . linux-c-mode) - auto-mode-alist)) - -to your .emacs file if you want to have linux-c-mode switched on -automagically when you edit source files under /usr/src/linux. +(defun c-lineup-arglist-tabs-only (ignored) + "Line up argument lists by tabs, not spaces" + (let* ((anchor (c-langelem-pos c-syntactic-element)) + (column (c-langelem-2nd-pos c-syntactic-element)) + (offset (- (1+ column) anchor)) + (steps (floor offset c-basic-offset))) + (* (max steps 1) + c-basic-offset))) + +(add-hook 'c-mode-hook + (lambda () + (let ((filename (buffer-file-name))) + ;; Enable kernel mode for the appropriate files + (when (and filename + (string-match "~/src/linux-trees" filename)) + (setq indent-tabs-mode t) + (c-set-style "linux") + (c-set-offset 'arglist-cont-nonempty + '(c-lineup-gcc-asm-reg + c-lineup-arglist-tabs-only)))))) + +This will make emacs go better with the kernel coding style for C +files below ~/src/linux-trees. But even if you fail in getting emacs to do sane formatting, not everything is lost: use "indent". diff --git a/Documentation/DocBook/procfs-guide.tmpl b/Documentation/DocBook/procfs-guide.tmpl index 1fd6a1ec7591..8a5dc6e021ff 100644 --- a/Documentation/DocBook/procfs-guide.tmpl +++ b/Documentation/DocBook/procfs-guide.tmpl @@ -29,12 +29,12 @@ <revhistory> <revision> - <revnumber>1.0 </revnumber> + <revnumber>1.0</revnumber> <date>May 30, 2001</date> <revremark>Initial revision posted to linux-kernel</revremark> </revision> <revision> - <revnumber>1.1 </revnumber> + <revnumber>1.1</revnumber> <date>June 3, 2001</date> <revremark>Revised after comments from linux-kernel</revremark> </revision> diff --git a/Documentation/accounting/delay-accounting.txt b/Documentation/accounting/delay-accounting.txt index 1443cd71d263..8a12f0730c94 100644 --- a/Documentation/accounting/delay-accounting.txt +++ b/Documentation/accounting/delay-accounting.txt @@ -11,6 +11,7 @@ the delays experienced by a task while a) waiting for a CPU (while being runnable) b) completion of synchronous block I/O initiated by the task c) swapping in pages +d) memory reclaim and makes these statistics available to userspace through the taskstats interface. @@ -41,7 +42,7 @@ this structure. See include/linux/taskstats.h for a description of the fields pertaining to delay accounting. It will generally be in the form of counters returning the cumulative -delay seen for cpu, sync block I/O, swapin etc. +delay seen for cpu, sync block I/O, swapin, memory reclaim etc. Taking the difference of two successive readings of a given counter (say cpu_delay_total) for a task will give the delay @@ -94,7 +95,9 @@ CPU count real total virtual total delay total 7876 92005750 100000000 24001500 IO count delay total 0 0 -MEM count delay total +SWAP count delay total + 0 0 +RECLAIM count delay total 0 0 Get delays seen in executing a given simple command @@ -108,5 +111,7 @@ CPU count real total virtual total delay total 6 4000250 4000000 0 IO count delay total 0 0 -MEM count delay total +SWAP count delay total + 0 0 +RECLAIM count delay total 0 0 diff --git a/Documentation/accounting/getdelays.c b/Documentation/accounting/getdelays.c index 40121b5cca14..3f7755f3963f 100644 --- a/Documentation/accounting/getdelays.c +++ b/Documentation/accounting/getdelays.c @@ -196,14 +196,18 @@ void print_delayacct(struct taskstats *t) " %15llu%15llu%15llu%15llu\n" "IO %15s%15s\n" " %15llu%15llu\n" - "MEM %15s%15s\n" + "SWAP %15s%15s\n" + " %15llu%15llu\n" + "RECLAIM %12s%15s\n" " %15llu%15llu\n", "count", "real total", "virtual total", "delay total", t->cpu_count, t->cpu_run_real_total, t->cpu_run_virtual_total, t->cpu_delay_total, "count", "delay total", t->blkio_count, t->blkio_delay_total, - "count", "delay total", t->swapin_count, t->swapin_delay_total); + "count", "delay total", t->swapin_count, t->swapin_delay_total, + "count", "delay total", + t->freepages_count, t->freepages_delay_total); } void task_context_switch_counts(struct taskstats *t) diff --git a/Documentation/accounting/taskstats-struct.txt b/Documentation/accounting/taskstats-struct.txt index cd784f46bf8a..b988d110db59 100644 --- a/Documentation/accounting/taskstats-struct.txt +++ b/Documentation/accounting/taskstats-struct.txt @@ -26,6 +26,8 @@ There are three different groups of fields in the struct taskstats: 5) Time accounting for SMT machines +6) Extended delay accounting fields for memory reclaim + Future extension should add fields to the end of the taskstats struct, and should not change the relative position of each field within the struct. @@ -170,4 +172,9 @@ struct taskstats { __u64 ac_utimescaled; /* utime scaled on frequency etc */ __u64 ac_stimescaled; /* stime scaled on frequency etc */ __u64 cpu_scaled_run_real_total; /* scaled cpu_run_real_total */ + +6) Extended delay accounting fields for memory reclaim + /* Delay waiting for memory reclaim */ + __u64 freepages_count; + __u64 freepages_delay_total; } diff --git a/Documentation/bt8xxgpio.txt b/Documentation/bt8xxgpio.txt new file mode 100644 index 000000000000..d8297e4ebd26 --- /dev/null +++ b/Documentation/bt8xxgpio.txt @@ -0,0 +1,67 @@ +=============================================================== +== BT8XXGPIO driver == +== == +== A driver for a selfmade cheap BT8xx based PCI GPIO-card == +== == +== For advanced documentation, see == +== http://www.bu3sch.de/btgpio.php == +=============================================================== + + +A generic digital 24-port PCI GPIO card can be built out of an ordinary +Brooktree bt848, bt849, bt878 or bt879 based analog TV tuner card. The +Brooktree chip is used in old analog Hauppauge WinTV PCI cards. You can easily +find them used for low prices on the net. + +The bt8xx chip does have 24 digital GPIO ports. +These ports are accessible via 24 pins on the SMD chip package. + + +============================================== +== How to physically access the GPIO pins == +============================================== + +The are several ways to access these pins. One might unsolder the whole chip +and put it on a custom PCI board, or one might only unsolder each individual +GPIO pin and solder that to some tiny wire. As the chip package really is tiny +there are some advanced soldering skills needed in any case. + +The physical pinouts are drawn in the following ASCII art. +The GPIO pins are marked with G00-G23 + + G G G G G G G G G G G G G G G G G G + 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 + 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 + | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | + --------------------------------------------------------------------------- + --| ^ ^ |-- + --| pin 86 pin 67 |-- + --| |-- + --| pin 61 > |-- G18 + --| |-- G19 + --| |-- G20 + --| |-- G21 + --| |-- G22 + --| pin 56 > |-- G23 + --| |-- + --| Brooktree 878/879 |-- + --| |-- + --| |-- + --| |-- + --| |-- + --| |-- + --| |-- + --| |-- + --| |-- + --| |-- + --| |-- + --| |-- + --| |-- + --| |-- + --| O |-- + --| |-- + --------------------------------------------------------------------------- + | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | + ^ + This is pin 1 + diff --git a/Documentation/controllers/memory.txt b/Documentation/controllers/memory.txt index 866b9cd9a959..9b53d5827361 100644 --- a/Documentation/controllers/memory.txt +++ b/Documentation/controllers/memory.txt @@ -242,8 +242,7 @@ rmdir() if there are no tasks. 1. Add support for accounting huge pages (as a separate controller) 2. Make per-cgroup scanner reclaim not-shared pages first 3. Teach controller to account for shared-pages -4. Start reclamation when the limit is lowered -5. Start reclamation in the background when the limit is +4. Start reclamation in the background when the limit is not yet hit but the usage is getting closer Summary diff --git a/Documentation/edac.txt b/Documentation/edac.txt index a5c36842ecef..ced527388001 100644 --- a/Documentation/edac.txt +++ b/Documentation/edac.txt @@ -222,74 +222,9 @@ both csrow2 and csrow3 are populated, this indicates a dual ranked set of DIMMs for channels 0 and 1. -Within each of the 'mc','mcX' and 'csrowX' directories are several +Within each of the 'mcX' and 'csrowX' directories are several EDAC control and attribute files. - -============================================================================ -DIRECTORY 'mc' - -In directory 'mc' are EDAC system overall control and attribute files: - - -Panic on UE control file: - - 'edac_mc_panic_on_ue' - - An uncorrectable error will cause a machine panic. This is usually - desirable. It is a bad idea to continue when an uncorrectable error - occurs - it is indeterminate what was uncorrected and the operating - system context might be so mangled that continuing will lead to further - corruption. If the kernel has MCE configured, then EDAC will never - notice the UE. - - LOAD TIME: module/kernel parameter: panic_on_ue=[0|1] - - RUN TIME: echo "1" >/sys/devices/system/edac/mc/edac_mc_panic_on_ue - - -Log UE control file: - - 'edac_mc_log_ue' - - Generate kernel messages describing uncorrectable errors. These errors - are reported through the system message log system. UE statistics - will be accumulated even when UE logging is disabled. - - LOAD TIME: module/kernel parameter: log_ue=[0|1] - - RUN TIME: echo "1" >/sys/devices/system/edac/mc/edac_mc_log_ue - - -Log CE control file: - - 'edac_mc_log_ce' - - Generate kernel messages describing correctable errors. These - errors are reported through the system message log system. - CE statistics will be accumulated even when CE logging is disabled. - - LOAD TIME: module/kernel parameter: log_ce=[0|1] - - RUN TIME: echo "1" >/sys/devices/system/edac/mc/edac_mc_log_ce - - -Polling period control file: - - 'edac_mc_poll_msec' - - The time period, in milliseconds, for polling for error information. - Too small a value wastes resources. Too large a value might delay - necessary handling of errors and might loose valuable information for - locating the error. 1000 milliseconds (once each second) is the current - default. Systems which require all the bandwidth they can get, may - increase this. - - LOAD TIME: module/kernel parameter: poll_msec=[0|1] - - RUN TIME: echo "1000" >/sys/devices/system/edac/mc/edac_mc_poll_msec - - ============================================================================ 'mcX' DIRECTORIES @@ -537,7 +472,6 @@ Channel 1 DIMM Label control file: motherboard specific and determination of this information must occur in userland at this time. - ============================================================================ SYSTEM LOGGING @@ -570,7 +504,6 @@ error type, a notice of "no info" and then an optional, driver-specific error message. - ============================================================================ PCI Bus Parity Detection @@ -604,6 +537,74 @@ Enable/Disable PCI Parity checking control file: echo "0" >/sys/devices/system/edac/pci/check_pci_parity +Parity Count: + + 'pci_parity_count' + + This attribute file will display the number of parity errors that + have been detected. + + +============================================================================ +MODULE PARAMETERS + +Panic on UE control file: + + 'edac_mc_panic_on_ue' + + An uncorrectable error will cause a machine panic. This is usually + desirable. It is a bad idea to continue when an uncorrectable error + occurs - it is indeterminate what was uncorrected and the operating + system context might be so mangled that continuing will lead to further + corruption. If the kernel has MCE configured, then EDAC will never + notice the UE. + + LOAD TIME: module/kernel parameter: edac_mc_panic_on_ue=[0|1] + + RUN TIME: echo "1" > /sys/module/edac_core/parameters/edac_mc_panic_on_ue + + +Log UE control file: + + 'edac_mc_log_ue' + + Generate kernel messages describing uncorrectable errors. These errors + are reported through the system message log system. UE statistics + will be accumulated even when UE logging is disabled. + + LOAD TIME: module/kernel parameter: edac_mc_log_ue=[0|1] + + RUN TIME: echo "1" > /sys/module/edac_core/parameters/edac_mc_log_ue + + +Log CE control file: + + 'edac_mc_log_ce' + + Generate kernel messages describing correctable errors. These + errors are reported through the system message log system. + CE statistics will be accumulated even when CE logging is disabled. + + LOAD TIME: module/kernel parameter: edac_mc_log_ce=[0|1] + + RUN TIME: echo "1" > /sys/module/edac_core/parameters/edac_mc_log_ce + + +Polling period control file: + + 'edac_mc_poll_msec' + + The time period, in milliseconds, for polling for error information. + Too small a value wastes resources. Too large a value might delay + necessary handling of errors and might loose valuable information for + locating the error. 1000 milliseconds (once each second) is the current + default. Systems which require all the bandwidth they can get, may + increase this. + + LOAD TIME: module/kernel parameter: edac_mc_poll_msec=[0|1] + + RUN TIME: echo "1000" > /sys/module/edac_core/parameters/edac_mc_poll_msec + Panic on PCI PARITY Error: @@ -614,21 +615,13 @@ Panic on PCI PARITY Error: error has been detected. - module/kernel parameter: panic_on_pci_parity=[0|1] + module/kernel parameter: edac_panic_on_pci_pe=[0|1] Enable: - echo "1" >/sys/devices/system/edac/pci/panic_on_pci_parity + echo "1" > /sys/module/edac_core/parameters/edac_panic_on_pci_pe Disable: - echo "0" >/sys/devices/system/edac/pci/panic_on_pci_parity - - -Parity Count: - - 'pci_parity_count' - - This attribute file will display the number of parity errors that - have been detected. + echo "0" > /sys/module/edac_core/parameters/edac_panic_on_pci_pe diff --git a/Documentation/feature-removal-schedule.txt b/Documentation/feature-removal-schedule.txt index 09c4a1efb8e3..721c71b86e06 100644 --- a/Documentation/feature-removal-schedule.txt +++ b/Documentation/feature-removal-schedule.txt @@ -138,24 +138,6 @@ Who: Kay Sievers <kay.sievers@suse.de> --------------------------- -What: find_task_by_pid -When: 2.6.26 -Why: With pid namespaces, calling this funciton will return the - wrong task when called from inside a namespace. - - The best way to save a task pid and find a task by this - pid later, is to find this task's struct pid pointer (or get - it directly from the task) and call pid_task() later. - - If someone really needs to get a task by its pid_t, then - he most likely needs the find_task_by_vpid() to get the - task from the same namespace as the current task is in, but - this may be not so in general. - -Who: Pavel Emelyanov <xemul@openvz.org> - ---------------------------- - What: ACPI procfs interface When: July 2008 Why: ACPI sysfs conversion should be finished by January 2008. diff --git a/Documentation/filesystems/vfat.txt b/Documentation/filesystems/vfat.txt index 2d5e1e582e13..bbac4f1d9056 100644 --- a/Documentation/filesystems/vfat.txt +++ b/Documentation/filesystems/vfat.txt @@ -96,6 +96,14 @@ shortname=lower|win95|winnt|mixed emulate the Windows 95 rule for create. Default setting is `lower'. +tz=UTC -- Interpret timestamps as UTC rather than local time. + This option disables the conversion of timestamps + between local time (as used by Windows on FAT) and UTC + (which Linux uses internally). This is particuluarly + useful when mounting devices (like digital cameras) + that are set to UTC in order to avoid the pitfalls of + local time. + <bool>: 0,1,yes,no,true,false TODO diff --git a/Documentation/gpio.txt b/Documentation/gpio.txt index c35ca9e40d4c..18022e249c53 100644 --- a/Documentation/gpio.txt +++ b/Documentation/gpio.txt @@ -347,15 +347,12 @@ necessarily be nonportable. Dynamic definition of GPIOs is not currently standard; for example, as a side effect of configuring an add-on board with some GPIO expanders. -These calls are purely for kernel space, but a userspace API could be built -on top of them. - GPIO implementor's framework (OPTIONAL) ======================================= As noted earlier, there is an optional implementation framework making it easier for platforms to support different kinds of GPIO controller using -the same programming interface. +the same programming interface. This framework is called "gpiolib". As a debugging aid, if debugfs is available a /sys/kernel/debug/gpio file will be found there. That will list all the controllers registered through @@ -392,11 +389,21 @@ either NULL or the label associated with that GPIO when it was requested. Platform Support ---------------- -To support this framework, a platform's Kconfig will "select HAVE_GPIO_LIB" +To support this framework, a platform's Kconfig will "select" either +ARCH_REQUIRE_GPIOLIB or ARCH_WANT_OPTIONAL_GPIOLIB and arrange that its <asm/gpio.h> includes <asm-generic/gpio.h> and defines three functions: gpio_get_value(), gpio_set_value(), and gpio_cansleep(). They may also want to provide a custom value for ARCH_NR_GPIOS. +ARCH_REQUIRE_GPIOLIB means that the gpio-lib code will always get compiled +into the kernel on that architecture. + +ARCH_WANT_OPTIONAL_GPIOLIB means the gpio-lib code defaults to off and the user +can enable it and build it into the kernel optionally. + +If neither of these options are selected, the platform does not support +GPIOs through GPIO-lib and the code cannot be enabled by the user. + Trivial implementations of those functions can directly use framework code, which always dispatches through the gpio_chip: @@ -439,4 +446,120 @@ becomes available. That may mean the device should not be registered until calls for that GPIO can work. One way to address such dependencies is for such gpio_chip controllers to provide setup() and teardown() callbacks to board specific code; those board specific callbacks would register devices -once all the necessary resources are available. +once all the necessary resources are available, and remove them later when +the GPIO controller device becomes unavailable. + + +Sysfs Interface for Userspace (OPTIONAL) +======================================== +Platforms which use the "gpiolib" implementors framework may choose to +configure a sysfs user interface to GPIOs. This is different from the +debugfs interface, since it provides control over GPIO direction and +value instead of just showing a gpio state summary. Plus, it could be +present on production systems without debugging support. + +Given approprate hardware documentation for the system, userspace could +know for example that GPIO #23 controls the write protect line used to +protect boot loader segments in flash memory. System upgrade procedures +may need to temporarily remove that protection, first importing a GPIO, +then changing its output state, then updating the code before re-enabling +the write protection. In normal use, GPIO #23 would never be touched, +and the kernel would have no need to know about it. + +Again depending on appropriate hardware documentation, on some systems +userspace GPIO can be used to determine system configuration data that +standard kernels won't know about. And for some tasks, simple userspace +GPIO drivers could be all that the system really needs. + +Note that standard kernel drivers exist for common "LEDs and Buttons" +GPIO tasks: "leds-gpio" and "gpio_keys", respectively. Use those +instead of talking directly to the GPIOs; they integrate with kernel +frameworks better than your userspace code could. + + +Paths in Sysfs +-------------- +There are three kinds of entry in /sys/class/gpio: + + - Control interfaces used to get userspace control over GPIOs; + + - GPIOs themselves; and + + - GPIO controllers ("gpio_chip" instances). + +That's in addition to standard files including the "device" symlink. + +The control interfaces are write-only: + + /sys/class/gpio/ + + "export" ... Userspace may ask the kernel to export control of + a GPIO to userspace by writing its number to this file. + + Example: "echo 19 > export" will create a "gpio19" node + for GPIO #19, if that's not requested by kernel code. + + "unexport" ... Reverses the effect of exporting to userspace. + + Example: "echo 19 > unexport" will remove a "gpio19" + node exported using the "export" file. + +GPIO signals have paths like /sys/class/gpio/gpio42/ (for GPIO #42) +and have the following read/write attributes: + + /sys/class/gpio/gpioN/ + + "direction" ... reads as either "in" or "out". This value may + normally be written. Writing as "out" defaults to + initializing the value as low. To ensure glitch free + operation, values "low" and "high" may be written to + configure the GPIO as an output with that initial value. + + Note that this attribute *will not exist* if the kernel + doesn't support changing the direction of a GPIO, or + it was exported by kernel code that didn't explicitly + allow userspace to reconfigure this GPIO's direction. + + "value" ... reads as either 0 (low) or 1 (high). If the GPIO + is configured as an output, this value may be written; + any nonzero value is treated as high. + +GPIO controllers have paths like /sys/class/gpio/chipchip42/ (for the +controller implementing GPIOs starting at #42) and have the following +read-only attributes: + + /sys/class/gpio/gpiochipN/ + + "base" ... same as N, the first GPIO managed by this chip + + "label" ... provided for diagnostics (not always unique) + + "ngpio" ... how many GPIOs this manges (N to N + ngpio - 1) + +Board documentation should in most cases cover what GPIOs are used for +what purposes. However, those numbers are not always stable; GPIOs on +a daughtercard might be different depending on the base board being used, +or other cards in the stack. In such cases, you may need to use the +gpiochip nodes (possibly in conjunction with schematics) to determine +the correct GPIO number to use for a given signal. + + +Exporting from Kernel code +-------------------------- +Kernel code can explicitly manage exports of GPIOs which have already been +requested using gpio_request(): + + /* export the GPIO to userspace */ + int gpio_export(unsigned gpio, bool direction_may_change); + + /* reverse gpio_export() */ + void gpio_unexport(); + +After a kernel driver requests a GPIO, it may only be made available in +the sysfs interface by gpio_export(). The driver can control whether the +signal direction may change. This helps drivers prevent userspace code +from accidentally clobbering important system state. + +This explicit exporting can help with debugging (by making some kinds +of experiments easier), or can provide an always-there interface that's +suitable for documenting as part of a board support package. diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index 497a98dafdaa..e7bea3e85304 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt @@ -2159,13 +2159,6 @@ and is between 256 and 4096 characters. It is defined in the file <deci-seconds>: poll all this frequency 0: no polling (default) - tipar.timeout= [HW,PPT] - Set communications timeout in tenths of a second - (default 15). - - tipar.delay= [HW,PPT] - Set inter-bit delay in microseconds (default 10). - tmscsim= [HW,SCSI] See comment before function dc390_setup() in drivers/scsi/tmscsim.c. diff --git a/Documentation/moxa-smartio b/Documentation/moxa-smartio index fe24ecc6372e..5337e80a5b96 100644 --- a/Documentation/moxa-smartio +++ b/Documentation/moxa-smartio @@ -1,14 +1,22 @@ ============================================================================= - - MOXA Smartio Family Device Driver Ver 1.1 Installation Guide - for Linux Kernel 2.2.x and 2.0.3x - Copyright (C) 1999, Moxa Technologies Co, Ltd. + MOXA Smartio/Industio Family Device Driver Installation Guide + for Linux Kernel 2.4.x, 2.6.x + Copyright (C) 2008, Moxa Inc. ============================================================================= +Date: 01/21/2008 + Content 1. Introduction 2. System Requirement 3. Installation + 3.1 Hardware installation + 3.2 Driver files + 3.3 Device naming convention + 3.4 Module driver configuration + 3.5 Static driver configuration for Linux kernel 2.4.x and 2.6.x. + 3.6 Custom configuration + 3.7 Verify driver installation 4. Utilities 5. Setserial 6. Troubleshooting @@ -16,27 +24,48 @@ Content ----------------------------------------------------------------------------- 1. Introduction - The Smartio family Linux driver, Ver. 1.1, supports following multiport + The Smartio/Industio/UPCI family Linux driver supports following multiport boards. - -C104P/H/HS, C104H/PCI, C104HS/PCI, CI-104J 4 port multiport board. - -C168P/H/HS, C168H/PCI 8 port multiport board. - - This driver has been modified a little and cleaned up from the Moxa - contributed driver code and merged into Linux 2.2.14pre. In particular - official major/minor numbers have been assigned which are different to - those the original Moxa supplied driver used. + - 2 ports multiport board + CP-102U, CP-102UL, CP-102UF + CP-132U-I, CP-132UL, + CP-132, CP-132I, CP132S, CP-132IS, + CI-132, CI-132I, CI-132IS, + (C102H, C102HI, C102HIS, C102P, CP-102, CP-102S) + + - 4 ports multiport board + CP-104EL, + CP-104UL, CP-104JU, + CP-134U, CP-134U-I, + C104H/PCI, C104HS/PCI, + CP-114, CP-114I, CP-114S, CP-114IS, CP-114UL, + C104H, C104HS, + CI-104J, CI-104JS, + CI-134, CI-134I, CI-134IS, + (C114HI, CT-114I, C104P) + POS-104UL, + CB-114, + CB-134I + + - 8 ports multiport board + CP-118EL, CP-168EL, + CP-118U, CP-168U, + C168H/PCI, + C168H, C168HS, + (C168P), + CB-108 This driver and installation procedure have been developed upon Linux Kernel - 2.2.5 and backward compatible to 2.0.3x. This driver supports Intel x86 and - Alpha hardware platform. In order to maintain compatibility, this version - has also been properly tested with RedHat, OpenLinux, TurboLinux and - S.u.S.E Linux. However, if compatibility problem occurs, please contact - Moxa at support@moxa.com.tw. + 2.4.x and 2.6.x. This driver supports Intel x86 hardware platform. In order + to maintain compatibility, this version has also been properly tested with + RedHat, Mandrake, Fedora and S.u.S.E Linux. However, if compatibility problem + occurs, please contact Moxa at support@moxa.com.tw. In addition to device driver, useful utilities are also provided in this version. They are - - msdiag Diagnostic program for detecting installed Moxa Smartio boards. + - msdiag Diagnostic program for displaying installed Moxa + Smartio/Industio boards. - msmon Monitor program to observe data count and line status signals. - msterm A simple terminal program which is useful in testing serial ports. @@ -47,8 +76,7 @@ Content GNU General Public License in this version. Please refer to GNU General Public License announcement in each source code file for more detail. - In Moxa's ftp sites, you may always find latest driver at - ftp://ftp.moxa.com or ftp://ftp.moxa.com.tw. + In Moxa's Web sites, you may always find latest driver at http://web.moxa.com. This version of driver can be installed as Loadable Module (Module driver) or built-in into kernel (Static driver). You may refer to following @@ -61,8 +89,8 @@ Content ----------------------------------------------------------------------------- 2. System Requirement - - Hardware platform: Intel x86 or Alpha machine - - Kernel version: 2.0.3x or 2.2.x + - Hardware platform: Intel x86 machine + - Kernel version: 2.4.x or 2.6.x - gcc version 2.72 or later - Maximum 4 boards can be installed in combination @@ -70,9 +98,18 @@ Content 3. Installation 3.1 Hardware installation + 3.2 Driver files + 3.3 Device naming convention + 3.4 Module driver configuration + 3.5 Static driver configuration for Linux kernel 2.4.x, 2.6.x. + 3.6 Custom configuration + 3.7 Verify driver installation + + + 3.1 Hardware installation - There are two types of buses, ISA and PCI, for Smartio family multiport - board. + There are two types of buses, ISA and PCI, for Smartio/Industio + family multiport board. ISA board --------- @@ -81,47 +118,57 @@ Content installation procedure in User's Manual before proceed any further. Please make sure the JP1 is open after the ISA board is set properly. - PCI board - --------- + PCI/UPCI board + -------------- You may need to adjust IRQ usage in BIOS to avoid from IRQ conflict with other ISA devices. Please refer to hardware installation procedure in User's Manual in advance. - IRQ Sharing + PCI IRQ Sharing ----------- Each port within the same multiport board shares the same IRQ. Up to - 4 Moxa Smartio Family multiport boards can be installed together on - one system and they can share the same IRQ. + 4 Moxa Smartio/Industio PCI Family multiport boards can be installed + together on one system and they can share the same IRQ. + - 3.2 Driver files and device naming convention + 3.2 Driver files The driver file may be obtained from ftp, CD-ROM or floppy disk. The first step, anyway, is to copy driver file "mxser.tgz" into specified directory. e.g. /moxa. The execute commands as below. + # cd / + # mkdir moxa # cd /moxa - # tar xvf /dev/fd0 + # tar xvf /dev/fd0 + or + + # cd / + # mkdir moxa # cd /moxa # cp /mnt/cdrom/<driver directory>/mxser.tgz . # tar xvfz mxser.tgz + + 3.3 Device naming convention + You may find all the driver and utilities files in /moxa/mxser. Following installation procedure depends on the model you'd like to - run the driver. If you prefer module driver, please refer to 3.3. - If static driver is required, please refer to 3.4. + run the driver. If you prefer module driver, please refer to 3.4. + If static driver is required, please refer to 3.5. Dialin and callout port ----------------------- - This driver remains traditional serial device properties. There're + This driver remains traditional serial device properties. There are two special file name for each serial port. One is dial-in port which is named "ttyMxx". For callout port, the naming convention is "cumxx". Device naming when more than 2 boards installed ----------------------------------------------- - Naming convention for each Smartio multiport board is pre-defined - as below. + Naming convention for each Smartio/Industio multiport board is + pre-defined as below. Board Num. Dial-in Port Callout port 1st board ttyM0 - ttyM7 cum0 - cum7 @@ -129,6 +176,12 @@ Content 3rd board ttyM16 - ttyM23 cum16 - cum23 4th board ttyM24 - ttym31 cum24 - cum31 + + !!!!!!!!!!!!!!!!!!!! NOTE !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! + Under Kernel 2.6 the cum Device is Obsolete. So use ttyM* + device instead. + !!!!!!!!!!!!!!!!!!!! NOTE !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! + Board sequence -------------- This driver will activate ISA boards according to the parameter set @@ -138,69 +191,131 @@ Content For PCI boards, their sequence will be after ISA boards and C168H/PCI has higher priority than C104H/PCI boards. - 3.3 Module driver configuration + 3.4 Module driver configuration Module driver is easiest way to install. If you prefer static driver installation, please skip this paragraph. - 1. Find "Makefile" in /moxa/mxser, then run - # make install + + ------------- Prepare to use the MOXA driver-------------------- + 3.4.1 Create tty device with correct major number + Before using MOXA driver, your system must have the tty devices + which are created with driver's major number. We offer one shell + script "msmknod" to simplify the procedure. + This step is only needed to be executed once. But you still + need to do this procedure when: + a. You change the driver's major number. Please refer the "3.7" + section. + b. Your total installed MOXA boards number is changed. Maybe you + add/delete one MOXA board. + c. You want to change the tty name. This needs to modify the + shell script "msmknod" + + The procedure is: + # cd /moxa/mxser/driver + # ./msmknod + + This shell script will require the major number for dial-in + device and callout device to create tty device. You also need + to specify the total installed MOXA board number. Default major + numbers for dial-in device and callout device are 30, 35. If + you need to change to other number, please refer section "3.7" + for more detailed procedure. + Msmknod will delete any special files occupying the same device + naming. + + 3.4.2 Build the MOXA driver and utilities + Before using the MOXA driver and utilities, you need compile the + all the source code. This step is only need to be executed once. + But you still re-compile the source code if you modify the source + code. For example, if you change the driver's major number (see + "3.7" section), then you need to do this step again. + + Find "Makefile" in /moxa/mxser, then run + + # make clean; make install + + !!!!!!!!!! NOTE !!!!!!!!!!!!!!!!! + For Red Hat 9, Red Hat Enterprise Linux AS3/ES3/WS3 & Fedora Core1: + # make clean; make installsp1 + + For Red Hat Enterprise Linux AS4/ES4/WS4: + # make clean; make installsp2 + !!!!!!!!!! NOTE !!!!!!!!!!!!!!!!! The driver files "mxser.o" and utilities will be properly compiled - and copied to system directories respectively.Then run + and copied to system directories respectively. - # insmod mxser + ------------- Load MOXA driver-------------------- + 3.4.3 Load the MOXA driver - to activate the modular driver. You may run "lsmod" to check - if "mxser.o" is activated. + # modprobe mxser <argument> - 2. Create special files by executing "msmknod". - # cd /moxa/mxser/driver - # ./msmknod + will activate the module driver. You may run "lsmod" to check + if "mxser" is activated. If the MOXA board is ISA board, the + <argument> is needed. Please refer to section "3.4.5" for more + information. + + + ------------- Load MOXA driver on boot -------------------- + 3.4.4 For the above description, you may manually execute + "modprobe mxser" to activate this driver and run + "rmmod mxser" to remove it. + However, it's better to have a boot time configuration to + eliminate manual operation. Boot time configuration can be + achieved by rc file. We offer one "rc.mxser" file to simplify + the procedure under "moxa/mxser/driver". - Default major numbers for dial-in device and callout device are - 174, 175. Msmknod will delete any special files occupying the same - device naming. + But if you use ISA board, please modify the "modprobe ..." command + to add the argument (see "3.4.5" section). After modifying the + rc.mxser, please try to execute "/moxa/mxser/driver/rc.mxser" + manually to make sure the modification is ok. If any error + encountered, please try to modify again. If the modification is + completed, follow the below step. - 3. Up to now, you may manually execute "insmod mxser" to activate - this driver and run "rmmod mxser" to remove it. However, it's - better to have a boot time configuration to eliminate manual - operation. - Boot time configuration can be achieved by rc file. Run following - command for setting rc files. + Run following command for setting rc files. # cd /moxa/mxser/driver # cp ./rc.mxser /etc/rc.d # cd /etc/rc.d - You may have to modify part of the content in rc.mxser to specify - parameters for ISA board. Please refer to rc.mxser for more detail. - Find "rc.serial". If "rc.serial" doesn't exist, create it by vi. - Add "rc.mxser" in last line. Next, open rc.local by vi - and append following content. + Check "rc.serial" is existed or not. If "rc.serial" doesn't exist, + create it by vi, run "chmod 755 rc.serial" to change the permission. + Add "/etc/rc.d/rc.mxser" in last line, - if [ -f /etc/rc.d/rc.serial ]; then - sh /etc/rc.d/rc.serial - fi + Reboot and check if moxa.o activated by "lsmod" command. - 4. Reboot and check if mxser.o activated by "lsmod" command. - 5. If you'd like to drive Smartio ISA boards in the system, you'll - have to add parameter to specify CAP address of given board while - activating "mxser.o". The format for parameters are as follows. + 3.4.5. If you'd like to drive Smartio/Industio ISA boards in the system, + you'll have to add parameter to specify CAP address of given + board while activating "mxser.o". The format for parameters are + as follows. - insmod mxser ioaddr=0x???,0x???,0x???,0x??? + modprobe mxser ioaddr=0x???,0x???,0x???,0x??? | | | | | | | +- 4th ISA board | | +------ 3rd ISA board | +------------ 2nd ISA board +------------------- 1st ISA board - 3.4 Static driver configuration + 3.5 Static driver configuration for Linux kernel 2.4.x and 2.6.x + + Note: To use static driver, you must install the linux kernel + source package. + + 3.5.1 Backup the built-in driver in the kernel. + # cd /usr/src/linux/drivers/char + # mv mxser.c mxser.c.old + + For Red Hat 7.x user, you need to create link: + # cd /usr/src + # ln -s linux-2.4 linux - 1. Create link + 3.5.2 Create link # cd /usr/src/linux/drivers/char # ln -s /moxa/mxser/driver/mxser.c mxser.c - 2. Add CAP address list for ISA boards + 3.5.3 Add CAP address list for ISA boards. For PCI boards user, + please skip this step. + In module mode, the CAP address for ISA board is given by parameter. In static driver configuration, you'll have to assign it within driver's source code. If you will not @@ -222,73 +337,55 @@ Content static int mxserBoardCAP[] = {0x280, 0x180, 0x00, 0x00}; - 3. Modify tty_io.c - # cd /usr/src/linux/drivers/char/ - # vi tty_io.c - Find pty_init(), insert "mxser_init()" as + 3.5.4 Setup kernel configuration - pty_init(); - mxser_init(); + Configure the kernel: - 4. Modify tty.h - # cd /usr/src/linux/include/linux - # vi tty.h - Find extern int tty_init(void), insert "mxser_init()" as + # cd /usr/src/linux + # make menuconfig - extern int tty_init(void); - extern int mxser_init(void); - - 5. Modify Makefile - # cd /usr/src/linux/drivers/char - # vi Makefile - Find L_OBJS := tty_io.o ...... random.o, add - "mxser.o" at last of this line as - L_OBJS := tty_io.o ....... mxser.o + You will go into a menu-driven system. Please select [Character + devices][Non-standard serial port support], enable the [Moxa + SmartIO support] driver with "[*]" for built-in (not "[M]"), then + select [Exit] to exit this program. - 6. Rebuild kernel - The following are for Linux kernel rebuilding,for your reference only. + 3.5.5 Rebuild kernel + The following are for Linux kernel rebuilding, for your + reference only. For appropriate details, please refer to the Linux document. - If 'lilo' utility is installed, please use 'make zlilo' to rebuild - kernel. If 'lilo' is not installed, please follow the following steps. - a. cd /usr/src/linux - b. make clean /* take a few minutes */ - c. make bzImage /* take probably 10-20 minutes */ - d. Backup original boot kernel. /* optional step */ - e. cp /usr/src/linux/arch/i386/boot/bzImage /boot/vmlinuz + b. make clean /* take a few minutes */ + c. make dep /* take a few minutes */ + d. make bzImage /* take probably 10-20 minutes */ + e. make install /* copy boot image to correct position */ f. Please make sure the boot kernel (vmlinuz) is in the - correct position. If you use 'lilo' utility, you should - check /etc/lilo.conf 'image' item specified the path - which is the 'vmlinuz' path, or you will load wrong - (or old) boot kernel image (vmlinuz). - g. chmod 400 /vmlinuz - h. lilo - i. rdev -R /vmlinuz 1 - j. sync - - Note that if the result of "make zImage" is ERROR, then you have to - go back to Linux configuration Setup. Type "make config" in directory - /usr/src/linux or "setup". - - Since system include file, /usr/src/linux/include/linux/interrupt.h, - is modified each time the MOXA driver is installed, kernel rebuilding - is inevitable. And it takes about 10 to 20 minutes depends on the - machine. - - 7. Make utility - # cd /moxa/mxser/utility - # make install - - 8. Make special file + correct position. + g. If you use 'lilo' utility, you should check /etc/lilo.conf + 'image' item specified the path which is the 'vmlinuz' path, + or you will load wrong (or old) boot kernel image (vmlinuz). + After checking /etc/lilo.conf, please run "lilo". + + Note that if the result of "make bzImage" is ERROR, then you have to + go back to Linux configuration Setup. Type "make menuconfig" in + directory /usr/src/linux. + + + 3.5.6 Make tty device and special file # cd /moxa/mxser/driver # ./msmknod - 9. Reboot + 3.5.7 Make utility + # cd /moxa/mxser/utility + # make clean; make install + + 3.5.8 Reboot - 3.5 Custom configuration + + + 3.6 Custom configuration Although this driver already provides you default configuration, you - still can change the device name and major number.The instruction to + still can change the device name and major number. The instruction to change these parameters are shown as below. Change Device name @@ -306,33 +403,37 @@ Content 2 free major numbers for this driver. There are 3 steps to change major numbers. - 1. Find free major numbers + 3.6.1 Find free major numbers In /proc/devices, you may find all the major numbers occupied in the system. Please select 2 major numbers that are available. e.g. 40, 45. - 2. Create special files + 3.6.2 Create special files Run /moxa/mxser/driver/msmknod to create special files with specified major numbers. - 3. Modify driver with new major number + 3.6.3 Modify driver with new major number Run vi to open /moxa/mxser/driver/mxser.c. Locate the line contains "MXSERMAJOR". Change the content as below. #define MXSERMAJOR 40 #define MXSERCUMAJOR 45 - 4. Run # make install in /moxa/mxser/driver. + 3.6.4 Run "make clean; make install" in /moxa/mxser/driver. - 3.6 Verify driver installation + 3.7 Verify driver installation You may refer to /var/log/messages to check the latest status log reported by this driver whenever it's activated. + ----------------------------------------------------------------------------- 4. Utilities There are 3 utilities contained in this driver. They are msdiag, msmon and msterm. These 3 utilities are released in form of source code. They should be compiled into executable file and copied into /usr/bin. + Before using these utilities, please load driver (refer 3.4 & 3.5) and + make sure you had run the "msmknod" utility. + msdiag - Diagnostic -------------------- - This utility provides the function to detect what Moxa Smartio multiport - board exists in the system. + This utility provides the function to display what Moxa Smartio/Industio + board found by driver in the system. msmon - Port Monitoring ----------------------- @@ -353,12 +454,13 @@ Content application, for example, sending AT command to a modem connected to the port or used as a terminal for login purpose. Note that this is only a dumb terminal emulation without handling full screen operation. + ----------------------------------------------------------------------------- 5. Setserial Supported Setserial parameters are listed as below. - uart set UART type(16450-->disable FIFO, 16550A-->enable FIFO) + uart set UART type(16450-->disable FIFO, 16550A-->enable FIFO) close_delay set the amount of time(in 1/100 of a second) that DTR should be kept low while being closed. closing_wait set the amount of time(in 1/100 of a second) that the @@ -366,7 +468,13 @@ Content being closed, before the receiver is disable. spd_hi Use 57.6kb when the application requests 38.4kb. spd_vhi Use 115.2kb when the application requests 38.4kb. + spd_shi Use 230.4kb when the application requests 38.4kb. + spd_warp Use 460.8kb when the application requests 38.4kb. spd_normal Use 38.4kb when the application requests 38.4kb. + spd_cust Use the custom divisor to set the speed when the + application requests 38.4kb. + divisor This option set the custom divison. + baud_base This option set the base baud rate. ----------------------------------------------------------------------------- 6. Troubleshooting @@ -375,8 +483,9 @@ Content possible. If all the possible solutions fail, please contact our technical support team to get more help. - Error msg: More than 4 Moxa Smartio family boards found. Fifth board and - after are ignored. + + Error msg: More than 4 Moxa Smartio/Industio family boards found. Fifth board + and after are ignored. Solution: To avoid this problem, please unplug fifth and after board, because Moxa driver supports up to 4 boards. @@ -384,7 +493,7 @@ Content Error msg: Request_irq fail, IRQ(?) may be conflict with another device. Solution: Other PCI or ISA devices occupy the assigned IRQ. If you are not sure - which device causes the situation,please check /proc/interrupts to find + which device causes the situation, please check /proc/interrupts to find free IRQ and simply change another free IRQ for Moxa board. Error msg: Board #: C1xx Series(CAP=xxx) interrupt number invalid. @@ -397,15 +506,18 @@ Content Moxa ISA board needs an interrupt vector.Please refer to user's manual "Hardware Installation" chapter to set interrupt vector. - Error msg: Couldn't install MOXA Smartio family driver! + Error msg: Couldn't install MOXA Smartio/Industio family driver! Solution: Load Moxa driver fail, the major number may conflict with other devices. - Please refer to previous section 3.5 to change a free major number for + Please refer to previous section 3.7 to change a free major number for Moxa driver. - Error msg: Couldn't install MOXA Smartio family callout driver! + Error msg: Couldn't install MOXA Smartio/Industio family callout driver! Solution: Load Moxa callout driver fail, the callout device major number may - conflict with other devices. Please refer to previous section 3.5 to + conflict with other devices. Please refer to previous section 3.7 to change a free callout device major number for Moxa driver. + + ----------------------------------------------------------------------------- + diff --git a/Documentation/powerpc/booting-without-of.txt b/Documentation/powerpc/booting-without-of.txt index ea1b70b35793..99514ced82c5 100644 --- a/Documentation/powerpc/booting-without-of.txt +++ b/Documentation/powerpc/booting-without-of.txt @@ -59,6 +59,7 @@ Table of Contents p) Freescale Synchronous Serial Interface q) USB EHCI controllers r) MDIO on GPIOs + s) SPI busses VII - Marvell Discovery mv64[345]6x System Controller chips 1) The /system-controller node @@ -1883,6 +1884,62 @@ platforms are moved over to use the flattened-device-tree model. &qe_pio_c 6>; }; + s) SPI (Serial Peripheral Interface) busses + + SPI busses can be described with a node for the SPI master device + and a set of child nodes for each SPI slave on the bus. For this + discussion, it is assumed that the system's SPI controller is in + SPI master mode. This binding does not describe SPI controllers + in slave mode. + + The SPI master node requires the following properties: + - #address-cells - number of cells required to define a chip select + address on the SPI bus. + - #size-cells - should be zero. + - compatible - name of SPI bus controller following generic names + recommended practice. + No other properties are required in the SPI bus node. It is assumed + that a driver for an SPI bus device will understand that it is an SPI bus. + However, the binding does not attempt to define the specific method for + assigning chip select numbers. Since SPI chip select configuration is + flexible and non-standardized, it is left out of this binding with the + assumption that board specific platform code will be used to manage + chip selects. Individual drivers can define additional properties to + support describing the chip select layout. + + SPI slave nodes must be children of the SPI master node and can + contain the following properties. + - reg - (required) chip select address of device. + - compatible - (required) name of SPI device following generic names + recommended practice + - spi-max-frequency - (required) Maximum SPI clocking speed of device in Hz + - spi-cpol - (optional) Empty property indicating device requires + inverse clock polarity (CPOL) mode + - spi-cpha - (optional) Empty property indicating device requires + shifted clock phase (CPHA) mode + + SPI example for an MPC5200 SPI bus: + spi@f00 { + #address-cells = <1>; + #size-cells = <0>; + compatible = "fsl,mpc5200b-spi","fsl,mpc5200-spi"; + reg = <0xf00 0x20>; + interrupts = <2 13 0 2 14 0>; + interrupt-parent = <&mpc5200_pic>; + + ethernet-switch@0 { + compatible = "micrel,ks8995m"; + spi-max-frequency = <1000000>; + reg = <0>; + }; + + codec@1 { + compatible = "ti,tlv320aic26"; + spi-max-frequency = <100000>; + reg = <1>; + }; + }; + VII - Marvell Discovery mv64[345]6x System Controller chips =========================================================== diff --git a/Documentation/unaligned-memory-access.txt b/Documentation/unaligned-memory-access.txt index b0472ac5226a..f866c72291bf 100644 --- a/Documentation/unaligned-memory-access.txt +++ b/Documentation/unaligned-memory-access.txt @@ -218,9 +218,35 @@ If use of such macros is not convenient, another option is to use memcpy(), where the source or destination (or both) are of type u8* or unsigned char*. Due to the byte-wise nature of this operation, unaligned accesses are avoided. + +Alignment vs. Networking +======================== + +On architectures that require aligned loads, networking requires that the IP +header is aligned on a four-byte boundary to optimise the IP stack. For +regular ethernet hardware, the constant NET_IP_ALIGN is used. On most +architectures this constant has the value 2 because the normal ethernet +header is 14 bytes long, so in order to get proper alignment one needs to +DMA to an address which can be expressed as 4*n + 2. One notable exception +here is powerpc which defines NET_IP_ALIGN to 0 because DMA to unaligned +addresses can be very expensive and dwarf the cost of unaligned loads. + +For some ethernet hardware that cannot DMA to unaligned addresses like +4*n+2 or non-ethernet hardware, this can be a problem, and it is then +required to copy the incoming frame into an aligned buffer. Because this is +unnecessary on architectures that can do unaligned accesses, the code can be +made dependent on CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS like so: + +#ifdef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS + skb = original skb +#else + skb = copy skb +#endif + -- -Author: Daniel Drake <dsd@gentoo.org> +Authors: Daniel Drake <dsd@gentoo.org>, + Johannes Berg <johannes@sipsolutions.net> With help from: Alan Cox, Avuton Olrich, Heikki Orsila, Jan Engelhardt, -Johannes Berg, Kyle McMartin, Kyle Moffett, Randy Dunlap, Robert Hancock, -Uli Kunitz, Vadim Lobanov +Kyle McMartin, Kyle Moffett, Randy Dunlap, Robert Hancock, Uli Kunitz, +Vadim Lobanov diff --git a/MAINTAINERS b/MAINTAINERS index be05ef9b7b42..4cbf6016a9b9 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -1043,6 +1043,12 @@ M: fujita.tomonori@lab.ntt.co.jp L: linux-scsi@vger.kernel.org S: Supported +BT8XXGPIO DRIVER +P: Michael Buesch +M: mb@bu3sch.de +W: http://bu3sch.de/btgpio.php +S: Maintained + BTTV VIDEO4LINUX DRIVER P: Mauro Carvalho Chehab M: mchehab@infradead.org diff --git a/arch/Kconfig b/arch/Kconfig index 6093c0be58b0..b0fabfa864ff 100644 --- a/arch/Kconfig +++ b/arch/Kconfig @@ -27,6 +27,25 @@ config KPROBES for kernel debugging, non-intrusive instrumentation and testing. If in doubt, say "N". +config HAVE_EFFICIENT_UNALIGNED_ACCESS + def_bool n + help + Some architectures are unable to perform unaligned accesses + without the use of get_unaligned/put_unaligned. Others are + unable to perform such accesses efficiently (e.g. trap on + unaligned access and require fixing it up in the exception + handler.) + + This symbol should be selected by an architecture if it can + perform unaligned accesses efficiently to allow different + code paths to be selected for these cases. Some network + drivers, for example, could opt to not fix up alignment + problems with received packets if doing so would not help + much. + + See Documentation/unaligned-memory-access.txt for more + information on the topic of unaligned memory accesses. + config KRETPROBES def_bool y depends on KPROBES && HAVE_KRETPROBES diff --git a/arch/alpha/boot/misc.c b/arch/alpha/boot/misc.c index c00646b25f6e..3047a1b3a517 100644 --- a/arch/alpha/boot/misc.c +++ b/arch/alpha/boot/misc.c @@ -78,8 +78,6 @@ static unsigned outcnt; /* bytes in output buffer */ static int fill_inbuf(void); static void flush_window(void); static void error(char *m); -static void gzip_mark(void **); -static void gzip_release(void **); static char *input_data; static int input_data_size; @@ -88,51 +86,18 @@ static uch *output_data; static ulg output_ptr; static ulg bytes_out; -static void *malloc(int size); -static void free(void *where); static void error(char *m); static void gzip_mark(void **); static void gzip_release(void **); extern int end; static ulg free_mem_ptr; -static ulg free_mem_ptr_end; +static ulg free_mem_end_ptr; #define HEAP_SIZE 0x3000 #include "../../../lib/inflate.c" -static void *malloc(int size) -{ - void *p; - - if (size <0) error("Malloc error"); - if (free_mem_ptr <= 0) error("Memory error"); - - free_mem_ptr = (free_mem_ptr + 3) & ~3; /* Align */ - - p = (void *)free_mem_ptr; - free_mem_ptr += size; - - if (free_mem_ptr >= free_mem_ptr_end) - error("Out of memory"); - return p; -} - -static void free(void *where) -{ /* gzip_mark & gzip_release do the free */ -} - -static void gzip_mark(void **ptr) -{ - *ptr = (void *) free_mem_ptr; -} - -static void gzip_release(void **ptr) -{ - free_mem_ptr = (long) *ptr; -} - /* =========================================================================== * Fill the input buffer. This is called only when the buffer is empty * and at least one byte is really needed. @@ -193,7 +158,7 @@ decompress_kernel(void *output_start, /* FIXME FIXME FIXME */ free_mem_ptr = (ulg)output_start + ksize; - free_mem_ptr_end = (ulg)output_start + ksize + 0x200000; + free_mem_end_ptr = (ulg)output_start + ksize + 0x200000; /* FIXME FIXME FIXME */ /* put in temp area to reduce initial footprint */ diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig index 6fb4f03369f2..dabb015aa40b 100644 --- a/arch/arm/Kconfig +++ b/arch/arm/Kconfig @@ -268,7 +268,7 @@ config ARCH_EP93XX select GENERIC_GPIO select HAVE_CLK select HAVE_CLK - select HAVE_GPIO_LIB + select ARCH_REQUIRE_GPIOLIB help This enables support for the Cirrus EP93xx series of CPUs. @@ -447,7 +447,7 @@ config ARCH_PXA select ARCH_MTD_XIP select GENERIC_GPIO select HAVE_CLK - select HAVE_GPIO_LIB + select ARCH_REQUIRE_GPIOLIB select GENERIC_TIME select GENERIC_CLOCKEVENTS select TICK_ONESHOT @@ -479,7 +479,7 @@ config ARCH_SA1100 select GENERIC_CLOCKEVENTS select HAVE_CLK select TICK_ONESHOT - select HAVE_GPIO_LIB + select ARCH_REQUIRE_GPIOLIB help Support for StrongARM 11x0 based boards. @@ -522,7 +522,7 @@ config ARCH_OMAP bool "TI OMAP" select GENERIC_GPIO select HAVE_CLK - select HAVE_GPIO_LIB + select ARCH_REQUIRE_GPIOLIB select GENERIC_TIME select GENERIC_CLOCKEVENTS help diff --git a/arch/arm/boot/compressed/misc.c b/arch/arm/boot/compressed/misc.c index 9b444022cb9b..7145cc7c04f0 100644 --- a/arch/arm/boot/compressed/misc.c +++ b/arch/arm/boot/compressed/misc.c @@ -217,8 +217,6 @@ static unsigned outcnt; /* bytes in output buffer */ static int fill_inbuf(void); static void flush_window(void); static void error(char *m); -static void gzip_mark(void **); -static void gzip_release(void **); extern char input_data[]; extern char input_data_end[]; @@ -227,64 +225,21 @@ static uch *output_data; static ulg output_ptr; static ulg bytes_out; -static void *malloc(int size); -static void free(void *where); static void error(char *m); -static void gzip_mark(void **); -static void gzip_release(void **); static void putstr(const char *); extern int end; static ulg free_mem_ptr; -static ulg free_mem_ptr_end; +static ulg free_mem_end_ptr; -#define HEAP_SIZE 0x3000 - -#include "../../../../lib/inflate.c" - -#ifndef STANDALONE_DEBUG -static void *malloc(int size) -{ - void *p; - - if (size <0) error("Malloc error"); - if (free_mem_ptr <= 0) error("Memory error"); - - free_mem_ptr = (free_mem_ptr + 3) & ~3; /* Align */ - - p = (void *)free_mem_ptr; - free_mem_ptr += size; - - if (free_mem_ptr >= free_mem_ptr_end) - error("Out of memory"); - return p; -} - -static void free(void *where) -{ /* gzip_mark & gzip_release do the free */ -} - -static void gzip_mark(void **ptr) -{ - arch_decomp_wdog(); - *ptr = (void *) free_mem_ptr; -} +#ifdef STANDALONE_DEBUG +#define NO_INFLATE_MALLOC +#endif -static void gzip_release(void **ptr) -{ - arch_decomp_wdog(); - free_mem_ptr = (long) *ptr; -} -#else -static void gzip_mark(void **ptr) -{ -} +#define ARCH_HAS_DECOMP_WDOG -static void gzip_release(void **ptr) -{ -} -#endif +#include "../../../../lib/inflate.c" /* =========================================================================== * Fill the input buffer. This is called only when the buffer is empty @@ -348,7 +303,7 @@ decompress_kernel(ulg output_start, ulg free_mem_ptr_p, ulg free_mem_ptr_end_p, { output_data = (uch *)output_start; /* Points to kernel start */ free_mem_ptr = free_mem_ptr_p; - free_mem_ptr_end = free_mem_ptr_end_p; + free_mem_end_ptr = free_mem_ptr_end_p; __machine_arch_type = arch_id; arch_decomp_setup(); diff --git a/arch/arm/kernel/kprobes.c b/arch/arm/kernel/kprobes.c index 5ee39e10c8d1..d28513f14d05 100644 --- a/arch/arm/kernel/kprobes.c +++ b/arch/arm/kernel/kprobes.c @@ -296,8 +296,7 @@ static __used __kprobes void *trampoline_handler(struct pt_regs *regs) unsigned long trampoline_address = (unsigned long)&kretprobe_trampoline; INIT_HLIST_HEAD(&empty_rp); - spin_lock_irqsave(&kretprobe_lock, flags); - head = kretprobe_inst_table_head(current); + kretprobe_hash_lock(current, &head, &flags); /* * It is possible to have multiple instances associated with a given @@ -337,7 +336,7 @@ static __used __kprobes void *trampoline_handler(struct pt_regs *regs) } kretprobe_assert(ri, orig_ret_address, trampoline_address); - spin_unlock_irqrestore(&kretprobe_lock, flags); + kretprobe_hash_unlock(current, &flags); hlist_for_each_entry_safe(ri, node, tmp, &empty_rp, hlist) { hlist_del(&ri->hlist); @@ -347,7 +346,6 @@ static __used __kprobes void *trampoline_handler(struct pt_regs *regs) return (void *)orig_ret_address; } -/* Called with kretprobe_lock held. */ void __kprobes arch_prepare_kretprobe(struct kretprobe_instance *ri, struct pt_regs *regs) { diff --git a/arch/arm/plat-omap/gpio.c b/arch/arm/plat-omap/gpio.c index 1903a3491ee9..d8e9c2c3f0f6 100644 --- a/arch/arm/plat-omap/gpio.c +++ b/arch/arm/plat-omap/gpio.c @@ -1488,6 +1488,9 @@ static int __init _omap_gpio_init(void) bank->chip.set = gpio_set; if (bank_is_mpuio(bank)) { bank->chip.label = "mpuio"; +#ifdef CONFIG_ARCH_OMAP1 + bank->chip.dev = &omap_mpuio_device.dev; +#endif bank->chip.base = OMAP_MPUIO(0); } else { bank->chip.label = "gpio"; diff --git a/arch/avr32/Kconfig b/arch/avr32/Kconfig index df4adefedb42..7c239a916275 100644 --- a/arch/avr32/Kconfig +++ b/arch/avr32/Kconfig @@ -88,7 +88,7 @@ config PLATFORM_AT32AP select SUBARCH_AVR32B select MMU select PERFORMANCE_COUNTERS - select HAVE_GPIO_LIB + select ARCH_REQUIRE_GPIOLIB select GENERIC_ALLOCATOR # diff --git a/arch/avr32/mach-at32ap/pio.c b/arch/avr32/mach-at32ap/pio.c index 60da03ba7117..296294f8ed81 100644 --- a/arch/avr32/mach-at32ap/pio.c +++ b/arch/avr32/mach-at32ap/pio.c @@ -360,6 +360,8 @@ static int __init pio_probe(struct platform_device *pdev) pio->chip.label = pio->name; pio->chip.base = pdev->id * 32; pio->chip.ngpio = 32; + pio->chip.dev = &pdev->dev; + pio->chip.owner = THIS_MODULE; pio->chip.direction_input = direction_input; pio->chip.get = gpio_get; diff --git a/arch/cris/arch-v10/boot/compressed/misc.c b/arch/cris/arch-v10/boot/compressed/misc.c index 18e13bce1400..d933c89889db 100644 --- a/arch/cris/arch-v10/boot/compressed/misc.c +++ b/arch/cris/arch-v10/boot/compressed/misc.c @@ -102,50 +102,16 @@ extern char *input_data; /* lives in head.S */ static long bytes_out = 0; static uch *output_data; static unsigned long output_ptr = 0; - -static void *malloc(int size); -static void free(void *where); -static void gzip_mark(void **); -static void gzip_release(void **); - static void puts(const char *); /* the "heap" is put directly after the BSS ends, at end */ extern int _end; static long free_mem_ptr = (long)&_end; +static long free_mem_end_ptr; #include "../../../../../lib/inflate.c" -static void *malloc(int size) -{ - void *p; - - if (size < 0) - error("Malloc error"); - - free_mem_ptr = (free_mem_ptr + 3) & ~3; /* Align */ - - p = (void *)free_mem_ptr; - free_mem_ptr += size; - - return p; -} - -static void free(void *where) -{ /* Don't care */ -} - -static void gzip_mark(void **ptr) -{ - *ptr = (void *) free_mem_ptr; -} - -static void gzip_release(void **ptr) -{ - free_mem_ptr = (long) *ptr; -} - /* decompressor info and error messages to serial console */ static void diff --git a/arch/cris/arch-v32/boot/compressed/misc.c b/arch/cris/arch-v32/boot/compressed/misc.c index 55b2695c5d70..3595e16e82bc 100644 --- a/arch/cris/arch-v32/boot/compressed/misc.c +++ b/arch/cris/arch-v32/boot/compressed/misc.c @@ -89,20 +89,14 @@ static unsigned outcnt = 0; /* bytes in output buffer */ static void flush_window(void); static void error(char *m); -static void gzip_mark(void **); -static void gzip_release(void **); extern char *input_data; /* lives in head.S */ -static long bytes_out = 0; +static long bytes_out; static uch *output_data; -static unsigned long output_ptr = 0; +static unsigned long output_ptr; -static void *malloc(int size); -static void free(void *where); static void error(char *m); -static void gzip_mark(void **); -static void gzip_release(void **); static void puts(const char *); @@ -110,37 +104,10 @@ static void puts(const char *); extern int _end; static long free_mem_ptr = (long)&_end; +static long free_mem_end_ptr; #include "../../../../../lib/inflate.c" -static void *malloc(int size) -{ - void *p; - - if (size <0) error("Malloc error"); - - free_mem_ptr = (free_mem_ptr + 3) & ~3; /* Align */ - - p = (void *)free_mem_ptr; - free_mem_ptr += size; - - return p; -} - -static void free(void *where) -{ /* Don't care */ -} - -static void gzip_mark(void **ptr) -{ - *ptr = (void *) free_mem_ptr; -} - -static void gzip_release(void **ptr) -{ - free_mem_ptr = (long) *ptr; -} - /* decompressor info and error messages to serial console */ static inline void diff --git a/arch/h8300/Kconfig b/arch/h8300/Kconfig index 085dc6ec152b..396ab059efa3 100644 --- a/arch/h8300/Kconfig +++ b/arch/h8300/Kconfig @@ -203,20 +203,6 @@ config UNIX98_PTYS Read the instructions in <file:Documentation/Changes> pertaining to pseudo terminals. It's safe to say N. -config UNIX98_PTY_COUNT - int "Maximum number of Unix98 PTYs in use (0-2048)" - depends on UNIX98_PTYS - default "256" - help - The maximum number of Unix98 PTYs that can be used at any one time. - The default is 256, and should be enough for desktop systems. Server - machines which support incoming telnet/rlogin/ssh connections and/or - serve several X terminals may want to increase this: every incoming - connection and every xterm uses up one PTY. - - When not in use, each additional set of 256 PTYs occupy - approximately 8 KB of kernel memory on 32-bit architectures. - source "drivers/char/pcmcia/Kconfig" source "drivers/serial/Kconfig" diff --git a/arch/h8300/boot/compressed/misc.c b/arch/h8300/boot/compressed/misc.c index 845074588af0..51ab6cbd030f 100644 --- a/arch/h8300/boot/compressed/misc.c +++ b/arch/h8300/boot/compressed/misc.c @@ -67,8 +67,6 @@ static unsigned outcnt = 0; /* bytes in output buffer */ static int fill_inbuf(void); static void flush_window(void); static void error(char *m); -static void gzip_mark(void **); -static void gzip_release(void **); extern char input_data[]; extern int input_len; @@ -77,11 +75,7 @@ static long bytes_out = 0; static uch *output_data; static unsigned long output_ptr = 0; -static void *malloc(int size); -static void free(void *where); static void error(char *m); -static void gzip_mark(void **); -static void gzip_release(void **); int puts(const char *); @@ -98,38 +92,6 @@ static unsigned long free_mem_end_ptr; #define TDR *((volatile unsigned char *)0xffff8b) #define SSR *((volatile unsigned char *)0xffff8c) -static void *malloc(int size) -{ - void *p; - - if (size <0) error("Malloc error"); - if (free_mem_ptr == 0) error("Memory error"); - - free_mem_ptr = (free_mem_ptr + 3) & ~3; /* Align */ - - p = (void *)free_mem_ptr; - free_mem_ptr += size; - - if (free_mem_ptr >= free_mem_end_ptr) - error("Out of memory"); - - return p; -} - -static void free(void *where) -{ /* Don't care */ -} - -static void gzip_mark(void **ptr) -{ - *ptr = (void *) free_mem_ptr; -} - -static void gzip_release(void **ptr) -{ - free_mem_ptr = (long) *ptr; -} - int puts(const char *s) { return 0; diff --git a/arch/ia64/kernel/kprobes.c b/arch/ia64/kernel/kprobes.c index 233434f4f88f..f07688da947c 100644 --- a/arch/ia64/kernel/kprobes.c +++ b/arch/ia64/kernel/kprobes.c @@ -429,8 +429,7 @@ int __kprobes trampoline_probe_handler(struct kprobe *p, struct pt_regs *regs) ((struct fnptr *)kretprobe_trampoline)->ip; INIT_HLIST_HEAD(&empty_rp); - spin_lock_irqsave(&kretprobe_lock, flags); - head = kretprobe_inst_table_head(current); + kretprobe_hash_lock(current, &head, &flags); /* * It is possible to have multiple instances associated with a given @@ -485,7 +484,7 @@ int __kprobes trampoline_probe_handler(struct kprobe *p, struct pt_regs *regs) kretprobe_assert(ri, orig_ret_address, trampoline_address); reset_current_kprobe(); - spin_unlock_irqrestore(&kretprobe_lock, flags); + kretprobe_hash_unlock(current, &flags); preempt_enable_no_resched(); hlist_for_each_entry_safe(ri, node, tmp, &empty_rp, hlist) { @@ -500,7 +499,6 @@ int __kprobes trampoline_probe_handler(struct kprobe *p, struct pt_regs *regs) return 1; } -/* Called with kretprobe_lock held */ void __kprobes arch_prepare_kretprobe(struct kretprobe_instance *ri, struct pt_regs *regs) { diff --git a/arch/m32r/boot/compressed/misc.c b/arch/m32r/boot/compressed/misc.c index 600d40e33495..d394292498c0 100644 --- a/arch/m32r/boot/compressed/misc.c +++ b/arch/m32r/boot/compressed/misc.c @@ -70,8 +70,6 @@ static unsigned outcnt = 0; /* bytes in output buffer */ static int fill_inbuf(void); static void flush_window(void); static void error(char *m); -static void gzip_mark(void **); -static void gzip_release(void **); static unsigned char *input_data; static int input_len; @@ -82,9 +80,6 @@ static unsigned long output_ptr = 0; #include "m32r_sio.c" -static void *malloc(int size); -static void free(void *where); - static unsigned long free_mem_ptr; static unsigned long free_mem_end_ptr; @@ -92,38 +87,6 @@ static unsigned long free_mem_end_ptr; #include "../../../../lib/inflate.c" -static void *malloc(int size) -{ - void *p; - - if (size <0) error("Malloc error"); - if (free_mem_ptr == 0) error("Memory error"); - - free_mem_ptr = (free_mem_ptr + 3) & ~3; /* Align */ - - p = (void *)free_mem_ptr; - free_mem_ptr += size; - - if (free_mem_ptr >= free_mem_end_ptr) - error("Out of memory"); - - return p; -} - -static void free(void *where) -{ /* Don't care */ -} - -static void gzip_mark(void **ptr) -{ - *ptr = (void *) free_mem_ptr; -} - -static void gzip_release(void **ptr) -{ - free_mem_ptr = (long) *ptr; -} - void* memset(void* s, int c, size_t n) { int i; diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig index b9c754f4070c..b4c4eaa5dd26 100644 --- a/arch/mips/Kconfig +++ b/arch/mips/Kconfig @@ -713,7 +713,7 @@ config CSRC_SB1250 config GPIO_TXX9 select GENERIC_GPIO - select HAVE_GPIO_LIB + select ARCH_REQUIRE_GPIOLIB bool config CFE diff --git a/arch/mips/kernel/linux32.c b/arch/mips/kernel/linux32.c index c266211ed653..2fefb14414b7 100644 --- a/arch/mips/kernel/linux32.c +++ b/arch/mips/kernel/linux32.c @@ -11,7 +11,6 @@ #include <linux/file.h> #include <linux/smp_lock.h> #include <linux/highuid.h> -#include <linux/dirent.h> #include <linux/resource.h> #include <linux/highmem.h> #include <linux/time.h> diff --git a/arch/mn10300/boot/compressed/misc.c b/arch/mn10300/boot/compressed/misc.c index ded207efc97a..f673383518e4 100644 --- a/arch/mn10300/boot/compressed/misc.c +++ b/arch/mn10300/boot/compressed/misc.c @@ -153,26 +153,9 @@ static uch *output_data; static unsigned long output_ptr; -static void *malloc(int size); - -static inline void free(void *where) -{ /* Don't care */ -} - static unsigned long free_mem_ptr = (unsigned long) &end; static unsigned long free_mem_end_ptr = (unsigned long) &end + 0x90000; -static inline void gzip_mark(void **ptr) -{ - kputs("."); - *ptr = (void *) free_mem_ptr; -} - -static inline void gzip_release(void **ptr) -{ - free_mem_ptr = (unsigned long) *ptr; -} - #define INPLACE_MOVE_ROUTINE 0x1000 #define LOW_BUFFER_START 0x2000 #define LOW_BUFFER_END 0x90000 @@ -186,26 +169,6 @@ static int lines, cols; #include "../../../../lib/inflate.c" -static void *malloc(int size) -{ - void *p; - - if (size < 0) - error("Malloc error\n"); - if (!free_mem_ptr) - error("Memory error\n"); - - free_mem_ptr = (free_mem_ptr + 3) & ~3; /* Align */ - - p = (void *) free_mem_ptr; - free_mem_ptr += size; - - if (free_mem_ptr >= free_mem_end_ptr) - error("\nOut of memory\n"); - - return p; -} - static inline void scroll(void) { int i; diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index a487671c282f..fe88418167c5 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -110,8 +110,10 @@ config PPC default y select HAVE_DYNAMIC_FTRACE select HAVE_FTRACE + select ARCH_WANT_OPTIONAL_GPIOLIB select HAVE_IDE select HAVE_IOREMAP_PROT + select HAVE_EFFICIENT_UNALIGNED_ACCESS select HAVE_KPROBES select HAVE_ARCH_KGDB select HAVE_KRETPROBES diff --git a/arch/powerpc/kernel/cputable.c b/arch/powerpc/kernel/cputable.c index b936a1dd0a50..25a052c16754 100644 --- a/arch/powerpc/kernel/cputable.c +++ b/arch/powerpc/kernel/cputable.c @@ -23,6 +23,9 @@ struct cpu_spec* cur_cpu_spec = NULL; EXPORT_SYMBOL(cur_cpu_spec); +/* The platform string corresponding to the real PVR */ +const char *powerpc_base_platform; + /* NOTE: * Unlike ppc32, ppc64 will only call this once for the boot CPU, it's * the responsibility of the appropriate CPU save/restore functions to @@ -1652,6 +1655,14 @@ struct cpu_spec * __init identify_cpu(unsigned long offset, unsigned int pvr) } else *t = *s; *PTRRELOC(&cur_cpu_spec) = &the_cpu_spec; + + /* + * Set the base platform string once; assumes + * we're called with real pvr first. + */ + if (powerpc_base_platform == NULL) + powerpc_base_platform = t->platform; + #if defined(CONFIG_PPC64) || defined(CONFIG_BOOKE) /* ppc64 and booke expect identify_cpu to also call * setup_cpu for that processor. I will consolidate diff --git a/arch/powerpc/kernel/entry_32.S b/arch/powerpc/kernel/entry_32.S index da52269aec1e..81c8324a4a3c 100644 --- a/arch/powerpc/kernel/entry_32.S +++ b/arch/powerpc/kernel/entry_32.S @@ -148,7 +148,7 @@ transfer_to_handler: /* Check to see if the dbcr0 register is set up to debug. Use the internal debug mode bit to do this. */ lwz r12,THREAD_DBCR0(r12) - andis. r12,r12,DBCR0_IDM@h + andis. r12,r12,(DBCR0_IDM | DBSR_DAC1R | DBSR_DAC1W)@h beq+ 3f /* From user and task is ptraced - load up global dbcr0 */ li r12,-1 /* clear all pending debug events */ @@ -292,7 +292,7 @@ syscall_exit_cont: /* If the process has its own DBCR0 value, load it up. The internal debug mode bit tells us that dbcr0 should be loaded. */ lwz r0,THREAD+THREAD_DBCR0(r2) - andis. r10,r0,DBCR0_IDM@h + andis. r10,r0,(DBCR0_IDM | DBSR_DAC1R | DBSR_DAC1W)@h bnel- load_dbcr0 #endif #ifdef CONFIG_44x @@ -720,7 +720,7 @@ restore_user: /* Check whether this process has its own DBCR0 value. The internal debug mode bit tells us that dbcr0 should be loaded. */ lwz r0,THREAD+THREAD_DBCR0(r2) - andis. r10,r0,DBCR0_IDM@h + andis. r10,r0,(DBCR0_IDM | DBSR_DAC1R | DBSR_DAC1W)@h bnel- load_dbcr0 #endif diff --git a/arch/powerpc/kernel/iommu.c b/arch/powerpc/kernel/iommu.c index 2385f68c1751..550a19399bfa 100644 --- a/arch/powerpc/kernel/iommu.c +++ b/arch/powerpc/kernel/iommu.c @@ -49,6 +49,8 @@ static int novmerge = 1; static int protect4gb = 1; +static void __iommu_free(struct iommu_table *, dma_addr_t, unsigned int); + static inline unsigned long iommu_num_pages(unsigned long vaddr, unsigned long slen) { @@ -191,6 +193,7 @@ static dma_addr_t iommu_alloc(struct device *dev, struct iommu_table *tbl, { unsigned long entry, flags; dma_addr_t ret = DMA_ERROR_CODE; + int build_fail; spin_lock_irqsave(&(tbl->it_lock), flags); @@ -205,9 +208,21 @@ static dma_addr_t iommu_alloc(struct device *dev, struct iommu_table *tbl, ret = entry << IOMMU_PAGE_SHIFT; /* Set the return dma address */ /* Put the TCEs in the HW table */ - ppc_md.tce_build(tbl, entry, npages, (unsigned long)page & IOMMU_PAGE_MASK, - direction, attrs); + build_fail = ppc_md.tce_build(tbl, entry, npages, + (unsigned long)page & IOMMU_PAGE_MASK, + direction, attrs); + + /* ppc_md.tce_build() only returns non-zero for transient errors. + * Clean up the table bitmap in this case and return + * DMA_ERROR_CODE. For all other errors the functionality is + * not altered. + */ + if (unlikely(build_fail)) { + __iommu_free(tbl, ret, npages); + spin_unlock_irqrestore(&(tbl->it_lock), flags); + return DMA_ERROR_CODE; + } /* Flush/invalidate TLB caches if necessary */ if (ppc_md.tce_flush) @@ -276,7 +291,7 @@ int iommu_map_sg(struct device *dev, struct iommu_table *tbl, dma_addr_t dma_next = 0, dma_addr; unsigned long flags; struct scatterlist *s, *outs, *segstart; - int outcount, incount, i; + int outcount, incount, i, build_fail = 0; unsigned int align; unsigned long handle; unsigned int max_seg_size; @@ -337,8 +352,11 @@ int iommu_map_sg(struct device *dev, struct iommu_table *tbl, npages, entry, dma_addr); /* Insert into HW table */ - ppc_md.tce_build(tbl, entry, npages, vaddr & IOMMU_PAGE_MASK, - direction, attrs); + build_fail = ppc_md.tce_build(tbl, entry, npages, + vaddr & IOMMU_PAGE_MASK, + direction, attrs); + if(unlikely(build_fail)) + goto failure; /* If we are in an open segment, try merging */ if (segstart != s) { diff --git a/arch/powerpc/kernel/kprobes.c b/arch/powerpc/kernel/kprobes.c index 4ba2af125450..de79915452c8 100644 --- a/arch/powerpc/kernel/kprobes.c +++ b/arch/powerpc/kernel/kprobes.c @@ -144,7 +144,6 @@ static void __kprobes set_current_kprobe(struct kprobe *p, struct pt_regs *regs, kcb->kprobe_saved_msr = regs->msr; } -/* Called with kretprobe_lock held */ void __kprobes arch_prepare_kretprobe(struct kretprobe_instance *ri, struct pt_regs *regs) { @@ -312,8 +311,7 @@ static int __kprobes trampoline_probe_handler(struct kprobe *p, unsigned long trampoline_address =(unsigned long)&kretprobe_trampoline; INIT_HLIST_HEAD(&empty_rp); - spin_lock_irqsave(&kretprobe_lock, flags); - head = kretprobe_inst_table_head(current); + kretprobe_hash_lock(current, &head, &flags); /* * It is possible to have multiple instances associated with a given @@ -352,7 +350,7 @@ static int __kprobes trampoline_probe_handler(struct kprobe *p, regs->nip = orig_ret_address; reset_current_kprobe(); - spin_unlock_irqrestore(&kretprobe_lock, flags); + kretprobe_hash_unlock(current, &flags); preempt_enable_no_resched(); hlist_for_each_entry_safe(ri, node, tmp, &empty_rp, hlist) { diff --git a/arch/powerpc/kernel/lparcfg.c b/arch/powerpc/kernel/lparcfg.c index 827a5726a035..9f856a0c3e38 100644 --- a/arch/powerpc/kernel/lparcfg.c +++ b/arch/powerpc/kernel/lparcfg.c @@ -34,8 +34,9 @@ #include <asm/time.h> #include <asm/prom.h> #include <asm/vdso_datapage.h> +#include <asm/vio.h> -#define MODULE_VERS "1.7" +#define MODULE_VERS "1.8" #define MODULE_NAME "lparcfg" /* #define LPARCFG_DEBUG */ @@ -129,32 +130,46 @@ static int iseries_lparcfg_data(struct seq_file *m, void *v) /* * Methods used to fetch LPAR data when running on a pSeries platform. */ -static void log_plpar_hcall_return(unsigned long rc, char *tag) +/** + * h_get_mpp + * H_GET_MPP hcall returns info in 7 parms + */ +int h_get_mpp(struct hvcall_mpp_data *mpp_data) { - switch(rc) { - case 0: - return; - case H_HARDWARE: - printk(KERN_INFO "plpar-hcall (%s) " - "Hardware fault\n", tag); - return; - case H_FUNCTION: - printk(KERN_INFO "plpar-hcall (%s) " - "Function not allowed\n", tag); - return; - case H_AUTHORITY: - printk(KERN_INFO "plpar-hcall (%s) " - "Not authorized to this function\n", tag); - return; - case H_PARAMETER: - printk(KERN_INFO "plpar-hcall (%s) " - "Bad parameter(s)\n",tag); - return; - default: - printk(KERN_INFO "plpar-hcall (%s) " - "Unexpected rc(0x%lx)\n", tag, rc); - } + int rc; + unsigned long retbuf[PLPAR_HCALL9_BUFSIZE]; + + rc = plpar_hcall9(H_GET_MPP, retbuf); + + mpp_data->entitled_mem = retbuf[0]; + mpp_data->mapped_mem = retbuf[1]; + + mpp_data->group_num = (retbuf[2] >> 2 * 8) & 0xffff; + mpp_data->pool_num = retbuf[2] & 0xffff; + + mpp_data->mem_weight = (retbuf[3] >> 7 * 8) & 0xff; + mpp_data->unallocated_mem_weight = (retbuf[3] >> 6 * 8) & 0xff; + mpp_data->unallocated_entitlement = retbuf[3] & 0xffffffffffff; + + mpp_data->pool_size = retbuf[4]; + mpp_data->loan_request = retbuf[5]; + mpp_data->backing_mem = retbuf[6]; + + return rc; } +EXPORT_SYMBOL(h_get_mpp); + +struct hvcall_ppp_data { + u64 entitlement; + u64 unallocated_entitlement; + u16 group_num; + u16 pool_num; + u8 capped; + u8 weight; + u8 unallocated_weight; + u16 active_procs_in_pool; + u16 active_system_procs; +}; /* * H_GET_PPP hcall returns info in 4 parms. @@ -176,27 +191,30 @@ static void log_plpar_hcall_return(unsigned long rc, char *tag) * XXXX - Active processors in Physical Processor Pool. * XXXX - Processors active on platform. */ -static unsigned int h_get_ppp(unsigned long *entitled, - unsigned long *unallocated, - unsigned long *aggregation, - unsigned long *resource) +static unsigned int h_get_ppp(struct hvcall_ppp_data *ppp_data) { unsigned long rc; unsigned long retbuf[PLPAR_HCALL_BUFSIZE]; rc = plpar_hcall(H_GET_PPP, retbuf); - *entitled = retbuf[0]; - *unallocated = retbuf[1]; - *aggregation = retbuf[2]; - *resource = retbuf[3]; + ppp_data->entitlement = retbuf[0]; + ppp_data->unallocated_entitlement = retbuf[1]; + + ppp_data->group_num = (retbuf[2] >> 2 * 8) & 0xffff; + ppp_data->pool_num = retbuf[2] & 0xffff; - log_plpar_hcall_return(rc, "H_GET_PPP"); + ppp_data->capped = (retbuf[3] >> 6 * 8) & 0x01; + ppp_data->weight = (retbuf[3] >> 5 * 8) & 0xff; + ppp_data->unallocated_weight = (retbuf[3] >> 4 * 8) & 0xff; + ppp_data->active_procs_in_pool = (retbuf[3] >> 2 * 8) & 0xffff; + ppp_data->active_system_procs = retbuf[3] & 0xffff; return rc; } -static void h_pic(unsigned long *pool_idle_time, unsigned long *num_procs) +static unsigned h_pic(unsigned long *pool_idle_time, + unsigned long *num_procs) { unsigned long rc; unsigned long retbuf[PLPAR_HCALL_BUFSIZE]; @@ -206,8 +224,87 @@ static void h_pic(unsigned long *pool_idle_time, unsigned long *num_procs) *pool_idle_time = retbuf[0]; *num_procs = retbuf[1]; - if (rc != H_AUTHORITY) - log_plpar_hcall_return(rc, "H_PIC"); + return rc; +} + +/* + * parse_ppp_data + * Parse out the data returned from h_get_ppp and h_pic + */ +static void parse_ppp_data(struct seq_file *m) +{ + struct hvcall_ppp_data ppp_data; + int rc; + + rc = h_get_ppp(&ppp_data); + if (rc) + return; + + seq_printf(m, "partition_entitled_capacity=%ld\n", + ppp_data.entitlement); + seq_printf(m, "group=%d\n", ppp_data.group_num); + seq_printf(m, "system_active_processors=%d\n", + ppp_data.active_system_procs); + + /* pool related entries are apropriate for shared configs */ + if (lppaca[0].shared_proc) { + unsigned long pool_idle_time, pool_procs; + + seq_printf(m, "pool=%d\n", ppp_data.pool_num); + + /* report pool_capacity in percentage */ + seq_printf(m, "pool_capacity=%d\n", + ppp_data.active_procs_in_pool * 100); + + h_pic(&pool_idle_time, &pool_procs); + seq_printf(m, "pool_idle_time=%ld\n", pool_idle_time); + seq_printf(m, "pool_num_procs=%ld\n", pool_procs); + } + + seq_printf(m, "unallocated_capacity_weight=%d\n", + ppp_data.unallocated_weight); + seq_printf(m, "capacity_weight=%d\n", ppp_data.weight); + seq_printf(m, "capped=%d\n", ppp_data.capped); + seq_printf(m, "unallocated_capacity=%ld\n", + ppp_data.unallocated_entitlement); +} + +/** + * parse_mpp_data + * Parse out data returned from h_get_mpp + */ +static void parse_mpp_data(struct seq_file *m) +{ + struct hvcall_mpp_data mpp_data; + int rc; + + rc = h_get_mpp(&mpp_data); + if (rc) + return; + + seq_printf(m, "entitled_memory=%ld\n", mpp_data.entitled_mem); + + if (mpp_data.mapped_mem != -1) + seq_printf(m, "mapped_entitled_memory=%ld\n", + mpp_data.mapped_mem); + + seq_printf(m, "entitled_memory_group_number=%d\n", mpp_data.group_num); + seq_printf(m, "entitled_memory_pool_number=%d\n", mpp_data.pool_num); + + seq_printf(m, "entitled_memory_weight=%d\n", mpp_data.mem_weight); + seq_printf(m, "unallocated_entitled_memory_weight=%d\n", + mpp_data.unallocated_mem_weight); + seq_printf(m, "unallocated_io_mapping_entitlement=%ld\n", + mpp_data.unallocated_entitlement); + + if (mpp_data.pool_size != -1) + seq_printf(m, "entitled_memory_pool_size=%ld bytes\n", + mpp_data.pool_size); + + seq_printf(m, "entitled_memory_loan_request=%ld\n", + mpp_data.loan_request); + + seq_printf(m, "backing_memory=%ld bytes\n", mpp_data.backing_mem); } #define SPLPAR_CHARACTERISTICS_TOKEN 20 @@ -313,6 +410,25 @@ static int lparcfg_count_active_processors(void) return count; } +static void pseries_cmo_data(struct seq_file *m) +{ + int cpu; + unsigned long cmo_faults = 0; + unsigned long cmo_fault_time = 0; + + if (!firmware_has_feature(FW_FEATURE_CMO)) + return; + + for_each_possible_cpu(cpu) { + cmo_faults += lppaca[cpu].cmo_faults; + cmo_fault_time += lppaca[cpu].cmo_fault_time; + } + + seq_printf(m, "cmo_faults=%lu\n", cmo_faults); + seq_printf(m, "cmo_fault_time_usec=%lu\n", + cmo_fault_time / tb_ticks_per_usec); +} + static int pseries_lparcfg_data(struct seq_file *m, void *v) { int partition_potential_processors; @@ -334,60 +450,13 @@ static int pseries_lparcfg_data(struct seq_file *m, void *v) partition_active_processors = lparcfg_count_active_processors(); if (firmware_has_feature(FW_FEATURE_SPLPAR)) { - unsigned long h_entitled, h_unallocated; - unsigned long h_aggregation, h_resource; - unsigned long pool_idle_time, pool_procs; - unsigned long purr; - - h_get_ppp(&h_entitled, &h_unallocated, &h_aggregation, - &h_resource); - - seq_printf(m, "R4=0x%lx\n", h_entitled); - seq_printf(m, "R5=0x%lx\n", h_unallocated); - seq_printf(m, "R6=0x%lx\n", h_aggregation); - seq_printf(m, "R7=0x%lx\n", h_resource); - - purr = get_purr(); - /* this call handles the ibm,get-system-parameter contents */ parse_system_parameter_string(m); + parse_ppp_data(m); + parse_mpp_data(m); + pseries_cmo_data(m); - seq_printf(m, "partition_entitled_capacity=%ld\n", h_entitled); - - seq_printf(m, "group=%ld\n", (h_aggregation >> 2 * 8) & 0xffff); - - seq_printf(m, "system_active_processors=%ld\n", - (h_resource >> 0 * 8) & 0xffff); - - /* pool related entries are apropriate for shared configs */ - if (lppaca[0].shared_proc) { - - h_pic(&pool_idle_time, &pool_procs); - - seq_printf(m, "pool=%ld\n", - (h_aggregation >> 0 * 8) & 0xffff); - - /* report pool_capacity in percentage */ - seq_printf(m, "pool_capacity=%ld\n", - ((h_resource >> 2 * 8) & 0xffff) * 100); - - seq_printf(m, "pool_idle_time=%ld\n", pool_idle_time); - - seq_printf(m, "pool_num_procs=%ld\n", pool_procs); - } - - seq_printf(m, "unallocated_capacity_weight=%ld\n", - (h_resource >> 4 * 8) & 0xFF); - - seq_printf(m, "capacity_weight=%ld\n", - (h_resource >> 5 * 8) & 0xFF); - - seq_printf(m, "capped=%ld\n", (h_resource >> 6 * 8) & 0x01); - - seq_printf(m, "unallocated_capacity=%ld\n", h_unallocated); - - seq_printf(m, "purr=%ld\n", purr); - + seq_printf(m, "purr=%ld\n", get_purr()); } else { /* non SPLPAR case */ seq_printf(m, "system_active_processors=%d\n", @@ -414,6 +483,83 @@ static int pseries_lparcfg_data(struct seq_file *m, void *v) return 0; } +static ssize_t update_ppp(u64 *entitlement, u8 *weight) +{ + struct hvcall_ppp_data ppp_data; + u8 new_weight; + u64 new_entitled; + ssize_t retval; + + /* Get our current parameters */ + retval = h_get_ppp(&ppp_data); + if (retval) + return retval; + + if (entitlement) { + new_weight = ppp_data.weight; + new_entitled = *entitlement; + } else if (weight) { + new_weight = *weight; + new_entitled = ppp_data.entitlement; + } else + return -EINVAL; + + pr_debug("%s: current_entitled = %lu, current_weight = %u\n", + __FUNCTION__, ppp_data.entitlement, ppp_data.weight); + + pr_debug("%s: new_entitled = %lu, new_weight = %u\n", + __FUNCTION__, new_entitled, new_weight); + + retval = plpar_hcall_norets(H_SET_PPP, new_entitled, new_weight); + return retval; +} + +/** + * update_mpp + * + * Update the memory entitlement and weight for the partition. Caller must + * specify either a new entitlement or weight, not both, to be updated + * since the h_set_mpp call takes both entitlement and weight as parameters. + */ +static ssize_t update_mpp(u64 *entitlement, u8 *weight) +{ + struct hvcall_mpp_data mpp_data; + u64 new_entitled; + u8 new_weight; + ssize_t rc; + + if (entitlement) { + /* Check with vio to ensure the new memory entitlement + * can be handled. + */ + rc = vio_cmo_entitlement_update(*entitlement); + if (rc) + return rc; + } + + rc = h_get_mpp(&mpp_data); + if (rc) + return rc; + + if (entitlement) { + new_weight = mpp_data.mem_weight; + new_entitled = *entitlement; + } else if (weight) { + new_weight = *weight; + new_entitled = mpp_data.entitled_mem; + } else + return -EINVAL; + + pr_debug("%s: current_entitled = %lu, current_weight = %u\n", + __FUNCTION__, mpp_data.entitled_mem, mpp_data.mem_weight); + + pr_debug("%s: new_entitled = %lu, new_weight = %u\n", + __FUNCTION__, new_entitled, new_weight); + + rc = plpar_hcall_norets(H_SET_MPP, new_entitled, new_weight); + return rc; +} + /* * Interface for changing system parameters (variable capacity weight * and entitled capacity). Format of input is "param_name=value"; @@ -427,35 +573,27 @@ static int pseries_lparcfg_data(struct seq_file *m, void *v) static ssize_t lparcfg_write(struct file *file, const char __user * buf, size_t count, loff_t * off) { - char *kbuf; + int kbuf_sz = 64; + char kbuf[kbuf_sz]; char *tmp; u64 new_entitled, *new_entitled_ptr = &new_entitled; u8 new_weight, *new_weight_ptr = &new_weight; - - unsigned long current_entitled; /* parameters for h_get_ppp */ - unsigned long dummy; - unsigned long resource; - u8 current_weight; - - ssize_t retval = -ENOMEM; + ssize_t retval; if (!firmware_has_feature(FW_FEATURE_SPLPAR) || firmware_has_feature(FW_FEATURE_ISERIES)) return -EINVAL; - kbuf = kmalloc(count, GFP_KERNEL); - if (!kbuf) - goto out; + if (count > kbuf_sz) + return -EINVAL; - retval = -EFAULT; if (copy_from_user(kbuf, buf, count)) - goto out; + return -EFAULT; - retval = -EINVAL; kbuf[count - 1] = '\0'; tmp = strchr(kbuf, '='); if (!tmp) - goto out; + return -EINVAL; *tmp++ = '\0'; @@ -463,34 +601,32 @@ static ssize_t lparcfg_write(struct file *file, const char __user * buf, char *endp; *new_entitled_ptr = (u64) simple_strtoul(tmp, &endp, 10); if (endp == tmp) - goto out; - new_weight_ptr = ¤t_weight; + return -EINVAL; + + retval = update_ppp(new_entitled_ptr, NULL); } else if (!strcmp(kbuf, "capacity_weight")) { char *endp; *new_weight_ptr = (u8) simple_strtoul(tmp, &endp, 10); if (endp == tmp) - goto out; - new_entitled_ptr = ¤t_entitled; - } else - goto out; - - /* Get our current parameters */ - retval = h_get_ppp(¤t_entitled, &dummy, &dummy, &resource); - if (retval) { - retval = -EIO; - goto out; - } - - current_weight = (resource >> 5 * 8) & 0xFF; + return -EINVAL; - pr_debug("%s: current_entitled = %lu, current_weight = %u\n", - __func__, current_entitled, current_weight); + retval = update_ppp(NULL, new_weight_ptr); + } else if (!strcmp(kbuf, "entitled_memory")) { + char *endp; + *new_entitled_ptr = (u64) simple_strtoul(tmp, &endp, 10); + if (endp == tmp) + return -EINVAL; - pr_debug("%s: new_entitled = %lu, new_weight = %u\n", - __func__, *new_entitled_ptr, *new_weight_ptr); + retval = update_mpp(new_entitled_ptr, NULL); + } else if (!strcmp(kbuf, "entitled_memory_weight")) { + char *endp; + *new_weight_ptr = (u8) simple_strtoul(tmp, &endp, 10); + if (endp == tmp) + return -EINVAL; - retval = plpar_hcall_norets(H_SET_PPP, *new_entitled_ptr, - *new_weight_ptr); + retval = update_mpp(NULL, new_weight_ptr); + } else + return -EINVAL; if (retval == H_SUCCESS || retval == H_CONSTRAINED) { retval = count; @@ -506,8 +642,6 @@ static ssize_t lparcfg_write(struct file *file, const char __user * buf, retval = -EIO; } -out: - kfree(kbuf); return retval; } diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c index 219f3634115e..db2497ccc111 100644 --- a/arch/powerpc/kernel/process.c +++ b/arch/powerpc/kernel/process.c @@ -47,6 +47,8 @@ #ifdef CONFIG_PPC64 #include <asm/firmware.h> #endif +#include <linux/kprobes.h> +#include <linux/kdebug.h> extern unsigned long _get_SP(void); @@ -239,6 +241,35 @@ void discard_lazy_cpu_state(void) } #endif /* CONFIG_SMP */ +void do_dabr(struct pt_regs *regs, unsigned long address, + unsigned long error_code) +{ + siginfo_t info; + + if (notify_die(DIE_DABR_MATCH, "dabr_match", regs, error_code, + 11, SIGSEGV) == NOTIFY_STOP) + return; + + if (debugger_dabr_match(regs)) + return; + + /* Clear the DAC and struct entries. One shot trigger */ +#if (defined(CONFIG_44x) || defined(CONFIG_BOOKE)) + mtspr(SPRN_DBCR0, mfspr(SPRN_DBCR0) & ~(DBSR_DAC1R | DBSR_DAC1W + | DBCR0_IDM)); +#endif + + /* Clear the DABR */ + set_dabr(0); + + /* Deliver the signal to userspace */ + info.si_signo = SIGTRAP; + info.si_errno = 0; + info.si_code = TRAP_HWBKPT; + info.si_addr = (void __user *)address; + force_sig_info(SIGTRAP, &info, current); +} + static DEFINE_PER_CPU(unsigned long, current_dabr); int set_dabr(unsigned long dabr) @@ -254,6 +285,11 @@ int set_dabr(unsigned long dabr) #if defined(CONFIG_PPC64) || defined(CONFIG_6xx) mtspr(SPRN_DABR, dabr); #endif + +#if defined(CONFIG_44x) || defined(CONFIG_BOOKE) + mtspr(SPRN_DAC1, dabr); +#endif + return 0; } @@ -337,6 +373,12 @@ struct task_struct *__switch_to(struct task_struct *prev, if (unlikely(__get_cpu_var(current_dabr) != new->thread.dabr)) set_dabr(new->thread.dabr); +#if defined(CONFIG_44x) || defined(CONFIG_BOOKE) + /* If new thread DAC (HW breakpoint) is the same then leave it */ + if (new->thread.dabr) + set_dabr(new->thread.dabr); +#endif + new_thread = &new->thread; old_thread = ¤t->thread; @@ -525,6 +567,10 @@ void flush_thread(void) if (current->thread.dabr) { current->thread.dabr = 0; set_dabr(0); + +#if defined(CONFIG_44x) || defined(CONFIG_BOOKE) + current->thread.dbcr0 &= ~(DBSR_DAC1R | DBSR_DAC1W); +#endif } } diff --git a/arch/powerpc/kernel/prom_init.c b/arch/powerpc/kernel/prom_init.c index 1ea8c8d3ce89..c4ab2195b9cb 100644 --- a/arch/powerpc/kernel/prom_init.c +++ b/arch/powerpc/kernel/prom_init.c @@ -643,6 +643,11 @@ static void __init early_cmdline_parse(void) #else #define OV5_MSI 0x00 #endif /* CONFIG_PCI_MSI */ +#ifdef CONFIG_PPC_SMLPAR +#define OV5_CMO 0x80 /* Cooperative Memory Overcommitment */ +#else +#define OV5_CMO 0x00 +#endif /* * The architecture vector has an array of PVR mask/value pairs, @@ -687,10 +692,12 @@ static unsigned char ibm_architecture_vec[] = { 0, /* don't halt */ /* option vector 5: PAPR/OF options */ - 3 - 2, /* length */ + 5 - 2, /* length */ 0, /* don't ignore, don't halt */ OV5_LPAR | OV5_SPLPAR | OV5_LARGE_PAGES | OV5_DRCONF_MEMORY | OV5_DONATE_DEDICATE_CPU | OV5_MSI, + 0, + OV5_CMO, }; /* Old method - ELF header with PT_NOTE sections */ diff --git a/arch/powerpc/kernel/ptrace.c b/arch/powerpc/kernel/ptrace.c index 8feb93e7890c..a5d0e78779c8 100644 --- a/arch/powerpc/kernel/ptrace.c +++ b/arch/powerpc/kernel/ptrace.c @@ -703,7 +703,7 @@ void user_enable_single_step(struct task_struct *task) if (regs != NULL) { #if defined(CONFIG_40x) || defined(CONFIG_BOOKE) - task->thread.dbcr0 = DBCR0_IDM | DBCR0_IC; + task->thread.dbcr0 |= DBCR0_IDM | DBCR0_IC; regs->msr |= MSR_DE; #else regs->msr |= MSR_SE; @@ -716,9 +716,16 @@ void user_disable_single_step(struct task_struct *task) { struct pt_regs *regs = task->thread.regs; + +#if defined(CONFIG_44x) || defined(CONFIG_BOOKE) + /* If DAC then do not single step, skip */ + if (task->thread.dabr) + return; +#endif + if (regs != NULL) { #if defined(CONFIG_40x) || defined(CONFIG_BOOKE) - task->thread.dbcr0 = 0; + task->thread.dbcr0 &= ~(DBCR0_IC | DBCR0_IDM); regs->msr &= ~MSR_DE; #else regs->msr &= ~MSR_SE; @@ -727,22 +734,75 @@ void user_disable_single_step(struct task_struct *task) clear_tsk_thread_flag(task, TIF_SINGLESTEP); } -static int ptrace_set_debugreg(struct task_struct *task, unsigned long addr, +int ptrace_set_debugreg(struct task_struct *task, unsigned long addr, unsigned long data) { - /* We only support one DABR and no IABRS at the moment */ + /* For ppc64 we support one DABR and no IABR's at the moment (ppc64). + * For embedded processors we support one DAC and no IAC's at the + * moment. + */ if (addr > 0) return -EINVAL; - /* The bottom 3 bits are flags */ if ((data & ~0x7UL) >= TASK_SIZE) return -EIO; - /* Ensure translation is on */ +#ifdef CONFIG_PPC64 + + /* For processors using DABR (i.e. 970), the bottom 3 bits are flags. + * It was assumed, on previous implementations, that 3 bits were + * passed together with the data address, fitting the design of the + * DABR register, as follows: + * + * bit 0: Read flag + * bit 1: Write flag + * bit 2: Breakpoint translation + * + * Thus, we use them here as so. + */ + + /* Ensure breakpoint translation bit is set */ if (data && !(data & DABR_TRANSLATION)) return -EIO; + /* Move contents to the DABR register */ task->thread.dabr = data; + +#endif +#if defined(CONFIG_44x) || defined(CONFIG_BOOKE) + + /* As described above, it was assumed 3 bits were passed with the data + * address, but we will assume only the mode bits will be passed + * as to not cause alignment restrictions for DAC-based processors. + */ + + /* DAC's hold the whole address without any mode flags */ + task->thread.dabr = data & ~0x3UL; + + if (task->thread.dabr == 0) { + task->thread.dbcr0 &= ~(DBSR_DAC1R | DBSR_DAC1W | DBCR0_IDM); + task->thread.regs->msr &= ~MSR_DE; + return 0; + } + + /* Read or Write bits must be set */ + + if (!(data & 0x3UL)) + return -EINVAL; + + /* Set the Internal Debugging flag (IDM bit 1) for the DBCR0 + register */ + task->thread.dbcr0 = DBCR0_IDM; + + /* Check for write and read flags and set DBCR0 + accordingly */ + if (data & 0x1UL) + task->thread.dbcr0 |= DBSR_DAC1R; + if (data & 0x2UL) + task->thread.dbcr0 |= DBSR_DAC1W; + + task->thread.regs->msr |= MSR_DE; +#endif return 0; } diff --git a/arch/powerpc/kernel/signal.c b/arch/powerpc/kernel/signal.c index ad55488939c3..7aada783ec6a 100644 --- a/arch/powerpc/kernel/signal.c +++ b/arch/powerpc/kernel/signal.c @@ -145,8 +145,12 @@ int do_signal(sigset_t *oldset, struct pt_regs *regs) * user space. The DABR will have been cleared if it * triggered inside the kernel. */ - if (current->thread.dabr) + if (current->thread.dabr) { set_dabr(current->thread.dabr); +#if defined(CONFIG_44x) || defined(CONFIG_BOOKE) + mtspr(SPRN_DBCR0, current->thread.dbcr0); +#endif + } if (is32) { if (ka.sa.sa_flags & SA_SIGINFO) diff --git a/arch/powerpc/kernel/sysfs.c b/arch/powerpc/kernel/sysfs.c index aba0ba95f062..800e5e9a087b 100644 --- a/arch/powerpc/kernel/sysfs.c +++ b/arch/powerpc/kernel/sysfs.c @@ -529,7 +529,8 @@ static void register_nodes(void) #endif /* Only valid if CPU is present. */ -static ssize_t show_physical_id(struct sys_device *dev, char *buf) +static ssize_t show_physical_id(struct sys_device *dev, + struct sysdev_attribute *attr, char *buf) { struct cpu *cpu = container_of(dev, struct cpu, sysdev); diff --git a/arch/powerpc/kernel/traps.c b/arch/powerpc/kernel/traps.c index 878fbddb6ae1..81ccb8dd1a54 100644 --- a/arch/powerpc/kernel/traps.c +++ b/arch/powerpc/kernel/traps.c @@ -1067,6 +1067,22 @@ void __kprobes DebugException(struct pt_regs *regs, unsigned long debug_status) } _exception(SIGTRAP, regs, TRAP_TRACE, regs->nip); + } else if (debug_status & (DBSR_DAC1R | DBSR_DAC1W)) { + regs->msr &= ~MSR_DE; + + if (user_mode(regs)) { + current->thread.dbcr0 &= ~(DBSR_DAC1R | DBSR_DAC1W | + DBCR0_IDM); + } else { + /* Disable DAC interupts */ + mtspr(SPRN_DBCR0, mfspr(SPRN_DBCR0) & ~(DBSR_DAC1R | + DBSR_DAC1W | DBCR0_IDM)); + + /* Clear the DAC event */ + mtspr(SPRN_DBSR, (DBSR_DAC1R | DBSR_DAC1W)); + } + /* Setup and send the trap to the handler */ + do_dabr(regs, mfspr(SPRN_DAC1), debug_status); } } #endif /* CONFIG_4xx || CONFIG_BOOKE */ diff --git a/arch/powerpc/kernel/vio.c b/arch/powerpc/kernel/vio.c index b77f8af7ddde..ade8aeaa2e70 100644 --- a/arch/powerpc/kernel/vio.c +++ b/arch/powerpc/kernel/vio.c @@ -1,11 +1,12 @@ /* * IBM PowerPC Virtual I/O Infrastructure Support. * - * Copyright (c) 2003-2005 IBM Corp. + * Copyright (c) 2003,2008 IBM Corp. * Dave Engebretsen engebret@us.ibm.com * Santiago Leon santil@us.ibm.com * Hollis Blanchard <hollisb@us.ibm.com> * Stephen Rothwell + * Robert Jennings <rcjenn@us.ibm.com> * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License @@ -46,6 +47,996 @@ static struct vio_dev vio_bus_device = { /* fake "parent" device */ .dev.bus = &vio_bus_type, }; +#ifdef CONFIG_PPC_SMLPAR +/** + * vio_cmo_pool - A pool of IO memory for CMO use + * + * @size: The size of the pool in bytes + * @free: The amount of free memory in the pool + */ +struct vio_cmo_pool { + size_t size; + size_t free; +}; + +/* How many ms to delay queued balance work */ +#define VIO_CMO_BALANCE_DELAY 100 + +/* Portion out IO memory to CMO devices by this chunk size */ +#define VIO_CMO_BALANCE_CHUNK 131072 + +/** + * vio_cmo_dev_entry - A device that is CMO-enabled and requires entitlement + * + * @vio_dev: struct vio_dev pointer + * @list: pointer to other devices on bus that are being tracked + */ +struct vio_cmo_dev_entry { + struct vio_dev *viodev; + struct list_head list; +}; + +/** + * vio_cmo - VIO bus accounting structure for CMO entitlement + * + * @lock: spinlock for entire structure + * @balance_q: work queue for balancing system entitlement + * @device_list: list of CMO-enabled devices requiring entitlement + * @entitled: total system entitlement in bytes + * @reserve: pool of memory from which devices reserve entitlement, incl. spare + * @excess: pool of excess entitlement not needed for device reserves or spare + * @spare: IO memory for device hotplug functionality + * @min: minimum necessary for system operation + * @desired: desired memory for system operation + * @curr: bytes currently allocated + * @high: high water mark for IO data usage + */ +struct vio_cmo { + spinlock_t lock; + struct delayed_work balance_q; + struct list_head device_list; + size_t entitled; + struct vio_cmo_pool reserve; + struct vio_cmo_pool excess; + size_t spare; + size_t min; + size_t desired; + size_t curr; + size_t high; +} vio_cmo; + +/** + * vio_cmo_OF_devices - Count the number of OF devices that have DMA windows + */ +static int vio_cmo_num_OF_devs(void) +{ + struct device_node *node_vroot; + int count = 0; + + /* + * Count the number of vdevice entries with an + * ibm,my-dma-window OF property + */ + node_vroot = of_find_node_by_name(NULL, "vdevice"); + if (node_vroot) { + struct device_node *of_node; + struct property *prop; + + for_each_child_of_node(node_vroot, of_node) { + prop = of_find_property(of_node, "ibm,my-dma-window", + NULL); + if (prop) + count++; + } + } + of_node_put(node_vroot); + return count; +} + +/** + * vio_cmo_alloc - allocate IO memory for CMO-enable devices + * + * @viodev: VIO device requesting IO memory + * @size: size of allocation requested + * + * Allocations come from memory reserved for the devices and any excess + * IO memory available to all devices. The spare pool used to service + * hotplug must be equal to %VIO_CMO_MIN_ENT for the excess pool to be + * made available. + * + * Return codes: + * 0 for successful allocation and -ENOMEM for a failure + */ +static inline int vio_cmo_alloc(struct vio_dev *viodev, size_t size) +{ + unsigned long flags; + size_t reserve_free = 0; + size_t excess_free = 0; + int ret = -ENOMEM; + + spin_lock_irqsave(&vio_cmo.lock, flags); + + /* Determine the amount of free entitlement available in reserve */ + if (viodev->cmo.entitled > viodev->cmo.allocated) + reserve_free = viodev->cmo.entitled - viodev->cmo.allocated; + + /* If spare is not fulfilled, the excess pool can not be used. */ + if (vio_cmo.spare >= VIO_CMO_MIN_ENT) + excess_free = vio_cmo.excess.free; + + /* The request can be satisfied */ + if ((reserve_free + excess_free) >= size) { + vio_cmo.curr += size; + if (vio_cmo.curr > vio_cmo.high) + vio_cmo.high = vio_cmo.curr; + viodev->cmo.allocated += size; + size -= min(reserve_free, size); + vio_cmo.excess.free -= size; + ret = 0; + } + + spin_unlock_irqrestore(&vio_cmo.lock, flags); + return ret; +} + +/** + * vio_cmo_dealloc - deallocate IO memory from CMO-enable devices + * @viodev: VIO device freeing IO memory + * @size: size of deallocation + * + * IO memory is freed by the device back to the correct memory pools. + * The spare pool is replenished first from either memory pool, then + * the reserve pool is used to reduce device entitlement, the excess + * pool is used to increase the reserve pool toward the desired entitlement + * target, and then the remaining memory is returned to the pools. + * + */ +static inline void vio_cmo_dealloc(struct vio_dev *viodev, size_t size) +{ + unsigned long flags; + size_t spare_needed = 0; + size_t excess_freed = 0; + size_t reserve_freed = size; + size_t tmp; + int balance = 0; + + spin_lock_irqsave(&vio_cmo.lock, flags); + vio_cmo.curr -= size; + + /* Amount of memory freed from the excess pool */ + if (viodev->cmo.allocated > viodev->cmo.entitled) { + excess_freed = min(reserve_freed, (viodev->cmo.allocated - + viodev->cmo.entitled)); + reserve_freed -= excess_freed; + } + + /* Remove allocation from device */ + viodev->cmo.allocated -= (reserve_freed + excess_freed); + + /* Spare is a subset of the reserve pool, replenish it first. */ + spare_needed = VIO_CMO_MIN_ENT - vio_cmo.spare; + + /* + * Replenish the spare in the reserve pool from the excess pool. + * This moves entitlement into the reserve pool. + */ + if (spare_needed && excess_freed) { + tmp = min(excess_freed, spare_needed); + vio_cmo.excess.size -= tmp; + vio_cmo.reserve.size += tmp; + vio_cmo.spare += tmp; + excess_freed -= tmp; + spare_needed -= tmp; + balance = 1; + } + + /* + * Replenish the spare in the reserve pool from the reserve pool. + * This removes entitlement from the device down to VIO_CMO_MIN_ENT, + * if needed, and gives it to the spare pool. The amount of used + * memory in this pool does not change. + */ + if (spare_needed && reserve_freed) { + tmp = min(spare_needed, min(reserve_freed, + (viodev->cmo.entitled - + VIO_CMO_MIN_ENT))); + + vio_cmo.spare += tmp; + viodev->cmo.entitled -= tmp; + reserve_freed -= tmp; + spare_needed -= tmp; + balance = 1; + } + + /* + * Increase the reserve pool until the desired allocation is met. + * Move an allocation freed from the excess pool into the reserve + * pool and schedule a balance operation. + */ + if (excess_freed && (vio_cmo.desired > vio_cmo.reserve.size)) { + tmp = min(excess_freed, (vio_cmo.desired - vio_cmo.reserve.size)); + + vio_cmo.excess.size -= tmp; + vio_cmo.reserve.size += tmp; + excess_freed -= tmp; + balance = 1; + } + + /* Return memory from the excess pool to that pool */ + if (excess_freed) + vio_cmo.excess.free += excess_freed; + + if (balance) + schedule_delayed_work(&vio_cmo.balance_q, VIO_CMO_BALANCE_DELAY); + spin_unlock_irqrestore(&vio_cmo.lock, flags); +} + +/** + * vio_cmo_entitlement_update - Manage system entitlement changes + * + * @new_entitlement: new system entitlement to attempt to accommodate + * + * Increases in entitlement will be used to fulfill the spare entitlement + * and the rest is given to the excess pool. Decreases, if they are + * possible, come from the excess pool and from unused device entitlement + * + * Returns: 0 on success, -ENOMEM when change can not be made + */ +int vio_cmo_entitlement_update(size_t new_entitlement) +{ + struct vio_dev *viodev; + struct vio_cmo_dev_entry *dev_ent; + unsigned long flags; + size_t avail, delta, tmp; + + spin_lock_irqsave(&vio_cmo.lock, flags); + + /* Entitlement increases */ + if (new_entitlement > vio_cmo.entitled) { + delta = new_entitlement - vio_cmo.entitled; + + /* Fulfill spare allocation */ + if (vio_cmo.spare < VIO_CMO_MIN_ENT) { + tmp = min(delta, (VIO_CMO_MIN_ENT - vio_cmo.spare)); + vio_cmo.spare += tmp; + vio_cmo.reserve.size += tmp; + delta -= tmp; + } + + /* Remaining new allocation goes to the excess pool */ + vio_cmo.entitled += delta; + vio_cmo.excess.size += delta; + vio_cmo.excess.free += delta; + + goto out; + } + + /* Entitlement decreases */ + delta = vio_cmo.entitled - new_entitlement; + avail = vio_cmo.excess.free; + + /* + * Need to check how much unused entitlement each device can + * sacrifice to fulfill entitlement change. + */ + list_for_each_entry(dev_ent, &vio_cmo.device_list, list) { + if (avail >= delta) + break; + + viodev = dev_ent->viodev; + if ((viodev->cmo.entitled > viodev->cmo.allocated) && + (viodev->cmo.entitled > VIO_CMO_MIN_ENT)) + avail += viodev->cmo.entitled - + max_t(size_t, viodev->cmo.allocated, + VIO_CMO_MIN_ENT); + } + + if (delta <= avail) { + vio_cmo.entitled -= delta; + + /* Take entitlement from the excess pool first */ + tmp = min(vio_cmo.excess.free, delta); + vio_cmo.excess.size -= tmp; + vio_cmo.excess.free -= tmp; + delta -= tmp; + + /* + * Remove all but VIO_CMO_MIN_ENT bytes from devices + * until entitlement change is served + */ + list_for_each_entry(dev_ent, &vio_cmo.device_list, list) { + if (!delta) + break; + + viodev = dev_ent->viodev; + tmp = 0; + if ((viodev->cmo.entitled > viodev->cmo.allocated) && + (viodev->cmo.entitled > VIO_CMO_MIN_ENT)) + tmp = viodev->cmo.entitled - + max_t(size_t, viodev->cmo.allocated, + VIO_CMO_MIN_ENT); + viodev->cmo.entitled -= min(tmp, delta); + delta -= min(tmp, delta); + } + } else { + spin_unlock_irqrestore(&vio_cmo.lock, flags); + return -ENOMEM; + } + +out: + schedule_delayed_work(&vio_cmo.balance_q, 0); + spin_unlock_irqrestore(&vio_cmo.lock, flags); + return 0; +} + +/** + * vio_cmo_balance - Balance entitlement among devices + * + * @work: work queue structure for this operation + * + * Any system entitlement above the minimum needed for devices, or + * already allocated to devices, can be distributed to the devices. + * The list of devices is iterated through to recalculate the desired + * entitlement level and to determine how much entitlement above the + * minimum entitlement is allocated to devices. + * + * Small chunks of the available entitlement are given to devices until + * their requirements are fulfilled or there is no entitlement left to give. + * Upon completion sizes of the reserve and excess pools are calculated. + * + * The system minimum entitlement level is also recalculated here. + * Entitlement will be reserved for devices even after vio_bus_remove to + * accommodate reloading the driver. The OF tree is walked to count the + * number of devices present and this will remove entitlement for devices + * that have actually left the system after having vio_bus_remove called. + */ +static void vio_cmo_balance(struct work_struct *work) +{ + struct vio_cmo *cmo; + struct vio_dev *viodev; + struct vio_cmo_dev_entry *dev_ent; + unsigned long flags; + size_t avail = 0, level, chunk, need; + int devcount = 0, fulfilled; + + cmo = container_of(work, struct vio_cmo, balance_q.work); + + spin_lock_irqsave(&vio_cmo.lock, flags); + + /* Calculate minimum entitlement and fulfill spare */ + cmo->min = vio_cmo_num_OF_devs() * VIO_CMO_MIN_ENT; + BUG_ON(cmo->min > cmo->entitled); + cmo->spare = min_t(size_t, VIO_CMO_MIN_ENT, (cmo->entitled - cmo->min)); + cmo->min += cmo->spare; + cmo->desired = cmo->min; + + /* + * Determine how much entitlement is available and reset device + * entitlements + */ + avail = cmo->entitled - cmo->spare; + list_for_each_entry(dev_ent, &vio_cmo.device_list, list) { + viodev = dev_ent->viodev; + devcount++; + viodev->cmo.entitled = VIO_CMO_MIN_ENT; + cmo->desired += (viodev->cmo.desired - VIO_CMO_MIN_ENT); + avail -= max_t(size_t, viodev->cmo.allocated, VIO_CMO_MIN_ENT); + } + + /* + * Having provided each device with the minimum entitlement, loop + * over the devices portioning out the remaining entitlement + * until there is nothing left. + */ + level = VIO_CMO_MIN_ENT; + while (avail) { + fulfilled = 0; + list_for_each_entry(dev_ent, &vio_cmo.device_list, list) { + viodev = dev_ent->viodev; + + if (viodev->cmo.desired <= level) { + fulfilled++; + continue; + } + + /* + * Give the device up to VIO_CMO_BALANCE_CHUNK + * bytes of entitlement, but do not exceed the + * desired level of entitlement for the device. + */ + chunk = min_t(size_t, avail, VIO_CMO_BALANCE_CHUNK); + chunk = min(chunk, (viodev->cmo.desired - + viodev->cmo.entitled)); + viodev->cmo.entitled += chunk; + + /* + * If the memory for this entitlement increase was + * already allocated to the device it does not come + * from the available pool being portioned out. + */ + need = max(viodev->cmo.allocated, viodev->cmo.entitled)- + max(viodev->cmo.allocated, level); + avail -= need; + + } + if (fulfilled == devcount) + break; + level += VIO_CMO_BALANCE_CHUNK; + } + + /* Calculate new reserve and excess pool sizes */ + cmo->reserve.size = cmo->min; + cmo->excess.free = 0; + cmo->excess.size = 0; + need = 0; + list_for_each_entry(dev_ent, &vio_cmo.device_list, list) { + viodev = dev_ent->viodev; + /* Calculated reserve size above the minimum entitlement */ + if (viodev->cmo.entitled) + cmo->reserve.size += (viodev->cmo.entitled - + VIO_CMO_MIN_ENT); + /* Calculated used excess entitlement */ + if (viodev->cmo.allocated > viodev->cmo.entitled) + need += viodev->cmo.allocated - viodev->cmo.entitled; + } + cmo->excess.size = cmo->entitled - cmo->reserve.size; + cmo->excess.free = cmo->excess.size - need; + + cancel_delayed_work(container_of(work, struct delayed_work, work)); + spin_unlock_irqrestore(&vio_cmo.lock, flags); +} + +static void *vio_dma_iommu_alloc_coherent(struct device *dev, size_t size, + dma_addr_t *dma_handle, gfp_t flag) +{ + struct vio_dev *viodev = to_vio_dev(dev); + void *ret; + + if (vio_cmo_alloc(viodev, roundup(size, IOMMU_PAGE_SIZE))) { + atomic_inc(&viodev->cmo.allocs_failed); + return NULL; + } + + ret = dma_iommu_ops.alloc_coherent(dev, size, dma_handle, flag); + if (unlikely(ret == NULL)) { + vio_cmo_dealloc(viodev, roundup(size, IOMMU_PAGE_SIZE)); + atomic_inc(&viodev->cmo.allocs_failed); + } + + return ret; +} + +static void vio_dma_iommu_free_coherent(struct device *dev, size_t size, + void *vaddr, dma_addr_t dma_handle) +{ + struct vio_dev *viodev = to_vio_dev(dev); + + dma_iommu_ops.free_coherent(dev, size, vaddr, dma_handle); + + vio_cmo_dealloc(viodev, roundup(size, IOMMU_PAGE_SIZE)); +} + +static dma_addr_t vio_dma_iommu_map_single(struct device *dev, void *vaddr, + size_t size, + enum dma_data_direction direction, + struct dma_attrs *attrs) +{ + struct vio_dev *viodev = to_vio_dev(dev); + dma_addr_t ret = DMA_ERROR_CODE; + + if (vio_cmo_alloc(viodev, roundup(size, IOMMU_PAGE_SIZE))) { + atomic_inc(&viodev->cmo.allocs_failed); + return ret; + } + + ret = dma_iommu_ops.map_single(dev, vaddr, size, direction, attrs); + if (unlikely(dma_mapping_error(ret))) { + vio_cmo_dealloc(viodev, roundup(size, IOMMU_PAGE_SIZE)); + atomic_inc(&viodev->cmo.allocs_failed); + } + + return ret; +} + +static void vio_dma_iommu_unmap_single(struct device *dev, + dma_addr_t dma_handle, size_t size, + enum dma_data_direction direction, + struct dma_attrs *attrs) +{ + struct vio_dev *viodev = to_vio_dev(dev); + + dma_iommu_ops.unmap_single(dev, dma_handle, size, direction, attrs); + + vio_cmo_dealloc(viodev, roundup(size, IOMMU_PAGE_SIZE)); +} + +static int vio_dma_iommu_map_sg(struct device *dev, struct scatterlist *sglist, + int nelems, enum dma_data_direction direction, + struct dma_attrs *attrs) +{ + struct vio_dev *viodev = to_vio_dev(dev); + struct scatterlist *sgl; + int ret, count = 0; + size_t alloc_size = 0; + + for (sgl = sglist; count < nelems; count++, sgl++) + alloc_size += roundup(sgl->length, IOMMU_PAGE_SIZE); + + if (vio_cmo_alloc(viodev, alloc_size)) { + atomic_inc(&viodev->cmo.allocs_failed); + return 0; + } + + ret = dma_iommu_ops.map_sg(dev, sglist, nelems, direction, attrs); + + if (unlikely(!ret)) { + vio_cmo_dealloc(viodev, alloc_size); + atomic_inc(&viodev->cmo.allocs_failed); + } + + for (sgl = sglist, count = 0; count < ret; count++, sgl++) + alloc_size -= roundup(sgl->dma_length, IOMMU_PAGE_SIZE); + if (alloc_size) + vio_cmo_dealloc(viodev, alloc_size); + + return ret; +} + +static void vio_dma_iommu_unmap_sg(struct device *dev, + struct scatterlist *sglist, int nelems, + enum dma_data_direction direction, + struct dma_attrs *attrs) +{ + struct vio_dev *viodev = to_vio_dev(dev); + struct scatterlist *sgl; + size_t alloc_size = 0; + int count = 0; + + for (sgl = sglist; count < nelems; count++, sgl++) + alloc_size += roundup(sgl->dma_length, IOMMU_PAGE_SIZE); + + dma_iommu_ops.unmap_sg(dev, sglist, nelems, direction, attrs); + + vio_cmo_dealloc(viodev, alloc_size); +} + +struct dma_mapping_ops vio_dma_mapping_ops = { + .alloc_coherent = vio_dma_iommu_alloc_coherent, + .free_coherent = vio_dma_iommu_free_coherent, + .map_single = vio_dma_iommu_map_single, + .unmap_single = vio_dma_iommu_unmap_single, + .map_sg = vio_dma_iommu_map_sg, + .unmap_sg = vio_dma_iommu_unmap_sg, +}; + +/** + * vio_cmo_set_dev_desired - Set desired entitlement for a device + * + * @viodev: struct vio_dev for device to alter + * @new_desired: new desired entitlement level in bytes + * + * For use by devices to request a change to their entitlement at runtime or + * through sysfs. The desired entitlement level is changed and a balancing + * of system resources is scheduled to run in the future. + */ +void vio_cmo_set_dev_desired(struct vio_dev *viodev, size_t desired) +{ + unsigned long flags; + struct vio_cmo_dev_entry *dev_ent; + int found = 0; + + if (!firmware_has_feature(FW_FEATURE_CMO)) + return; + + spin_lock_irqsave(&vio_cmo.lock, flags); + if (desired < VIO_CMO_MIN_ENT) + desired = VIO_CMO_MIN_ENT; + + /* + * Changes will not be made for devices not in the device list. + * If it is not in the device list, then no driver is loaded + * for the device and it can not receive entitlement. + */ + list_for_each_entry(dev_ent, &vio_cmo.device_list, list) + if (viodev == dev_ent->viodev) { + found = 1; + break; + } + if (!found) + return; + + /* Increase/decrease in desired device entitlement */ + if (desired >= viodev->cmo.desired) { + /* Just bump the bus and device values prior to a balance*/ + vio_cmo.desired += desired - viodev->cmo.desired; + viodev->cmo.desired = desired; + } else { + /* Decrease bus and device values for desired entitlement */ + vio_cmo.desired -= viodev->cmo.desired - desired; + viodev->cmo.desired = desired; + /* + * If less entitlement is desired than current entitlement, move + * any reserve memory in the change region to the excess pool. + */ + if (viodev->cmo.entitled > desired) { + vio_cmo.reserve.size -= viodev->cmo.entitled - desired; + vio_cmo.excess.size += viodev->cmo.entitled - desired; + /* + * If entitlement moving from the reserve pool to the + * excess pool is currently unused, add to the excess + * free counter. + */ + if (viodev->cmo.allocated < viodev->cmo.entitled) + vio_cmo.excess.free += viodev->cmo.entitled - + max(viodev->cmo.allocated, desired); + viodev->cmo.entitled = desired; + } + } + schedule_delayed_work(&vio_cmo.balance_q, 0); + spin_unlock_irqrestore(&vio_cmo.lock, flags); +} + +/** + * vio_cmo_bus_probe - Handle CMO specific bus probe activities + * + * @viodev - Pointer to struct vio_dev for device + * + * Determine the devices IO memory entitlement needs, attempting + * to satisfy the system minimum entitlement at first and scheduling + * a balance operation to take care of the rest at a later time. + * + * Returns: 0 on success, -EINVAL when device doesn't support CMO, and + * -ENOMEM when entitlement is not available for device or + * device entry. + * + */ +static int vio_cmo_bus_probe(struct vio_dev *viodev) +{ + struct vio_cmo_dev_entry *dev_ent; + struct device *dev = &viodev->dev; + struct vio_driver *viodrv = to_vio_driver(dev->driver); + unsigned long flags; + size_t size; + + /* + * Check to see that device has a DMA window and configure + * entitlement for the device. + */ + if (of_get_property(viodev->dev.archdata.of_node, + "ibm,my-dma-window", NULL)) { + /* Check that the driver is CMO enabled and get desired DMA */ + if (!viodrv->get_desired_dma) { + dev_err(dev, "%s: device driver does not support CMO\n", + __func__); + return -EINVAL; + } + + viodev->cmo.desired = IOMMU_PAGE_ALIGN(viodrv->get_desired_dma(viodev)); + if (viodev->cmo.desired < VIO_CMO_MIN_ENT) + viodev->cmo.desired = VIO_CMO_MIN_ENT; + size = VIO_CMO_MIN_ENT; + + dev_ent = kmalloc(sizeof(struct vio_cmo_dev_entry), + GFP_KERNEL); + if (!dev_ent) + return -ENOMEM; + + dev_ent->viodev = viodev; + spin_lock_irqsave(&vio_cmo.lock, flags); + list_add(&dev_ent->list, &vio_cmo.device_list); + } else { + viodev->cmo.desired = 0; + size = 0; + spin_lock_irqsave(&vio_cmo.lock, flags); + } + + /* + * If the needs for vio_cmo.min have not changed since they + * were last set, the number of devices in the OF tree has + * been constant and the IO memory for this is already in + * the reserve pool. + */ + if (vio_cmo.min == ((vio_cmo_num_OF_devs() + 1) * + VIO_CMO_MIN_ENT)) { + /* Updated desired entitlement if device requires it */ + if (size) + vio_cmo.desired += (viodev->cmo.desired - + VIO_CMO_MIN_ENT); + } else { + size_t tmp; + + tmp = vio_cmo.spare + vio_cmo.excess.free; + if (tmp < size) { + dev_err(dev, "%s: insufficient free " + "entitlement to add device. " + "Need %lu, have %lu\n", __func__, + size, (vio_cmo.spare + tmp)); + spin_unlock_irqrestore(&vio_cmo.lock, flags); + return -ENOMEM; + } + + /* Use excess pool first to fulfill request */ + tmp = min(size, vio_cmo.excess.free); + vio_cmo.excess.free -= tmp; + vio_cmo.excess.size -= tmp; + vio_cmo.reserve.size += tmp; + + /* Use spare if excess pool was insufficient */ + vio_cmo.spare -= size - tmp; + + /* Update bus accounting */ + vio_cmo.min += size; + vio_cmo.desired += viodev->cmo.desired; + } + spin_unlock_irqrestore(&vio_cmo.lock, flags); + return 0; +} + +/** + * vio_cmo_bus_remove - Handle CMO specific bus removal activities + * + * @viodev - Pointer to struct vio_dev for device + * + * Remove the device from the cmo device list. The minimum entitlement + * will be reserved for the device as long as it is in the system. The + * rest of the entitlement the device had been allocated will be returned + * to the system. + */ +static void vio_cmo_bus_remove(struct vio_dev *viodev) +{ + struct vio_cmo_dev_entry *dev_ent; + unsigned long flags; + size_t tmp; + + spin_lock_irqsave(&vio_cmo.lock, flags); + if (viodev->cmo.allocated) { + dev_err(&viodev->dev, "%s: device had %lu bytes of IO " + "allocated after remove operation.\n", + __func__, viodev->cmo.allocated); + BUG(); + } + + /* + * Remove the device from the device list being maintained for + * CMO enabled devices. + */ + list_for_each_entry(dev_ent, &vio_cmo.device_list, list) + if (viodev == dev_ent->viodev) { + list_del(&dev_ent->list); + kfree(dev_ent); + break; + } + + /* + * Devices may not require any entitlement and they do not need + * to be processed. Otherwise, return the device's entitlement + * back to the pools. + */ + if (viodev->cmo.entitled) { + /* + * This device has not yet left the OF tree, it's + * minimum entitlement remains in vio_cmo.min and + * vio_cmo.desired + */ + vio_cmo.desired -= (viodev->cmo.desired - VIO_CMO_MIN_ENT); + + /* + * Save min allocation for device in reserve as long + * as it exists in OF tree as determined by later + * balance operation + */ + viodev->cmo.entitled -= VIO_CMO_MIN_ENT; + + /* Replenish spare from freed reserve pool */ + if (viodev->cmo.entitled && (vio_cmo.spare < VIO_CMO_MIN_ENT)) { + tmp = min(viodev->cmo.entitled, (VIO_CMO_MIN_ENT - + vio_cmo.spare)); + vio_cmo.spare += tmp; + viodev->cmo.entitled -= tmp; + } + + /* Remaining reserve goes to excess pool */ + vio_cmo.excess.size += viodev->cmo.entitled; + vio_cmo.excess.free += viodev->cmo.entitled; + vio_cmo.reserve.size -= viodev->cmo.entitled; + + /* + * Until the device is removed it will keep a + * minimum entitlement; this will guarantee that + * a module unload/load will result in a success. + */ + viodev->cmo.entitled = VIO_CMO_MIN_ENT; + viodev->cmo.desired = VIO_CMO_MIN_ENT; + atomic_set(&viodev->cmo.allocs_failed, 0); + } + + spin_unlock_irqrestore(&vio_cmo.lock, flags); +} + +static void vio_cmo_set_dma_ops(struct vio_dev *viodev) +{ + vio_dma_mapping_ops.dma_supported = dma_iommu_ops.dma_supported; + viodev->dev.archdata.dma_ops = &vio_dma_mapping_ops; +} + +/** + * vio_cmo_bus_init - CMO entitlement initialization at bus init time + * + * Set up the reserve and excess entitlement pools based on available + * system entitlement and the number of devices in the OF tree that + * require entitlement in the reserve pool. + */ +static void vio_cmo_bus_init(void) +{ + struct hvcall_mpp_data mpp_data; + int err; + + memset(&vio_cmo, 0, sizeof(struct vio_cmo)); + spin_lock_init(&vio_cmo.lock); + INIT_LIST_HEAD(&vio_cmo.device_list); + INIT_DELAYED_WORK(&vio_cmo.balance_q, vio_cmo_balance); + + /* Get current system entitlement */ + err = h_get_mpp(&mpp_data); + + /* + * On failure, continue with entitlement set to 0, will panic() + * later when spare is reserved. + */ + if (err != H_SUCCESS) { + printk(KERN_ERR "%s: unable to determine system IO "\ + "entitlement. (%d)\n", __func__, err); + vio_cmo.entitled = 0; + } else { + vio_cmo.entitled = mpp_data.entitled_mem; + } + + /* Set reservation and check against entitlement */ + vio_cmo.spare = VIO_CMO_MIN_ENT; + vio_cmo.reserve.size = vio_cmo.spare; + vio_cmo.reserve.size += (vio_cmo_num_OF_devs() * + VIO_CMO_MIN_ENT); + if (vio_cmo.reserve.size > vio_cmo.entitled) { + printk(KERN_ERR "%s: insufficient system entitlement\n", + __func__); + panic("%s: Insufficient system entitlement", __func__); + } + + /* Set the remaining accounting variables */ + vio_cmo.excess.size = vio_cmo.entitled - vio_cmo.reserve.size; + vio_cmo.excess.free = vio_cmo.excess.size; + vio_cmo.min = vio_cmo.reserve.size; + vio_cmo.desired = vio_cmo.reserve.size; +} + +/* sysfs device functions and data structures for CMO */ + +#define viodev_cmo_rd_attr(name) \ +static ssize_t viodev_cmo_##name##_show(struct device *dev, \ + struct device_attribute *attr, \ + char *buf) \ +{ \ + return sprintf(buf, "%lu\n", to_vio_dev(dev)->cmo.name); \ +} + +static ssize_t viodev_cmo_allocs_failed_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct vio_dev *viodev = to_vio_dev(dev); + return sprintf(buf, "%d\n", atomic_read(&viodev->cmo.allocs_failed)); +} + +static ssize_t viodev_cmo_allocs_failed_reset(struct device *dev, + struct device_attribute *attr, const char *buf, size_t count) +{ + struct vio_dev *viodev = to_vio_dev(dev); + atomic_set(&viodev->cmo.allocs_failed, 0); + return count; +} + +static ssize_t viodev_cmo_desired_set(struct device *dev, + struct device_attribute *attr, const char *buf, size_t count) +{ + struct vio_dev *viodev = to_vio_dev(dev); + size_t new_desired; + int ret; + + ret = strict_strtoul(buf, 10, &new_desired); + if (ret) + return ret; + + vio_cmo_set_dev_desired(viodev, new_desired); + return count; +} + +viodev_cmo_rd_attr(desired); +viodev_cmo_rd_attr(entitled); +viodev_cmo_rd_attr(allocated); + +static ssize_t name_show(struct device *, struct device_attribute *, char *); +static ssize_t devspec_show(struct device *, struct device_attribute *, char *); +static struct device_attribute vio_cmo_dev_attrs[] = { + __ATTR_RO(name), + __ATTR_RO(devspec), + __ATTR(cmo_desired, S_IWUSR|S_IRUSR|S_IWGRP|S_IRGRP|S_IROTH, + viodev_cmo_desired_show, viodev_cmo_desired_set), + __ATTR(cmo_entitled, S_IRUGO, viodev_cmo_entitled_show, NULL), + __ATTR(cmo_allocated, S_IRUGO, viodev_cmo_allocated_show, NULL), + __ATTR(cmo_allocs_failed, S_IWUSR|S_IRUSR|S_IWGRP|S_IRGRP|S_IROTH, + viodev_cmo_allocs_failed_show, viodev_cmo_allocs_failed_reset), + __ATTR_NULL +}; + +/* sysfs bus functions and data structures for CMO */ + +#define viobus_cmo_rd_attr(name) \ +static ssize_t \ +viobus_cmo_##name##_show(struct bus_type *bt, char *buf) \ +{ \ + return sprintf(buf, "%lu\n", vio_cmo.name); \ +} + +#define viobus_cmo_pool_rd_attr(name, var) \ +static ssize_t \ +viobus_cmo_##name##_pool_show_##var(struct bus_type *bt, char *buf) \ +{ \ + return sprintf(buf, "%lu\n", vio_cmo.name.var); \ +} + +static ssize_t viobus_cmo_high_reset(struct bus_type *bt, const char *buf, + size_t count) +{ + unsigned long flags; + + spin_lock_irqsave(&vio_cmo.lock, flags); + vio_cmo.high = vio_cmo.curr; + spin_unlock_irqrestore(&vio_cmo.lock, flags); + + return count; +} + +viobus_cmo_rd_attr(entitled); +viobus_cmo_pool_rd_attr(reserve, size); +viobus_cmo_pool_rd_attr(excess, size); +viobus_cmo_pool_rd_attr(excess, free); +viobus_cmo_rd_attr(spare); +viobus_cmo_rd_attr(min); +viobus_cmo_rd_attr(desired); +viobus_cmo_rd_attr(curr); +viobus_cmo_rd_attr(high); + +static struct bus_attribute vio_cmo_bus_attrs[] = { + __ATTR(cmo_entitled, S_IRUGO, viobus_cmo_entitled_show, NULL), + __ATTR(cmo_reserve_size, S_IRUGO, viobus_cmo_reserve_pool_show_size, NULL), + __ATTR(cmo_excess_size, S_IRUGO, viobus_cmo_excess_pool_show_size, NULL), + __ATTR(cmo_excess_free, S_IRUGO, viobus_cmo_excess_pool_show_free, NULL), + __ATTR(cmo_spare, S_IRUGO, viobus_cmo_spare_show, NULL), + __ATTR(cmo_min, S_IRUGO, viobus_cmo_min_show, NULL), + __ATTR(cmo_desired, S_IRUGO, viobus_cmo_desired_show, NULL), + __ATTR(cmo_curr, S_IRUGO, viobus_cmo_curr_show, NULL), + __ATTR(cmo_high, S_IWUSR|S_IRUSR|S_IWGRP|S_IRGRP|S_IROTH, + viobus_cmo_high_show, viobus_cmo_high_reset), + __ATTR_NULL +}; + +static void vio_cmo_sysfs_init(void) +{ + vio_bus_type.dev_attrs = vio_cmo_dev_attrs; + vio_bus_type.bus_attrs = vio_cmo_bus_attrs; +} +#else /* CONFIG_PPC_SMLPAR */ +/* Dummy functions for iSeries platform */ +int vio_cmo_entitlement_update(size_t new_entitlement) { return 0; } +void vio_cmo_set_dev_desired(struct vio_dev *viodev, size_t desired) {} +static int vio_cmo_bus_probe(struct vio_dev *viodev) { return 0; } +static void vio_cmo_bus_remove(struct vio_dev *viodev) {} +static void vio_cmo_set_dma_ops(struct vio_dev *viodev) {} +static void vio_cmo_bus_init() {} +static void vio_cmo_sysfs_init() { } +#endif /* CONFIG_PPC_SMLPAR */ +EXPORT_SYMBOL(vio_cmo_entitlement_update); +EXPORT_SYMBOL(vio_cmo_set_dev_desired); + static struct iommu_table *vio_build_iommu_table(struct vio_dev *dev) { const unsigned char *dma_window; @@ -114,8 +1105,17 @@ static int vio_bus_probe(struct device *dev) return error; id = vio_match_device(viodrv->id_table, viodev); - if (id) + if (id) { + memset(&viodev->cmo, 0, sizeof(viodev->cmo)); + if (firmware_has_feature(FW_FEATURE_CMO)) { + error = vio_cmo_bus_probe(viodev); + if (error) + return error; + } error = viodrv->probe(viodev, id); + if (error) + vio_cmo_bus_remove(viodev); + } return error; } @@ -125,12 +1125,23 @@ static int vio_bus_remove(struct device *dev) { struct vio_dev *viodev = to_vio_dev(dev); struct vio_driver *viodrv = to_vio_driver(dev->driver); + struct device *devptr; + int ret = 1; + + /* + * Hold a reference to the device after the remove function is called + * to allow for CMO accounting cleanup for the device. + */ + devptr = get_device(dev); if (viodrv->remove) - return viodrv->remove(viodev); + ret = viodrv->remove(viodev); + + if (!ret && firmware_has_feature(FW_FEATURE_CMO)) + vio_cmo_bus_remove(viodev); - /* driver can't remove */ - return 1; + put_device(devptr); + return ret; } /** @@ -215,7 +1226,11 @@ struct vio_dev *vio_register_device_node(struct device_node *of_node) viodev->unit_address = *unit_address; } viodev->dev.archdata.of_node = of_node_get(of_node); - viodev->dev.archdata.dma_ops = &dma_iommu_ops; + + if (firmware_has_feature(FW_FEATURE_CMO)) + vio_cmo_set_dma_ops(viodev); + else + viodev->dev.archdata.dma_ops = &dma_iommu_ops; viodev->dev.archdata.dma_data = vio_build_iommu_table(viodev); viodev->dev.archdata.numa_node = of_node_to_nid(of_node); @@ -245,6 +1260,9 @@ static int __init vio_bus_init(void) int err; struct device_node *node_vroot; + if (firmware_has_feature(FW_FEATURE_CMO)) + vio_cmo_sysfs_init(); + err = bus_register(&vio_bus_type); if (err) { printk(KERN_ERR "failed to register VIO bus\n"); @@ -262,6 +1280,9 @@ static int __init vio_bus_init(void) return err; } + if (firmware_has_feature(FW_FEATURE_CMO)) + vio_cmo_bus_init(); + node_vroot = of_find_node_by_name(NULL, "vdevice"); if (node_vroot) { struct device_node *of_node; diff --git a/arch/powerpc/kernel/vmlinux.lds.S b/arch/powerpc/kernel/vmlinux.lds.S index a914411bced5..4a8ce62fe112 100644 --- a/arch/powerpc/kernel/vmlinux.lds.S +++ b/arch/powerpc/kernel/vmlinux.lds.S @@ -85,7 +85,7 @@ SECTIONS /* The dummy segment contents for the bug workaround mentioned above near PHDRS. */ - .dummy : { + .dummy : AT(ADDR(.dummy) - LOAD_OFFSET) { LONG(0xf177) } :kernel :dummy diff --git a/arch/powerpc/mm/fault.c b/arch/powerpc/mm/fault.c index 1707d00331fc..565b7a237c84 100644 --- a/arch/powerpc/mm/fault.c +++ b/arch/powerpc/mm/fault.c @@ -100,31 +100,6 @@ static int store_updates_sp(struct pt_regs *regs) return 0; } -#if !(defined(CONFIG_4xx) || defined(CONFIG_BOOKE)) -static void do_dabr(struct pt_regs *regs, unsigned long address, - unsigned long error_code) -{ - siginfo_t info; - - if (notify_die(DIE_DABR_MATCH, "dabr_match", regs, error_code, - 11, SIGSEGV) == NOTIFY_STOP) - return; - - if (debugger_dabr_match(regs)) - return; - - /* Clear the DABR */ - set_dabr(0); - - /* Deliver the signal to userspace */ - info.si_signo = SIGTRAP; - info.si_errno = 0; - info.si_code = TRAP_HWBKPT; - info.si_addr = (void __user *)address; - force_sig_info(SIGTRAP, &info, current); -} -#endif /* !(CONFIG_4xx || CONFIG_BOOKE)*/ - /* * For 600- and 800-family processors, the error_code parameter is DSISR * for a data fault, SRR1 for an instruction fault. For 400-family processors diff --git a/arch/powerpc/platforms/52xx/Kconfig b/arch/powerpc/platforms/52xx/Kconfig index d664b1bce381..696a5ee4962d 100644 --- a/arch/powerpc/platforms/52xx/Kconfig +++ b/arch/powerpc/platforms/52xx/Kconfig @@ -1,7 +1,6 @@ config PPC_MPC52xx bool "52xx-based boards" depends on PPC_MULTIPLATFORM && PPC32 - select FSL_SOC select PPC_CLOCK select PPC_PCI_CHOICE @@ -48,6 +47,7 @@ config PPC_MPC5200_BUGFIX config PPC_MPC5200_GPIO bool "MPC5200 GPIO support" depends on PPC_MPC52xx - select HAVE_GPIO_LIB + select ARCH_REQUIRE_GPIOLIB + select GENERIC_GPIO help Enable gpiolib support for mpc5200 based boards diff --git a/arch/powerpc/platforms/cell/iommu.c b/arch/powerpc/platforms/cell/iommu.c index 208005ca262c..e06420af5fe9 100644 --- a/arch/powerpc/platforms/cell/iommu.c +++ b/arch/powerpc/platforms/cell/iommu.c @@ -172,7 +172,7 @@ static void invalidate_tce_cache(struct cbe_iommu *iommu, unsigned long *pte, } } -static void tce_build_cell(struct iommu_table *tbl, long index, long npages, +static int tce_build_cell(struct iommu_table *tbl, long index, long npages, unsigned long uaddr, enum dma_data_direction direction, struct dma_attrs *attrs) { @@ -213,6 +213,7 @@ static void tce_build_cell(struct iommu_table *tbl, long index, long npages, pr_debug("tce_build_cell(index=%lx,n=%lx,dir=%d,base_pte=%lx)\n", index, npages, direction, base_pte); + return 0; } static void tce_free_cell(struct iommu_table *tbl, long index, long npages) @@ -1150,12 +1151,23 @@ static int iommu_fixed_disabled; static int __init setup_iommu_fixed(char *str) { + struct device_node *pciep; + if (strcmp(str, "off") == 0) iommu_fixed_disabled = 1; - else if (strcmp(str, "weak") == 0) + /* If we can find a pcie-endpoint in the device tree assume that + * we're on a triblade or a CAB so by default the fixed mapping + * should be set to be weakly ordered; but only if the boot + * option WASN'T set for strong ordering + */ + pciep = of_find_node_by_type(NULL, "pcie-endpoint"); + + if (strcmp(str, "weak") == 0 || (pciep && strcmp(str, "strong") != 0)) iommu_fixed_is_weak = 1; + of_node_put(pciep); + return 1; } __setup("iommu_fixed=", setup_iommu_fixed); diff --git a/arch/powerpc/platforms/cell/spufs/sched.c b/arch/powerpc/platforms/cell/spufs/sched.c index 34654743363d..2deeeba7eccf 100644 --- a/arch/powerpc/platforms/cell/spufs/sched.c +++ b/arch/powerpc/platforms/cell/spufs/sched.c @@ -312,11 +312,28 @@ static struct spu *aff_ref_location(struct spu_context *ctx, int mem_aff, */ node = cpu_to_node(raw_smp_processor_id()); for (n = 0; n < MAX_NUMNODES; n++, node++) { + int available_spus; + node = (node < MAX_NUMNODES) ? node : 0; if (!node_allowed(ctx, node)) continue; + + available_spus = 0; mutex_lock(&cbe_spu_info[node].list_mutex); list_for_each_entry(spu, &cbe_spu_info[node].spus, cbe_list) { + if (spu->ctx && spu->ctx->gang + && spu->ctx->aff_offset == 0) + available_spus -= + (spu->ctx->gang->contexts - 1); + else + available_spus++; + } + if (available_spus < ctx->gang->contexts) { + mutex_unlock(&cbe_spu_info[node].list_mutex); + continue; + } + + list_for_each_entry(spu, &cbe_spu_info[node].spus, cbe_list) { if ((!mem_aff || spu->has_mem_affinity) && sched_spu(spu)) { mutex_unlock(&cbe_spu_info[node].list_mutex); @@ -389,6 +406,9 @@ static int has_affinity(struct spu_context *ctx) if (list_empty(&ctx->aff_list)) return 0; + if (atomic_read(&ctx->gang->aff_sched_count) == 0) + ctx->gang->aff_ref_spu = NULL; + if (!gang->aff_ref_spu) { if (!(gang->aff_flags & AFF_MERGED)) aff_merge_remaining_ctxs(gang); @@ -416,14 +436,8 @@ static void spu_unbind_context(struct spu *spu, struct spu_context *ctx) if (spu->ctx->flags & SPU_CREATE_NOSCHED) atomic_dec(&cbe_spu_info[spu->node].reserved_spus); - if (ctx->gang){ - mutex_lock(&ctx->gang->aff_mutex); - if (has_affinity(ctx)) { - if (atomic_dec_and_test(&ctx->gang->aff_sched_count)) - ctx->gang->aff_ref_spu = NULL; - } - mutex_unlock(&ctx->gang->aff_mutex); - } + if (ctx->gang) + atomic_dec_if_positive(&ctx->gang->aff_sched_count); spu_switch_notify(spu, NULL); spu_unmap_mappings(ctx); @@ -562,10 +576,7 @@ static struct spu *spu_get_idle(struct spu_context *ctx) goto found; mutex_unlock(&cbe_spu_info[node].list_mutex); - mutex_lock(&ctx->gang->aff_mutex); - if (atomic_dec_and_test(&ctx->gang->aff_sched_count)) - ctx->gang->aff_ref_spu = NULL; - mutex_unlock(&ctx->gang->aff_mutex); + atomic_dec(&ctx->gang->aff_sched_count); goto not_found; } mutex_unlock(&ctx->gang->aff_mutex); diff --git a/arch/powerpc/platforms/cell/spufs/sputrace.c b/arch/powerpc/platforms/cell/spufs/sputrace.c index 8c0e95766a62..92d20e993ede 100644 --- a/arch/powerpc/platforms/cell/spufs/sputrace.c +++ b/arch/powerpc/platforms/cell/spufs/sputrace.c @@ -196,8 +196,7 @@ static int __init sputrace_init(void) struct proc_dir_entry *entry; int i, error = -ENOMEM; - sputrace_log = kcalloc(sizeof(struct sputrace), - bufsize, GFP_KERNEL); + sputrace_log = kcalloc(bufsize, sizeof(struct sputrace), GFP_KERNEL); if (!sputrace_log) goto out; diff --git a/arch/powerpc/platforms/iseries/iommu.c b/arch/powerpc/platforms/iseries/iommu.c index bc818e4e2033..bb464d1211b2 100644 --- a/arch/powerpc/platforms/iseries/iommu.c +++ b/arch/powerpc/platforms/iseries/iommu.c @@ -41,7 +41,7 @@ #include <asm/iseries/hv_call_event.h> #include <asm/iseries/iommu.h> -static void tce_build_iSeries(struct iommu_table *tbl, long index, long npages, +static int tce_build_iSeries(struct iommu_table *tbl, long index, long npages, unsigned long uaddr, enum dma_data_direction direction, struct dma_attrs *attrs) { @@ -71,6 +71,7 @@ static void tce_build_iSeries(struct iommu_table *tbl, long index, long npages, index++; uaddr += TCE_PAGE_SIZE; } + return 0; } static void tce_free_iSeries(struct iommu_table *tbl, long index, long npages) diff --git a/arch/powerpc/platforms/pasemi/iommu.c b/arch/powerpc/platforms/pasemi/iommu.c index 70541b7a5013..a0ff03a3d8da 100644 --- a/arch/powerpc/platforms/pasemi/iommu.c +++ b/arch/powerpc/platforms/pasemi/iommu.c @@ -83,7 +83,7 @@ static u32 *iob_l2_base; static struct iommu_table iommu_table_iobmap; static int iommu_table_iobmap_inited; -static void iobmap_build(struct iommu_table *tbl, long index, +static int iobmap_build(struct iommu_table *tbl, long index, long npages, unsigned long uaddr, enum dma_data_direction direction, struct dma_attrs *attrs) @@ -108,6 +108,7 @@ static void iobmap_build(struct iommu_table *tbl, long index, uaddr += IOBMAP_PAGE_SIZE; bus_addr += IOBMAP_PAGE_SIZE; } + return 0; } diff --git a/arch/powerpc/platforms/pseries/Kconfig b/arch/powerpc/platforms/pseries/Kconfig index 757c0296e0b8..97619fd51e39 100644 --- a/arch/powerpc/platforms/pseries/Kconfig +++ b/arch/powerpc/platforms/pseries/Kconfig @@ -40,3 +40,26 @@ config PPC_PSERIES_DEBUG depends on PPC_PSERIES && PPC_EARLY_DEBUG bool "Enable extra debug logging in platforms/pseries" default y + +config PPC_SMLPAR + bool "Support for shared-memory logical partitions" + depends on PPC_PSERIES + select LPARCFG + default n + help + Select this option to enable shared memory partition support. + With this option a system running in an LPAR can be given more + memory than physically available and will allow firmware to + balance memory across many LPARs. + +config CMM + tristate "Collaborative memory management" + depends on PPC_SMLPAR + default y + help + Select this option, if you want to enable the kernel interface + to reduce the memory size of the system. This is accomplished + by allocating pages of memory and put them "on hold". This only + makes sense for a system running in an LPAR where the unused pages + will be reused for other LPARs. The interface allows firmware to + balance memory across many LPARs. diff --git a/arch/powerpc/platforms/pseries/Makefile b/arch/powerpc/platforms/pseries/Makefile index 554c6e42ef2a..dfe574af2dc0 100644 --- a/arch/powerpc/platforms/pseries/Makefile +++ b/arch/powerpc/platforms/pseries/Makefile @@ -24,3 +24,4 @@ obj-$(CONFIG_HVC_CONSOLE) += hvconsole.o obj-$(CONFIG_HVCS) += hvcserver.o obj-$(CONFIG_HCALL_STATS) += hvCall_inst.o obj-$(CONFIG_PHYP_DUMP) += phyp_dump.o +obj-$(CONFIG_CMM) += cmm.o diff --git a/arch/powerpc/platforms/pseries/cmm.c b/arch/powerpc/platforms/pseries/cmm.c new file mode 100644 index 000000000000..c6b3be03168b --- /dev/null +++ b/arch/powerpc/platforms/pseries/cmm.c @@ -0,0 +1,468 @@ +/* + * Collaborative memory management interface. + * + * Copyright (C) 2008 IBM Corporation + * Author(s): Brian King (brking@linux.vnet.ibm.com), + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + */ + +#include <linux/ctype.h> +#include <linux/delay.h> +#include <linux/errno.h> +#include <linux/fs.h> +#include <linux/init.h> +#include <linux/kthread.h> +#include <linux/module.h> +#include <linux/oom.h> +#include <linux/sched.h> +#include <linux/stringify.h> +#include <linux/swap.h> +#include <linux/sysdev.h> +#include <asm/firmware.h> +#include <asm/hvcall.h> +#include <asm/mmu.h> +#include <asm/pgalloc.h> +#include <asm/uaccess.h> + +#include "plpar_wrappers.h" + +#define CMM_DRIVER_VERSION "1.0.0" +#define CMM_DEFAULT_DELAY 1 +#define CMM_DEBUG 0 +#define CMM_DISABLE 0 +#define CMM_OOM_KB 1024 +#define CMM_MIN_MEM_MB 256 +#define KB2PAGES(_p) ((_p)>>(PAGE_SHIFT-10)) +#define PAGES2KB(_p) ((_p)<<(PAGE_SHIFT-10)) + +static unsigned int delay = CMM_DEFAULT_DELAY; +static unsigned int oom_kb = CMM_OOM_KB; +static unsigned int cmm_debug = CMM_DEBUG; +static unsigned int cmm_disabled = CMM_DISABLE; +static unsigned long min_mem_mb = CMM_MIN_MEM_MB; +static struct sys_device cmm_sysdev; + +MODULE_AUTHOR("Brian King <brking@linux.vnet.ibm.com>"); +MODULE_DESCRIPTION("IBM System p Collaborative Memory Manager"); +MODULE_LICENSE("GPL"); +MODULE_VERSION(CMM_DRIVER_VERSION); + +module_param_named(delay, delay, uint, S_IRUGO | S_IWUSR); +MODULE_PARM_DESC(delay, "Delay (in seconds) between polls to query hypervisor paging requests. " + "[Default=" __stringify(CMM_DEFAULT_DELAY) "]"); +module_param_named(oom_kb, oom_kb, uint, S_IRUGO | S_IWUSR); +MODULE_PARM_DESC(oom_kb, "Amount of memory in kb to free on OOM. " + "[Default=" __stringify(CMM_OOM_KB) "]"); +module_param_named(min_mem_mb, min_mem_mb, ulong, S_IRUGO | S_IWUSR); +MODULE_PARM_DESC(min_mem_mb, "Minimum amount of memory (in MB) to not balloon. " + "[Default=" __stringify(CMM_MIN_MEM_MB) "]"); +module_param_named(debug, cmm_debug, uint, S_IRUGO | S_IWUSR); +MODULE_PARM_DESC(debug, "Enable module debugging logging. Set to 1 to enable. " + "[Default=" __stringify(CMM_DEBUG) "]"); + +#define CMM_NR_PAGES ((PAGE_SIZE - sizeof(void *) - sizeof(unsigned long)) / sizeof(unsigned long)) + +#define cmm_dbg(...) if (cmm_debug) { printk(KERN_INFO "cmm: "__VA_ARGS__); } + +struct cmm_page_array { + struct cmm_page_array *next; + unsigned long index; + unsigned long page[CMM_NR_PAGES]; +}; + +static unsigned long loaned_pages; +static unsigned long loaned_pages_target; +static unsigned long oom_freed_pages; + +static struct cmm_page_array *cmm_page_list; +static DEFINE_SPINLOCK(cmm_lock); + +static struct task_struct *cmm_thread_ptr; + +/** + * cmm_alloc_pages - Allocate pages and mark them as loaned + * @nr: number of pages to allocate + * + * Return value: + * number of pages requested to be allocated which were not + **/ +static long cmm_alloc_pages(long nr) +{ + struct cmm_page_array *pa, *npa; + unsigned long addr; + long rc; + + cmm_dbg("Begin request for %ld pages\n", nr); + + while (nr) { + addr = __get_free_page(GFP_NOIO | __GFP_NOWARN | + __GFP_NORETRY | __GFP_NOMEMALLOC); + if (!addr) + break; + spin_lock(&cmm_lock); + pa = cmm_page_list; + if (!pa || pa->index >= CMM_NR_PAGES) { + /* Need a new page for the page list. */ + spin_unlock(&cmm_lock); + npa = (struct cmm_page_array *)__get_free_page(GFP_NOIO | __GFP_NOWARN | + __GFP_NORETRY | __GFP_NOMEMALLOC); + if (!npa) { + pr_info("%s: Can not allocate new page list\n", __FUNCTION__); + free_page(addr); + break; + } + spin_lock(&cmm_lock); + pa = cmm_page_list; + + if (!pa || pa->index >= CMM_NR_PAGES) { + npa->next = pa; + npa->index = 0; + pa = npa; + cmm_page_list = pa; + } else + free_page((unsigned long) npa); + } + + if ((rc = plpar_page_set_loaned(__pa(addr)))) { + pr_err("%s: Can not set page to loaned. rc=%ld\n", __FUNCTION__, rc); + spin_unlock(&cmm_lock); + free_page(addr); + break; + } + + pa->page[pa->index++] = addr; + loaned_pages++; + totalram_pages--; + spin_unlock(&cmm_lock); + nr--; + } + + cmm_dbg("End request with %ld pages unfulfilled\n", nr); + return nr; +} + +/** + * cmm_free_pages - Free pages and mark them as active + * @nr: number of pages to free + * + * Return value: + * number of pages requested to be freed which were not + **/ +static long cmm_free_pages(long nr) +{ + struct cmm_page_array *pa; + unsigned long addr; + + cmm_dbg("Begin free of %ld pages.\n", nr); + spin_lock(&cmm_lock); + pa = cmm_page_list; + while (nr) { + if (!pa || pa->index <= 0) + break; + addr = pa->page[--pa->index]; + + if (pa->index == 0) { + pa = pa->next; + free_page((unsigned long) cmm_page_list); + cmm_page_list = pa; + } + + plpar_page_set_active(__pa(addr)); + free_page(addr); + loaned_pages--; + nr--; + totalram_pages++; + } + spin_unlock(&cmm_lock); + cmm_dbg("End request with %ld pages unfulfilled\n", nr); + return nr; +} + +/** + * cmm_oom_notify - OOM notifier + * @self: notifier block struct + * @dummy: not used + * @parm: returned - number of pages freed + * + * Return value: + * NOTIFY_OK + **/ +static int cmm_oom_notify(struct notifier_block *self, + unsigned long dummy, void *parm) +{ + unsigned long *freed = parm; + long nr = KB2PAGES(oom_kb); + + cmm_dbg("OOM processing started\n"); + nr = cmm_free_pages(nr); + loaned_pages_target = loaned_pages; + *freed += KB2PAGES(oom_kb) - nr; + oom_freed_pages += KB2PAGES(oom_kb) - nr; + cmm_dbg("OOM processing complete\n"); + return NOTIFY_OK; +} + +/** + * cmm_get_mpp - Read memory performance parameters + * + * Makes hcall to query the current page loan request from the hypervisor. + * + * Return value: + * nothing + **/ +static void cmm_get_mpp(void) +{ + int rc; + struct hvcall_mpp_data mpp_data; + unsigned long active_pages_target; + signed long page_loan_request; + + rc = h_get_mpp(&mpp_data); + + if (rc != H_SUCCESS) + return; + + page_loan_request = div_s64((s64)mpp_data.loan_request, PAGE_SIZE); + loaned_pages_target = page_loan_request + loaned_pages; + if (loaned_pages_target > oom_freed_pages) + loaned_pages_target -= oom_freed_pages; + else + loaned_pages_target = 0; + + active_pages_target = totalram_pages + loaned_pages - loaned_pages_target; + + if ((min_mem_mb * 1024 * 1024) > (active_pages_target * PAGE_SIZE)) + loaned_pages_target = totalram_pages + loaned_pages - + ((min_mem_mb * 1024 * 1024) / PAGE_SIZE); + + cmm_dbg("delta = %ld, loaned = %lu, target = %lu, oom = %lu, totalram = %lu\n", + page_loan_request, loaned_pages, loaned_pages_target, + oom_freed_pages, totalram_pages); +} + +static struct notifier_block cmm_oom_nb = { + .notifier_call = cmm_oom_notify +}; + +/** + * cmm_thread - CMM task thread + * @dummy: not used + * + * Return value: + * 0 + **/ +static int cmm_thread(void *dummy) +{ + unsigned long timeleft; + + while (1) { + timeleft = msleep_interruptible(delay * 1000); + + if (kthread_should_stop() || timeleft) { + loaned_pages_target = loaned_pages; + break; + } + + cmm_get_mpp(); + + if (loaned_pages_target > loaned_pages) { + if (cmm_alloc_pages(loaned_pages_target - loaned_pages)) + loaned_pages_target = loaned_pages; + } else if (loaned_pages_target < loaned_pages) + cmm_free_pages(loaned_pages - loaned_pages_target); + } + return 0; +} + +#define CMM_SHOW(name, format, args...) \ + static ssize_t show_##name(struct sys_device *dev, char *buf) \ + { \ + return sprintf(buf, format, ##args); \ + } \ + static SYSDEV_ATTR(name, S_IRUGO, show_##name, NULL) + +CMM_SHOW(loaned_kb, "%lu\n", PAGES2KB(loaned_pages)); +CMM_SHOW(loaned_target_kb, "%lu\n", PAGES2KB(loaned_pages_target)); + +static ssize_t show_oom_pages(struct sys_device *dev, char *buf) +{ + return sprintf(buf, "%lu\n", PAGES2KB(oom_freed_pages)); +} + +static ssize_t store_oom_pages(struct sys_device *dev, + const char *buf, size_t count) +{ + unsigned long val = simple_strtoul (buf, NULL, 10); + + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; + if (val != 0) + return -EBADMSG; + + oom_freed_pages = 0; + return count; +} + +static SYSDEV_ATTR(oom_freed_kb, S_IWUSR| S_IRUGO, + show_oom_pages, store_oom_pages); + +static struct sysdev_attribute *cmm_attrs[] = { + &attr_loaned_kb, + &attr_loaned_target_kb, + &attr_oom_freed_kb, +}; + +static struct sysdev_class cmm_sysdev_class = { + .name = "cmm", +}; + +/** + * cmm_sysfs_register - Register with sysfs + * + * Return value: + * 0 on success / other on failure + **/ +static int cmm_sysfs_register(struct sys_device *sysdev) +{ + int i, rc; + + if ((rc = sysdev_class_register(&cmm_sysdev_class))) + return rc; + + sysdev->id = 0; + sysdev->cls = &cmm_sysdev_class; + + if ((rc = sysdev_register(sysdev))) + goto class_unregister; + + for (i = 0; i < ARRAY_SIZE(cmm_attrs); i++) { + if ((rc = sysdev_create_file(sysdev, cmm_attrs[i]))) + goto fail; + } + + return 0; + +fail: + while (--i >= 0) + sysdev_remove_file(sysdev, cmm_attrs[i]); + sysdev_unregister(sysdev); +class_unregister: + sysdev_class_unregister(&cmm_sysdev_class); + return rc; +} + +/** + * cmm_unregister_sysfs - Unregister from sysfs + * + **/ +static void cmm_unregister_sysfs(struct sys_device *sysdev) +{ + int i; + + for (i = 0; i < ARRAY_SIZE(cmm_attrs); i++) + sysdev_remove_file(sysdev, cmm_attrs[i]); + sysdev_unregister(sysdev); + sysdev_class_unregister(&cmm_sysdev_class); +} + +/** + * cmm_init - Module initialization + * + * Return value: + * 0 on success / other on failure + **/ +static int cmm_init(void) +{ + int rc = -ENOMEM; + + if (!firmware_has_feature(FW_FEATURE_CMO)) + return -EOPNOTSUPP; + + if ((rc = register_oom_notifier(&cmm_oom_nb)) < 0) + return rc; + + if ((rc = cmm_sysfs_register(&cmm_sysdev))) + goto out_oom_notifier; + + if (cmm_disabled) + return rc; + + cmm_thread_ptr = kthread_run(cmm_thread, NULL, "cmmthread"); + if (IS_ERR(cmm_thread_ptr)) { + rc = PTR_ERR(cmm_thread_ptr); + goto out_unregister_sysfs; + } + + return rc; + +out_unregister_sysfs: + cmm_unregister_sysfs(&cmm_sysdev); +out_oom_notifier: + unregister_oom_notifier(&cmm_oom_nb); + return rc; +} + +/** + * cmm_exit - Module exit + * + * Return value: + * nothing + **/ +static void cmm_exit(void) +{ + if (cmm_thread_ptr) + kthread_stop(cmm_thread_ptr); + unregister_oom_notifier(&cmm_oom_nb); + cmm_free_pages(loaned_pages); + cmm_unregister_sysfs(&cmm_sysdev); +} + +/** + * cmm_set_disable - Disable/Enable CMM + * + * Return value: + * 0 on success / other on failure + **/ +static int cmm_set_disable(const char *val, struct kernel_param *kp) +{ + int disable = simple_strtoul(val, NULL, 10); + + if (disable != 0 && disable != 1) + return -EINVAL; + + if (disable && !cmm_disabled) { + if (cmm_thread_ptr) + kthread_stop(cmm_thread_ptr); + cmm_thread_ptr = NULL; + cmm_free_pages(loaned_pages); + } else if (!disable && cmm_disabled) { + cmm_thread_ptr = kthread_run(cmm_thread, NULL, "cmmthread"); + if (IS_ERR(cmm_thread_ptr)) + return PTR_ERR(cmm_thread_ptr); + } + + cmm_disabled = disable; + return 0; +} + +module_param_call(disable, cmm_set_disable, param_get_uint, + &cmm_disabled, S_IRUGO | S_IWUSR); +MODULE_PARM_DESC(disable, "Disable CMM. Set to 1 to disable. " + "[Default=" __stringify(CMM_DISABLE) "]"); + +module_init(cmm_init); +module_exit(cmm_exit); diff --git a/arch/powerpc/platforms/pseries/iommu.c b/arch/powerpc/platforms/pseries/iommu.c index 5377dd4b849a..a8c446697f9e 100644 --- a/arch/powerpc/platforms/pseries/iommu.c +++ b/arch/powerpc/platforms/pseries/iommu.c @@ -48,7 +48,7 @@ #include "plpar_wrappers.h" -static void tce_build_pSeries(struct iommu_table *tbl, long index, +static int tce_build_pSeries(struct iommu_table *tbl, long index, long npages, unsigned long uaddr, enum dma_data_direction direction, struct dma_attrs *attrs) @@ -72,6 +72,7 @@ static void tce_build_pSeries(struct iommu_table *tbl, long index, uaddr += TCE_PAGE_SIZE; tcep++; } + return 0; } @@ -94,14 +95,19 @@ static unsigned long tce_get_pseries(struct iommu_table *tbl, long index) return *tcep; } -static void tce_build_pSeriesLP(struct iommu_table *tbl, long tcenum, +static void tce_free_pSeriesLP(struct iommu_table*, long, long); +static void tce_freemulti_pSeriesLP(struct iommu_table*, long, long); + +static int tce_build_pSeriesLP(struct iommu_table *tbl, long tcenum, long npages, unsigned long uaddr, enum dma_data_direction direction, struct dma_attrs *attrs) { - u64 rc; + u64 rc = 0; u64 proto_tce, tce; u64 rpn; + int ret = 0; + long tcenum_start = tcenum, npages_start = npages; rpn = (virt_to_abs(uaddr)) >> TCE_SHIFT; proto_tce = TCE_PCI_READ; @@ -112,6 +118,13 @@ static void tce_build_pSeriesLP(struct iommu_table *tbl, long tcenum, tce = proto_tce | (rpn & TCE_RPN_MASK) << TCE_RPN_SHIFT; rc = plpar_tce_put((u64)tbl->it_index, (u64)tcenum << 12, tce); + if (unlikely(rc == H_NOT_ENOUGH_RESOURCES)) { + ret = (int)rc; + tce_free_pSeriesLP(tbl, tcenum_start, + (npages_start - (npages + 1))); + break; + } + if (rc && printk_ratelimit()) { printk("tce_build_pSeriesLP: plpar_tce_put failed. rc=%ld\n", rc); printk("\tindex = 0x%lx\n", (u64)tbl->it_index); @@ -123,25 +136,27 @@ static void tce_build_pSeriesLP(struct iommu_table *tbl, long tcenum, tcenum++; rpn++; } + return ret; } static DEFINE_PER_CPU(u64 *, tce_page) = NULL; -static void tce_buildmulti_pSeriesLP(struct iommu_table *tbl, long tcenum, +static int tce_buildmulti_pSeriesLP(struct iommu_table *tbl, long tcenum, long npages, unsigned long uaddr, enum dma_data_direction direction, struct dma_attrs *attrs) { - u64 rc; + u64 rc = 0; u64 proto_tce; u64 *tcep; u64 rpn; long l, limit; + long tcenum_start = tcenum, npages_start = npages; + int ret = 0; if (npages == 1) { - tce_build_pSeriesLP(tbl, tcenum, npages, uaddr, - direction, attrs); - return; + return tce_build_pSeriesLP(tbl, tcenum, npages, uaddr, + direction, attrs); } tcep = __get_cpu_var(tce_page); @@ -153,9 +168,8 @@ static void tce_buildmulti_pSeriesLP(struct iommu_table *tbl, long tcenum, tcep = (u64 *)__get_free_page(GFP_ATOMIC); /* If allocation fails, fall back to the loop implementation */ if (!tcep) { - tce_build_pSeriesLP(tbl, tcenum, npages, uaddr, + return tce_build_pSeriesLP(tbl, tcenum, npages, uaddr, direction, attrs); - return; } __get_cpu_var(tce_page) = tcep; } @@ -187,6 +201,13 @@ static void tce_buildmulti_pSeriesLP(struct iommu_table *tbl, long tcenum, tcenum += limit; } while (npages > 0 && !rc); + if (unlikely(rc == H_NOT_ENOUGH_RESOURCES)) { + ret = (int)rc; + tce_freemulti_pSeriesLP(tbl, tcenum_start, + (npages_start - (npages + limit))); + return ret; + } + if (rc && printk_ratelimit()) { printk("tce_buildmulti_pSeriesLP: plpar_tce_put failed. rc=%ld\n", rc); printk("\tindex = 0x%lx\n", (u64)tbl->it_index); @@ -194,6 +215,7 @@ static void tce_buildmulti_pSeriesLP(struct iommu_table *tbl, long tcenum, printk("\ttce[0] val = 0x%lx\n", tcep[0]); show_stack(current, (unsigned long *)__get_SP()); } + return ret; } static void tce_free_pSeriesLP(struct iommu_table *tbl, long tcenum, long npages) diff --git a/arch/powerpc/platforms/pseries/plpar_wrappers.h b/arch/powerpc/platforms/pseries/plpar_wrappers.h index d8680b589dc9..a437267c6bf8 100644 --- a/arch/powerpc/platforms/pseries/plpar_wrappers.h +++ b/arch/powerpc/platforms/pseries/plpar_wrappers.h @@ -42,6 +42,16 @@ static inline long register_slb_shadow(unsigned long cpu, unsigned long vpa) return vpa_call(0x3, cpu, vpa); } +static inline long plpar_page_set_loaned(unsigned long vpa) +{ + return plpar_hcall_norets(H_PAGE_INIT, H_PAGE_SET_LOANED, vpa, 0); +} + +static inline long plpar_page_set_active(unsigned long vpa) +{ + return plpar_hcall_norets(H_PAGE_INIT, H_PAGE_SET_ACTIVE, vpa, 0); +} + extern void vpa_init(int cpu); static inline long plpar_pte_enter(unsigned long flags, diff --git a/arch/powerpc/platforms/pseries/setup.c b/arch/powerpc/platforms/pseries/setup.c index 90beb444e1dd..063a0d2fba30 100644 --- a/arch/powerpc/platforms/pseries/setup.c +++ b/arch/powerpc/platforms/pseries/setup.c @@ -314,6 +314,76 @@ static int pseries_set_xdabr(unsigned long dabr) H_DABRX_KERNEL | H_DABRX_USER); } +#define CMO_CHARACTERISTICS_TOKEN 44 +#define CMO_MAXLENGTH 1026 + +/** + * fw_cmo_feature_init - FW_FEATURE_CMO is not stored in ibm,hypertas-functions, + * handle that here. (Stolen from parse_system_parameter_string) + */ +void pSeries_cmo_feature_init(void) +{ + char *ptr, *key, *value, *end; + int call_status; + int PrPSP = -1; + int SecPSP = -1; + + pr_debug(" -> fw_cmo_feature_init()\n"); + spin_lock(&rtas_data_buf_lock); + memset(rtas_data_buf, 0, RTAS_DATA_BUF_SIZE); + call_status = rtas_call(rtas_token("ibm,get-system-parameter"), 3, 1, + NULL, + CMO_CHARACTERISTICS_TOKEN, + __pa(rtas_data_buf), + RTAS_DATA_BUF_SIZE); + + if (call_status != 0) { + spin_unlock(&rtas_data_buf_lock); + pr_debug("CMO not available\n"); + pr_debug(" <- fw_cmo_feature_init()\n"); + return; + } + + end = rtas_data_buf + CMO_MAXLENGTH - 2; + ptr = rtas_data_buf + 2; /* step over strlen value */ + key = value = ptr; + + while (*ptr && (ptr <= end)) { + /* Separate the key and value by replacing '=' with '\0' and + * point the value at the string after the '=' + */ + if (ptr[0] == '=') { + ptr[0] = '\0'; + value = ptr + 1; + } else if (ptr[0] == '\0' || ptr[0] == ',') { + /* Terminate the string containing the key/value pair */ + ptr[0] = '\0'; + + if (key == value) { + pr_debug("Malformed key/value pair\n"); + /* Never found a '=', end processing */ + break; + } + + if (0 == strcmp(key, "PrPSP")) + PrPSP = simple_strtol(value, NULL, 10); + else if (0 == strcmp(key, "SecPSP")) + SecPSP = simple_strtol(value, NULL, 10); + value = key = ptr + 1; + } + ptr++; + } + + if (PrPSP != -1 || SecPSP != -1) { + pr_info("CMO enabled\n"); + pr_debug("CMO enabled, PrPSP=%d, SecPSP=%d\n", PrPSP, SecPSP); + powerpc_firmware_features |= FW_FEATURE_CMO; + } else + pr_debug("CMO not enabled, PrPSP=%d, SecPSP=%d\n", PrPSP, SecPSP); + spin_unlock(&rtas_data_buf_lock); + pr_debug(" <- fw_cmo_feature_init()\n"); +} + /* * Early initialization. Relocation is on but do not reference unbolted pages */ @@ -329,6 +399,7 @@ static void __init pSeries_init_early(void) else if (firmware_has_feature(FW_FEATURE_XDABR)) ppc_md.set_dabr = pseries_set_xdabr; + pSeries_cmo_feature_init(); iommu_init_early_pSeries(); pr_debug(" <- pSeries_init_early()\n"); diff --git a/arch/powerpc/sysdev/dart_iommu.c b/arch/powerpc/sysdev/dart_iommu.c index de8c8b542cfa..89639ecbf381 100644 --- a/arch/powerpc/sysdev/dart_iommu.c +++ b/arch/powerpc/sysdev/dart_iommu.c @@ -147,7 +147,7 @@ static void dart_flush(struct iommu_table *tbl) } } -static void dart_build(struct iommu_table *tbl, long index, +static int dart_build(struct iommu_table *tbl, long index, long npages, unsigned long uaddr, enum dma_data_direction direction, struct dma_attrs *attrs) @@ -184,6 +184,7 @@ static void dart_build(struct iommu_table *tbl, long index, } else { dart_dirty = 1; } + return 0; } diff --git a/arch/powerpc/sysdev/qe_lib/Kconfig b/arch/powerpc/sysdev/qe_lib/Kconfig index 4bb18f57901e..1ce546462be5 100644 --- a/arch/powerpc/sysdev/qe_lib/Kconfig +++ b/arch/powerpc/sysdev/qe_lib/Kconfig @@ -29,7 +29,7 @@ config QE_GPIO bool "QE GPIO support" depends on QUICC_ENGINE select GENERIC_GPIO - select HAVE_GPIO_LIB + select ARCH_REQUIRE_GPIOLIB help Say Y here if you're going to use hardware that connects to the QE GPIOs. diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig index eb530b4128ba..2ed88122be93 100644 --- a/arch/s390/Kconfig +++ b/arch/s390/Kconfig @@ -565,6 +565,7 @@ bool "s390 guest support (EXPERIMENTAL)" depends on 64BIT && EXPERIMENTAL select VIRTIO select VIRTIO_RING + select VIRTIO_CONSOLE help Select this option if you want to run the kernel under s390 linux endmenu diff --git a/arch/s390/kernel/kprobes.c b/arch/s390/kernel/kprobes.c index 288ad490a6dd..4f82e5b5f879 100644 --- a/arch/s390/kernel/kprobes.c +++ b/arch/s390/kernel/kprobes.c @@ -270,7 +270,6 @@ static void __kprobes set_current_kprobe(struct kprobe *p, struct pt_regs *regs, __ctl_store(kcb->kprobe_saved_ctl, 9, 11); } -/* Called with kretprobe_lock held */ void __kprobes arch_prepare_kretprobe(struct kretprobe_instance *ri, struct pt_regs *regs) { @@ -377,8 +376,7 @@ static int __kprobes trampoline_probe_handler(struct kprobe *p, unsigned long trampoline_address = (unsigned long)&kretprobe_trampoline; INIT_HLIST_HEAD(&empty_rp); - spin_lock_irqsave(&kretprobe_lock, flags); - head = kretprobe_inst_table_head(current); + kretprobe_hash_lock(current, &head, &flags); /* * It is possible to have multiple instances associated with a given @@ -417,7 +415,7 @@ static int __kprobes trampoline_probe_handler(struct kprobe *p, regs->psw.addr = orig_ret_address | PSW_ADDR_AMODE; reset_current_kprobe(); - spin_unlock_irqrestore(&kretprobe_lock, flags); + kretprobe_hash_unlock(current, &flags); preempt_enable_no_resched(); hlist_for_each_entry_safe(ri, node, tmp, &empty_rp, hlist) { diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c index b358e18273b0..62122bad1e33 100644 --- a/arch/s390/kernel/setup.c +++ b/arch/s390/kernel/setup.c @@ -54,6 +54,7 @@ #include <asm/sections.h> #include <asm/ebcdic.h> #include <asm/compat.h> +#include <asm/kvm_virtio.h> long psw_kernel_bits = (PSW_BASE_BITS | PSW_MASK_DAT | PSW_ASC_PRIMARY | PSW_MASK_MCHECK | PSW_DEFAULT_KEY); @@ -766,7 +767,8 @@ setup_arch(char **cmdline_p) printk("We are running under VM (64 bit mode)\n"); else if (MACHINE_IS_KVM) { printk("We are running under KVM (64 bit mode)\n"); - add_preferred_console("ttyS", 1, NULL); + add_preferred_console("hvc", 0, NULL); + s390_virtio_console_init(); } else printk("We are running native (64 bit mode)\n"); #endif /* CONFIG_64BIT */ diff --git a/arch/s390/kernel/topology.c b/arch/s390/kernel/topology.c index 212d618b0095..632b13e10053 100644 --- a/arch/s390/kernel/topology.c +++ b/arch/s390/kernel/topology.c @@ -9,7 +9,6 @@ #include <linux/device.h> #include <linux/bootmem.h> #include <linux/sched.h> -#include <linux/kthread.h> #include <linux/workqueue.h> #include <linux/cpu.h> #include <linux/smp.h> @@ -230,20 +229,9 @@ void arch_update_cpu_topology(void) } } -static int topology_kthread(void *data) -{ - arch_reinit_sched_domains(); - return 0; -} - static void topology_work_fn(struct work_struct *work) { - /* We can't call arch_reinit_sched_domains() from a multi-threaded - * workqueue context since it may deadlock in case of cpu hotplug. - * So we have to create a kernel thread in order to call - * arch_reinit_sched_domains(). - */ - kthread_run(topology_kthread, NULL, "topology_update"); + arch_reinit_sched_domains(); } void topology_schedule_update(void) diff --git a/arch/sh/boot/compressed/misc_32.c b/arch/sh/boot/compressed/misc_32.c index adcea31e663e..f386997e4d9c 100644 --- a/arch/sh/boot/compressed/misc_32.c +++ b/arch/sh/boot/compressed/misc_32.c @@ -74,8 +74,6 @@ static unsigned outcnt = 0; /* bytes in output buffer */ static int fill_inbuf(void); static void flush_window(void); static void error(char *m); -static void gzip_mark(void **); -static void gzip_release(void **); extern char input_data[]; extern int input_len; @@ -84,11 +82,7 @@ static long bytes_out = 0; static uch *output_data; static unsigned long output_ptr = 0; -static void *malloc(int size); -static void free(void *where); static void error(char *m); -static void gzip_mark(void **); -static void gzip_release(void **); int puts(const char *); @@ -101,38 +95,6 @@ static unsigned long free_mem_end_ptr; #include "../../../../lib/inflate.c" -static void *malloc(int size) -{ - void *p; - - if (size <0) error("Malloc error"); - if (free_mem_ptr == 0) error("Memory error"); - - free_mem_ptr = (free_mem_ptr + 3) & ~3; /* Align */ - - p = (void *)free_mem_ptr; - free_mem_ptr += size; - - if (free_mem_ptr >= free_mem_end_ptr) - error("Out of memory"); - - return p; -} - -static void free(void *where) -{ /* Don't care */ -} - -static void gzip_mark(void **ptr) -{ - *ptr = (void *) free_mem_ptr; -} - -static void gzip_release(void **ptr) -{ - free_mem_ptr = (long) *ptr; -} - #ifdef CONFIG_SH_STANDARD_BIOS size_t strlen(const char *s) { diff --git a/arch/sh/boot/compressed/misc_64.c b/arch/sh/boot/compressed/misc_64.c index a006ef89b9dd..2941657e18aa 100644 --- a/arch/sh/boot/compressed/misc_64.c +++ b/arch/sh/boot/compressed/misc_64.c @@ -72,8 +72,6 @@ static unsigned outcnt = 0; /* bytes in output buffer */ static int fill_inbuf(void); static void flush_window(void); static void error(char *m); -static void gzip_mark(void **); -static void gzip_release(void **); extern char input_data[]; extern int input_len; @@ -82,11 +80,7 @@ static long bytes_out = 0; static uch *output_data; static unsigned long output_ptr = 0; -static void *malloc(int size); -static void free(void *where); static void error(char *m); -static void gzip_mark(void **); -static void gzip_release(void **); static void puts(const char *); @@ -99,40 +93,6 @@ static unsigned long free_mem_end_ptr; #include "../../../../lib/inflate.c" -static void *malloc(int size) -{ - void *p; - - if (size < 0) - error("Malloc error\n"); - if (free_mem_ptr == 0) - error("Memory error\n"); - - free_mem_ptr = (free_mem_ptr + 3) & ~3; /* Align */ - - p = (void *) free_mem_ptr; - free_mem_ptr += size; - - if (free_mem_ptr >= free_mem_end_ptr) - error("\nOut of memory\n"); - - return p; -} - -static void free(void *where) -{ /* Don't care */ -} - -static void gzip_mark(void **ptr) -{ - *ptr = (void *) free_mem_ptr; -} - -static void gzip_release(void **ptr) -{ - free_mem_ptr = (long) *ptr; -} - void puts(const char *s) { } diff --git a/arch/sparc/Kconfig b/arch/sparc/Kconfig index 789724e61e83..375de7c6d082 100644 --- a/arch/sparc/Kconfig +++ b/arch/sparc/Kconfig @@ -298,20 +298,6 @@ config UNIX98_PTYS Read the instructions in <file:Documentation/Changes> pertaining to pseudo terminals. It's safe to say N. -config UNIX98_PTY_COUNT - int "Maximum number of Unix98 PTYs in use (0-2048)" - depends on UNIX98_PTYS - default "256" - help - The maximum number of Unix98 PTYs that can be used at any one time. - The default is 256, and should be enough for desktop systems. Server - machines which support incoming telnet/rlogin/ssh connections and/or - serve several X terminals may want to increase this: every incoming - connection and every xterm uses up one PTY. - - When not in use, each additional set of 256 PTYs occupy - approximately 8 KB of kernel memory on 32-bit architectures. - endmenu source "fs/Kconfig" diff --git a/arch/sparc64/kernel/kprobes.c b/arch/sparc64/kernel/kprobes.c index f43b5d755354..201a6e547e4a 100644 --- a/arch/sparc64/kernel/kprobes.c +++ b/arch/sparc64/kernel/kprobes.c @@ -478,9 +478,9 @@ int __kprobes longjmp_break_handler(struct kprobe *p, struct pt_regs *regs) return 0; } -/* Called with kretprobe_lock held. The value stored in the return - * address register is actually 2 instructions before where the - * callee will return to. Sequences usually look something like this +/* The value stored in the return address register is actually 2 + * instructions before where the callee will return to. + * Sequences usually look something like this * * call some_function <--- return register points here * nop <--- call delay slot @@ -512,8 +512,7 @@ int __kprobes trampoline_probe_handler(struct kprobe *p, struct pt_regs *regs) unsigned long trampoline_address =(unsigned long)&kretprobe_trampoline; INIT_HLIST_HEAD(&empty_rp); - spin_lock_irqsave(&kretprobe_lock, flags); - head = kretprobe_inst_table_head(current); + kretprobe_hash_lock(current, &head, &flags); /* * It is possible to have multiple instances associated with a given @@ -553,7 +552,7 @@ int __kprobes trampoline_probe_handler(struct kprobe *p, struct pt_regs *regs) regs->tnpc = orig_ret_address + 4; reset_current_kprobe(); - spin_unlock_irqrestore(&kretprobe_lock, flags); + kretprobe_hash_unlock(current, &flags); preempt_enable_no_resched(); hlist_for_each_entry_safe(ri, node, tmp, &empty_rp, hlist) { diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index b2ddfcf01728..e3cba0b45600 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -23,11 +23,13 @@ config X86 select HAVE_OPROFILE select HAVE_IOREMAP_PROT select HAVE_KPROBES + select ARCH_WANT_OPTIONAL_GPIOLIB if !X86_RDC321X select HAVE_KRETPROBES select HAVE_DYNAMIC_FTRACE select HAVE_FTRACE select HAVE_KVM if ((X86_32 && !X86_VOYAGER && !X86_VISWS && !X86_NUMAQ) || X86_64) select HAVE_ARCH_KGDB if !X86_VOYAGER + select HAVE_EFFICIENT_UNALIGNED_ACCESS config ARCH_DEFCONFIG string diff --git a/arch/x86/boot/compressed/misc.c b/arch/x86/boot/compressed/misc.c index bc5553b496f7..9fea73706479 100644 --- a/arch/x86/boot/compressed/misc.c +++ b/arch/x86/boot/compressed/misc.c @@ -182,8 +182,6 @@ static unsigned outcnt; static int fill_inbuf(void); static void flush_window(void); static void error(char *m); -static void gzip_mark(void **); -static void gzip_release(void **); /* * This is set up by the setup-routine at boot-time @@ -196,9 +194,6 @@ extern int input_len; static long bytes_out; -static void *malloc(int size); -static void free(void *where); - static void *memset(void *s, int c, unsigned n); static void *memcpy(void *dest, const void *src, unsigned n); @@ -220,40 +215,6 @@ static int lines, cols; #include "../../../../lib/inflate.c" -static void *malloc(int size) -{ - void *p; - - if (size < 0) - error("Malloc error"); - if (free_mem_ptr <= 0) - error("Memory error"); - - free_mem_ptr = (free_mem_ptr + 3) & ~3; /* Align */ - - p = (void *)free_mem_ptr; - free_mem_ptr += size; - - if (free_mem_ptr >= free_mem_end_ptr) - error("Out of memory"); - - return p; -} - -static void free(void *where) -{ /* Don't care */ -} - -static void gzip_mark(void **ptr) -{ - *ptr = (void *) free_mem_ptr; -} - -static void gzip_release(void **ptr) -{ - free_mem_ptr = (memptr) *ptr; -} - static void scroll(void) { int i; diff --git a/arch/x86/kernel/kprobes.c b/arch/x86/kernel/kprobes.c index 43c019f85f0d..6c27679ec6aa 100644 --- a/arch/x86/kernel/kprobes.c +++ b/arch/x86/kernel/kprobes.c @@ -431,7 +431,6 @@ static void __kprobes prepare_singlestep(struct kprobe *p, struct pt_regs *regs) regs->ip = (unsigned long)p->ainsn.insn; } -/* Called with kretprobe_lock held */ void __kprobes arch_prepare_kretprobe(struct kretprobe_instance *ri, struct pt_regs *regs) { @@ -682,8 +681,7 @@ static __used __kprobes void *trampoline_handler(struct pt_regs *regs) unsigned long trampoline_address = (unsigned long)&kretprobe_trampoline; INIT_HLIST_HEAD(&empty_rp); - spin_lock_irqsave(&kretprobe_lock, flags); - head = kretprobe_inst_table_head(current); + kretprobe_hash_lock(current, &head, &flags); /* fixup registers */ #ifdef CONFIG_X86_64 regs->cs = __KERNEL_CS; @@ -732,7 +730,7 @@ static __used __kprobes void *trampoline_handler(struct pt_regs *regs) kretprobe_assert(ri, orig_ret_address, trampoline_address); - spin_unlock_irqrestore(&kretprobe_lock, flags); + kretprobe_hash_unlock(current, &flags); hlist_for_each_entry_safe(ri, node, tmp, &empty_rp, hlist) { hlist_del(&ri->hlist); diff --git a/arch/x86/kernel/pci-calgary_64.c b/arch/x86/kernel/pci-calgary_64.c index 151f2d171f7c..19e7fc7c2c4f 100644 --- a/arch/x86/kernel/pci-calgary_64.c +++ b/arch/x86/kernel/pci-calgary_64.c @@ -29,6 +29,7 @@ #include <linux/mm.h> #include <linux/spinlock.h> #include <linux/string.h> +#include <linux/crash_dump.h> #include <linux/dma-mapping.h> #include <linux/bitops.h> #include <linux/pci_ids.h> @@ -167,6 +168,8 @@ static void calgary_dump_error_regs(struct iommu_table *tbl); static void calioc2_handle_quirks(struct iommu_table *tbl, struct pci_dev *dev); static void calioc2_tce_cache_blast(struct iommu_table *tbl); static void calioc2_dump_error_regs(struct iommu_table *tbl); +static void calgary_init_bitmap_from_tce_table(struct iommu_table *tbl); +static void get_tce_space_from_tar(void); static struct cal_chipset_ops calgary_chip_ops = { .handle_quirks = calgary_handle_quirks, @@ -830,7 +833,11 @@ static int __init calgary_setup_tar(struct pci_dev *dev, void __iomem *bbar) tbl = pci_iommu(dev->bus); tbl->it_base = (unsigned long)bus_info[dev->bus->number].tce_space; - tce_free(tbl, 0, tbl->it_size); + + if (is_kdump_kernel()) + calgary_init_bitmap_from_tce_table(tbl); + else + tce_free(tbl, 0, tbl->it_size); if (is_calgary(dev->device)) tbl->chip_ops = &calgary_chip_ops; @@ -1209,6 +1216,10 @@ static int __init calgary_init(void) if (ret) return ret; + /* Purely for kdump kernel case */ + if (is_kdump_kernel()) + get_tce_space_from_tar(); + do { dev = pci_get_device(PCI_VENDOR_ID_IBM, PCI_ANY_ID, dev); if (!dev) @@ -1339,6 +1350,61 @@ static int __init calgary_bus_has_devices(int bus, unsigned short pci_dev) return (val != 0xffffffff); } +/* + * calgary_init_bitmap_from_tce_table(): + * Funtion for kdump case. In the second/kdump kernel initialize + * the bitmap based on the tce table entries obtained from first kernel + */ +static void calgary_init_bitmap_from_tce_table(struct iommu_table *tbl) +{ + u64 *tp; + unsigned int index; + tp = ((u64 *)tbl->it_base); + for (index = 0 ; index < tbl->it_size; index++) { + if (*tp != 0x0) + set_bit(index, tbl->it_map); + tp++; + } +} + +/* + * get_tce_space_from_tar(): + * Function for kdump case. Get the tce tables from first kernel + * by reading the contents of the base adress register of calgary iommu + */ +static void get_tce_space_from_tar() +{ + int bus; + void __iomem *target; + unsigned long tce_space; + + for (bus = 0; bus < MAX_PHB_BUS_NUM; bus++) { + struct calgary_bus_info *info = &bus_info[bus]; + unsigned short pci_device; + u32 val; + + val = read_pci_config(bus, 0, 0, 0); + pci_device = (val & 0xFFFF0000) >> 16; + + if (!is_cal_pci_dev(pci_device)) + continue; + if (info->translation_disabled) + continue; + + if (calgary_bus_has_devices(bus, pci_device) || + translate_empty_slots) { + target = calgary_reg(bus_info[bus].bbar, + tar_offset(bus)); + tce_space = be64_to_cpu(readq(target)); + tce_space = tce_space & TAR_SW_BITS; + + tce_space = tce_space & (~specified_table_size); + info->tce_space = (u64 *)__va(tce_space); + } + } + return; +} + void __init detect_calgary(void) { int bus; @@ -1394,7 +1460,8 @@ void __init detect_calgary(void) return; } - specified_table_size = determine_tce_table_size(max_pfn * PAGE_SIZE); + specified_table_size = determine_tce_table_size((is_kdump_kernel() ? + saved_max_pfn : max_pfn) * PAGE_SIZE); for (bus = 0; bus < MAX_PHB_BUS_NUM; bus++) { struct calgary_bus_info *info = &bus_info[bus]; @@ -1412,10 +1479,16 @@ void __init detect_calgary(void) if (calgary_bus_has_devices(bus, pci_device) || translate_empty_slots) { - tbl = alloc_tce_table(); - if (!tbl) - goto cleanup; - info->tce_space = tbl; + /* + * If it is kdump kernel, find and use tce tables + * from first kernel, else allocate tce tables here + */ + if (!is_kdump_kernel()) { + tbl = alloc_tce_table(); + if (!tbl) + goto cleanup; + info->tce_space = tbl; + } calgary_found = 1; } } diff --git a/block/ioctl.c b/block/ioctl.c index 52d6385216ad..77185e5c026a 100644 --- a/block/ioctl.c +++ b/block/ioctl.c @@ -17,6 +17,7 @@ static int blkpg_ioctl(struct block_device *bdev, struct blkpg_ioctl_arg __user long long start, length; int part; int i; + int err; if (!capable(CAP_SYS_ADMIN)) return -EACCES; @@ -61,9 +62,9 @@ static int blkpg_ioctl(struct block_device *bdev, struct blkpg_ioctl_arg __user } } /* all seems OK */ - add_partition(disk, part, start, length, ADDPART_FLAG_NONE); + err = add_partition(disk, part, start, length, ADDPART_FLAG_NONE); mutex_unlock(&bdev->bd_mutex); - return 0; + return err; case BLKPG_DEL_PARTITION: if (!disk->part[part-1]) return -ENXIO; diff --git a/drivers/Makefile b/drivers/Makefile index 808e0ae66aa8..54ec5e718c0e 100644 --- a/drivers/Makefile +++ b/drivers/Makefile @@ -5,7 +5,7 @@ # Rewritten to use lists instead of if-statements. # -obj-$(CONFIG_HAVE_GPIO_LIB) += gpio/ +obj-y += gpio/ obj-$(CONFIG_PCI) += pci/ obj-$(CONFIG_PARISC) += parisc/ obj-$(CONFIG_RAPIDIO) += rapidio/ diff --git a/drivers/ata/ahci.c b/drivers/ata/ahci.c index dc7596f028b6..ef3e5522e1a4 100644 --- a/drivers/ata/ahci.c +++ b/drivers/ata/ahci.c @@ -1273,7 +1273,7 @@ static ssize_t ahci_transmit_led_message(struct ata_port *ap, u32 state, void __iomem *mmio = ap->host->iomap[AHCI_PCI_BAR]; u32 em_ctl; u32 message[] = {0, 0}; - unsigned int flags; + unsigned long flags; int pmp; struct ahci_em_priv *emp; diff --git a/drivers/base/firmware_class.c b/drivers/base/firmware_class.c index b0be1d18fee2..c9c92b00fd55 100644 --- a/drivers/base/firmware_class.c +++ b/drivers/base/firmware_class.c @@ -184,7 +184,7 @@ firmware_data_read(struct kobject *kobj, struct bin_attribute *bin_attr, struct device *dev = to_dev(kobj); struct firmware_priv *fw_priv = dev_get_drvdata(dev); struct firmware *fw; - ssize_t ret_count = count; + ssize_t ret_count; mutex_lock(&fw_lock); fw = fw_priv->fw; @@ -192,14 +192,8 @@ firmware_data_read(struct kobject *kobj, struct bin_attribute *bin_attr, ret_count = -ENODEV; goto out; } - if (offset > fw->size) { - ret_count = 0; - goto out; - } - if (offset + ret_count > fw->size) - ret_count = fw->size - offset; - - memcpy(buffer, fw->data + offset, ret_count); + ret_count = memory_read_from_buffer(buffer, count, &offset, + fw->data, fw->size); out: mutex_unlock(&fw_lock); return ret_count; diff --git a/drivers/block/aoe/aoechr.c b/drivers/block/aoe/aoechr.c index c04440cd6a32..181ebb85f0be 100644 --- a/drivers/block/aoe/aoechr.c +++ b/drivers/block/aoe/aoechr.c @@ -6,6 +6,7 @@ #include <linux/hdreg.h> #include <linux/blkdev.h> +#include <linux/completion.h> #include <linux/delay.h> #include <linux/smp_lock.h> #include "aoe.h" @@ -36,7 +37,7 @@ struct ErrMsg { static struct ErrMsg emsgs[NMSG]; static int emsgs_head_idx, emsgs_tail_idx; -static struct semaphore emsgs_sema; +static struct completion emsgs_comp; static spinlock_t emsgs_lock; static int nblocked_emsgs_readers; static struct class *aoe_class; @@ -141,7 +142,7 @@ bail: spin_unlock_irqrestore(&emsgs_lock, flags); spin_unlock_irqrestore(&emsgs_lock, flags); if (nblocked_emsgs_readers) - up(&emsgs_sema); + complete(&emsgs_comp); } static ssize_t @@ -221,7 +222,7 @@ aoechr_read(struct file *filp, char __user *buf, size_t cnt, loff_t *off) spin_unlock_irqrestore(&emsgs_lock, flags); - n = down_interruptible(&emsgs_sema); + n = wait_for_completion_interruptible(&emsgs_comp); spin_lock_irqsave(&emsgs_lock, flags); @@ -269,7 +270,7 @@ aoechr_init(void) printk(KERN_ERR "aoe: can't register char device\n"); return n; } - sema_init(&emsgs_sema, 0); + init_completion(&emsgs_comp); spin_lock_init(&emsgs_lock); aoe_class = class_create(THIS_MODULE, "aoe"); if (IS_ERR(aoe_class)) { diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c index dd7ea203f940..42251095134f 100644 --- a/drivers/block/virtio_blk.c +++ b/drivers/block/virtio_blk.c @@ -196,6 +196,7 @@ static int virtblk_probe(struct virtio_device *vdev) int err; u64 cap; u32 v; + u32 blk_size; if (index_to_minor(index) >= 1 << MINORBITS) return -ENOSPC; @@ -290,6 +291,13 @@ static int virtblk_probe(struct virtio_device *vdev) if (!err) blk_queue_max_hw_segments(vblk->disk->queue, v); + /* Host can optionally specify the block size of the device */ + err = virtio_config_val(vdev, VIRTIO_BLK_F_BLK_SIZE, + offsetof(struct virtio_blk_config, blk_size), + &blk_size); + if (!err) + blk_queue_hardsect_size(vblk->disk->queue, blk_size); + add_disk(vblk->disk); return 0; @@ -330,7 +338,7 @@ static struct virtio_device_id id_table[] = { static unsigned int features[] = { VIRTIO_BLK_F_BARRIER, VIRTIO_BLK_F_SEG_MAX, VIRTIO_BLK_F_SIZE_MAX, - VIRTIO_BLK_F_GEOMETRY, VIRTIO_BLK_F_RO, + VIRTIO_BLK_F_GEOMETRY, VIRTIO_BLK_F_RO, VIRTIO_BLK_F_BLK_SIZE, }; static struct virtio_driver virtio_blk = { diff --git a/drivers/char/Kconfig b/drivers/char/Kconfig index 67b07576f8bf..6c070dc5f2d4 100644 --- a/drivers/char/Kconfig +++ b/drivers/char/Kconfig @@ -578,11 +578,14 @@ config HVC_DRIVER It will automatically be selected if one of the back-end console drivers is selected. +config HVC_IRQ + bool config HVC_CONSOLE bool "pSeries Hypervisor Virtual Console support" depends on PPC_PSERIES select HVC_DRIVER + select HVC_IRQ help pSeries machines when partitioned support a hypervisor virtual console. This driver allows each pSeries partition to have a console @@ -593,6 +596,7 @@ config HVC_ISERIES depends on PPC_ISERIES default y select HVC_DRIVER + select HVC_IRQ help iSeries machines support a hypervisor virtual console. @@ -614,13 +618,18 @@ config HVC_XEN bool "Xen Hypervisor Console support" depends on XEN select HVC_DRIVER + select HVC_IRQ default y help Xen virtual console device driver config VIRTIO_CONSOLE - bool + tristate "Virtio console" + depends on VIRTIO select HVC_DRIVER + help + Virtio console for use with lguest and other hypervisors. + config HVCS tristate "IBM Hypervisor Virtual Console Server support" diff --git a/drivers/char/Makefile b/drivers/char/Makefile index 4b6e736cfa02..f7a0d1a754fc 100644 --- a/drivers/char/Makefile +++ b/drivers/char/Makefile @@ -48,6 +48,7 @@ obj-$(CONFIG_HVC_ISERIES) += hvc_iseries.o obj-$(CONFIG_HVC_RTAS) += hvc_rtas.o obj-$(CONFIG_HVC_BEAT) += hvc_beat.o obj-$(CONFIG_HVC_DRIVER) += hvc_console.o +obj-$(CONFIG_HVC_IRQ) += hvc_irq.o obj-$(CONFIG_HVC_XEN) += hvc_xen.o obj-$(CONFIG_VIRTIO_CONSOLE) += virtio_console.o obj-$(CONFIG_RAW_DRIVER) += raw.o @@ -63,7 +64,6 @@ obj-$(CONFIG_BRIQ_PANEL) += briq_panel.o obj-$(CONFIG_BFIN_OTP) += bfin-otp.o obj-$(CONFIG_PRINTER) += lp.o -obj-$(CONFIG_TIPAR) += tipar.o obj-$(CONFIG_APM_EMULATION) += apm-emulation.o diff --git a/drivers/char/ds1302.c b/drivers/char/ds1302.c index fada6ddefbae..c5e67a623951 100644 --- a/drivers/char/ds1302.c +++ b/drivers/char/ds1302.c @@ -20,10 +20,11 @@ #include <linux/miscdevice.h> #include <linux/delay.h> #include <linux/bcd.h> +#include <linux/smp_lock.h> +#include <linux/uaccess.h> +#include <linux/io.h> -#include <asm/uaccess.h> #include <asm/system.h> -#include <asm/io.h> #include <asm/rtc.h> #if defined(CONFIG_M32R) #include <asm/m32r.h> @@ -153,9 +154,7 @@ static unsigned char days_in_mo[] = /* ioctl that supports RTC_RD_TIME and RTC_SET_TIME (read and set time/date). */ -static int -rtc_ioctl(struct inode *inode, struct file *file, unsigned int cmd, - unsigned long arg) +static long rtc_ioctl(struct file *file, unsigned int cmd, unsigned long arg) { unsigned long flags; @@ -165,7 +164,9 @@ rtc_ioctl(struct inode *inode, struct file *file, unsigned int cmd, struct rtc_time rtc_tm; memset(&rtc_tm, 0, sizeof (struct rtc_time)); + lock_kernel(); get_rtc_time(&rtc_tm); + unlock_kernel(); if (copy_to_user((struct rtc_time*)arg, &rtc_tm, sizeof(struct rtc_time))) return -EFAULT; return 0; @@ -217,6 +218,7 @@ rtc_ioctl(struct inode *inode, struct file *file, unsigned int cmd, BIN_TO_BCD(mon); BIN_TO_BCD(yrs); + lock_kernel(); local_irq_save(flags); CMOS_WRITE(yrs, RTC_YEAR); CMOS_WRITE(mon, RTC_MONTH); @@ -225,6 +227,7 @@ rtc_ioctl(struct inode *inode, struct file *file, unsigned int cmd, CMOS_WRITE(min, RTC_MINUTES); CMOS_WRITE(sec, RTC_SECONDS); local_irq_restore(flags); + unlock_kernel(); /* Notice that at this point, the RTC is updated but * the kernel is still running with the old time. @@ -244,8 +247,10 @@ rtc_ioctl(struct inode *inode, struct file *file, unsigned int cmd, if(copy_from_user(&tcs_val, (int*)arg, sizeof(int))) return -EFAULT; + lock_kernel(); tcs_val = RTC_TCR_PATTERN | (tcs_val & 0x0F); ds1302_writereg(RTC_TRICKLECHARGER, tcs_val); + unlock_kernel(); return 0; } default: @@ -282,7 +287,7 @@ get_rtc_status(char *buf) static const struct file_operations rtc_fops = { .owner = THIS_MODULE, - .ioctl = rtc_ioctl, + .unlocked_ioctl = rtc_ioctl, }; /* Probe for the chip by writing something to its RAM and try reading it back. */ diff --git a/drivers/char/dsp56k.c b/drivers/char/dsp56k.c index 33c466a4888f..19b88504e960 100644 --- a/drivers/char/dsp56k.c +++ b/drivers/char/dsp56k.c @@ -36,10 +36,10 @@ #include <linux/smp_lock.h> #include <linux/firmware.h> #include <linux/platform_device.h> +#include <linux/uaccess.h> /* For put_user and get_user */ #include <asm/atarihw.h> #include <asm/traps.h> -#include <asm/uaccess.h> /* For put_user and get_user */ #include <asm/dsp56k.h> @@ -303,8 +303,8 @@ static ssize_t dsp56k_write(struct file *file, const char __user *buf, size_t co } } -static int dsp56k_ioctl(struct inode *inode, struct file *file, - unsigned int cmd, unsigned long arg) +static long dsp56k_ioctl(struct file *file, unsigned int cmd, + unsigned long arg) { int dev = iminor(inode) & 0x0f; void __user *argp = (void __user *)arg; @@ -331,8 +331,9 @@ static int dsp56k_ioctl(struct inode *inode, struct file *file, if (len > DSP56K_MAX_BINARY_LENGTH) { return -EINVAL; } - + lock_kernel(); r = dsp56k_upload(bin, len); + unlock_kernel(); if (r < 0) { return r; } @@ -342,12 +343,16 @@ static int dsp56k_ioctl(struct inode *inode, struct file *file, case DSP56K_SET_TX_WSIZE: if (arg > 4 || arg < 1) return -EINVAL; + lock_kernel(); dsp56k.tx_wsize = (int) arg; + unlock_kernel(); break; case DSP56K_SET_RX_WSIZE: if (arg > 4 || arg < 1) return -EINVAL; + lock_kernel(); dsp56k.rx_wsize = (int) arg; + unlock_kernel(); break; case DSP56K_HOST_FLAGS: { @@ -359,6 +364,7 @@ static int dsp56k_ioctl(struct inode *inode, struct file *file, if(get_user(out, &hf->out) < 0) return -EFAULT; + lock_kernel(); if ((dir & 0x1) && (out & 0x1)) dsp56k_host_interface.icr |= DSP56K_ICR_HF0; else if (dir & 0x1) @@ -373,14 +379,16 @@ static int dsp56k_ioctl(struct inode *inode, struct file *file, if (dsp56k_host_interface.icr & DSP56K_ICR_HF1) status |= 0x2; if (dsp56k_host_interface.isr & DSP56K_ISR_HF2) status |= 0x4; if (dsp56k_host_interface.isr & DSP56K_ISR_HF3) status |= 0x8; - + unlock_kernel(); return put_user(status, &hf->status); } case DSP56K_HOST_CMD: if (arg > 31 || arg < 0) return -EINVAL; + lock_kernel(); dsp56k_host_interface.cvr = (u_char)((arg & DSP56K_CVR_HV_MASK) | DSP56K_CVR_HC); + unlock_kernel(); break; default: return -EINVAL; @@ -472,7 +480,7 @@ static const struct file_operations dsp56k_fops = { .owner = THIS_MODULE, .read = dsp56k_read, .write = dsp56k_write, - .ioctl = dsp56k_ioctl, + .unlocked_ioctl = dsp56k_ioctl, .open = dsp56k_open, .release = dsp56k_release, }; diff --git a/drivers/char/efirtc.c b/drivers/char/efirtc.c index d57ca3e4e534..67fbd7aab5db 100644 --- a/drivers/char/efirtc.c +++ b/drivers/char/efirtc.c @@ -37,8 +37,9 @@ #include <linux/rtc.h> #include <linux/proc_fs.h> #include <linux/efi.h> +#include <linux/smp_lock.h> +#include <linux/uaccess.h> -#include <asm/uaccess.h> #include <asm/system.h> #define EFI_RTC_VERSION "0.4" @@ -51,8 +52,8 @@ static DEFINE_SPINLOCK(efi_rtc_lock); -static int efi_rtc_ioctl(struct inode *inode, struct file *file, - unsigned int cmd, unsigned long arg); +static long efi_rtc_ioctl(struct file *file, unsigned int cmd, + unsigned long arg); #define is_leap(year) \ ((year) % 4 == 0 && ((year) % 100 != 0 || (year) % 400 == 0)) @@ -146,9 +147,8 @@ convert_from_efi_time(efi_time_t *eft, struct rtc_time *wtime) } } -static int -efi_rtc_ioctl(struct inode *inode, struct file *file, unsigned int cmd, - unsigned long arg) +static long efi_rtc_ioctl(struct file *file, unsigned int cmd, + unsigned long arg) { efi_status_t status; @@ -175,13 +175,13 @@ efi_rtc_ioctl(struct inode *inode, struct file *file, unsigned int cmd, return -EINVAL; case RTC_RD_TIME: - + lock_kernel(); spin_lock_irqsave(&efi_rtc_lock, flags); status = efi.get_time(&eft, &cap); spin_unlock_irqrestore(&efi_rtc_lock,flags); - + unlock_kernel(); if (status != EFI_SUCCESS) { /* should never happen */ printk(KERN_ERR "efitime: can't read time\n"); @@ -203,11 +203,13 @@ efi_rtc_ioctl(struct inode *inode, struct file *file, unsigned int cmd, convert_to_efi_time(&wtime, &eft); + lock_kernel(); spin_lock_irqsave(&efi_rtc_lock, flags); status = efi.set_time(&eft); spin_unlock_irqrestore(&efi_rtc_lock,flags); + unlock_kernel(); return status == EFI_SUCCESS ? 0 : -EINVAL; @@ -223,6 +225,7 @@ efi_rtc_ioctl(struct inode *inode, struct file *file, unsigned int cmd, convert_to_efi_time(&wtime, &eft); + lock_kernel(); spin_lock_irqsave(&efi_rtc_lock, flags); /* * XXX Fixme: @@ -233,16 +236,19 @@ efi_rtc_ioctl(struct inode *inode, struct file *file, unsigned int cmd, status = efi.set_wakeup_time((efi_bool_t)enabled, &eft); spin_unlock_irqrestore(&efi_rtc_lock,flags); + unlock_kernel(); return status == EFI_SUCCESS ? 0 : -EINVAL; case RTC_WKALM_RD: + lock_kernel(); spin_lock_irqsave(&efi_rtc_lock, flags); status = efi.get_wakeup_time((efi_bool_t *)&enabled, (efi_bool_t *)&pending, &eft); spin_unlock_irqrestore(&efi_rtc_lock,flags); + unlock_kernel(); if (status != EFI_SUCCESS) return -EINVAL; @@ -256,7 +262,7 @@ efi_rtc_ioctl(struct inode *inode, struct file *file, unsigned int cmd, return copy_to_user(&ewp->time, &wtime, sizeof(struct rtc_time)) ? -EFAULT : 0; } - return -EINVAL; + return -ENOTTY; } /* @@ -265,8 +271,7 @@ efi_rtc_ioctl(struct inode *inode, struct file *file, unsigned int cmd, * up things on a close. */ -static int -efi_rtc_open(struct inode *inode, struct file *file) +static int efi_rtc_open(struct inode *inode, struct file *file) { /* * nothing special to do here @@ -277,8 +282,7 @@ efi_rtc_open(struct inode *inode, struct file *file) return 0; } -static int -efi_rtc_close(struct inode *inode, struct file *file) +static int efi_rtc_close(struct inode *inode, struct file *file) { return 0; } @@ -289,13 +293,12 @@ efi_rtc_close(struct inode *inode, struct file *file) static const struct file_operations efi_rtc_fops = { .owner = THIS_MODULE, - .ioctl = efi_rtc_ioctl, + .unlocked_ioctl = efi_rtc_ioctl, .open = efi_rtc_open, .release = efi_rtc_close, }; -static struct miscdevice efi_rtc_dev= -{ +static struct miscdevice efi_rtc_dev= { EFI_RTC_MINOR, "efirtc", &efi_rtc_fops diff --git a/drivers/char/hpet.c b/drivers/char/hpet.c index fb0a85a1eb36..b3f5dbc6d880 100644 --- a/drivers/char/hpet.c +++ b/drivers/char/hpet.c @@ -623,6 +623,7 @@ static inline int hpet_tpcheck(struct hpet_task *tp) return -ENXIO; } +#if 0 int hpet_unregister(struct hpet_task *tp) { struct hpet_dev *devp; @@ -652,6 +653,7 @@ int hpet_unregister(struct hpet_task *tp) return 0; } +#endif /* 0 */ static ctl_table hpet_table[] = { { diff --git a/drivers/char/hvc_console.c b/drivers/char/hvc_console.c index 2f9759d625cc..02aac104842d 100644 --- a/drivers/char/hvc_console.c +++ b/drivers/char/hvc_console.c @@ -27,7 +27,6 @@ #include <linux/init.h> #include <linux/kbd_kern.h> #include <linux/kernel.h> -#include <linux/kref.h> #include <linux/kthread.h> #include <linux/list.h> #include <linux/module.h> @@ -75,23 +74,6 @@ static int hvc_init(void); static int sysrq_pressed; #endif -struct hvc_struct { - spinlock_t lock; - int index; - struct tty_struct *tty; - unsigned int count; - int do_wakeup; - char *outbuf; - int outbuf_size; - int n_outbuf; - uint32_t vtermno; - struct hv_ops *ops; - int irq_requested; - int irq; - struct list_head next; - struct kref kref; /* ref count & hvc_struct lifetime */ -}; - /* dynamic list of hvc_struct instances */ static LIST_HEAD(hvc_structs); @@ -298,27 +280,15 @@ int hvc_instantiate(uint32_t vtermno, int index, struct hv_ops *ops) return 0; } +EXPORT_SYMBOL_GPL(hvc_instantiate); /* Wake the sleeping khvcd */ -static void hvc_kick(void) +void hvc_kick(void) { hvc_kicked = 1; wake_up_process(hvc_task); } - -static int hvc_poll(struct hvc_struct *hp); - -/* - * NOTE: This API isn't used if the console adapter doesn't support interrupts. - * In this case the console is poll driven. - */ -static irqreturn_t hvc_handle_interrupt(int irq, void *dev_instance) -{ - /* if hvc_poll request a repoll, then kick the hvcd thread */ - if (hvc_poll(dev_instance)) - hvc_kick(); - return IRQ_HANDLED; -} +EXPORT_SYMBOL_GPL(hvc_kick); static void hvc_unthrottle(struct tty_struct *tty) { @@ -333,7 +303,6 @@ static int hvc_open(struct tty_struct *tty, struct file * filp) { struct hvc_struct *hp; unsigned long flags; - int irq = 0; int rc = 0; /* Auto increments kref reference if found. */ @@ -352,18 +321,15 @@ static int hvc_open(struct tty_struct *tty, struct file * filp) tty->low_latency = 1; /* Makes flushes to ldisc synchronous. */ hp->tty = tty; - /* Save for request_irq outside of spin_lock. */ - irq = hp->irq; - if (irq) - hp->irq_requested = 1; + + if (hp->ops->notifier_add) + rc = hp->ops->notifier_add(hp, hp->data); spin_unlock_irqrestore(&hp->lock, flags); - /* check error, fallback to non-irq */ - if (irq) - rc = request_irq(irq, hvc_handle_interrupt, IRQF_DISABLED, "hvc_console", hp); + /* - * If the request_irq() fails and we return an error. The tty layer + * If the notifier fails we return an error. The tty layer * will call hvc_close() after a failed open but we don't want to clean * up there so we'll clean up here and clear out the previously set * tty fields and return the kref reference. @@ -371,7 +337,6 @@ static int hvc_open(struct tty_struct *tty, struct file * filp) if (rc) { spin_lock_irqsave(&hp->lock, flags); hp->tty = NULL; - hp->irq_requested = 0; spin_unlock_irqrestore(&hp->lock, flags); tty->driver_data = NULL; kref_put(&hp->kref, destroy_hvc_struct); @@ -386,7 +351,6 @@ static int hvc_open(struct tty_struct *tty, struct file * filp) static void hvc_close(struct tty_struct *tty, struct file * filp) { struct hvc_struct *hp; - int irq = 0; unsigned long flags; if (tty_hung_up_p(filp)) @@ -404,9 +368,8 @@ static void hvc_close(struct tty_struct *tty, struct file * filp) spin_lock_irqsave(&hp->lock, flags); if (--hp->count == 0) { - if (hp->irq_requested) - irq = hp->irq; - hp->irq_requested = 0; + if (hp->ops->notifier_del) + hp->ops->notifier_del(hp, hp->data); /* We are done with the tty pointer now. */ hp->tty = NULL; @@ -418,10 +381,6 @@ static void hvc_close(struct tty_struct *tty, struct file * filp) * waking periodically to check chars_in_buffer(). */ tty_wait_until_sent(tty, HVC_CLOSE_WAIT); - - if (irq) - free_irq(irq, hp); - } else { if (hp->count < 0) printk(KERN_ERR "hvc_close %X: oops, count is %d\n", @@ -436,7 +395,6 @@ static void hvc_hangup(struct tty_struct *tty) { struct hvc_struct *hp = tty->driver_data; unsigned long flags; - int irq = 0; int temp_open_count; if (!hp) @@ -458,13 +416,12 @@ static void hvc_hangup(struct tty_struct *tty) hp->count = 0; hp->n_outbuf = 0; hp->tty = NULL; - if (hp->irq_requested) - /* Saved for use outside of spin_lock. */ - irq = hp->irq; - hp->irq_requested = 0; + + if (hp->ops->notifier_del) + hp->ops->notifier_del(hp, hp->data); + spin_unlock_irqrestore(&hp->lock, flags); - if (irq) - free_irq(irq, hp); + while(temp_open_count) { --temp_open_count; kref_put(&hp->kref, destroy_hvc_struct); @@ -575,7 +532,7 @@ static u32 timeout = MIN_TIMEOUT; #define HVC_POLL_READ 0x00000001 #define HVC_POLL_WRITE 0x00000002 -static int hvc_poll(struct hvc_struct *hp) +int hvc_poll(struct hvc_struct *hp) { struct tty_struct *tty; int i, n, poll_mask = 0; @@ -602,10 +559,10 @@ static int hvc_poll(struct hvc_struct *hp) if (test_bit(TTY_THROTTLED, &tty->flags)) goto throttled; - /* If we aren't interrupt driven and aren't throttled, we always + /* If we aren't notifier driven and aren't throttled, we always * request a reschedule */ - if (hp->irq == 0) + if (!hp->irq_requested) poll_mask |= HVC_POLL_READ; /* Read data if any */ @@ -674,6 +631,7 @@ static int hvc_poll(struct hvc_struct *hp) return poll_mask; } +EXPORT_SYMBOL_GPL(hvc_poll); /* * This kthread is either polling or interrupt driven. This is determined by @@ -733,7 +691,7 @@ static const struct tty_operations hvc_ops = { .chars_in_buffer = hvc_chars_in_buffer, }; -struct hvc_struct __devinit *hvc_alloc(uint32_t vtermno, int irq, +struct hvc_struct __devinit *hvc_alloc(uint32_t vtermno, int data, struct hv_ops *ops, int outbuf_size) { struct hvc_struct *hp; @@ -754,7 +712,7 @@ struct hvc_struct __devinit *hvc_alloc(uint32_t vtermno, int irq, memset(hp, 0x00, sizeof(*hp)); hp->vtermno = vtermno; - hp->irq = irq; + hp->data = data; hp->ops = ops; hp->outbuf_size = outbuf_size; hp->outbuf = &((char *)hp)[ALIGN(sizeof(*hp), sizeof(long))]; @@ -784,6 +742,7 @@ struct hvc_struct __devinit *hvc_alloc(uint32_t vtermno, int irq, return hp; } +EXPORT_SYMBOL_GPL(hvc_alloc); int __devexit hvc_remove(struct hvc_struct *hp) { diff --git a/drivers/char/hvc_console.h b/drivers/char/hvc_console.h index 42ffb17e15df..d9ce10915625 100644 --- a/drivers/char/hvc_console.h +++ b/drivers/char/hvc_console.h @@ -26,6 +26,7 @@ #ifndef HVC_CONSOLE_H #define HVC_CONSOLE_H +#include <linux/kref.h> /* * This is the max number of console adapters that can/will be found as @@ -42,24 +43,50 @@ */ #define HVC_ALLOC_TTY_ADAPTERS 8 +struct hvc_struct { + spinlock_t lock; + int index; + struct tty_struct *tty; + unsigned int count; + int do_wakeup; + char *outbuf; + int outbuf_size; + int n_outbuf; + uint32_t vtermno; + struct hv_ops *ops; + int irq_requested; + int data; + struct list_head next; + struct kref kref; /* ref count & hvc_struct lifetime */ +}; /* implemented by a low level driver */ struct hv_ops { int (*get_chars)(uint32_t vtermno, char *buf, int count); int (*put_chars)(uint32_t vtermno, const char *buf, int count); -}; -struct hvc_struct; + /* Callbacks for notification. Called in open and close */ + int (*notifier_add)(struct hvc_struct *hp, int irq); + void (*notifier_del)(struct hvc_struct *hp, int irq); +}; /* Register a vterm and a slot index for use as a console (console_init) */ extern int hvc_instantiate(uint32_t vtermno, int index, struct hv_ops *ops); /* register a vterm for hvc tty operation (module_init or hotplug add) */ -extern struct hvc_struct * __devinit hvc_alloc(uint32_t vtermno, int irq, +extern struct hvc_struct * __devinit hvc_alloc(uint32_t vtermno, int data, struct hv_ops *ops, int outbuf_size); -/* remove a vterm from hvc tty operation (modele_exit or hotplug remove) */ +/* remove a vterm from hvc tty operation (module_exit or hotplug remove) */ extern int __devexit hvc_remove(struct hvc_struct *hp); +/* data available */ +int hvc_poll(struct hvc_struct *hp); +void hvc_kick(void); + +/* default notifier for irq based notification */ +extern int notifier_add_irq(struct hvc_struct *hp, int data); +extern void notifier_del_irq(struct hvc_struct *hp, int data); + #if defined(CONFIG_XMON) && defined(CONFIG_SMP) #include <asm/xmon.h> diff --git a/drivers/char/hvc_irq.c b/drivers/char/hvc_irq.c new file mode 100644 index 000000000000..73a59cdb8947 --- /dev/null +++ b/drivers/char/hvc_irq.c @@ -0,0 +1,44 @@ +/* + * Copyright IBM Corp. 2001,2008 + * + * This file contains the IRQ specific code for hvc_console + * + */ + +#include <linux/interrupt.h> + +#include "hvc_console.h" + +static irqreturn_t hvc_handle_interrupt(int irq, void *dev_instance) +{ + /* if hvc_poll request a repoll, then kick the hvcd thread */ + if (hvc_poll(dev_instance)) + hvc_kick(); + return IRQ_HANDLED; +} + +/* + * For IRQ based systems these callbacks can be used + */ +int notifier_add_irq(struct hvc_struct *hp, int irq) +{ + int rc; + + if (!irq) { + hp->irq_requested = 0; + return 0; + } + rc = request_irq(irq, hvc_handle_interrupt, IRQF_DISABLED, + "hvc_console", hp); + if (!rc) + hp->irq_requested = 1; + return rc; +} + +void notifier_del_irq(struct hvc_struct *hp, int irq) +{ + if (!irq) + return; + free_irq(irq, hp); + hp->irq_requested = 0; +} diff --git a/drivers/char/hvc_iseries.c b/drivers/char/hvc_iseries.c index a08f8f981c11..b71c610fe5ae 100644 --- a/drivers/char/hvc_iseries.c +++ b/drivers/char/hvc_iseries.c @@ -200,6 +200,8 @@ done: static struct hv_ops hvc_get_put_ops = { .get_chars = get_chars, .put_chars = put_chars, + .notifier_add = notifier_add_irq, + .notifier_del = notifier_del_irq, }; static int __devinit hvc_vio_probe(struct vio_dev *vdev, diff --git a/drivers/char/hvc_vio.c b/drivers/char/hvc_vio.c index 79711aa4b41d..93f3840c1682 100644 --- a/drivers/char/hvc_vio.c +++ b/drivers/char/hvc_vio.c @@ -80,6 +80,8 @@ static int filtered_get_chars(uint32_t vtermno, char *buf, int count) static struct hv_ops hvc_get_put_ops = { .get_chars = filtered_get_chars, .put_chars = hvc_put_chars, + .notifier_add = notifier_add_irq, + .notifier_del = notifier_del_irq, }; static int __devinit hvc_vio_probe(struct vio_dev *vdev, diff --git a/drivers/char/hvc_xen.c b/drivers/char/hvc_xen.c index db2ae4216279..6b70aa66a587 100644 --- a/drivers/char/hvc_xen.c +++ b/drivers/char/hvc_xen.c @@ -100,6 +100,8 @@ static int read_console(uint32_t vtermno, char *buf, int len) static struct hv_ops hvc_ops = { .get_chars = read_console, .put_chars = write_console, + .notifier_add = notifier_add_irq, + .notifier_del = notifier_del_irq, }; static int __init xen_init(void) diff --git a/drivers/char/ip2/ip2main.c b/drivers/char/ip2/ip2main.c index 9cb48fcd316c..689f9dcd3b86 100644 --- a/drivers/char/ip2/ip2main.c +++ b/drivers/char/ip2/ip2main.c @@ -203,7 +203,7 @@ static int set_serial_info(i2ChanStrPtr, struct serial_struct __user *); static ssize_t ip2_ipl_read(struct file *, char __user *, size_t, loff_t *); static ssize_t ip2_ipl_write(struct file *, const char __user *, size_t, loff_t *); -static int ip2_ipl_ioctl(struct inode *, struct file *, UINT, ULONG); +static long ip2_ipl_ioctl(struct file *, UINT, ULONG); static int ip2_ipl_open(struct inode *, struct file *); static int DumpTraceBuffer(char __user *, int); @@ -236,7 +236,7 @@ static const struct file_operations ip2_ipl = { .owner = THIS_MODULE, .read = ip2_ipl_read, .write = ip2_ipl_write, - .ioctl = ip2_ipl_ioctl, + .unlocked_ioctl = ip2_ipl_ioctl, .open = ip2_ipl_open, }; @@ -2845,10 +2845,10 @@ ip2_ipl_write(struct file *pFile, const char __user *pData, size_t count, loff_t /* */ /* */ /******************************************************************************/ -static int -ip2_ipl_ioctl ( struct inode *pInode, struct file *pFile, UINT cmd, ULONG arg ) +static long +ip2_ipl_ioctl (struct file *pFile, UINT cmd, ULONG arg ) { - unsigned int iplminor = iminor(pInode); + unsigned int iplminor = iminor(pFile->f_path.dentry->d_inode); int rc = 0; void __user *argp = (void __user *)arg; ULONG __user *pIndex = argp; @@ -2859,6 +2859,8 @@ ip2_ipl_ioctl ( struct inode *pInode, struct file *pFile, UINT cmd, ULONG arg ) printk (KERN_DEBUG "IP2IPL: ioctl cmd %d, arg %ld\n", cmd, arg ); #endif + lock_kernel(); + switch ( iplminor ) { case 0: // IPL device rc = -EINVAL; @@ -2919,6 +2921,7 @@ ip2_ipl_ioctl ( struct inode *pInode, struct file *pFile, UINT cmd, ULONG arg ) rc = -ENODEV; break; } + unlock_kernel(); return rc; } diff --git a/drivers/char/mwave/mwavedd.c b/drivers/char/mwave/mwavedd.c index 50243fcd87e8..4f8d67fed292 100644 --- a/drivers/char/mwave/mwavedd.c +++ b/drivers/char/mwave/mwavedd.c @@ -86,8 +86,8 @@ module_param(mwave_uart_io, int, 0); static int mwave_open(struct inode *inode, struct file *file); static int mwave_close(struct inode *inode, struct file *file); -static int mwave_ioctl(struct inode *inode, struct file *filp, - unsigned int iocmd, unsigned long ioarg); +static long mwave_ioctl(struct file *filp, unsigned int iocmd, + unsigned long ioarg); MWAVE_DEVICE_DATA mwave_s_mdd; @@ -119,16 +119,16 @@ static int mwave_close(struct inode *inode, struct file *file) return retval; } -static int mwave_ioctl(struct inode *inode, struct file *file, - unsigned int iocmd, unsigned long ioarg) +static long mwave_ioctl(struct file *file, unsigned int iocmd, + unsigned long ioarg) { unsigned int retval = 0; pMWAVE_DEVICE_DATA pDrvData = &mwave_s_mdd; void __user *arg = (void __user *)ioarg; - PRINTK_5(TRACE_MWAVE, - "mwavedd::mwave_ioctl, entry inode %p file %p cmd %x arg %x\n", - inode, file, iocmd, (int) ioarg); + PRINTK_4(TRACE_MWAVE, + "mwavedd::mwave_ioctl, entry file %p cmd %x arg %x\n", + file, iocmd, (int) ioarg); switch (iocmd) { @@ -136,7 +136,9 @@ static int mwave_ioctl(struct inode *inode, struct file *file, PRINTK_1(TRACE_MWAVE, "mwavedd::mwave_ioctl, IOCTL_MW_RESET" " calling tp3780I_ResetDSP\n"); + lock_kernel(); retval = tp3780I_ResetDSP(&pDrvData->rBDData); + unlock_kernel(); PRINTK_2(TRACE_MWAVE, "mwavedd::mwave_ioctl, IOCTL_MW_RESET" " retval %x from tp3780I_ResetDSP\n", @@ -147,7 +149,9 @@ static int mwave_ioctl(struct inode *inode, struct file *file, PRINTK_1(TRACE_MWAVE, "mwavedd::mwave_ioctl, IOCTL_MW_RUN" " calling tp3780I_StartDSP\n"); + lock_kernel(); retval = tp3780I_StartDSP(&pDrvData->rBDData); + unlock_kernel(); PRINTK_2(TRACE_MWAVE, "mwavedd::mwave_ioctl, IOCTL_MW_RUN" " retval %x from tp3780I_StartDSP\n", @@ -161,8 +165,10 @@ static int mwave_ioctl(struct inode *inode, struct file *file, "mwavedd::mwave_ioctl," " IOCTL_MW_DSP_ABILITIES calling" " tp3780I_QueryAbilities\n"); + lock_kernel(); retval = tp3780I_QueryAbilities(&pDrvData->rBDData, &rAbilities); + unlock_kernel(); PRINTK_2(TRACE_MWAVE, "mwavedd::mwave_ioctl, IOCTL_MW_DSP_ABILITIES" " retval %x from tp3780I_QueryAbilities\n", @@ -193,11 +199,13 @@ static int mwave_ioctl(struct inode *inode, struct file *file, "mwavedd::mwave_ioctl IOCTL_MW_READ_DATA," " size %lx, ioarg %lx pusBuffer %p\n", rReadData.ulDataLength, ioarg, pusBuffer); + lock_kernel(); retval = tp3780I_ReadWriteDspDStore(&pDrvData->rBDData, iocmd, pusBuffer, rReadData.ulDataLength, rReadData.usDspAddress); + unlock_kernel(); } break; @@ -215,10 +223,12 @@ static int mwave_ioctl(struct inode *inode, struct file *file, " size %lx, ioarg %lx pusBuffer %p\n", rReadData.ulDataLength / 2, ioarg, pusBuffer); + lock_kernel(); retval = tp3780I_ReadWriteDspDStore(&pDrvData->rBDData, iocmd, pusBuffer, rReadData.ulDataLength / 2, rReadData.usDspAddress); + unlock_kernel(); } break; @@ -236,10 +246,12 @@ static int mwave_ioctl(struct inode *inode, struct file *file, " size %lx, ioarg %lx pusBuffer %p\n", rWriteData.ulDataLength, ioarg, pusBuffer); + lock_kernel(); retval = tp3780I_ReadWriteDspDStore(&pDrvData->rBDData, iocmd, pusBuffer, rWriteData.ulDataLength, rWriteData.usDspAddress); + unlock_kernel(); } break; @@ -257,10 +269,12 @@ static int mwave_ioctl(struct inode *inode, struct file *file, " size %lx, ioarg %lx pusBuffer %p\n", rWriteData.ulDataLength, ioarg, pusBuffer); + lock_kernel(); retval = tp3780I_ReadWriteDspIStore(&pDrvData->rBDData, iocmd, pusBuffer, rWriteData.ulDataLength, rWriteData.usDspAddress); + unlock_kernel(); } break; @@ -281,8 +295,10 @@ static int mwave_ioctl(struct inode *inode, struct file *file, ipcnum); return -EINVAL; } + lock_kernel(); pDrvData->IPCs[ipcnum].bIsHere = FALSE; pDrvData->IPCs[ipcnum].bIsEnabled = TRUE; + unlock_kernel(); PRINTK_2(TRACE_MWAVE, "mwavedd::mwave_ioctl IOCTL_MW_REGISTER_IPC" @@ -307,6 +323,7 @@ static int mwave_ioctl(struct inode *inode, struct file *file, return -EINVAL; } + lock_kernel(); if (pDrvData->IPCs[ipcnum].bIsEnabled == TRUE) { DECLARE_WAITQUEUE(wait, current); @@ -347,6 +364,7 @@ static int mwave_ioctl(struct inode *inode, struct file *file, " processing\n", ipcnum); } + unlock_kernel(); } break; @@ -365,19 +383,18 @@ static int mwave_ioctl(struct inode *inode, struct file *file, ipcnum); return -EINVAL; } + lock_kernel(); if (pDrvData->IPCs[ipcnum].bIsEnabled == TRUE) { pDrvData->IPCs[ipcnum].bIsEnabled = FALSE; if (pDrvData->IPCs[ipcnum].bIsHere == TRUE) { wake_up_interruptible(&pDrvData->IPCs[ipcnum].ipc_wait_queue); } } + unlock_kernel(); } break; default: - PRINTK_ERROR(KERN_ERR_MWAVE "mwavedd::mwave_ioctl:" - " Error: Unrecognized iocmd %x\n", - iocmd); return -ENOTTY; break; } /* switch */ @@ -460,7 +477,7 @@ static const struct file_operations mwave_fops = { .owner = THIS_MODULE, .read = mwave_read, .write = mwave_write, - .ioctl = mwave_ioctl, + .unlocked_ioctl = mwave_ioctl, .open = mwave_open, .release = mwave_close }; diff --git a/drivers/char/mwave/mwavedd.h b/drivers/char/mwave/mwavedd.h index 8eca61e0a19c..7e0d530e2e07 100644 --- a/drivers/char/mwave/mwavedd.h +++ b/drivers/char/mwave/mwavedd.h @@ -147,4 +147,6 @@ typedef struct _MWAVE_DEVICE_DATA { } MWAVE_DEVICE_DATA, *pMWAVE_DEVICE_DATA; +extern MWAVE_DEVICE_DATA mwave_s_mdd; + #endif diff --git a/drivers/char/mwave/tp3780i.c b/drivers/char/mwave/tp3780i.c index f282976daaac..c68969708068 100644 --- a/drivers/char/mwave/tp3780i.c +++ b/drivers/char/mwave/tp3780i.c @@ -57,8 +57,6 @@ #include "3780i.h" #include "mwavepub.h" -extern MWAVE_DEVICE_DATA mwave_s_mdd; - static unsigned short s_ausThinkpadIrqToField[16] = { 0xFFFF, 0xFFFF, 0xFFFF, 0x0001, 0x0002, 0x0003, 0xFFFF, 0x0004, 0xFFFF, 0xFFFF, 0x0005, 0x0006, 0xFFFF, 0xFFFF, 0xFFFF, 0x0007 }; diff --git a/drivers/char/mxser.c b/drivers/char/mxser.c index 4c756bbba948..e30575e87648 100644 --- a/drivers/char/mxser.c +++ b/drivers/char/mxser.c @@ -16,7 +16,6 @@ * Fed through a cleanup, indent and remove of non 2.6 code by Alan Cox * <alan@redhat.com>. The original 1.8 code is available on www.moxa.com. * - Fixed x86_64 cleanness - * - Fixed sleep with spinlock held in mxser_send_break */ #include <linux/module.h> @@ -49,18 +48,12 @@ #define MXSER_VERSION "2.0.4" /* 1.12 */ #define MXSERMAJOR 174 -#define MXSERCUMAJOR 175 #define MXSER_BOARDS 4 /* Max. boards */ #define MXSER_PORTS_PER_BOARD 8 /* Max. ports per board */ #define MXSER_PORTS (MXSER_BOARDS * MXSER_PORTS_PER_BOARD) #define MXSER_ISR_PASS_LIMIT 100 -#define MXSER_ERR_IOADDR -1 -#define MXSER_ERR_IRQ -2 -#define MXSER_ERR_IRQ_CONFLIT -3 -#define MXSER_ERR_VECTOR -4 - /*CheckIsMoxaMust return value*/ #define MOXA_OTHER_UART 0x00 #define MOXA_MUST_MU150_HWID 0x01 @@ -179,14 +172,15 @@ static struct pci_device_id mxser_pcibrds[] = { }; MODULE_DEVICE_TABLE(pci, mxser_pcibrds); -static int ioaddr[MXSER_BOARDS] = { 0, 0, 0, 0 }; +static unsigned long ioaddr[MXSER_BOARDS]; static int ttymajor = MXSERMAJOR; /* Variables for insmod */ MODULE_AUTHOR("Casper Yang"); MODULE_DESCRIPTION("MOXA Smartio/Industio Family Multiport Board Device Driver"); -module_param_array(ioaddr, int, NULL, 0); +module_param_array(ioaddr, ulong, NULL, 0); +MODULE_PARM_DESC(ioaddr, "ISA io addresses to look for a moxa board"); module_param(ttymajor, int, 0); MODULE_LICENSE("GPL"); @@ -196,7 +190,6 @@ struct mxser_log { unsigned long txcnt[MXSER_PORTS]; }; - struct mxser_mon { unsigned long rxcnt; unsigned long txcnt; @@ -287,19 +280,9 @@ struct mxser_mstatus { int dcd; }; -static struct mxser_mstatus GMStatus[MXSER_PORTS]; - -static int mxserBoardCAP[MXSER_BOARDS] = { - 0, 0, 0, 0 - /* 0x180, 0x280, 0x200, 0x320 */ -}; - static struct mxser_board mxser_boards[MXSER_BOARDS]; static struct tty_driver *mxvar_sdriver; static struct mxser_log mxvar_log; -static int mxvar_diagflag; -static unsigned char mxser_msr[MXSER_PORTS + 1]; -static struct mxser_mon_ext mon_data_ext; static int mxser_set_baud_method[MXSER_PORTS + 1]; static void mxser_enable_must_enchance_mode(unsigned long baseio) @@ -543,6 +526,7 @@ static void process_txrx_fifo(struct mxser_port *info) static unsigned char mxser_get_msr(int baseaddr, int mode, int port) { + static unsigned char mxser_msr[MXSER_PORTS + 1]; unsigned char status = 0; status = inb(baseaddr + UART_MSR); @@ -1319,13 +1303,9 @@ static void mxser_flush_chars(struct tty_struct *tty) struct mxser_port *info = tty->driver_data; unsigned long flags; - if (info->xmit_cnt <= 0 || - tty->stopped || - !info->port.xmit_buf || - (tty->hw_stopped && - (info->type != PORT_16550A) && - (!info->board->chip_flag) - )) + if (info->xmit_cnt <= 0 || tty->stopped || !info->port.xmit_buf || + (tty->hw_stopped && info->type != PORT_16550A && + !info->board->chip_flag)) return; spin_lock_irqsave(&info->slock, flags); @@ -1343,9 +1323,7 @@ static int mxser_write_room(struct tty_struct *tty) int ret; ret = SERIAL_XMIT_SIZE - info->xmit_cnt - 1; - if (ret < 0) - ret = 0; - return ret; + return ret < 0 ? 0 : ret; } static int mxser_chars_in_buffer(struct tty_struct *tty) @@ -1634,6 +1612,8 @@ static int mxser_ioctl_special(unsigned int cmd, void __user *argp) switch (cmd) { case MOXA_GET_MAJOR: + printk(KERN_WARNING "mxser: '%s' uses deprecated ioctl %x, fix " + "your userspace\n", current->comm, cmd); return put_user(ttymajor, (int __user *)argp); case MOXA_CHKPORTENABLE: @@ -1651,62 +1631,60 @@ static int mxser_ioctl_special(unsigned int cmd, void __user *argp) ret = -EFAULT; unlock_kernel(); return ret; - case MOXA_GETMSTATUS: + case MOXA_GETMSTATUS: { + struct mxser_mstatus ms, __user *msu = argp; lock_kernel(); for (i = 0; i < MXSER_BOARDS; i++) for (j = 0; j < MXSER_PORTS_PER_BOARD; j++) { port = &mxser_boards[i].ports[j]; + memset(&ms, 0, sizeof(ms)); - GMStatus[i].ri = 0; - if (!port->ioaddr) { - GMStatus[i].dcd = 0; - GMStatus[i].dsr = 0; - GMStatus[i].cts = 0; - continue; - } + if (!port->ioaddr) + goto copy; if (!port->port.tty || !port->port.tty->termios) - GMStatus[i].cflag = - port->normal_termios.c_cflag; + ms.cflag = port->normal_termios.c_cflag; else - GMStatus[i].cflag = - port->port.tty->termios->c_cflag; + ms.cflag = port->port.tty->termios->c_cflag; status = inb(port->ioaddr + UART_MSR); - if (status & 0x80 /*UART_MSR_DCD */ ) - GMStatus[i].dcd = 1; - else - GMStatus[i].dcd = 0; - - if (status & 0x20 /*UART_MSR_DSR */ ) - GMStatus[i].dsr = 1; - else - GMStatus[i].dsr = 0; - - - if (status & 0x10 /*UART_MSR_CTS */ ) - GMStatus[i].cts = 1; - else - GMStatus[i].cts = 0; + if (status & UART_MSR_DCD) + ms.dcd = 1; + if (status & UART_MSR_DSR) + ms.dsr = 1; + if (status & UART_MSR_CTS) + ms.cts = 1; + copy: + if (copy_to_user(msu, &ms, sizeof(ms))) { + unlock_kernel(); + return -EFAULT; + } + msu++; } unlock_kernel(); - if (copy_to_user(argp, GMStatus, - sizeof(struct mxser_mstatus) * MXSER_PORTS)) - return -EFAULT; return 0; + } case MOXA_ASPP_MON_EXT: { - int p, shiftbit; - unsigned long opmode; - unsigned cflag, iflag; + struct mxser_mon_ext *me; /* it's 2k, stack unfriendly */ + unsigned int cflag, iflag, p; + u8 opmode; + + me = kzalloc(sizeof(*me), GFP_KERNEL); + if (!me) + return -ENOMEM; lock_kernel(); - for (i = 0; i < MXSER_BOARDS; i++) { - for (j = 0; j < MXSER_PORTS_PER_BOARD; j++) { + for (i = 0, p = 0; i < MXSER_BOARDS; i++) { + for (j = 0; j < MXSER_PORTS_PER_BOARD; j++, p++) { + if (p >= ARRAY_SIZE(me->rx_cnt)) { + i = MXSER_BOARDS; + break; + } port = &mxser_boards[i].ports[j]; if (!port->ioaddr) continue; - status = mxser_get_msr(port->ioaddr, 0, i); + status = mxser_get_msr(port->ioaddr, 0, p); if (status & UART_MSR_TERI) port->icount.rng++; @@ -1718,16 +1696,13 @@ static int mxser_ioctl_special(unsigned int cmd, void __user *argp) port->icount.cts++; port->mon_data.modem_status = status; - mon_data_ext.rx_cnt[i] = port->mon_data.rxcnt; - mon_data_ext.tx_cnt[i] = port->mon_data.txcnt; - mon_data_ext.up_rxcnt[i] = - port->mon_data.up_rxcnt; - mon_data_ext.up_txcnt[i] = - port->mon_data.up_txcnt; - mon_data_ext.modem_status[i] = + me->rx_cnt[p] = port->mon_data.rxcnt; + me->tx_cnt[p] = port->mon_data.txcnt; + me->up_rxcnt[p] = port->mon_data.up_rxcnt; + me->up_txcnt[p] = port->mon_data.up_txcnt; + me->modem_status[p] = port->mon_data.modem_status; - mon_data_ext.baudrate[i] = - tty_get_baud_rate(port->port.tty); + me->baudrate[p] = tty_get_baud_rate(port->port.tty); if (!port->port.tty || !port->port.tty->termios) { cflag = port->normal_termios.c_cflag; @@ -1737,40 +1712,31 @@ static int mxser_ioctl_special(unsigned int cmd, void __user *argp) iflag = port->port.tty->termios->c_iflag; } - mon_data_ext.databits[i] = cflag & CSIZE; - - mon_data_ext.stopbits[i] = cflag & CSTOPB; - - mon_data_ext.parity[i] = - cflag & (PARENB | PARODD | CMSPAR); - - mon_data_ext.flowctrl[i] = 0x00; + me->databits[p] = cflag & CSIZE; + me->stopbits[p] = cflag & CSTOPB; + me->parity[p] = cflag & (PARENB | PARODD | + CMSPAR); if (cflag & CRTSCTS) - mon_data_ext.flowctrl[i] |= 0x03; + me->flowctrl[p] |= 0x03; if (iflag & (IXON | IXOFF)) - mon_data_ext.flowctrl[i] |= 0x0C; + me->flowctrl[p] |= 0x0C; if (port->type == PORT_16550A) - mon_data_ext.fifo[i] = 1; - else - mon_data_ext.fifo[i] = 0; + me->fifo[p] = 1; - p = i % 4; - shiftbit = p * 2; - opmode = inb(port->opmode_ioaddr) >> shiftbit; + opmode = inb(port->opmode_ioaddr) >> + ((p % 4) * 2); opmode &= OP_MODE_MASK; - - mon_data_ext.iftype[i] = opmode; - + me->iftype[p] = opmode; } } unlock_kernel(); - if (copy_to_user(argp, &mon_data_ext, - sizeof(mon_data_ext))) - return -EFAULT; - return 0; + if (copy_to_user(argp, me, sizeof(*me))) + ret = -EFAULT; + kfree(me); + return ret; } default: return -ENOIOCTLCMD; @@ -1804,7 +1770,6 @@ static int mxser_ioctl(struct tty_struct *tty, struct file *file, { struct mxser_port *info = tty->driver_data; struct async_icount cnow; - struct serial_icounter_struct __user *p_cuser; unsigned long flags; void __user *argp = (void __user *)arg; int retval; @@ -1884,30 +1849,26 @@ static int mxser_ioctl(struct tty_struct *tty, struct file *file, * NB: both 1->0 and 0->1 transitions are counted except for * RI where only 0->1 is counted. */ - case TIOCGICOUNT: + case TIOCGICOUNT: { + struct serial_icounter_struct icnt = { 0 }; spin_lock_irqsave(&info->slock, flags); cnow = info->icount; spin_unlock_irqrestore(&info->slock, flags); - p_cuser = argp; - if (put_user(cnow.frame, &p_cuser->frame)) - return -EFAULT; - if (put_user(cnow.brk, &p_cuser->brk)) - return -EFAULT; - if (put_user(cnow.overrun, &p_cuser->overrun)) - return -EFAULT; - if (put_user(cnow.buf_overrun, &p_cuser->buf_overrun)) - return -EFAULT; - if (put_user(cnow.parity, &p_cuser->parity)) - return -EFAULT; - if (put_user(cnow.rx, &p_cuser->rx)) - return -EFAULT; - if (put_user(cnow.tx, &p_cuser->tx)) - return -EFAULT; - put_user(cnow.cts, &p_cuser->cts); - put_user(cnow.dsr, &p_cuser->dsr); - put_user(cnow.rng, &p_cuser->rng); - put_user(cnow.dcd, &p_cuser->dcd); - return 0; + + icnt.frame = cnow.frame; + icnt.brk = cnow.brk; + icnt.overrun = cnow.overrun; + icnt.buf_overrun = cnow.buf_overrun; + icnt.parity = cnow.parity; + icnt.rx = cnow.rx; + icnt.tx = cnow.tx; + icnt.cts = cnow.cts; + icnt.dsr = cnow.dsr; + icnt.rng = cnow.rng; + icnt.dcd = cnow.dcd; + + return copy_to_user(argp, &icnt, sizeof(icnt)) ? -EFAULT : 0; + } case MOXA_HighSpeedOn: return put_user(info->baud_base != 115200 ? 1 : 0, (int __user *)argp); case MOXA_SDS_RSTICOUNTER: @@ -2503,7 +2464,8 @@ static int __devinit mxser_initbrd(struct mxser_board *brd, unsigned int i; int retval; - printk(KERN_INFO "max. baud rate = %d bps.\n", brd->ports[0].max_baud); + printk(KERN_INFO "mxser: max. baud rate = %d bps\n", + brd->ports[0].max_baud); for (i = 0; i < brd->info->nports; i++) { info = &brd->ports[i]; @@ -2586,28 +2548,32 @@ static int __init mxser_get_ISA_conf(int cap, struct mxser_board *brd) irq = regs[9] & 0xF000; irq = irq | (irq >> 4); if (irq != (regs[9] & 0xFF00)) - return MXSER_ERR_IRQ_CONFLIT; + goto err_irqconflict; } else if (brd->info->nports == 4) { irq = regs[9] & 0xF000; irq = irq | (irq >> 4); irq = irq | (irq >> 8); if (irq != regs[9]) - return MXSER_ERR_IRQ_CONFLIT; + goto err_irqconflict; } else if (brd->info->nports == 8) { irq = regs[9] & 0xF000; irq = irq | (irq >> 4); irq = irq | (irq >> 8); if ((irq != regs[9]) || (irq != regs[10])) - return MXSER_ERR_IRQ_CONFLIT; + goto err_irqconflict; } - if (!irq) - return MXSER_ERR_IRQ; + if (!irq) { + printk(KERN_ERR "mxser: interrupt number unset\n"); + return -EIO; + } brd->irq = ((int)(irq & 0xF000) >> 12); for (i = 0; i < 8; i++) brd->ports[i].ioaddr = (int) regs[i + 1] & 0xFFF8; - if ((regs[12] & 0x80) == 0) - return MXSER_ERR_VECTOR; + if ((regs[12] & 0x80) == 0) { + printk(KERN_ERR "mxser: invalid interrupt vector\n"); + return -EIO; + } brd->vector = (int)regs[11]; /* interrupt vector */ if (id == 1) brd->vector_mask = 0x00FF; @@ -2634,13 +2600,26 @@ static int __init mxser_get_ISA_conf(int cap, struct mxser_board *brd) else brd->uart_type = PORT_16450; if (!request_region(brd->ports[0].ioaddr, 8 * brd->info->nports, - "mxser(IO)")) - return MXSER_ERR_IOADDR; + "mxser(IO)")) { + printk(KERN_ERR "mxser: can't request ports I/O region: " + "0x%.8lx-0x%.8lx\n", + brd->ports[0].ioaddr, brd->ports[0].ioaddr + + 8 * brd->info->nports - 1); + return -EIO; + } if (!request_region(brd->vector, 1, "mxser(vector)")) { release_region(brd->ports[0].ioaddr, 8 * brd->info->nports); - return MXSER_ERR_VECTOR; + printk(KERN_ERR "mxser: can't request interrupt vector region: " + "0x%.8lx-0x%.8lx\n", + brd->ports[0].ioaddr, brd->ports[0].ioaddr + + 8 * brd->info->nports - 1); + return -EIO; } return brd->info->nports; + +err_irqconflict: + printk(KERN_ERR "mxser: invalid interrupt number\n"); + return -EIO; } static int __devinit mxser_probe(struct pci_dev *pdev, @@ -2657,20 +2636,20 @@ static int __devinit mxser_probe(struct pci_dev *pdev, break; if (i >= MXSER_BOARDS) { - printk(KERN_ERR "Too many Smartio/Industio family boards found " - "(maximum %d), board not configured\n", MXSER_BOARDS); + dev_err(&pdev->dev, "too many boards found (maximum %d), board " + "not configured\n", MXSER_BOARDS); goto err; } brd = &mxser_boards[i]; brd->idx = i * MXSER_PORTS_PER_BOARD; - printk(KERN_INFO "Found MOXA %s board (BusNo=%d, DevNo=%d)\n", + dev_info(&pdev->dev, "found MOXA %s board (BusNo=%d, DevNo=%d)\n", mxser_cards[ent->driver_data].name, pdev->bus->number, PCI_SLOT(pdev->devfn)); retval = pci_enable_device(pdev); if (retval) { - printk(KERN_ERR "Moxa SmartI/O PCI enable fail !\n"); + dev_err(&pdev->dev, "PCI enable failed\n"); goto err; } @@ -2772,11 +2751,8 @@ static struct pci_driver mxser_driver = { static int __init mxser_module_init(void) { struct mxser_board *brd; - unsigned long cap; - unsigned int i, m, isaloop; - int retval, b; - - pr_debug("Loading module mxser ...\n"); + unsigned int b, i, m; + int retval; mxvar_sdriver = alloc_tty_driver(MXSER_PORTS + 1); if (!mxvar_sdriver) @@ -2806,74 +2782,43 @@ static int __init mxser_module_init(void) goto err_put; } - mxvar_diagflag = 0; - - m = 0; /* Start finding ISA boards here */ - for (isaloop = 0; isaloop < 2; isaloop++) - for (b = 0; b < MXSER_BOARDS && m < MXSER_BOARDS; b++) { - if (!isaloop) - cap = mxserBoardCAP[b]; /* predefined */ - else - cap = ioaddr[b]; /* module param */ - - if (!cap) - continue; + for (m = 0, b = 0; b < MXSER_BOARDS; b++) { + if (!ioaddr[b]) + continue; + + brd = &mxser_boards[m]; + retval = mxser_get_ISA_conf(!ioaddr[b], brd); + if (retval <= 0) { + brd->info = NULL; + continue; + } - brd = &mxser_boards[m]; - retval = mxser_get_ISA_conf(cap, brd); - - if (retval != 0) - printk(KERN_INFO "Found MOXA %s board " - "(CAP=0x%x)\n", - brd->info->name, ioaddr[b]); - - if (retval <= 0) { - if (retval == MXSER_ERR_IRQ) - printk(KERN_ERR "Invalid interrupt " - "number, board not " - "configured\n"); - else if (retval == MXSER_ERR_IRQ_CONFLIT) - printk(KERN_ERR "Invalid interrupt " - "number, board not " - "configured\n"); - else if (retval == MXSER_ERR_VECTOR) - printk(KERN_ERR "Invalid interrupt " - "vector, board not " - "configured\n"); - else if (retval == MXSER_ERR_IOADDR) - printk(KERN_ERR "Invalid I/O address, " - "board not configured\n"); - - brd->info = NULL; - continue; - } + printk(KERN_INFO "mxser: found MOXA %s board (CAP=0x%lx)\n", + brd->info->name, ioaddr[b]); - /* mxser_initbrd will hook ISR. */ - if (mxser_initbrd(brd, NULL) < 0) { - brd->info = NULL; - continue; - } + /* mxser_initbrd will hook ISR. */ + if (mxser_initbrd(brd, NULL) < 0) { + brd->info = NULL; + continue; + } - brd->idx = m * MXSER_PORTS_PER_BOARD; - for (i = 0; i < brd->info->nports; i++) - tty_register_device(mxvar_sdriver, brd->idx + i, - NULL); + brd->idx = m * MXSER_PORTS_PER_BOARD; + for (i = 0; i < brd->info->nports; i++) + tty_register_device(mxvar_sdriver, brd->idx + i, NULL); - m++; - } + m++; + } retval = pci_register_driver(&mxser_driver); if (retval) { - printk(KERN_ERR "Can't register pci driver\n"); + printk(KERN_ERR "mxser: can't register pci driver\n"); if (!m) { retval = -ENODEV; goto err_unr; } /* else: we have some ISA cards under control */ } - pr_debug("Done.\n"); - return 0; err_unr: tty_unregister_driver(mxvar_sdriver); @@ -2886,8 +2831,6 @@ static void __exit mxser_module_exit(void) { unsigned int i, j; - pr_debug("Unloading module mxser ...\n"); - pci_unregister_driver(&mxser_driver); for (i = 0; i < MXSER_BOARDS; i++) /* ISA remains */ @@ -2901,8 +2844,6 @@ static void __exit mxser_module_exit(void) for (i = 0; i < MXSER_BOARDS; i++) if (mxser_boards[i].info != NULL) mxser_release_res(&mxser_boards[i], NULL, 1); - - pr_debug("Done.\n"); } module_init(mxser_module_init); diff --git a/drivers/char/nwflash.c b/drivers/char/nwflash.c index ba012c2bdf7a..f9f72a211292 100644 --- a/drivers/char/nwflash.c +++ b/drivers/char/nwflash.c @@ -122,35 +122,20 @@ static int flash_ioctl(struct inode *inodep, struct file *filep, unsigned int cm static ssize_t flash_read(struct file *file, char __user *buf, size_t size, loff_t *ppos) { - unsigned long p = *ppos; - unsigned int count = size; - int ret = 0; + ssize_t ret; if (flashdebug) printk(KERN_DEBUG "flash_read: flash_read: offset=0x%lX, " "buffer=%p, count=0x%X.\n", p, buf, count); + /* + * We now lock against reads and writes. --rmk + */ + if (mutex_lock_interruptible(&nwflash_mutex)) + return -ERESTARTSYS; - if (count) - ret = -ENXIO; - - if (p < gbFlashSize) { - if (count > gbFlashSize - p) - count = gbFlashSize - p; + ret = simple_read_from_buffer(buf, size, ppos, FLASH_BASE, gbFlashSize); + mutex_unlock(&nwflash_mutex); - /* - * We now lock against reads and writes. --rmk - */ - if (mutex_lock_interruptible(&nwflash_mutex)) - return -ERESTARTSYS; - - ret = copy_to_user(buf, (void *)(FLASH_BASE + p), count); - if (ret == 0) { - ret = count; - *ppos += count; - } else - ret = -EFAULT; - mutex_unlock(&nwflash_mutex); - } return ret; } diff --git a/drivers/char/ppdev.c b/drivers/char/ppdev.c index 7af7a7e6b9c2..bee39fdfba73 100644 --- a/drivers/char/ppdev.c +++ b/drivers/char/ppdev.c @@ -67,7 +67,7 @@ #include <linux/major.h> #include <linux/ppdev.h> #include <linux/smp_lock.h> -#include <asm/uaccess.h> +#include <linux/uaccess.h> #define PP_VERSION "ppdev: user-space parallel port driver" #define CHRDEV "ppdev" @@ -328,10 +328,9 @@ static enum ieee1284_phase init_phase (int mode) return IEEE1284_PH_FWD_IDLE; } -static int pp_ioctl(struct inode *inode, struct file *file, - unsigned int cmd, unsigned long arg) +static int pp_do_ioctl(struct file *file, unsigned int cmd, unsigned long arg) { - unsigned int minor = iminor(inode); + unsigned int minor = iminor(file->f_path.dentry->d_inode); struct pp_struct *pp = file->private_data; struct parport * port; void __user *argp = (void __user *)arg; @@ -634,6 +633,15 @@ static int pp_ioctl(struct inode *inode, struct file *file, return 0; } +static long pp_ioctl(struct file *file, unsigned int cmd, unsigned long arg) +{ + long ret; + lock_kernel(); + ret = pp_do_ioctl(file, cmd, arg); + unlock_kernel(); + return ret; +} + static int pp_open (struct inode * inode, struct file * file) { unsigned int minor = iminor(inode); @@ -745,7 +753,7 @@ static const struct file_operations pp_fops = { .read = pp_read, .write = pp_write, .poll = pp_poll, - .ioctl = pp_ioctl, + .unlocked_ioctl = pp_ioctl, .open = pp_open, .release = pp_release, }; diff --git a/drivers/char/rio/rio_linux.c b/drivers/char/rio/rio_linux.c index 0cdfee152916..a8f68a3f14dd 100644 --- a/drivers/char/rio/rio_linux.c +++ b/drivers/char/rio/rio_linux.c @@ -179,7 +179,7 @@ static int rio_set_real_termios(void *ptr); static void rio_hungup(void *ptr); static void rio_close(void *ptr); static int rio_chars_in_buffer(void *ptr); -static int rio_fw_ioctl(struct inode *inode, struct file *filp, unsigned int cmd, unsigned long arg); +static long rio_fw_ioctl(struct file *filp, unsigned int cmd, unsigned long arg); static int rio_init_drivers(void); static void my_hd(void *addr, int len); @@ -240,7 +240,7 @@ static struct real_driver rio_real_driver = { static const struct file_operations rio_fw_fops = { .owner = THIS_MODULE, - .ioctl = rio_fw_ioctl, + .unlocked_ioctl = rio_fw_ioctl, }; static struct miscdevice rio_fw_device = { @@ -560,13 +560,15 @@ static void rio_close(void *ptr) -static int rio_fw_ioctl(struct inode *inode, struct file *filp, unsigned int cmd, unsigned long arg) +static long rio_fw_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) { int rc = 0; func_enter(); /* The "dev" argument isn't used. */ + lock_kernel(); rc = riocontrol(p, 0, cmd, arg, capable(CAP_SYS_ADMIN)); + unlock_kernel(); func_exit(); return rc; diff --git a/drivers/char/sx.c b/drivers/char/sx.c index 2162439bbe48..c385206f9db5 100644 --- a/drivers/char/sx.c +++ b/drivers/char/sx.c @@ -286,8 +286,8 @@ static void sx_close(void *ptr); static int sx_chars_in_buffer(void *ptr); static int sx_init_board(struct sx_board *board); static int sx_init_portstructs(int nboards, int nports); -static int sx_fw_ioctl(struct inode *inode, struct file *filp, - unsigned int cmd, unsigned long arg); +static long sx_fw_ioctl(struct file *filp, unsigned int cmd, + unsigned long arg); static int sx_init_drivers(void); static struct tty_driver *sx_driver; @@ -396,7 +396,7 @@ static struct real_driver sx_real_driver = { static const struct file_operations sx_fw_fops = { .owner = THIS_MODULE, - .ioctl = sx_fw_ioctl, + .unlocked_ioctl = sx_fw_ioctl, }; static struct miscdevice sx_fw_device = { @@ -1686,10 +1686,10 @@ static int do_memtest_w(struct sx_board *board, int min, int max) } #endif -static int sx_fw_ioctl(struct inode *inode, struct file *filp, - unsigned int cmd, unsigned long arg) +static long sx_fw_ioctl(struct file *filp, unsigned int cmd, + unsigned long arg) { - int rc = 0; + long rc = 0; int __user *descr = (int __user *)arg; int i; static struct sx_board *board = NULL; @@ -1699,13 +1699,10 @@ static int sx_fw_ioctl(struct inode *inode, struct file *filp, func_enter(); -#if 0 - /* Removed superuser check: Sysops can use the permissions on the device - file to restrict access. Recommendation: Root only. (root.root 600) */ - if (!capable(CAP_SYS_ADMIN)) { + if (!capable(CAP_SYS_RAWIO)) return -EPERM; - } -#endif + + lock_kernel(); sx_dprintk(SX_DEBUG_FIRMWARE, "IOCTL %x: %lx\n", cmd, arg); @@ -1720,19 +1717,23 @@ static int sx_fw_ioctl(struct inode *inode, struct file *filp, for (i = 0; i < SX_NBOARDS; i++) sx_dprintk(SX_DEBUG_FIRMWARE, "<%x> ", boards[i].flags); sx_dprintk(SX_DEBUG_FIRMWARE, "\n"); + unlock_kernel(); return -EIO; } switch (cmd) { case SXIO_SET_BOARD: sx_dprintk(SX_DEBUG_FIRMWARE, "set board to %ld\n", arg); + rc = -EIO; if (arg >= SX_NBOARDS) - return -EIO; + break; sx_dprintk(SX_DEBUG_FIRMWARE, "not out of range\n"); if (!(boards[arg].flags & SX_BOARD_PRESENT)) - return -EIO; + break; sx_dprintk(SX_DEBUG_FIRMWARE, ".. and present!\n"); board = &boards[arg]; + rc = 0; + /* FIXME: And this does ... nothing?? */ break; case SXIO_GET_TYPE: rc = -ENOENT; /* If we manage to miss one, return error. */ @@ -1746,7 +1747,7 @@ static int sx_fw_ioctl(struct inode *inode, struct file *filp, rc = SX_TYPE_SI; if (IS_EISA_BOARD(board)) rc = SX_TYPE_SI; - sx_dprintk(SX_DEBUG_FIRMWARE, "returning type= %d\n", rc); + sx_dprintk(SX_DEBUG_FIRMWARE, "returning type= %ld\n", rc); break; case SXIO_DO_RAMTEST: if (sx_initialized) /* Already initialized: better not ramtest the board. */ @@ -1760,19 +1761,26 @@ static int sx_fw_ioctl(struct inode *inode, struct file *filp, rc = do_memtest(board, 0, 0x7ff8); /* if (!rc) rc = do_memtest_w (board, 0, 0x7ff8); */ } - sx_dprintk(SX_DEBUG_FIRMWARE, "returning memtest result= %d\n", - rc); + sx_dprintk(SX_DEBUG_FIRMWARE, + "returning memtest result= %ld\n", rc); break; case SXIO_DOWNLOAD: - if (sx_initialized) /* Already initialized */ - return -EEXIST; - if (!sx_reset(board)) - return -EIO; + if (sx_initialized) {/* Already initialized */ + rc = -EEXIST; + break; + } + if (!sx_reset(board)) { + rc = -EIO; + break; + } sx_dprintk(SX_DEBUG_INIT, "reset the board...\n"); tmp = kmalloc(SX_CHUNK_SIZE, GFP_USER); - if (!tmp) - return -ENOMEM; + if (!tmp) { + rc = -ENOMEM; + break; + } + /* FIXME: check returns */ get_user(nbytes, descr++); get_user(offset, descr++); get_user(data, descr++); @@ -1782,7 +1790,8 @@ static int sx_fw_ioctl(struct inode *inode, struct file *filp, (i + SX_CHUNK_SIZE > nbytes) ? nbytes - i : SX_CHUNK_SIZE)) { kfree(tmp); - return -EFAULT; + rc = -EFAULT; + break; } memcpy_toio(board->base2 + offset + i, tmp, (i + SX_CHUNK_SIZE > nbytes) ? @@ -1798,13 +1807,17 @@ static int sx_fw_ioctl(struct inode *inode, struct file *filp, rc = sx_nports; break; case SXIO_INIT: - if (sx_initialized) /* Already initialized */ - return -EEXIST; + if (sx_initialized) { /* Already initialized */ + rc = -EEXIST; + break; + } /* This is not allowed until all boards are initialized... */ for (i = 0; i < SX_NBOARDS; i++) { if ((boards[i].flags & SX_BOARD_PRESENT) && - !(boards[i].flags & SX_BOARD_INITIALIZED)) - return -EIO; + !(boards[i].flags & SX_BOARD_INITIALIZED)) { + rc = -EIO; + break; + } } for (i = 0; i < SX_NBOARDS; i++) if (!(boards[i].flags & SX_BOARD_PRESENT)) @@ -1832,10 +1845,10 @@ static int sx_fw_ioctl(struct inode *inode, struct file *filp, rc = sx_nports; break; default: - printk(KERN_WARNING "Unknown ioctl on firmware device (%x).\n", - cmd); + rc = -ENOTTY; break; } + unlock_kernel(); func_exit(); return rc; } diff --git a/drivers/char/tty_io.c b/drivers/char/tty_io.c index 6f4d856df987..e1b46bc7e43c 100644 --- a/drivers/char/tty_io.c +++ b/drivers/char/tty_io.c @@ -3580,7 +3580,6 @@ void proc_clear_tty(struct task_struct *p) p->signal->tty = NULL; spin_unlock_irq(&p->sighand->siglock); } -EXPORT_SYMBOL(proc_clear_tty); /* Called under the sighand lock */ diff --git a/drivers/char/virtio_console.c b/drivers/char/virtio_console.c index dc17fe3a88bc..d0f4eb6fdb7f 100644 --- a/drivers/char/virtio_console.c +++ b/drivers/char/virtio_console.c @@ -46,6 +46,9 @@ static char *in, *inbuf; /* The operations for our console. */ static struct hv_ops virtio_cons; +/* The hvc device */ +static struct hvc_struct *hvc; + /*D:310 The put_chars() callback is pretty straightforward. * * We turn the characters into a scatter-gather list, add it to the output @@ -134,6 +137,27 @@ int __init virtio_cons_early_init(int (*put_chars)(u32, const char *, int)) return hvc_instantiate(0, 0, &virtio_cons); } +/* + * we support only one console, the hvc struct is a global var + * There is no need to do anything + */ +static int notifier_add_vio(struct hvc_struct *hp, int data) +{ + hp->irq_requested = 1; + return 0; +} + +static void notifier_del_vio(struct hvc_struct *hp, int data) +{ + hp->irq_requested = 0; +} + +static void hvc_handle_input(struct virtqueue *vq) +{ + if (hvc_poll(hvc)) + hvc_kick(); +} + /*D:370 Once we're further in boot, we get probed like any other virtio device. * At this stage we set up the output virtqueue. * @@ -144,7 +168,6 @@ int __init virtio_cons_early_init(int (*put_chars)(u32, const char *, int)) static int __devinit virtcons_probe(struct virtio_device *dev) { int err; - struct hvc_struct *hvc; vdev = dev; @@ -158,7 +181,7 @@ static int __devinit virtcons_probe(struct virtio_device *dev) /* Find the input queue. */ /* FIXME: This is why we want to wean off hvc: we do nothing * when input comes in. */ - in_vq = vdev->config->find_vq(vdev, 0, NULL); + in_vq = vdev->config->find_vq(vdev, 0, hvc_handle_input); if (IS_ERR(in_vq)) { err = PTR_ERR(in_vq); goto free; @@ -173,15 +196,18 @@ static int __devinit virtcons_probe(struct virtio_device *dev) /* Start using the new console output. */ virtio_cons.get_chars = get_chars; virtio_cons.put_chars = put_chars; + virtio_cons.notifier_add = notifier_add_vio; + virtio_cons.notifier_del = notifier_del_vio; /* The first argument of hvc_alloc() is the virtual console number, so - * we use zero. The second argument is the interrupt number; we - * currently leave this as zero: it would be better not to use the - * hvc mechanism and fix this (FIXME!). + * we use zero. The second argument is the parameter for the + * notification mechanism (like irq number). We currently leave this + * as zero, virtqueues have implicit notifications. * * The third argument is a "struct hv_ops" containing the put_chars() - * and get_chars() pointers. The final argument is the output buffer - * size: we can do any size, so we put PAGE_SIZE here. */ + * get_chars(), notifier_add() and notifier_del() pointers. + * The final argument is the output buffer size: we can do any size, + * so we put PAGE_SIZE here. */ hvc = hvc_alloc(0, 0, &virtio_cons, PAGE_SIZE); if (IS_ERR(hvc)) { err = PTR_ERR(hvc); diff --git a/drivers/char/xilinx_hwicap/xilinx_hwicap.c b/drivers/char/xilinx_hwicap/xilinx_hwicap.c index 51966ccf4ea3..8bfee5fb7223 100644 --- a/drivers/char/xilinx_hwicap/xilinx_hwicap.c +++ b/drivers/char/xilinx_hwicap/xilinx_hwicap.c @@ -87,7 +87,6 @@ #include <linux/mutex.h> #include <linux/smp_lock.h> #include <linux/sysctl.h> -#include <linux/version.h> #include <linux/fs.h> #include <linux/cdev.h> #include <linux/platform_device.h> diff --git a/drivers/edac/Kconfig b/drivers/edac/Kconfig index 6e6c3c4aea6b..5a11e3cbcae2 100644 --- a/drivers/edac/Kconfig +++ b/drivers/edac/Kconfig @@ -123,6 +123,13 @@ config EDAC_I5000 Support for error detection and correction the Intel Greekcreek/Blackford chipsets. +config EDAC_I5100 + tristate "Intel San Clemente MCH" + depends on EDAC_MM_EDAC && X86 && PCI + help + Support for error detection and correction the Intel + San Clemente MCH. + config EDAC_MPC85XX tristate "Freescale MPC85xx" depends on EDAC_MM_EDAC && FSL_SOC && MPC85xx diff --git a/drivers/edac/Makefile b/drivers/edac/Makefile index 83807731d4a9..e5e9104b5520 100644 --- a/drivers/edac/Makefile +++ b/drivers/edac/Makefile @@ -19,6 +19,7 @@ endif obj-$(CONFIG_EDAC_AMD76X) += amd76x_edac.o obj-$(CONFIG_EDAC_I5000) += i5000_edac.o +obj-$(CONFIG_EDAC_I5100) += i5100_edac.o obj-$(CONFIG_EDAC_E7XXX) += e7xxx_edac.o obj-$(CONFIG_EDAC_E752X) += e752x_edac.o obj-$(CONFIG_EDAC_I82443BXGX) += i82443bxgx_edac.o diff --git a/drivers/edac/e752x_edac.c b/drivers/edac/e752x_edac.c index c94a0eb492cb..facfdb1fa71c 100644 --- a/drivers/edac/e752x_edac.c +++ b/drivers/edac/e752x_edac.c @@ -28,6 +28,7 @@ #define E752X_REVISION " Ver: 2.0.2 " __DATE__ #define EDAC_MOD_STR "e752x_edac" +static int report_non_memory_errors; static int force_function_unhide; static int sysbus_parity = -1; @@ -117,7 +118,7 @@ static struct edac_pci_ctl_info *e752x_pci; #define E752X_BUF_FERR 0x70 /* Memory buffer first error reg (8b) */ #define E752X_BUF_NERR 0x72 /* Memory buffer next error reg (8b) */ #define E752X_BUF_ERRMASK 0x74 /* Memory buffer error mask reg (8b) */ -#define E752X_BUF_SMICMD 0x7A /* Memory buffer SMI command reg (8b) */ +#define E752X_BUF_SMICMD 0x7A /* Memory buffer SMI cmd reg (8b) */ #define E752X_DRAM_FERR 0x80 /* DRAM first error register (16b) */ #define E752X_DRAM_NERR 0x82 /* DRAM next error register (16b) */ #define E752X_DRAM_ERRMASK 0x84 /* DRAM error mask register (8b) */ @@ -127,7 +128,7 @@ static struct edac_pci_ctl_info *e752x_pci; /* error address register (32b) */ /* * 31 Reserved - * 30:2 CE address (64 byte block 34:6) + * 30:2 CE address (64 byte block 34:6 * 1 Reserved * 0 HiLoCS */ @@ -147,11 +148,11 @@ static struct edac_pci_ctl_info *e752x_pci; * 1 Reserved * 0 HiLoCS */ -#define E752X_DRAM_SCRB_ADD 0xA8 /* DRAM first uncorrectable scrub memory */ +#define E752X_DRAM_SCRB_ADD 0xA8 /* DRAM 1st uncorrectable scrub mem */ /* error address register (32b) */ /* * 31 Reserved - * 30:2 CE address (64 byte block 34:6) + * 30:2 CE address (64 byte block 34:6 * 1 Reserved * 0 HiLoCS */ @@ -394,9 +395,12 @@ static void do_process_ded_retry(struct mem_ctl_info *mci, u16 error, struct e752x_pvt *pvt = (struct e752x_pvt *)mci->pvt_info; error_1b = retry_add; - page = error_1b >> (PAGE_SHIFT - 4); /* convert the addr to 4k page */ - row = pvt->mc_symmetric ? ((page >> 1) & 3) : /* chip select are bits 14 & 13 */ + page = error_1b >> (PAGE_SHIFT - 4); /* convert the addr to 4k page */ + + /* chip select are bits 14 & 13 */ + row = pvt->mc_symmetric ? ((page >> 1) & 3) : edac_mc_find_csrow_by_page(mci, page); + e752x_mc_printk(mci, KERN_WARNING, "CE page 0x%lx, row %d : Memory read retry\n", (long unsigned int)page, row); @@ -422,12 +426,21 @@ static inline void process_threshold_ce(struct mem_ctl_info *mci, u16 error, } static char *global_message[11] = { - "PCI Express C1", "PCI Express C", "PCI Express B1", - "PCI Express B", "PCI Express A1", "PCI Express A", - "DMA Controler", "HUB or NS Interface", "System Bus", - "DRAM Controler", "Internal Buffer" + "PCI Express C1", + "PCI Express C", + "PCI Express B1", + "PCI Express B", + "PCI Express A1", + "PCI Express A", + "DMA Controller", + "HUB or NS Interface", + "System Bus", + "DRAM Controller", /* 9th entry */ + "Internal Buffer" }; +#define DRAM_ENTRY 9 + static char *fatal_message[2] = { "Non-Fatal ", "Fatal " }; static void do_global_error(int fatal, u32 errors) @@ -435,9 +448,16 @@ static void do_global_error(int fatal, u32 errors) int i; for (i = 0; i < 11; i++) { - if (errors & (1 << i)) - e752x_printk(KERN_WARNING, "%sError %s\n", - fatal_message[fatal], global_message[i]); + if (errors & (1 << i)) { + /* If the error is from DRAM Controller OR + * we are to report ALL errors, then + * report the error + */ + if ((i == DRAM_ENTRY) || report_non_memory_errors) + e752x_printk(KERN_WARNING, "%sError %s\n", + fatal_message[fatal], + global_message[i]); + } } } @@ -1021,7 +1041,7 @@ static int e752x_get_devs(struct pci_dev *pdev, int dev_idx, struct pci_dev *dev; pvt->bridge_ck = pci_get_device(PCI_VENDOR_ID_INTEL, - pvt->dev_info->err_dev, pvt->bridge_ck); + pvt->dev_info->err_dev, pvt->bridge_ck); if (pvt->bridge_ck == NULL) pvt->bridge_ck = pci_scan_single_device(pdev->bus, @@ -1034,8 +1054,9 @@ static int e752x_get_devs(struct pci_dev *pdev, int dev_idx, return 1; } - dev = pci_get_device(PCI_VENDOR_ID_INTEL, e752x_devs[dev_idx].ctl_dev, - NULL); + dev = pci_get_device(PCI_VENDOR_ID_INTEL, + e752x_devs[dev_idx].ctl_dev, + NULL); if (dev == NULL) goto fail; @@ -1316,7 +1337,8 @@ MODULE_DESCRIPTION("MC support for Intel e752x/3100 memory controllers"); module_param(force_function_unhide, int, 0444); MODULE_PARM_DESC(force_function_unhide, "if BIOS sets Dev0:Fun1 up as hidden:" - " 1=force unhide and hope BIOS doesn't fight driver for Dev0:Fun1 access"); + " 1=force unhide and hope BIOS doesn't fight driver for " + "Dev0:Fun1 access"); module_param(edac_op_state, int, 0444); MODULE_PARM_DESC(edac_op_state, "EDAC Error Reporting state: 0=Poll,1=NMI"); @@ -1324,3 +1346,6 @@ MODULE_PARM_DESC(edac_op_state, "EDAC Error Reporting state: 0=Poll,1=NMI"); module_param(sysbus_parity, int, 0444); MODULE_PARM_DESC(sysbus_parity, "0=disable system bus parity checking," " 1=enable system bus parity checking, default=auto-detect"); +module_param(report_non_memory_errors, int, 0644); +MODULE_PARM_DESC(report_non_memory_errors, "0=disable non-memory error " + "reporting, 1=enable non-memory error reporting"); diff --git a/drivers/edac/edac_mc_sysfs.c b/drivers/edac/edac_mc_sysfs.c index 021d18795145..ad218fe4942d 100644 --- a/drivers/edac/edac_mc_sysfs.c +++ b/drivers/edac/edac_mc_sysfs.c @@ -44,6 +44,25 @@ int edac_mc_get_poll_msec(void) return edac_mc_poll_msec; } +static int edac_set_poll_msec(const char *val, struct kernel_param *kp) +{ + long l; + int ret; + + if (!val) + return -EINVAL; + + ret = strict_strtol(val, 0, &l); + if (ret == -EINVAL || ((int)l != l)) + return -EINVAL; + *((int *)kp->arg) = l; + + /* notify edac_mc engine to reset the poll period */ + edac_mc_reset_delay_period(l); + + return 0; +} + /* Parameter declarations for above */ module_param(edac_mc_panic_on_ue, int, 0644); MODULE_PARM_DESC(edac_mc_panic_on_ue, "Panic on uncorrected error: 0=off 1=on"); @@ -53,7 +72,8 @@ MODULE_PARM_DESC(edac_mc_log_ue, module_param(edac_mc_log_ce, int, 0644); MODULE_PARM_DESC(edac_mc_log_ce, "Log correctable error to console: 0=off 1=on"); -module_param(edac_mc_poll_msec, int, 0644); +module_param_call(edac_mc_poll_msec, edac_set_poll_msec, param_get_int, + &edac_mc_poll_msec, 0644); MODULE_PARM_DESC(edac_mc_poll_msec, "Polling period in milliseconds"); /* @@ -103,16 +123,6 @@ static const char *edac_caps[] = { -/* - * /sys/devices/system/edac/mc; - * data structures and methods - */ -static ssize_t memctrl_int_show(void *ptr, char *buffer) -{ - int *value = (int *)ptr; - return sprintf(buffer, "%u\n", *value); -} - static ssize_t memctrl_int_store(void *ptr, const char *buffer, size_t count) { int *value = (int *)ptr; @@ -123,23 +133,6 @@ static ssize_t memctrl_int_store(void *ptr, const char *buffer, size_t count) return count; } -/* - * mc poll_msec time value - */ -static ssize_t poll_msec_int_store(void *ptr, const char *buffer, size_t count) -{ - int *value = (int *)ptr; - - if (isdigit(*buffer)) { - *value = simple_strtoul(buffer, NULL, 0); - - /* notify edac_mc engine to reset the poll period */ - edac_mc_reset_delay_period(*value); - } - - return count; -} - /* EDAC sysfs CSROW data structures and methods */ @@ -185,7 +178,11 @@ static ssize_t csrow_edac_mode_show(struct csrow_info *csrow, char *data, static ssize_t channel_dimm_label_show(struct csrow_info *csrow, char *data, int channel) { - return snprintf(data, EDAC_MC_LABEL_LEN, "%s", + /* if field has not been initialized, there is nothing to send */ + if (!csrow->channels[channel].label[0]) + return 0; + + return snprintf(data, EDAC_MC_LABEL_LEN, "%s\n", csrow->channels[channel].label); } @@ -649,98 +646,10 @@ static struct kobj_type ktype_mci = { .default_attrs = (struct attribute **)mci_attr, }; -/* show/store, tables, etc for the MC kset */ - - -struct memctrl_dev_attribute { - struct attribute attr; - void *value; - ssize_t(*show) (void *, char *); - ssize_t(*store) (void *, const char *, size_t); -}; - -/* Set of show/store abstract level functions for memory control object */ -static ssize_t memctrl_dev_show(struct kobject *kobj, - struct attribute *attr, char *buffer) -{ - struct memctrl_dev_attribute *memctrl_dev; - memctrl_dev = (struct memctrl_dev_attribute *)attr; - - if (memctrl_dev->show) - return memctrl_dev->show(memctrl_dev->value, buffer); - - return -EIO; -} - -static ssize_t memctrl_dev_store(struct kobject *kobj, struct attribute *attr, - const char *buffer, size_t count) -{ - struct memctrl_dev_attribute *memctrl_dev; - memctrl_dev = (struct memctrl_dev_attribute *)attr; - - if (memctrl_dev->store) - return memctrl_dev->store(memctrl_dev->value, buffer, count); - - return -EIO; -} - -static struct sysfs_ops memctrlfs_ops = { - .show = memctrl_dev_show, - .store = memctrl_dev_store -}; - -#define MEMCTRL_ATTR(_name, _mode, _show, _store) \ -static struct memctrl_dev_attribute attr_##_name = { \ - .attr = {.name = __stringify(_name), .mode = _mode }, \ - .value = &_name, \ - .show = _show, \ - .store = _store, \ -}; - -#define MEMCTRL_STRING_ATTR(_name, _data, _mode, _show, _store) \ -static struct memctrl_dev_attribute attr_##_name = { \ - .attr = {.name = __stringify(_name), .mode = _mode }, \ - .value = _data, \ - .show = _show, \ - .store = _store, \ -}; - -/* csrow<id> control files */ -MEMCTRL_ATTR(edac_mc_panic_on_ue, - S_IRUGO | S_IWUSR, memctrl_int_show, memctrl_int_store); - -MEMCTRL_ATTR(edac_mc_log_ue, - S_IRUGO | S_IWUSR, memctrl_int_show, memctrl_int_store); - -MEMCTRL_ATTR(edac_mc_log_ce, - S_IRUGO | S_IWUSR, memctrl_int_show, memctrl_int_store); - -MEMCTRL_ATTR(edac_mc_poll_msec, - S_IRUGO | S_IWUSR, memctrl_int_show, poll_msec_int_store); - -/* Base Attributes of the memory ECC object */ -static struct memctrl_dev_attribute *memctrl_attr[] = { - &attr_edac_mc_panic_on_ue, - &attr_edac_mc_log_ue, - &attr_edac_mc_log_ce, - &attr_edac_mc_poll_msec, - NULL, -}; - - -/* the ktype for the mc_kset internal kobj */ -static struct kobj_type ktype_mc_set_attribs = { - .sysfs_ops = &memctrlfs_ops, - .default_attrs = (struct attribute **)memctrl_attr, -}; - /* EDAC memory controller sysfs kset: * /sys/devices/system/edac/mc */ -static struct kset mc_kset = { - .kobj = {.ktype = &ktype_mc_set_attribs }, -}; - +static struct kset *mc_kset; /* * edac_mc_register_sysfs_main_kobj @@ -771,7 +680,7 @@ int edac_mc_register_sysfs_main_kobj(struct mem_ctl_info *mci) } /* this instance become part of the mc_kset */ - kobj_mci->kset = &mc_kset; + kobj_mci->kset = mc_kset; /* register the mc<id> kobject to the mc_kset */ err = kobject_init_and_add(kobj_mci, &ktype_mci, NULL, @@ -1001,12 +910,9 @@ int edac_sysfs_setup_mc_kset(void) } /* Init the MC's kobject */ - kobject_set_name(&mc_kset.kobj, "mc"); - mc_kset.kobj.parent = &edac_class->kset.kobj; - - /* register the mc_kset */ - err = kset_register(&mc_kset); - if (err) { + mc_kset = kset_create_and_add("mc", NULL, &edac_class->kset.kobj); + if (!mc_kset) { + err = -ENOMEM; debugf1("%s() Failed to register '.../edac/mc'\n", __func__); goto fail_out; } @@ -1028,6 +934,6 @@ fail_out: */ void edac_sysfs_teardown_mc_kset(void) { - kset_unregister(&mc_kset); + kset_unregister(mc_kset); } diff --git a/drivers/edac/edac_pci_sysfs.c b/drivers/edac/edac_pci_sysfs.c index 2c1fa1bb6df2..5c153dccc95e 100644 --- a/drivers/edac/edac_pci_sysfs.c +++ b/drivers/edac/edac_pci_sysfs.c @@ -28,7 +28,7 @@ static int edac_pci_poll_msec = 1000; /* one second workq period */ static atomic_t pci_parity_count = ATOMIC_INIT(0); static atomic_t pci_nonparity_count = ATOMIC_INIT(0); -static struct kobject edac_pci_top_main_kobj; +static struct kobject *edac_pci_top_main_kobj; static atomic_t edac_pci_sysfs_refcount = ATOMIC_INIT(0); /* getter functions for the data variables */ @@ -83,7 +83,7 @@ static void edac_pci_instance_release(struct kobject *kobj) pci = to_instance(kobj); /* decrement reference count on top main kobj */ - kobject_put(&edac_pci_top_main_kobj); + kobject_put(edac_pci_top_main_kobj); kfree(pci); /* Free the control struct */ } @@ -166,7 +166,7 @@ static int edac_pci_create_instance_kobj(struct edac_pci_ctl_info *pci, int idx) * track the number of PCI instances we have, and thus nest * properly on keeping the module loaded */ - main_kobj = kobject_get(&edac_pci_top_main_kobj); + main_kobj = kobject_get(edac_pci_top_main_kobj); if (!main_kobj) { err = -ENODEV; goto error_out; @@ -174,11 +174,11 @@ static int edac_pci_create_instance_kobj(struct edac_pci_ctl_info *pci, int idx) /* And now register this new kobject under the main kobj */ err = kobject_init_and_add(&pci->kobj, &ktype_pci_instance, - &edac_pci_top_main_kobj, "pci%d", idx); + edac_pci_top_main_kobj, "pci%d", idx); if (err != 0) { debugf2("%s() failed to register instance pci%d\n", __func__, idx); - kobject_put(&edac_pci_top_main_kobj); + kobject_put(edac_pci_top_main_kobj); goto error_out; } @@ -316,9 +316,10 @@ static struct edac_pci_dev_attribute *edac_pci_attr[] = { */ static void edac_pci_release_main_kobj(struct kobject *kobj) { - debugf0("%s() here to module_put(THIS_MODULE)\n", __func__); + kfree(kobj); + /* last reference to top EDAC PCI kobject has been removed, * NOW release our ref count on the core module */ @@ -369,8 +370,16 @@ static int edac_pci_main_kobj_setup(void) goto decrement_count_fail; } + edac_pci_top_main_kobj = kzalloc(sizeof(struct kobject), GFP_KERNEL); + if (!edac_pci_top_main_kobj) { + debugf1("Failed to allocate\n"); + err = -ENOMEM; + goto kzalloc_fail; + } + /* Instanstiate the pci object */ - err = kobject_init_and_add(&edac_pci_top_main_kobj, &ktype_edac_pci_main_kobj, + err = kobject_init_and_add(edac_pci_top_main_kobj, + &ktype_edac_pci_main_kobj, &edac_class->kset.kobj, "pci"); if (err) { debugf1("Failed to register '.../edac/pci'\n"); @@ -381,13 +390,16 @@ static int edac_pci_main_kobj_setup(void) * for EDAC PCI, then edac_pci_main_kobj_teardown() * must be used, for resources to be cleaned up properly */ - kobject_uevent(&edac_pci_top_main_kobj, KOBJ_ADD); + kobject_uevent(edac_pci_top_main_kobj, KOBJ_ADD); debugf1("Registered '.../edac/pci' kobject\n"); return 0; /* Error unwind statck */ kobject_init_and_add_fail: + kfree(edac_pci_top_main_kobj); + +kzalloc_fail: module_put(THIS_MODULE); decrement_count_fail: @@ -414,7 +426,7 @@ static void edac_pci_main_kobj_teardown(void) if (atomic_dec_return(&edac_pci_sysfs_refcount) == 0) { debugf0("%s() called kobject_put on main kobj\n", __func__); - kobject_put(&edac_pci_top_main_kobj); + kobject_put(edac_pci_top_main_kobj); } } diff --git a/drivers/edac/i5100_edac.c b/drivers/edac/i5100_edac.c new file mode 100644 index 000000000000..22db05a67bfb --- /dev/null +++ b/drivers/edac/i5100_edac.c @@ -0,0 +1,981 @@ +/* + * Intel 5100 Memory Controllers kernel module + * + * This file may be distributed under the terms of the + * GNU General Public License. + * + * This module is based on the following document: + * + * Intel 5100X Chipset Memory Controller Hub (MCH) - Datasheet + * http://download.intel.com/design/chipsets/datashts/318378.pdf + * + */ +#include <linux/module.h> +#include <linux/init.h> +#include <linux/pci.h> +#include <linux/pci_ids.h> +#include <linux/slab.h> +#include <linux/edac.h> +#include <linux/delay.h> +#include <linux/mmzone.h> + +#include "edac_core.h" + +/* register addresses */ + +/* device 16, func 1 */ +#define I5100_MC 0x40 /* Memory Control Register */ +#define I5100_MS 0x44 /* Memory Status Register */ +#define I5100_SPDDATA 0x48 /* Serial Presence Detect Status Reg */ +#define I5100_SPDCMD 0x4c /* Serial Presence Detect Command Reg */ +#define I5100_TOLM 0x6c /* Top of Low Memory */ +#define I5100_MIR0 0x80 /* Memory Interleave Range 0 */ +#define I5100_MIR1 0x84 /* Memory Interleave Range 1 */ +#define I5100_AMIR_0 0x8c /* Adjusted Memory Interleave Range 0 */ +#define I5100_AMIR_1 0x90 /* Adjusted Memory Interleave Range 1 */ +#define I5100_FERR_NF_MEM 0xa0 /* MC First Non Fatal Errors */ +#define I5100_FERR_NF_MEM_M16ERR_MASK (1 << 16) +#define I5100_FERR_NF_MEM_M15ERR_MASK (1 << 15) +#define I5100_FERR_NF_MEM_M14ERR_MASK (1 << 14) +#define I5100_FERR_NF_MEM_M12ERR_MASK (1 << 12) +#define I5100_FERR_NF_MEM_M11ERR_MASK (1 << 11) +#define I5100_FERR_NF_MEM_M10ERR_MASK (1 << 10) +#define I5100_FERR_NF_MEM_M6ERR_MASK (1 << 6) +#define I5100_FERR_NF_MEM_M5ERR_MASK (1 << 5) +#define I5100_FERR_NF_MEM_M4ERR_MASK (1 << 4) +#define I5100_FERR_NF_MEM_M1ERR_MASK 1 +#define I5100_FERR_NF_MEM_ANY_MASK \ + (I5100_FERR_NF_MEM_M16ERR_MASK | \ + I5100_FERR_NF_MEM_M15ERR_MASK | \ + I5100_FERR_NF_MEM_M14ERR_MASK | \ + I5100_FERR_NF_MEM_M12ERR_MASK | \ + I5100_FERR_NF_MEM_M11ERR_MASK | \ + I5100_FERR_NF_MEM_M10ERR_MASK | \ + I5100_FERR_NF_MEM_M6ERR_MASK | \ + I5100_FERR_NF_MEM_M5ERR_MASK | \ + I5100_FERR_NF_MEM_M4ERR_MASK | \ + I5100_FERR_NF_MEM_M1ERR_MASK) +#define I5100_NERR_NF_MEM 0xa4 /* MC Next Non-Fatal Errors */ +#define I5100_EMASK_MEM 0xa8 /* MC Error Mask Register */ + +/* device 21 and 22, func 0 */ +#define I5100_MTR_0 0x154 /* Memory Technology Registers 0-3 */ +#define I5100_DMIR 0x15c /* DIMM Interleave Range */ +#define I5100_VALIDLOG 0x18c /* Valid Log Markers */ +#define I5100_NRECMEMA 0x190 /* Non-Recoverable Memory Error Log Reg A */ +#define I5100_NRECMEMB 0x194 /* Non-Recoverable Memory Error Log Reg B */ +#define I5100_REDMEMA 0x198 /* Recoverable Memory Data Error Log Reg A */ +#define I5100_REDMEMB 0x19c /* Recoverable Memory Data Error Log Reg B */ +#define I5100_RECMEMA 0x1a0 /* Recoverable Memory Error Log Reg A */ +#define I5100_RECMEMB 0x1a4 /* Recoverable Memory Error Log Reg B */ +#define I5100_MTR_4 0x1b0 /* Memory Technology Registers 4,5 */ + +/* bit field accessors */ + +static inline u32 i5100_mc_errdeten(u32 mc) +{ + return mc >> 5 & 1; +} + +static inline u16 i5100_spddata_rdo(u16 a) +{ + return a >> 15 & 1; +} + +static inline u16 i5100_spddata_sbe(u16 a) +{ + return a >> 13 & 1; +} + +static inline u16 i5100_spddata_busy(u16 a) +{ + return a >> 12 & 1; +} + +static inline u16 i5100_spddata_data(u16 a) +{ + return a & ((1 << 8) - 1); +} + +static inline u32 i5100_spdcmd_create(u32 dti, u32 ckovrd, u32 sa, u32 ba, + u32 data, u32 cmd) +{ + return ((dti & ((1 << 4) - 1)) << 28) | + ((ckovrd & 1) << 27) | + ((sa & ((1 << 3) - 1)) << 24) | + ((ba & ((1 << 8) - 1)) << 16) | + ((data & ((1 << 8) - 1)) << 8) | + (cmd & 1); +} + +static inline u16 i5100_tolm_tolm(u16 a) +{ + return a >> 12 & ((1 << 4) - 1); +} + +static inline u16 i5100_mir_limit(u16 a) +{ + return a >> 4 & ((1 << 12) - 1); +} + +static inline u16 i5100_mir_way1(u16 a) +{ + return a >> 1 & 1; +} + +static inline u16 i5100_mir_way0(u16 a) +{ + return a & 1; +} + +static inline u32 i5100_ferr_nf_mem_chan_indx(u32 a) +{ + return a >> 28 & 1; +} + +static inline u32 i5100_ferr_nf_mem_any(u32 a) +{ + return a & I5100_FERR_NF_MEM_ANY_MASK; +} + +static inline u32 i5100_nerr_nf_mem_any(u32 a) +{ + return i5100_ferr_nf_mem_any(a); +} + +static inline u32 i5100_dmir_limit(u32 a) +{ + return a >> 16 & ((1 << 11) - 1); +} + +static inline u32 i5100_dmir_rank(u32 a, u32 i) +{ + return a >> (4 * i) & ((1 << 2) - 1); +} + +static inline u16 i5100_mtr_present(u16 a) +{ + return a >> 10 & 1; +} + +static inline u16 i5100_mtr_ethrottle(u16 a) +{ + return a >> 9 & 1; +} + +static inline u16 i5100_mtr_width(u16 a) +{ + return a >> 8 & 1; +} + +static inline u16 i5100_mtr_numbank(u16 a) +{ + return a >> 6 & 1; +} + +static inline u16 i5100_mtr_numrow(u16 a) +{ + return a >> 2 & ((1 << 2) - 1); +} + +static inline u16 i5100_mtr_numcol(u16 a) +{ + return a & ((1 << 2) - 1); +} + + +static inline u32 i5100_validlog_redmemvalid(u32 a) +{ + return a >> 2 & 1; +} + +static inline u32 i5100_validlog_recmemvalid(u32 a) +{ + return a >> 1 & 1; +} + +static inline u32 i5100_validlog_nrecmemvalid(u32 a) +{ + return a & 1; +} + +static inline u32 i5100_nrecmema_merr(u32 a) +{ + return a >> 15 & ((1 << 5) - 1); +} + +static inline u32 i5100_nrecmema_bank(u32 a) +{ + return a >> 12 & ((1 << 3) - 1); +} + +static inline u32 i5100_nrecmema_rank(u32 a) +{ + return a >> 8 & ((1 << 3) - 1); +} + +static inline u32 i5100_nrecmema_dm_buf_id(u32 a) +{ + return a & ((1 << 8) - 1); +} + +static inline u32 i5100_nrecmemb_cas(u32 a) +{ + return a >> 16 & ((1 << 13) - 1); +} + +static inline u32 i5100_nrecmemb_ras(u32 a) +{ + return a & ((1 << 16) - 1); +} + +static inline u32 i5100_redmemb_ecc_locator(u32 a) +{ + return a & ((1 << 18) - 1); +} + +static inline u32 i5100_recmema_merr(u32 a) +{ + return i5100_nrecmema_merr(a); +} + +static inline u32 i5100_recmema_bank(u32 a) +{ + return i5100_nrecmema_bank(a); +} + +static inline u32 i5100_recmema_rank(u32 a) +{ + return i5100_nrecmema_rank(a); +} + +static inline u32 i5100_recmema_dm_buf_id(u32 a) +{ + return i5100_nrecmema_dm_buf_id(a); +} + +static inline u32 i5100_recmemb_cas(u32 a) +{ + return i5100_nrecmemb_cas(a); +} + +static inline u32 i5100_recmemb_ras(u32 a) +{ + return i5100_nrecmemb_ras(a); +} + +/* some generic limits */ +#define I5100_MAX_RANKS_PER_CTLR 6 +#define I5100_MAX_CTLRS 2 +#define I5100_MAX_RANKS_PER_DIMM 4 +#define I5100_DIMM_ADDR_LINES (6 - 3) /* 64 bits / 8 bits per byte */ +#define I5100_MAX_DIMM_SLOTS_PER_CTLR 4 +#define I5100_MAX_RANK_INTERLEAVE 4 +#define I5100_MAX_DMIRS 5 + +struct i5100_priv { + /* ranks on each dimm -- 0 maps to not present -- obtained via SPD */ + int dimm_numrank[I5100_MAX_CTLRS][I5100_MAX_DIMM_SLOTS_PER_CTLR]; + + /* + * mainboard chip select map -- maps i5100 chip selects to + * DIMM slot chip selects. In the case of only 4 ranks per + * controller, the mapping is fairly obvious but not unique. + * we map -1 -> NC and assume both controllers use the same + * map... + * + */ + int dimm_csmap[I5100_MAX_DIMM_SLOTS_PER_CTLR][I5100_MAX_RANKS_PER_DIMM]; + + /* memory interleave range */ + struct { + u64 limit; + unsigned way[2]; + } mir[I5100_MAX_CTLRS]; + + /* adjusted memory interleave range register */ + unsigned amir[I5100_MAX_CTLRS]; + + /* dimm interleave range */ + struct { + unsigned rank[I5100_MAX_RANK_INTERLEAVE]; + u64 limit; + } dmir[I5100_MAX_CTLRS][I5100_MAX_DMIRS]; + + /* memory technology registers... */ + struct { + unsigned present; /* 0 or 1 */ + unsigned ethrottle; /* 0 or 1 */ + unsigned width; /* 4 or 8 bits */ + unsigned numbank; /* 2 or 3 lines */ + unsigned numrow; /* 13 .. 16 lines */ + unsigned numcol; /* 11 .. 12 lines */ + } mtr[I5100_MAX_CTLRS][I5100_MAX_RANKS_PER_CTLR]; + + u64 tolm; /* top of low memory in bytes */ + unsigned ranksperctlr; /* number of ranks per controller */ + + struct pci_dev *mc; /* device 16 func 1 */ + struct pci_dev *ch0mm; /* device 21 func 0 */ + struct pci_dev *ch1mm; /* device 22 func 0 */ +}; + +/* map a rank/ctlr to a slot number on the mainboard */ +static int i5100_rank_to_slot(const struct mem_ctl_info *mci, + int ctlr, int rank) +{ + const struct i5100_priv *priv = mci->pvt_info; + int i; + + for (i = 0; i < I5100_MAX_DIMM_SLOTS_PER_CTLR; i++) { + int j; + const int numrank = priv->dimm_numrank[ctlr][i]; + + for (j = 0; j < numrank; j++) + if (priv->dimm_csmap[i][j] == rank) + return i * 2 + ctlr; + } + + return -1; +} + +static const char *i5100_err_msg(unsigned err) +{ + static const char *merrs[] = { + "unknown", /* 0 */ + "uncorrectable data ECC on replay", /* 1 */ + "unknown", /* 2 */ + "unknown", /* 3 */ + "aliased uncorrectable demand data ECC", /* 4 */ + "aliased uncorrectable spare-copy data ECC", /* 5 */ + "aliased uncorrectable patrol data ECC", /* 6 */ + "unknown", /* 7 */ + "unknown", /* 8 */ + "unknown", /* 9 */ + "non-aliased uncorrectable demand data ECC", /* 10 */ + "non-aliased uncorrectable spare-copy data ECC", /* 11 */ + "non-aliased uncorrectable patrol data ECC", /* 12 */ + "unknown", /* 13 */ + "correctable demand data ECC", /* 14 */ + "correctable spare-copy data ECC", /* 15 */ + "correctable patrol data ECC", /* 16 */ + "unknown", /* 17 */ + "SPD protocol error", /* 18 */ + "unknown", /* 19 */ + "spare copy initiated", /* 20 */ + "spare copy completed", /* 21 */ + }; + unsigned i; + + for (i = 0; i < ARRAY_SIZE(merrs); i++) + if (1 << i & err) + return merrs[i]; + + return "none"; +} + +/* convert csrow index into a rank (per controller -- 0..5) */ +static int i5100_csrow_to_rank(const struct mem_ctl_info *mci, int csrow) +{ + const struct i5100_priv *priv = mci->pvt_info; + + return csrow % priv->ranksperctlr; +} + +/* convert csrow index into a controller (0..1) */ +static int i5100_csrow_to_cntlr(const struct mem_ctl_info *mci, int csrow) +{ + const struct i5100_priv *priv = mci->pvt_info; + + return csrow / priv->ranksperctlr; +} + +static unsigned i5100_rank_to_csrow(const struct mem_ctl_info *mci, + int ctlr, int rank) +{ + const struct i5100_priv *priv = mci->pvt_info; + + return ctlr * priv->ranksperctlr + rank; +} + +static void i5100_handle_ce(struct mem_ctl_info *mci, + int ctlr, + unsigned bank, + unsigned rank, + unsigned long syndrome, + unsigned cas, + unsigned ras, + const char *msg) +{ + const int csrow = i5100_rank_to_csrow(mci, ctlr, rank); + + printk(KERN_ERR + "CE ctlr %d, bank %u, rank %u, syndrome 0x%lx, " + "cas %u, ras %u, csrow %u, label \"%s\": %s\n", + ctlr, bank, rank, syndrome, cas, ras, + csrow, mci->csrows[csrow].channels[0].label, msg); + + mci->ce_count++; + mci->csrows[csrow].ce_count++; + mci->csrows[csrow].channels[0].ce_count++; +} + +static void i5100_handle_ue(struct mem_ctl_info *mci, + int ctlr, + unsigned bank, + unsigned rank, + unsigned long syndrome, + unsigned cas, + unsigned ras, + const char *msg) +{ + const int csrow = i5100_rank_to_csrow(mci, ctlr, rank); + + printk(KERN_ERR + "UE ctlr %d, bank %u, rank %u, syndrome 0x%lx, " + "cas %u, ras %u, csrow %u, label \"%s\": %s\n", + ctlr, bank, rank, syndrome, cas, ras, + csrow, mci->csrows[csrow].channels[0].label, msg); + + mci->ue_count++; + mci->csrows[csrow].ue_count++; +} + +static void i5100_read_log(struct mem_ctl_info *mci, int ctlr, + u32 ferr, u32 nerr) +{ + struct i5100_priv *priv = mci->pvt_info; + struct pci_dev *pdev = (ctlr) ? priv->ch1mm : priv->ch0mm; + u32 dw; + u32 dw2; + unsigned syndrome = 0; + unsigned ecc_loc = 0; + unsigned merr; + unsigned bank; + unsigned rank; + unsigned cas; + unsigned ras; + + pci_read_config_dword(pdev, I5100_VALIDLOG, &dw); + + if (i5100_validlog_redmemvalid(dw)) { + pci_read_config_dword(pdev, I5100_REDMEMA, &dw2); + syndrome = dw2; + pci_read_config_dword(pdev, I5100_REDMEMB, &dw2); + ecc_loc = i5100_redmemb_ecc_locator(dw2); + } + + if (i5100_validlog_recmemvalid(dw)) { + const char *msg; + + pci_read_config_dword(pdev, I5100_RECMEMA, &dw2); + merr = i5100_recmema_merr(dw2); + bank = i5100_recmema_bank(dw2); + rank = i5100_recmema_rank(dw2); + + pci_read_config_dword(pdev, I5100_RECMEMB, &dw2); + cas = i5100_recmemb_cas(dw2); + ras = i5100_recmemb_ras(dw2); + + /* FIXME: not really sure if this is what merr is... + */ + if (!merr) + msg = i5100_err_msg(ferr); + else + msg = i5100_err_msg(nerr); + + i5100_handle_ce(mci, ctlr, bank, rank, syndrome, cas, ras, msg); + } + + if (i5100_validlog_nrecmemvalid(dw)) { + const char *msg; + + pci_read_config_dword(pdev, I5100_NRECMEMA, &dw2); + merr = i5100_nrecmema_merr(dw2); + bank = i5100_nrecmema_bank(dw2); + rank = i5100_nrecmema_rank(dw2); + + pci_read_config_dword(pdev, I5100_NRECMEMB, &dw2); + cas = i5100_nrecmemb_cas(dw2); + ras = i5100_nrecmemb_ras(dw2); + + /* FIXME: not really sure if this is what merr is... + */ + if (!merr) + msg = i5100_err_msg(ferr); + else + msg = i5100_err_msg(nerr); + + i5100_handle_ue(mci, ctlr, bank, rank, syndrome, cas, ras, msg); + } + + pci_write_config_dword(pdev, I5100_VALIDLOG, dw); +} + +static void i5100_check_error(struct mem_ctl_info *mci) +{ + struct i5100_priv *priv = mci->pvt_info; + u32 dw; + + + pci_read_config_dword(priv->mc, I5100_FERR_NF_MEM, &dw); + if (i5100_ferr_nf_mem_any(dw)) { + u32 dw2; + + pci_read_config_dword(priv->mc, I5100_NERR_NF_MEM, &dw2); + if (dw2) + pci_write_config_dword(priv->mc, I5100_NERR_NF_MEM, + dw2); + pci_write_config_dword(priv->mc, I5100_FERR_NF_MEM, dw); + + i5100_read_log(mci, i5100_ferr_nf_mem_chan_indx(dw), + i5100_ferr_nf_mem_any(dw), + i5100_nerr_nf_mem_any(dw2)); + } +} + +static struct pci_dev *pci_get_device_func(unsigned vendor, + unsigned device, + unsigned func) +{ + struct pci_dev *ret = NULL; + + while (1) { + ret = pci_get_device(vendor, device, ret); + + if (!ret) + break; + + if (PCI_FUNC(ret->devfn) == func) + break; + } + + return ret; +} + +static unsigned long __devinit i5100_npages(struct mem_ctl_info *mci, + int csrow) +{ + struct i5100_priv *priv = mci->pvt_info; + const unsigned ctlr_rank = i5100_csrow_to_rank(mci, csrow); + const unsigned ctlr = i5100_csrow_to_cntlr(mci, csrow); + unsigned addr_lines; + + /* dimm present? */ + if (!priv->mtr[ctlr][ctlr_rank].present) + return 0ULL; + + addr_lines = + I5100_DIMM_ADDR_LINES + + priv->mtr[ctlr][ctlr_rank].numcol + + priv->mtr[ctlr][ctlr_rank].numrow + + priv->mtr[ctlr][ctlr_rank].numbank; + + return (unsigned long) + ((unsigned long long) (1ULL << addr_lines) / PAGE_SIZE); +} + +static void __devinit i5100_init_mtr(struct mem_ctl_info *mci) +{ + struct i5100_priv *priv = mci->pvt_info; + struct pci_dev *mms[2] = { priv->ch0mm, priv->ch1mm }; + int i; + + for (i = 0; i < I5100_MAX_CTLRS; i++) { + int j; + struct pci_dev *pdev = mms[i]; + + for (j = 0; j < I5100_MAX_RANKS_PER_CTLR; j++) { + const unsigned addr = + (j < 4) ? I5100_MTR_0 + j * 2 : + I5100_MTR_4 + (j - 4) * 2; + u16 w; + + pci_read_config_word(pdev, addr, &w); + + priv->mtr[i][j].present = i5100_mtr_present(w); + priv->mtr[i][j].ethrottle = i5100_mtr_ethrottle(w); + priv->mtr[i][j].width = 4 + 4 * i5100_mtr_width(w); + priv->mtr[i][j].numbank = 2 + i5100_mtr_numbank(w); + priv->mtr[i][j].numrow = 13 + i5100_mtr_numrow(w); + priv->mtr[i][j].numcol = 10 + i5100_mtr_numcol(w); + } + } +} + +/* + * FIXME: make this into a real i2c adapter (so that dimm-decode + * will work)? + */ +static int i5100_read_spd_byte(const struct mem_ctl_info *mci, + u8 ch, u8 slot, u8 addr, u8 *byte) +{ + struct i5100_priv *priv = mci->pvt_info; + u16 w; + unsigned long et; + + pci_read_config_word(priv->mc, I5100_SPDDATA, &w); + if (i5100_spddata_busy(w)) + return -1; + + pci_write_config_dword(priv->mc, I5100_SPDCMD, + i5100_spdcmd_create(0xa, 1, ch * 4 + slot, addr, + 0, 0)); + + /* wait up to 100ms */ + et = jiffies + HZ / 10; + udelay(100); + while (1) { + pci_read_config_word(priv->mc, I5100_SPDDATA, &w); + if (!i5100_spddata_busy(w)) + break; + udelay(100); + } + + if (!i5100_spddata_rdo(w) || i5100_spddata_sbe(w)) + return -1; + + *byte = i5100_spddata_data(w); + + return 0; +} + +/* + * fill dimm chip select map + * + * FIXME: + * o only valid for 4 ranks per controller + * o not the only way to may chip selects to dimm slots + * o investigate if there is some way to obtain this map from the bios + */ +static void __devinit i5100_init_dimm_csmap(struct mem_ctl_info *mci) +{ + struct i5100_priv *priv = mci->pvt_info; + int i; + + WARN_ON(priv->ranksperctlr != 4); + + for (i = 0; i < I5100_MAX_DIMM_SLOTS_PER_CTLR; i++) { + int j; + + for (j = 0; j < I5100_MAX_RANKS_PER_DIMM; j++) + priv->dimm_csmap[i][j] = -1; /* default NC */ + } + + /* only 2 chip selects per slot... */ + priv->dimm_csmap[0][0] = 0; + priv->dimm_csmap[0][1] = 3; + priv->dimm_csmap[1][0] = 1; + priv->dimm_csmap[1][1] = 2; + priv->dimm_csmap[2][0] = 2; + priv->dimm_csmap[3][0] = 3; +} + +static void __devinit i5100_init_dimm_layout(struct pci_dev *pdev, + struct mem_ctl_info *mci) +{ + struct i5100_priv *priv = mci->pvt_info; + int i; + + for (i = 0; i < I5100_MAX_CTLRS; i++) { + int j; + + for (j = 0; j < I5100_MAX_DIMM_SLOTS_PER_CTLR; j++) { + u8 rank; + + if (i5100_read_spd_byte(mci, i, j, 5, &rank) < 0) + priv->dimm_numrank[i][j] = 0; + else + priv->dimm_numrank[i][j] = (rank & 3) + 1; + } + } + + i5100_init_dimm_csmap(mci); +} + +static void __devinit i5100_init_interleaving(struct pci_dev *pdev, + struct mem_ctl_info *mci) +{ + u16 w; + u32 dw; + struct i5100_priv *priv = mci->pvt_info; + struct pci_dev *mms[2] = { priv->ch0mm, priv->ch1mm }; + int i; + + pci_read_config_word(pdev, I5100_TOLM, &w); + priv->tolm = (u64) i5100_tolm_tolm(w) * 256 * 1024 * 1024; + + pci_read_config_word(pdev, I5100_MIR0, &w); + priv->mir[0].limit = (u64) i5100_mir_limit(w) << 28; + priv->mir[0].way[1] = i5100_mir_way1(w); + priv->mir[0].way[0] = i5100_mir_way0(w); + + pci_read_config_word(pdev, I5100_MIR1, &w); + priv->mir[1].limit = (u64) i5100_mir_limit(w) << 28; + priv->mir[1].way[1] = i5100_mir_way1(w); + priv->mir[1].way[0] = i5100_mir_way0(w); + + pci_read_config_word(pdev, I5100_AMIR_0, &w); + priv->amir[0] = w; + pci_read_config_word(pdev, I5100_AMIR_1, &w); + priv->amir[1] = w; + + for (i = 0; i < I5100_MAX_CTLRS; i++) { + int j; + + for (j = 0; j < 5; j++) { + int k; + + pci_read_config_dword(mms[i], I5100_DMIR + j * 4, &dw); + + priv->dmir[i][j].limit = + (u64) i5100_dmir_limit(dw) << 28; + for (k = 0; k < I5100_MAX_RANKS_PER_DIMM; k++) + priv->dmir[i][j].rank[k] = + i5100_dmir_rank(dw, k); + } + } + + i5100_init_mtr(mci); +} + +static void __devinit i5100_init_csrows(struct mem_ctl_info *mci) +{ + int i; + unsigned long total_pages = 0UL; + struct i5100_priv *priv = mci->pvt_info; + + for (i = 0; i < mci->nr_csrows; i++) { + const unsigned long npages = i5100_npages(mci, i); + const unsigned cntlr = i5100_csrow_to_cntlr(mci, i); + const unsigned rank = i5100_csrow_to_rank(mci, i); + + if (!npages) + continue; + + /* + * FIXME: these two are totally bogus -- I don't see how to + * map them correctly to this structure... + */ + mci->csrows[i].first_page = total_pages; + mci->csrows[i].last_page = total_pages + npages - 1; + mci->csrows[i].page_mask = 0UL; + + mci->csrows[i].nr_pages = npages; + mci->csrows[i].grain = 32; + mci->csrows[i].csrow_idx = i; + mci->csrows[i].dtype = + (priv->mtr[cntlr][rank].width == 4) ? DEV_X4 : DEV_X8; + mci->csrows[i].ue_count = 0; + mci->csrows[i].ce_count = 0; + mci->csrows[i].mtype = MEM_RDDR2; + mci->csrows[i].edac_mode = EDAC_SECDED; + mci->csrows[i].mci = mci; + mci->csrows[i].nr_channels = 1; + mci->csrows[i].channels[0].chan_idx = 0; + mci->csrows[i].channels[0].ce_count = 0; + mci->csrows[i].channels[0].csrow = mci->csrows + i; + snprintf(mci->csrows[i].channels[0].label, + sizeof(mci->csrows[i].channels[0].label), + "DIMM%u", i5100_rank_to_slot(mci, cntlr, rank)); + + total_pages += npages; + } +} + +static int __devinit i5100_init_one(struct pci_dev *pdev, + const struct pci_device_id *id) +{ + int rc; + struct mem_ctl_info *mci; + struct i5100_priv *priv; + struct pci_dev *ch0mm, *ch1mm; + int ret = 0; + u32 dw; + int ranksperch; + + if (PCI_FUNC(pdev->devfn) != 1) + return -ENODEV; + + rc = pci_enable_device(pdev); + if (rc < 0) { + ret = rc; + goto bail; + } + + /* ECC enabled? */ + pci_read_config_dword(pdev, I5100_MC, &dw); + if (!i5100_mc_errdeten(dw)) { + printk(KERN_INFO "i5100_edac: ECC not enabled.\n"); + ret = -ENODEV; + goto bail_pdev; + } + + /* figure out how many ranks, from strapped state of 48GB_Mode input */ + pci_read_config_dword(pdev, I5100_MS, &dw); + ranksperch = !!(dw & (1 << 8)) * 2 + 4; + + if (ranksperch != 4) { + /* FIXME: get 6 ranks / controller to work - need hw... */ + printk(KERN_INFO "i5100_edac: unsupported configuration.\n"); + ret = -ENODEV; + goto bail_pdev; + } + + /* enable error reporting... */ + pci_read_config_dword(pdev, I5100_EMASK_MEM, &dw); + dw &= ~I5100_FERR_NF_MEM_ANY_MASK; + pci_write_config_dword(pdev, I5100_EMASK_MEM, dw); + + /* device 21, func 0, Channel 0 Memory Map, Error Flag/Mask, etc... */ + ch0mm = pci_get_device_func(PCI_VENDOR_ID_INTEL, + PCI_DEVICE_ID_INTEL_5100_21, 0); + if (!ch0mm) { + ret = -ENODEV; + goto bail_pdev; + } + + rc = pci_enable_device(ch0mm); + if (rc < 0) { + ret = rc; + goto bail_ch0; + } + + /* device 22, func 0, Channel 1 Memory Map, Error Flag/Mask, etc... */ + ch1mm = pci_get_device_func(PCI_VENDOR_ID_INTEL, + PCI_DEVICE_ID_INTEL_5100_22, 0); + if (!ch1mm) { + ret = -ENODEV; + goto bail_disable_ch0; + } + + rc = pci_enable_device(ch1mm); + if (rc < 0) { + ret = rc; + goto bail_ch1; + } + + mci = edac_mc_alloc(sizeof(*priv), ranksperch * 2, 1, 0); + if (!mci) { + ret = -ENOMEM; + goto bail_disable_ch1; + } + + mci->dev = &pdev->dev; + + priv = mci->pvt_info; + priv->ranksperctlr = ranksperch; + priv->mc = pdev; + priv->ch0mm = ch0mm; + priv->ch1mm = ch1mm; + + i5100_init_dimm_layout(pdev, mci); + i5100_init_interleaving(pdev, mci); + + mci->mtype_cap = MEM_FLAG_FB_DDR2; + mci->edac_ctl_cap = EDAC_FLAG_SECDED; + mci->edac_cap = EDAC_FLAG_SECDED; + mci->mod_name = "i5100_edac.c"; + mci->mod_ver = "not versioned"; + mci->ctl_name = "i5100"; + mci->dev_name = pci_name(pdev); + mci->ctl_page_to_phys = NULL; + + mci->edac_check = i5100_check_error; + + i5100_init_csrows(mci); + + /* this strange construction seems to be in every driver, dunno why */ + switch (edac_op_state) { + case EDAC_OPSTATE_POLL: + case EDAC_OPSTATE_NMI: + break; + default: + edac_op_state = EDAC_OPSTATE_POLL; + break; + } + + if (edac_mc_add_mc(mci)) { + ret = -ENODEV; + goto bail_mc; + } + + return ret; + +bail_mc: + edac_mc_free(mci); + +bail_disable_ch1: + pci_disable_device(ch1mm); + +bail_ch1: + pci_dev_put(ch1mm); + +bail_disable_ch0: + pci_disable_device(ch0mm); + +bail_ch0: + pci_dev_put(ch0mm); + +bail_pdev: + pci_disable_device(pdev); + +bail: + return ret; +} + +static void __devexit i5100_remove_one(struct pci_dev *pdev) +{ + struct mem_ctl_info *mci; + struct i5100_priv *priv; + + mci = edac_mc_del_mc(&pdev->dev); + + if (!mci) + return; + + priv = mci->pvt_info; + pci_disable_device(pdev); + pci_disable_device(priv->ch0mm); + pci_disable_device(priv->ch1mm); + pci_dev_put(priv->ch0mm); + pci_dev_put(priv->ch1mm); + + edac_mc_free(mci); +} + +static const struct pci_device_id i5100_pci_tbl[] __devinitdata = { + /* Device 16, Function 0, Channel 0 Memory Map, Error Flag/Mask, ... */ + { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_5100_16) }, + { 0, } +}; +MODULE_DEVICE_TABLE(pci, i5100_pci_tbl); + +static struct pci_driver i5100_driver = { + .name = KBUILD_BASENAME, + .probe = i5100_init_one, + .remove = __devexit_p(i5100_remove_one), + .id_table = i5100_pci_tbl, +}; + +static int __init i5100_init(void) +{ + int pci_rc; + + pci_rc = pci_register_driver(&i5100_driver); + + return (pci_rc < 0) ? pci_rc : 0; +} + +static void __exit i5100_exit(void) +{ + pci_unregister_driver(&i5100_driver); +} + +module_init(i5100_init); +module_exit(i5100_exit); + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR + ("Arthur Jones <ajones@riverbed.com>"); +MODULE_DESCRIPTION("MC Driver for Intel I5100 memory controllers"); diff --git a/drivers/edac/mpc85xx_edac.c b/drivers/edac/mpc85xx_edac.c index d49361bfe670..2265d9ca1535 100644 --- a/drivers/edac/mpc85xx_edac.c +++ b/drivers/edac/mpc85xx_edac.c @@ -195,14 +195,15 @@ static irqreturn_t mpc85xx_pci_isr(int irq, void *dev_id) return IRQ_HANDLED; } -static int __devinit mpc85xx_pci_err_probe(struct platform_device *pdev) +static int __devinit mpc85xx_pci_err_probe(struct of_device *op, + const struct of_device_id *match) { struct edac_pci_ctl_info *pci; struct mpc85xx_pci_pdata *pdata; - struct resource *r; + struct resource r; int res = 0; - if (!devres_open_group(&pdev->dev, mpc85xx_pci_err_probe, GFP_KERNEL)) + if (!devres_open_group(&op->dev, mpc85xx_pci_err_probe, GFP_KERNEL)) return -ENOMEM; pci = edac_pci_alloc_ctl_info(sizeof(*pdata), "mpc85xx_pci_err"); @@ -212,34 +213,37 @@ static int __devinit mpc85xx_pci_err_probe(struct platform_device *pdev) pdata = pci->pvt_info; pdata->name = "mpc85xx_pci_err"; pdata->irq = NO_IRQ; - platform_set_drvdata(pdev, pci); - pci->dev = &pdev->dev; + dev_set_drvdata(&op->dev, pci); + pci->dev = &op->dev; pci->mod_name = EDAC_MOD_STR; pci->ctl_name = pdata->name; - pci->dev_name = pdev->dev.bus_id; + pci->dev_name = op->dev.bus_id; if (edac_op_state == EDAC_OPSTATE_POLL) pci->edac_check = mpc85xx_pci_check; pdata->edac_idx = edac_pci_idx++; - r = platform_get_resource(pdev, IORESOURCE_MEM, 0); - if (!r) { + res = of_address_to_resource(op->node, 0, &r); + if (res) { printk(KERN_ERR "%s: Unable to get resource for " "PCI err regs\n", __func__); goto err; } - if (!devm_request_mem_region(&pdev->dev, r->start, - r->end - r->start + 1, pdata->name)) { + /* we only need the error registers */ + r.start += 0xe00; + + if (!devm_request_mem_region(&op->dev, r.start, + r.end - r.start + 1, pdata->name)) { printk(KERN_ERR "%s: Error while requesting mem region\n", __func__); res = -EBUSY; goto err; } - pdata->pci_vbase = devm_ioremap(&pdev->dev, r->start, - r->end - r->start + 1); + pdata->pci_vbase = devm_ioremap(&op->dev, r.start, + r.end - r.start + 1); if (!pdata->pci_vbase) { printk(KERN_ERR "%s: Unable to setup PCI err regs\n", __func__); res = -ENOMEM; @@ -266,14 +270,15 @@ static int __devinit mpc85xx_pci_err_probe(struct platform_device *pdev) } if (edac_op_state == EDAC_OPSTATE_INT) { - pdata->irq = platform_get_irq(pdev, 0); - res = devm_request_irq(&pdev->dev, pdata->irq, + pdata->irq = irq_of_parse_and_map(op->node, 0); + res = devm_request_irq(&op->dev, pdata->irq, mpc85xx_pci_isr, IRQF_DISABLED, "[EDAC] PCI err", pci); if (res < 0) { printk(KERN_ERR "%s: Unable to requiest irq %d for " "MPC85xx PCI err\n", __func__, pdata->irq); + irq_dispose_mapping(pdata->irq); res = -ENODEV; goto err2; } @@ -282,23 +287,23 @@ static int __devinit mpc85xx_pci_err_probe(struct platform_device *pdev) pdata->irq); } - devres_remove_group(&pdev->dev, mpc85xx_pci_err_probe); + devres_remove_group(&op->dev, mpc85xx_pci_err_probe); debugf3("%s(): success\n", __func__); printk(KERN_INFO EDAC_MOD_STR " PCI err registered\n"); return 0; err2: - edac_pci_del_device(&pdev->dev); + edac_pci_del_device(&op->dev); err: edac_pci_free_ctl_info(pci); - devres_release_group(&pdev->dev, mpc85xx_pci_err_probe); + devres_release_group(&op->dev, mpc85xx_pci_err_probe); return res; } -static int mpc85xx_pci_err_remove(struct platform_device *pdev) +static int mpc85xx_pci_err_remove(struct of_device *op) { - struct edac_pci_ctl_info *pci = platform_get_drvdata(pdev); + struct edac_pci_ctl_info *pci = dev_get_drvdata(&op->dev); struct mpc85xx_pci_pdata *pdata = pci->pvt_info; debugf0("%s()\n", __func__); @@ -318,12 +323,26 @@ static int mpc85xx_pci_err_remove(struct platform_device *pdev) return 0; } -static struct platform_driver mpc85xx_pci_err_driver = { +static struct of_device_id mpc85xx_pci_err_of_match[] = { + { + .compatible = "fsl,mpc8540-pcix", + }, + { + .compatible = "fsl,mpc8540-pci", + }, + {}, +}; + +static struct of_platform_driver mpc85xx_pci_err_driver = { + .owner = THIS_MODULE, + .name = "mpc85xx_pci_err", + .match_table = mpc85xx_pci_err_of_match, .probe = mpc85xx_pci_err_probe, .remove = __devexit_p(mpc85xx_pci_err_remove), .driver = { - .name = "mpc85xx_pci_err", - } + .name = "mpc85xx_pci_err", + .owner = THIS_MODULE, + }, }; #endif /* CONFIG_PCI */ @@ -1002,7 +1021,7 @@ static int __init mpc85xx_mc_init(void) printk(KERN_WARNING EDAC_MOD_STR "L2 fails to register\n"); #ifdef CONFIG_PCI - res = platform_driver_register(&mpc85xx_pci_err_driver); + res = of_register_platform_driver(&mpc85xx_pci_err_driver); if (res) printk(KERN_WARNING EDAC_MOD_STR "PCI fails to register\n"); #endif @@ -1025,7 +1044,7 @@ static void __exit mpc85xx_mc_exit(void) { mtspr(SPRN_HID1, orig_hid1); #ifdef CONFIG_PCI - platform_driver_unregister(&mpc85xx_pci_err_driver); + of_unregister_platform_driver(&mpc85xx_pci_err_driver); #endif of_unregister_platform_driver(&mpc85xx_l2_err_driver); of_unregister_platform_driver(&mpc85xx_mc_err_driver); diff --git a/drivers/edac/mv64x60_edac.c b/drivers/edac/mv64x60_edac.c index bf071f140a05..083ce8d0c63d 100644 --- a/drivers/edac/mv64x60_edac.c +++ b/drivers/edac/mv64x60_edac.c @@ -71,6 +71,35 @@ static irqreturn_t mv64x60_pci_isr(int irq, void *dev_id) return IRQ_HANDLED; } +/* + * Bit 0 of MV64x60_PCIx_ERR_MASK does not exist on the 64360 and because of + * errata FEr-#11 and FEr-##16 for the 64460, it should be 0 on that chip as + * well. IOW, don't set bit 0. + */ + +/* Erratum FEr PCI-#16: clear bit 0 of PCI SERRn Mask reg. */ +static int __init mv64x60_pci_fixup(struct platform_device *pdev) +{ + struct resource *r; + void __iomem *pci_serr; + + r = platform_get_resource(pdev, IORESOURCE_MEM, 1); + if (!r) { + printk(KERN_ERR "%s: Unable to get resource for " + "PCI err regs\n", __func__); + return -ENOENT; + } + + pci_serr = ioremap(r->start, r->end - r->start + 1); + if (!pci_serr) + return -ENOMEM; + + out_le32(pci_serr, in_le32(pci_serr) & ~0x1); + iounmap(pci_serr); + + return 0; +} + static int __devinit mv64x60_pci_err_probe(struct platform_device *pdev) { struct edac_pci_ctl_info *pci; @@ -128,6 +157,12 @@ static int __devinit mv64x60_pci_err_probe(struct platform_device *pdev) goto err; } + res = mv64x60_pci_fixup(pdev); + if (res < 0) { + printk(KERN_ERR "%s: PCI fixup failed\n", __func__); + goto err; + } + out_le32(pdata->pci_vbase + MV64X60_PCI_ERROR_CAUSE, 0); out_le32(pdata->pci_vbase + MV64X60_PCI_ERROR_MASK, 0); out_le32(pdata->pci_vbase + MV64X60_PCI_ERROR_MASK, @@ -612,7 +647,7 @@ static void get_total_mem(struct mv64x60_mc_pdata *pdata) if (!np) return; - reg = get_property(np, "reg", NULL); + reg = of_get_property(np, "reg", NULL); pdata->total_mem = reg[1]; } diff --git a/drivers/firmware/dcdbas.c b/drivers/firmware/dcdbas.c index 0b624e927a6f..c66817e7717b 100644 --- a/drivers/firmware/dcdbas.c +++ b/drivers/firmware/dcdbas.c @@ -152,20 +152,11 @@ static ssize_t smi_data_read(struct kobject *kobj, struct bin_attribute *bin_attr, char *buf, loff_t pos, size_t count) { - size_t max_read; ssize_t ret; mutex_lock(&smi_data_lock); - - if (pos >= smi_data_buf_size) { - ret = 0; - goto out; - } - - max_read = smi_data_buf_size - pos; - ret = min(max_read, count); - memcpy(buf, smi_data_buf + pos, ret); -out: + ret = memory_read_from_buffer(buf, count, &pos, smi_data_buf, + smi_data_buf_size); mutex_unlock(&smi_data_lock); return ret; } diff --git a/drivers/firmware/dell_rbu.c b/drivers/firmware/dell_rbu.c index 7430e218cda6..13946ebd77d6 100644 --- a/drivers/firmware/dell_rbu.c +++ b/drivers/firmware/dell_rbu.c @@ -507,11 +507,6 @@ static ssize_t read_packet_data(char *buffer, loff_t pos, size_t count) static ssize_t read_rbu_mono_data(char *buffer, loff_t pos, size_t count) { - unsigned char *ptemp = NULL; - size_t bytes_left = 0; - size_t data_length = 0; - ssize_t ret_count = 0; - /* check to see if we have something to return */ if ((rbu_data.image_update_buffer == NULL) || (rbu_data.bios_image_size == 0)) { @@ -519,28 +514,11 @@ static ssize_t read_rbu_mono_data(char *buffer, loff_t pos, size_t count) "bios_image_size %lu\n", rbu_data.image_update_buffer, rbu_data.bios_image_size); - ret_count = -ENOMEM; - goto read_rbu_data_exit; - } - - if (pos > rbu_data.bios_image_size) { - ret_count = 0; - goto read_rbu_data_exit; + return -ENOMEM; } - bytes_left = rbu_data.bios_image_size - pos; - data_length = min(bytes_left, count); - - ptemp = rbu_data.image_update_buffer; - memcpy(buffer, (ptemp + pos), data_length); - - if ((pos + count) > rbu_data.bios_image_size) - /* this was the last copy */ - ret_count = bytes_left; - else - ret_count = count; - read_rbu_data_exit: - return ret_count; + return memory_read_from_buffer(buffer, count, &pos, + rbu_data.image_update_buffer, rbu_data.bios_image_size); } static ssize_t read_rbu_data(struct kobject *kobj, diff --git a/drivers/gpio/Kconfig b/drivers/gpio/Kconfig index fced1909cbba..dbd42d6c93a7 100644 --- a/drivers/gpio/Kconfig +++ b/drivers/gpio/Kconfig @@ -2,15 +2,40 @@ # GPIO infrastructure and expanders # -config HAVE_GPIO_LIB +config ARCH_WANT_OPTIONAL_GPIOLIB bool help + Select this config option from the architecture Kconfig, if + it is possible to use gpiolib on the architecture, but let the + user decide whether to actually build it or not. + Select this instead of ARCH_REQUIRE_GPIOLIB, if your architecture does + not depend on GPIOs being available, but rather let the user + decide whether he needs it or not. + +config ARCH_REQUIRE_GPIOLIB + bool + select GPIOLIB + help Platforms select gpiolib if they use this infrastructure for all their GPIOs, usually starting with ones integrated into SOC processors. + Selecting this from the architecture code will cause the gpiolib + code to always get built in. + + + +menuconfig GPIOLIB + bool "GPIO Support" + depends on ARCH_WANT_OPTIONAL_GPIOLIB || ARCH_REQUIRE_GPIOLIB + select GENERIC_GPIO + help + This enables GPIO support through the generic GPIO library. + You only need to enable this, if you also want to enable + one or more of the GPIO expansion card drivers below. -menu "GPIO Support" - depends on HAVE_GPIO_LIB + If unsure, say N. + +if GPIOLIB config DEBUG_GPIO bool "Debug GPIO calls" @@ -23,10 +48,44 @@ config DEBUG_GPIO slower. The diagnostics help catch the type of setup errors that are most common when setting up new platforms or boards. +config GPIO_SYSFS + bool "/sys/class/gpio/... (sysfs interface)" + depends on SYSFS && EXPERIMENTAL + help + Say Y here to add a sysfs interface for GPIOs. + + This is mostly useful to work around omissions in a system's + kernel support. Those are common in custom and semicustom + hardware assembled using standard kernels with a minimum of + custom patches. In those cases, userspace code may import + a given GPIO from the kernel, if no kernel driver requested it. + + Kernel drivers may also request that a particular GPIO be + exported to userspace; this can be useful when debugging. + # put expanders in the right section, in alphabetical order comment "I2C GPIO expanders:" +config GPIO_MAX732X + tristate "MAX7319, MAX7320-7327 I2C Port Expanders" + depends on I2C + help + Say yes here to support the MAX7319, MAX7320-7327 series of I2C + Port Expanders. Each IO port on these chips has a fixed role of + Input (designated by 'I'), Push-Pull Output ('O'), or Open-Drain + Input and Output (designed by 'P'). The combinations are listed + below: + + 8 bits: max7319 (8I), max7320 (8O), max7321 (8P), + max7322 (4I4O), max7323 (4P4O) + + 16 bits: max7324 (8I8O), max7325 (8P8O), + max7326 (4I12O), max7327 (4P12O) + + Board setup code must specify the model to use, and the start + number for these GPIOs. + config GPIO_PCA953X tristate "PCA953x, PCA955x, and MAX7310 I/O ports" depends on I2C @@ -68,6 +127,24 @@ config GPIO_PCF857X This driver provides an in-kernel interface to those GPIOs using platform-neutral GPIO calls. +comment "PCI GPIO expanders:" + +config GPIO_BT8XX + tristate "BT8XX GPIO abuser" + depends on PCI && VIDEO_BT848=n + help + The BT8xx frame grabber chip has 24 GPIO pins than can be abused + as a cheap PCI GPIO card. + + This chip can be found on Miro, Hauppauge and STB TV-cards. + + The card needs to be physically altered for using it as a + GPIO card. For more information on how to build a GPIO card + from a BT8xx TV card, see the documentation file at + Documentation/bt8xxgpio.txt + + If unsure, say N. + comment "SPI GPIO expanders:" config GPIO_MAX7301 @@ -83,4 +160,4 @@ config GPIO_MCP23S08 SPI driver for Microchip MCP23S08 I/O expander. This provides a GPIO interface supporting inputs and outputs. -endmenu +endif diff --git a/drivers/gpio/Makefile b/drivers/gpio/Makefile index 16e796dc5410..01b4bbde1956 100644 --- a/drivers/gpio/Makefile +++ b/drivers/gpio/Makefile @@ -2,9 +2,11 @@ ccflags-$(CONFIG_DEBUG_GPIO) += -DDEBUG -obj-$(CONFIG_HAVE_GPIO_LIB) += gpiolib.o +obj-$(CONFIG_GPIOLIB) += gpiolib.o obj-$(CONFIG_GPIO_MAX7301) += max7301.o +obj-$(CONFIG_GPIO_MAX732X) += max732x.o obj-$(CONFIG_GPIO_MCP23S08) += mcp23s08.o obj-$(CONFIG_GPIO_PCA953X) += pca953x.o obj-$(CONFIG_GPIO_PCF857X) += pcf857x.o +obj-$(CONFIG_GPIO_BT8XX) += bt8xxgpio.o diff --git a/drivers/gpio/bt8xxgpio.c b/drivers/gpio/bt8xxgpio.c new file mode 100644 index 000000000000..7a1168249dd5 --- /dev/null +++ b/drivers/gpio/bt8xxgpio.c @@ -0,0 +1,348 @@ +/* + + bt8xx GPIO abuser + + Copyright (C) 2008 Michael Buesch <mb@bu3sch.de> + + Please do _only_ contact the people listed _above_ with issues related to this driver. + All the other people listed below are not related to this driver. Their names + are only here, because this driver is derived from the bt848 driver. + + + Derived from the bt848 driver: + + Copyright (C) 1996,97,98 Ralph Metzler + & Marcus Metzler + (c) 1999-2002 Gerd Knorr + + some v4l2 code lines are taken from Justin's bttv2 driver which is + (c) 2000 Justin Schoeman + + V4L1 removal from: + (c) 2005-2006 Nickolay V. Shmyrev + + Fixes to be fully V4L2 compliant by + (c) 2006 Mauro Carvalho Chehab + + Cropping and overscan support + Copyright (C) 2005, 2006 Michael H. Schimek + Sponsored by OPQ Systems AB + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. +*/ + +#include <linux/module.h> +#include <linux/pci.h> +#include <linux/spinlock.h> + +#include <asm/gpio.h> + +/* Steal the hardware definitions from the bttv driver. */ +#include "../media/video/bt8xx/bt848.h" + + +#define BT8XXGPIO_NR_GPIOS 24 /* We have 24 GPIO pins */ + + +struct bt8xxgpio { + spinlock_t lock; + + void __iomem *mmio; + struct pci_dev *pdev; + struct gpio_chip gpio; + +#ifdef CONFIG_PM + u32 saved_outen; + u32 saved_data; +#endif +}; + +#define bgwrite(dat, adr) writel((dat), bg->mmio+(adr)) +#define bgread(adr) readl(bg->mmio+(adr)) + + +static int modparam_gpiobase = -1/* dynamic */; +module_param_named(gpiobase, modparam_gpiobase, int, 0444); +MODULE_PARM_DESC(gpiobase, "The GPIO number base. -1 means dynamic, which is the default."); + + +static int bt8xxgpio_gpio_direction_input(struct gpio_chip *gpio, unsigned nr) +{ + struct bt8xxgpio *bg = container_of(gpio, struct bt8xxgpio, gpio); + unsigned long flags; + u32 outen, data; + + spin_lock_irqsave(&bg->lock, flags); + + data = bgread(BT848_GPIO_DATA); + data &= ~(1 << nr); + bgwrite(data, BT848_GPIO_DATA); + + outen = bgread(BT848_GPIO_OUT_EN); + outen &= ~(1 << nr); + bgwrite(outen, BT848_GPIO_OUT_EN); + + spin_unlock_irqrestore(&bg->lock, flags); + + return 0; +} + +static int bt8xxgpio_gpio_get(struct gpio_chip *gpio, unsigned nr) +{ + struct bt8xxgpio *bg = container_of(gpio, struct bt8xxgpio, gpio); + unsigned long flags; + u32 val; + + spin_lock_irqsave(&bg->lock, flags); + val = bgread(BT848_GPIO_DATA); + spin_unlock_irqrestore(&bg->lock, flags); + + return !!(val & (1 << nr)); +} + +static int bt8xxgpio_gpio_direction_output(struct gpio_chip *gpio, + unsigned nr, int val) +{ + struct bt8xxgpio *bg = container_of(gpio, struct bt8xxgpio, gpio); + unsigned long flags; + u32 outen, data; + + spin_lock_irqsave(&bg->lock, flags); + + outen = bgread(BT848_GPIO_OUT_EN); + outen |= (1 << nr); + bgwrite(outen, BT848_GPIO_OUT_EN); + + data = bgread(BT848_GPIO_DATA); + if (val) + data |= (1 << nr); + else + data &= ~(1 << nr); + bgwrite(data, BT848_GPIO_DATA); + + spin_unlock_irqrestore(&bg->lock, flags); + + return 0; +} + +static void bt8xxgpio_gpio_set(struct gpio_chip *gpio, + unsigned nr, int val) +{ + struct bt8xxgpio *bg = container_of(gpio, struct bt8xxgpio, gpio); + unsigned long flags; + u32 data; + + spin_lock_irqsave(&bg->lock, flags); + + data = bgread(BT848_GPIO_DATA); + if (val) + data |= (1 << nr); + else + data &= ~(1 << nr); + bgwrite(data, BT848_GPIO_DATA); + + spin_unlock_irqrestore(&bg->lock, flags); +} + +static void bt8xxgpio_gpio_setup(struct bt8xxgpio *bg) +{ + struct gpio_chip *c = &bg->gpio; + + c->label = bg->pdev->dev.bus_id; + c->owner = THIS_MODULE; + c->direction_input = bt8xxgpio_gpio_direction_input; + c->get = bt8xxgpio_gpio_get; + c->direction_output = bt8xxgpio_gpio_direction_output; + c->set = bt8xxgpio_gpio_set; + c->dbg_show = NULL; + c->base = modparam_gpiobase; + c->ngpio = BT8XXGPIO_NR_GPIOS; + c->can_sleep = 0; +} + +static int bt8xxgpio_probe(struct pci_dev *dev, + const struct pci_device_id *pci_id) +{ + struct bt8xxgpio *bg; + int err; + + bg = kzalloc(sizeof(*bg), GFP_KERNEL); + if (!bg) + return -ENOMEM; + + bg->pdev = dev; + spin_lock_init(&bg->lock); + + err = pci_enable_device(dev); + if (err) { + printk(KERN_ERR "bt8xxgpio: Can't enable device.\n"); + goto err_freebg; + } + if (!request_mem_region(pci_resource_start(dev, 0), + pci_resource_len(dev, 0), + "bt8xxgpio")) { + printk(KERN_WARNING "bt8xxgpio: Can't request iomem (0x%llx).\n", + (unsigned long long)pci_resource_start(dev, 0)); + err = -EBUSY; + goto err_disable; + } + pci_set_master(dev); + pci_set_drvdata(dev, bg); + + bg->mmio = ioremap(pci_resource_start(dev, 0), 0x1000); + if (!bg->mmio) { + printk(KERN_ERR "bt8xxgpio: ioremap() failed\n"); + err = -EIO; + goto err_release_mem; + } + + /* Disable interrupts */ + bgwrite(0, BT848_INT_MASK); + + /* gpio init */ + bgwrite(0, BT848_GPIO_DMA_CTL); + bgwrite(0, BT848_GPIO_REG_INP); + bgwrite(0, BT848_GPIO_OUT_EN); + + bt8xxgpio_gpio_setup(bg); + err = gpiochip_add(&bg->gpio); + if (err) { + printk(KERN_ERR "bt8xxgpio: Failed to register GPIOs\n"); + goto err_release_mem; + } + + printk(KERN_INFO "bt8xxgpio: Abusing BT8xx card for GPIOs %d to %d\n", + bg->gpio.base, bg->gpio.base + BT8XXGPIO_NR_GPIOS - 1); + + return 0; + +err_release_mem: + release_mem_region(pci_resource_start(dev, 0), + pci_resource_len(dev, 0)); + pci_set_drvdata(dev, NULL); +err_disable: + pci_disable_device(dev); +err_freebg: + kfree(bg); + + return err; +} + +static void bt8xxgpio_remove(struct pci_dev *pdev) +{ + struct bt8xxgpio *bg = pci_get_drvdata(pdev); + + gpiochip_remove(&bg->gpio); + + bgwrite(0, BT848_INT_MASK); + bgwrite(~0x0, BT848_INT_STAT); + bgwrite(0x0, BT848_GPIO_OUT_EN); + + iounmap(bg->mmio); + release_mem_region(pci_resource_start(pdev, 0), + pci_resource_len(pdev, 0)); + pci_disable_device(pdev); + + pci_set_drvdata(pdev, NULL); + kfree(bg); +} + +#ifdef CONFIG_PM +static int bt8xxgpio_suspend(struct pci_dev *pdev, pm_message_t state) +{ + struct bt8xxgpio *bg = pci_get_drvdata(pdev); + unsigned long flags; + + spin_lock_irqsave(&bg->lock, flags); + + bg->saved_outen = bgread(BT848_GPIO_OUT_EN); + bg->saved_data = bgread(BT848_GPIO_DATA); + + bgwrite(0, BT848_INT_MASK); + bgwrite(~0x0, BT848_INT_STAT); + bgwrite(0x0, BT848_GPIO_OUT_EN); + + spin_unlock_irqrestore(&bg->lock, flags); + + pci_save_state(pdev); + pci_disable_device(pdev); + pci_set_power_state(pdev, pci_choose_state(pdev, state)); + + return 0; +} + +static int bt8xxgpio_resume(struct pci_dev *pdev) +{ + struct bt8xxgpio *bg = pci_get_drvdata(pdev); + unsigned long flags; + int err; + + pci_set_power_state(pdev, 0); + err = pci_enable_device(pdev); + if (err) + return err; + pci_restore_state(pdev); + + spin_lock_irqsave(&bg->lock, flags); + + bgwrite(0, BT848_INT_MASK); + bgwrite(0, BT848_GPIO_DMA_CTL); + bgwrite(0, BT848_GPIO_REG_INP); + bgwrite(bg->saved_outen, BT848_GPIO_OUT_EN); + bgwrite(bg->saved_data & bg->saved_outen, + BT848_GPIO_DATA); + + spin_unlock_irqrestore(&bg->lock, flags); + + return 0; +} +#else +#define bt8xxgpio_suspend NULL +#define bt8xxgpio_resume NULL +#endif /* CONFIG_PM */ + +static struct pci_device_id bt8xxgpio_pci_tbl[] = { + { PCI_DEVICE(PCI_VENDOR_ID_BROOKTREE, PCI_DEVICE_ID_BT848) }, + { PCI_DEVICE(PCI_VENDOR_ID_BROOKTREE, PCI_DEVICE_ID_BT849) }, + { PCI_DEVICE(PCI_VENDOR_ID_BROOKTREE, PCI_DEVICE_ID_BT878) }, + { PCI_DEVICE(PCI_VENDOR_ID_BROOKTREE, PCI_DEVICE_ID_BT879) }, + { 0, }, +}; +MODULE_DEVICE_TABLE(pci, bt8xxgpio_pci_tbl); + +static struct pci_driver bt8xxgpio_pci_driver = { + .name = "bt8xxgpio", + .id_table = bt8xxgpio_pci_tbl, + .probe = bt8xxgpio_probe, + .remove = bt8xxgpio_remove, + .suspend = bt8xxgpio_suspend, + .resume = bt8xxgpio_resume, +}; + +static int bt8xxgpio_init(void) +{ + return pci_register_driver(&bt8xxgpio_pci_driver); +} +module_init(bt8xxgpio_init) + +static void bt8xxgpio_exit(void) +{ + pci_unregister_driver(&bt8xxgpio_pci_driver); +} +module_exit(bt8xxgpio_exit) + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Michael Buesch"); +MODULE_DESCRIPTION("Abuse a BT8xx framegrabber card as generic GPIO card"); diff --git a/drivers/gpio/gpiolib.c b/drivers/gpio/gpiolib.c index beaf6b3a37dc..8d2940517c99 100644 --- a/drivers/gpio/gpiolib.c +++ b/drivers/gpio/gpiolib.c @@ -2,8 +2,11 @@ #include <linux/module.h> #include <linux/irq.h> #include <linux/spinlock.h> - -#include <asm/gpio.h> +#include <linux/device.h> +#include <linux/err.h> +#include <linux/debugfs.h> +#include <linux/seq_file.h> +#include <linux/gpio.h> /* Optional implementation infrastructure for GPIO interfaces. @@ -44,6 +47,8 @@ struct gpio_desc { #define FLAG_REQUESTED 0 #define FLAG_IS_OUT 1 #define FLAG_RESERVED 2 +#define FLAG_EXPORT 3 /* protected by sysfs_lock */ +#define FLAG_SYSFS 4 /* exported via /sys/class/gpio/control */ #ifdef CONFIG_DEBUG_FS const char *label; @@ -151,6 +156,482 @@ err: return ret; } +#ifdef CONFIG_GPIO_SYSFS + +/* lock protects against unexport_gpio() being called while + * sysfs files are active. + */ +static DEFINE_MUTEX(sysfs_lock); + +/* + * /sys/class/gpio/gpioN... only for GPIOs that are exported + * /direction + * * MAY BE OMITTED if kernel won't allow direction changes + * * is read/write as "in" or "out" + * * may also be written as "high" or "low", initializing + * output value as specified ("out" implies "low") + * /value + * * always readable, subject to hardware behavior + * * may be writable, as zero/nonzero + * + * REVISIT there will likely be an attribute for configuring async + * notifications, e.g. to specify polling interval or IRQ trigger type + * that would for example trigger a poll() on the "value". + */ + +static ssize_t gpio_direction_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + const struct gpio_desc *desc = dev_get_drvdata(dev); + ssize_t status; + + mutex_lock(&sysfs_lock); + + if (!test_bit(FLAG_EXPORT, &desc->flags)) + status = -EIO; + else + status = sprintf(buf, "%s\n", + test_bit(FLAG_IS_OUT, &desc->flags) + ? "out" : "in"); + + mutex_unlock(&sysfs_lock); + return status; +} + +static ssize_t gpio_direction_store(struct device *dev, + struct device_attribute *attr, const char *buf, size_t size) +{ + const struct gpio_desc *desc = dev_get_drvdata(dev); + unsigned gpio = desc - gpio_desc; + ssize_t status; + + mutex_lock(&sysfs_lock); + + if (!test_bit(FLAG_EXPORT, &desc->flags)) + status = -EIO; + else if (sysfs_streq(buf, "high")) + status = gpio_direction_output(gpio, 1); + else if (sysfs_streq(buf, "out") || sysfs_streq(buf, "low")) + status = gpio_direction_output(gpio, 0); + else if (sysfs_streq(buf, "in")) + status = gpio_direction_input(gpio); + else + status = -EINVAL; + + mutex_unlock(&sysfs_lock); + return status ? : size; +} + +static const DEVICE_ATTR(direction, 0644, + gpio_direction_show, gpio_direction_store); + +static ssize_t gpio_value_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + const struct gpio_desc *desc = dev_get_drvdata(dev); + unsigned gpio = desc - gpio_desc; + ssize_t status; + + mutex_lock(&sysfs_lock); + + if (!test_bit(FLAG_EXPORT, &desc->flags)) + status = -EIO; + else + status = sprintf(buf, "%d\n", gpio_get_value_cansleep(gpio)); + + mutex_unlock(&sysfs_lock); + return status; +} + +static ssize_t gpio_value_store(struct device *dev, + struct device_attribute *attr, const char *buf, size_t size) +{ + const struct gpio_desc *desc = dev_get_drvdata(dev); + unsigned gpio = desc - gpio_desc; + ssize_t status; + + mutex_lock(&sysfs_lock); + + if (!test_bit(FLAG_EXPORT, &desc->flags)) + status = -EIO; + else if (!test_bit(FLAG_IS_OUT, &desc->flags)) + status = -EPERM; + else { + long value; + + status = strict_strtol(buf, 0, &value); + if (status == 0) { + gpio_set_value_cansleep(gpio, value != 0); + status = size; + } + } + + mutex_unlock(&sysfs_lock); + return status; +} + +static /*const*/ DEVICE_ATTR(value, 0644, + gpio_value_show, gpio_value_store); + +static const struct attribute *gpio_attrs[] = { + &dev_attr_direction.attr, + &dev_attr_value.attr, + NULL, +}; + +static const struct attribute_group gpio_attr_group = { + .attrs = (struct attribute **) gpio_attrs, +}; + +/* + * /sys/class/gpio/gpiochipN/ + * /base ... matching gpio_chip.base (N) + * /label ... matching gpio_chip.label + * /ngpio ... matching gpio_chip.ngpio + */ + +static ssize_t chip_base_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + const struct gpio_chip *chip = dev_get_drvdata(dev); + + return sprintf(buf, "%d\n", chip->base); +} +static DEVICE_ATTR(base, 0444, chip_base_show, NULL); + +static ssize_t chip_label_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + const struct gpio_chip *chip = dev_get_drvdata(dev); + + return sprintf(buf, "%s\n", chip->label ? : ""); +} +static DEVICE_ATTR(label, 0444, chip_label_show, NULL); + +static ssize_t chip_ngpio_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + const struct gpio_chip *chip = dev_get_drvdata(dev); + + return sprintf(buf, "%u\n", chip->ngpio); +} +static DEVICE_ATTR(ngpio, 0444, chip_ngpio_show, NULL); + +static const struct attribute *gpiochip_attrs[] = { + &dev_attr_base.attr, + &dev_attr_label.attr, + &dev_attr_ngpio.attr, + NULL, +}; + +static const struct attribute_group gpiochip_attr_group = { + .attrs = (struct attribute **) gpiochip_attrs, +}; + +/* + * /sys/class/gpio/export ... write-only + * integer N ... number of GPIO to export (full access) + * /sys/class/gpio/unexport ... write-only + * integer N ... number of GPIO to unexport + */ +static ssize_t export_store(struct class *class, const char *buf, size_t len) +{ + long gpio; + int status; + + status = strict_strtol(buf, 0, &gpio); + if (status < 0) + goto done; + + /* No extra locking here; FLAG_SYSFS just signifies that the + * request and export were done by on behalf of userspace, so + * they may be undone on its behalf too. + */ + + status = gpio_request(gpio, "sysfs"); + if (status < 0) + goto done; + + status = gpio_export(gpio, true); + if (status < 0) + gpio_free(gpio); + else + set_bit(FLAG_SYSFS, &gpio_desc[gpio].flags); + +done: + if (status) + pr_debug("%s: status %d\n", __func__, status); + return status ? : len; +} + +static ssize_t unexport_store(struct class *class, const char *buf, size_t len) +{ + long gpio; + int status; + + status = strict_strtol(buf, 0, &gpio); + if (status < 0) + goto done; + + status = -EINVAL; + + /* reject bogus commands (gpio_unexport ignores them) */ + if (!gpio_is_valid(gpio)) + goto done; + + /* No extra locking here; FLAG_SYSFS just signifies that the + * request and export were done by on behalf of userspace, so + * they may be undone on its behalf too. + */ + if (test_and_clear_bit(FLAG_SYSFS, &gpio_desc[gpio].flags)) { + status = 0; + gpio_free(gpio); + } +done: + if (status) + pr_debug("%s: status %d\n", __func__, status); + return status ? : len; +} + +static struct class_attribute gpio_class_attrs[] = { + __ATTR(export, 0200, NULL, export_store), + __ATTR(unexport, 0200, NULL, unexport_store), + __ATTR_NULL, +}; + +static struct class gpio_class = { + .name = "gpio", + .owner = THIS_MODULE, + + .class_attrs = gpio_class_attrs, +}; + + +/** + * gpio_export - export a GPIO through sysfs + * @gpio: gpio to make available, already requested + * @direction_may_change: true if userspace may change gpio direction + * Context: arch_initcall or later + * + * When drivers want to make a GPIO accessible to userspace after they + * have requested it -- perhaps while debugging, or as part of their + * public interface -- they may use this routine. If the GPIO can + * change direction (some can't) and the caller allows it, userspace + * will see "direction" sysfs attribute which may be used to change + * the gpio's direction. A "value" attribute will always be provided. + * + * Returns zero on success, else an error. + */ +int gpio_export(unsigned gpio, bool direction_may_change) +{ + unsigned long flags; + struct gpio_desc *desc; + int status = -EINVAL; + + /* can't export until sysfs is available ... */ + if (!gpio_class.p) { + pr_debug("%s: called too early!\n", __func__); + return -ENOENT; + } + + if (!gpio_is_valid(gpio)) + goto done; + + mutex_lock(&sysfs_lock); + + spin_lock_irqsave(&gpio_lock, flags); + desc = &gpio_desc[gpio]; + if (test_bit(FLAG_REQUESTED, &desc->flags) + && !test_bit(FLAG_EXPORT, &desc->flags)) { + status = 0; + if (!desc->chip->direction_input + || !desc->chip->direction_output) + direction_may_change = false; + } + spin_unlock_irqrestore(&gpio_lock, flags); + + if (status == 0) { + struct device *dev; + + dev = device_create(&gpio_class, desc->chip->dev, MKDEV(0, 0), + desc, "gpio%d", gpio); + if (dev) { + if (direction_may_change) + status = sysfs_create_group(&dev->kobj, + &gpio_attr_group); + else + status = device_create_file(dev, + &dev_attr_value); + if (status != 0) + device_unregister(dev); + } else + status = -ENODEV; + if (status == 0) + set_bit(FLAG_EXPORT, &desc->flags); + } + + mutex_unlock(&sysfs_lock); + +done: + if (status) + pr_debug("%s: gpio%d status %d\n", __func__, gpio, status); + + return status; +} +EXPORT_SYMBOL_GPL(gpio_export); + +static int match_export(struct device *dev, void *data) +{ + return dev_get_drvdata(dev) == data; +} + +/** + * gpio_unexport - reverse effect of gpio_export() + * @gpio: gpio to make unavailable + * + * This is implicit on gpio_free(). + */ +void gpio_unexport(unsigned gpio) +{ + struct gpio_desc *desc; + int status = -EINVAL; + + if (!gpio_is_valid(gpio)) + goto done; + + mutex_lock(&sysfs_lock); + + desc = &gpio_desc[gpio]; + if (test_bit(FLAG_EXPORT, &desc->flags)) { + struct device *dev = NULL; + + dev = class_find_device(&gpio_class, NULL, desc, match_export); + if (dev) { + clear_bit(FLAG_EXPORT, &desc->flags); + put_device(dev); + device_unregister(dev); + status = 0; + } else + status = -ENODEV; + } + + mutex_unlock(&sysfs_lock); +done: + if (status) + pr_debug("%s: gpio%d status %d\n", __func__, gpio, status); +} +EXPORT_SYMBOL_GPL(gpio_unexport); + +static int gpiochip_export(struct gpio_chip *chip) +{ + int status; + struct device *dev; + + /* Many systems register gpio chips for SOC support very early, + * before driver model support is available. In those cases we + * export this later, in gpiolib_sysfs_init() ... here we just + * verify that _some_ field of gpio_class got initialized. + */ + if (!gpio_class.p) + return 0; + + /* use chip->base for the ID; it's already known to be unique */ + mutex_lock(&sysfs_lock); + dev = device_create(&gpio_class, chip->dev, MKDEV(0, 0), chip, + "gpiochip%d", chip->base); + if (dev) { + status = sysfs_create_group(&dev->kobj, + &gpiochip_attr_group); + } else + status = -ENODEV; + chip->exported = (status == 0); + mutex_unlock(&sysfs_lock); + + if (status) { + unsigned long flags; + unsigned gpio; + + spin_lock_irqsave(&gpio_lock, flags); + gpio = chip->base; + while (gpio_desc[gpio].chip == chip) + gpio_desc[gpio++].chip = NULL; + spin_unlock_irqrestore(&gpio_lock, flags); + + pr_debug("%s: chip %s status %d\n", __func__, + chip->label, status); + } + + return status; +} + +static void gpiochip_unexport(struct gpio_chip *chip) +{ + int status; + struct device *dev; + + mutex_lock(&sysfs_lock); + dev = class_find_device(&gpio_class, NULL, chip, match_export); + if (dev) { + put_device(dev); + device_unregister(dev); + chip->exported = 0; + status = 0; + } else + status = -ENODEV; + mutex_unlock(&sysfs_lock); + + if (status) + pr_debug("%s: chip %s status %d\n", __func__, + chip->label, status); +} + +static int __init gpiolib_sysfs_init(void) +{ + int status; + unsigned long flags; + unsigned gpio; + + status = class_register(&gpio_class); + if (status < 0) + return status; + + /* Scan and register the gpio_chips which registered very + * early (e.g. before the class_register above was called). + * + * We run before arch_initcall() so chip->dev nodes can have + * registered, and so arch_initcall() can always gpio_export(). + */ + spin_lock_irqsave(&gpio_lock, flags); + for (gpio = 0; gpio < ARCH_NR_GPIOS; gpio++) { + struct gpio_chip *chip; + + chip = gpio_desc[gpio].chip; + if (!chip || chip->exported) + continue; + + spin_unlock_irqrestore(&gpio_lock, flags); + status = gpiochip_export(chip); + spin_lock_irqsave(&gpio_lock, flags); + } + spin_unlock_irqrestore(&gpio_lock, flags); + + + return status; +} +postcore_initcall(gpiolib_sysfs_init); + +#else +static inline int gpiochip_export(struct gpio_chip *chip) +{ + return 0; +} + +static inline void gpiochip_unexport(struct gpio_chip *chip) +{ +} + +#endif /* CONFIG_GPIO_SYSFS */ + /** * gpiochip_add() - register a gpio_chip * @chip: the chip to register, with chip->base initialized @@ -160,6 +641,11 @@ err: * because the chip->base is invalid or already associated with a * different chip. Otherwise it returns zero as a success code. * + * When gpiochip_add() is called very early during boot, so that GPIOs + * can be freely used, the chip->dev device must be registered before + * the gpio framework's arch_initcall(). Otherwise sysfs initialization + * for GPIOs will fail rudely. + * * If chip->base is negative, this requests dynamic assignment of * a range of valid GPIOs. */ @@ -182,7 +668,7 @@ int gpiochip_add(struct gpio_chip *chip) base = gpiochip_find_base(chip->ngpio); if (base < 0) { status = base; - goto fail_unlock; + goto unlock; } chip->base = base; } @@ -197,12 +683,23 @@ int gpiochip_add(struct gpio_chip *chip) if (status == 0) { for (id = base; id < base + chip->ngpio; id++) { gpio_desc[id].chip = chip; - gpio_desc[id].flags = 0; + + /* REVISIT: most hardware initializes GPIOs as + * inputs (often with pullups enabled) so power + * usage is minimized. Linux code should set the + * gpio direction first thing; but until it does, + * we may expose the wrong direction in sysfs. + */ + gpio_desc[id].flags = !chip->direction_input + ? (1 << FLAG_IS_OUT) + : 0; } } -fail_unlock: +unlock: spin_unlock_irqrestore(&gpio_lock, flags); + if (status == 0) + status = gpiochip_export(chip); fail: /* failures here can mean systems won't boot... */ if (status) @@ -239,6 +736,10 @@ int gpiochip_remove(struct gpio_chip *chip) } spin_unlock_irqrestore(&gpio_lock, flags); + + if (status == 0) + gpiochip_unexport(chip); + return status; } EXPORT_SYMBOL_GPL(gpiochip_remove); @@ -296,6 +797,8 @@ void gpio_free(unsigned gpio) return; } + gpio_unexport(gpio); + spin_lock_irqsave(&gpio_lock, flags); desc = &gpio_desc[gpio]; @@ -534,10 +1037,6 @@ EXPORT_SYMBOL_GPL(gpio_set_value_cansleep); #ifdef CONFIG_DEBUG_FS -#include <linux/debugfs.h> -#include <linux/seq_file.h> - - static void gpiolib_dbg_show(struct seq_file *s, struct gpio_chip *chip) { unsigned i; @@ -614,17 +1113,28 @@ static int gpiolib_show(struct seq_file *s, void *unused) /* REVISIT this isn't locked against gpio_chip removal ... */ for (gpio = 0; gpio_is_valid(gpio); gpio++) { + struct device *dev; + if (chip == gpio_desc[gpio].chip) continue; chip = gpio_desc[gpio].chip; if (!chip) continue; - seq_printf(s, "%sGPIOs %d-%d, %s%s:\n", + seq_printf(s, "%sGPIOs %d-%d", started ? "\n" : "", - chip->base, chip->base + chip->ngpio - 1, - chip->label ? : "generic", - chip->can_sleep ? ", can sleep" : ""); + chip->base, chip->base + chip->ngpio - 1); + dev = chip->dev; + if (dev) + seq_printf(s, ", %s/%s", + dev->bus ? dev->bus->name : "no-bus", + dev->bus_id); + if (chip->label) + seq_printf(s, ", %s", chip->label); + if (chip->can_sleep) + seq_printf(s, ", can sleep"); + seq_printf(s, ":\n"); + started = 1; if (chip->dbg_show) chip->dbg_show(s, chip); diff --git a/drivers/gpio/max732x.c b/drivers/gpio/max732x.c new file mode 100644 index 000000000000..b51c8135ca28 --- /dev/null +++ b/drivers/gpio/max732x.c @@ -0,0 +1,385 @@ +/* + * max732x.c - I2C Port Expander with 8/16 I/O + * + * Copyright (C) 2007 Marvell International Ltd. + * Copyright (C) 2008 Jack Ren <jack.ren@marvell.com> + * Copyright (C) 2008 Eric Miao <eric.miao@marvell.com> + * + * Derived from drivers/gpio/pca953x.c + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; version 2 of the License. + */ + +#include <linux/module.h> +#include <linux/init.h> +#include <linux/slab.h> +#include <linux/string.h> +#include <linux/gpio.h> + +#include <linux/i2c.h> +#include <linux/i2c/max732x.h> + + +/* + * Each port of MAX732x (including MAX7319) falls into one of the + * following three types: + * + * - Push Pull Output + * - Input + * - Open Drain I/O + * + * designated by 'O', 'I' and 'P' individually according to MAXIM's + * datasheets. + * + * There are two groups of I/O ports, each group usually includes + * up to 8 I/O ports, and is accessed by a specific I2C address: + * + * - Group A : by I2C address 0b'110xxxx + * - Group B : by I2C address 0b'101xxxx + * + * where 'xxxx' is decided by the connections of pin AD2/AD0. The + * address used also affects the initial state of output signals. + * + * Within each group of ports, there are five known combinations of + * I/O ports: 4I4O, 4P4O, 8I, 8P, 8O, see the definitions below for + * the detailed organization of these ports. + * + * GPIO numbers start from 'gpio_base + 0' to 'gpio_base + 8/16', + * and GPIOs from GROUP_A are numbered before those from GROUP_B + * (if there are two groups). + * + * NOTE: MAX7328/MAX7329 are drop-in replacements for PCF8574/a, so + * they are not supported by this driver. + */ + +#define PORT_NONE 0x0 /* '/' No Port */ +#define PORT_OUTPUT 0x1 /* 'O' Push-Pull, Output Only */ +#define PORT_INPUT 0x2 /* 'I' Input Only */ +#define PORT_OPENDRAIN 0x3 /* 'P' Open-Drain, I/O */ + +#define IO_4I4O 0x5AA5 /* O7 O6 I5 I4 I3 I2 O1 O0 */ +#define IO_4P4O 0x5FF5 /* O7 O6 P5 P4 P3 P2 O1 O0 */ +#define IO_8I 0xAAAA /* I7 I6 I5 I4 I3 I2 I1 I0 */ +#define IO_8P 0xFFFF /* P7 P6 P5 P4 P3 P2 P1 P0 */ +#define IO_8O 0x5555 /* O7 O6 O5 O4 O3 O2 O1 O0 */ + +#define GROUP_A(x) ((x) & 0xffff) /* I2C Addr: 0b'110xxxx */ +#define GROUP_B(x) ((x) << 16) /* I2C Addr: 0b'101xxxx */ + +static const struct i2c_device_id max732x_id[] = { + { "max7319", GROUP_A(IO_8I) }, + { "max7320", GROUP_B(IO_8O) }, + { "max7321", GROUP_A(IO_8P) }, + { "max7322", GROUP_A(IO_4I4O) }, + { "max7323", GROUP_A(IO_4P4O) }, + { "max7324", GROUP_A(IO_8I) | GROUP_B(IO_8O) }, + { "max7325", GROUP_A(IO_8P) | GROUP_B(IO_8O) }, + { "max7326", GROUP_A(IO_4I4O) | GROUP_B(IO_8O) }, + { "max7327", GROUP_A(IO_4P4O) | GROUP_B(IO_8O) }, + { }, +}; +MODULE_DEVICE_TABLE(i2c, max732x_id); + +struct max732x_chip { + struct gpio_chip gpio_chip; + + struct i2c_client *client; /* "main" client */ + struct i2c_client *client_dummy; + struct i2c_client *client_group_a; + struct i2c_client *client_group_b; + + unsigned int mask_group_a; + unsigned int dir_input; + unsigned int dir_output; + + struct mutex lock; + uint8_t reg_out[2]; +}; + +static int max732x_write(struct max732x_chip *chip, int group_a, uint8_t val) +{ + struct i2c_client *client; + int ret; + + client = group_a ? chip->client_group_a : chip->client_group_b; + ret = i2c_smbus_write_byte(client, val); + if (ret < 0) { + dev_err(&client->dev, "failed writing\n"); + return ret; + } + + return 0; +} + +static int max732x_read(struct max732x_chip *chip, int group_a, uint8_t *val) +{ + struct i2c_client *client; + int ret; + + client = group_a ? chip->client_group_a : chip->client_group_b; + ret = i2c_smbus_read_byte(client); + if (ret < 0) { + dev_err(&client->dev, "failed reading\n"); + return ret; + } + + *val = (uint8_t)ret; + return 0; +} + +static inline int is_group_a(struct max732x_chip *chip, unsigned off) +{ + return (1u << off) & chip->mask_group_a; +} + +static int max732x_gpio_get_value(struct gpio_chip *gc, unsigned off) +{ + struct max732x_chip *chip; + uint8_t reg_val; + int ret; + + chip = container_of(gc, struct max732x_chip, gpio_chip); + + ret = max732x_read(chip, is_group_a(chip, off), ®_val); + if (ret < 0) + return 0; + + return reg_val & (1u << (off & 0x7)); +} + +static void max732x_gpio_set_value(struct gpio_chip *gc, unsigned off, int val) +{ + struct max732x_chip *chip; + uint8_t reg_out, mask = 1u << (off & 0x7); + int ret; + + chip = container_of(gc, struct max732x_chip, gpio_chip); + + mutex_lock(&chip->lock); + + reg_out = (off > 7) ? chip->reg_out[1] : chip->reg_out[0]; + reg_out = (val) ? reg_out | mask : reg_out & ~mask; + + ret = max732x_write(chip, is_group_a(chip, off), reg_out); + if (ret < 0) + goto out; + + /* update the shadow register then */ + if (off > 7) + chip->reg_out[1] = reg_out; + else + chip->reg_out[0] = reg_out; +out: + mutex_unlock(&chip->lock); +} + +static int max732x_gpio_direction_input(struct gpio_chip *gc, unsigned off) +{ + struct max732x_chip *chip; + unsigned int mask = 1u << off; + + chip = container_of(gc, struct max732x_chip, gpio_chip); + + if ((mask & chip->dir_input) == 0) { + dev_dbg(&chip->client->dev, "%s port %d is output only\n", + chip->client->name, off); + return -EACCES; + } + + return 0; +} + +static int max732x_gpio_direction_output(struct gpio_chip *gc, + unsigned off, int val) +{ + struct max732x_chip *chip; + unsigned int mask = 1u << off; + + chip = container_of(gc, struct max732x_chip, gpio_chip); + + if ((mask & chip->dir_output) == 0) { + dev_dbg(&chip->client->dev, "%s port %d is input only\n", + chip->client->name, off); + return -EACCES; + } + + max732x_gpio_set_value(gc, off, val); + return 0; +} + +static int __devinit max732x_setup_gpio(struct max732x_chip *chip, + const struct i2c_device_id *id, + unsigned gpio_start) +{ + struct gpio_chip *gc = &chip->gpio_chip; + uint32_t id_data = id->driver_data; + int i, port = 0; + + for (i = 0; i < 16; i++, id_data >>= 2) { + unsigned int mask = 1 << port; + + switch (id_data & 0x3) { + case PORT_OUTPUT: + chip->dir_output |= mask; + break; + case PORT_INPUT: + chip->dir_input |= mask; + break; + case PORT_OPENDRAIN: + chip->dir_output |= mask; + chip->dir_input |= mask; + break; + default: + continue; + } + + if (i < 8) + chip->mask_group_a |= mask; + port++; + } + + if (chip->dir_input) + gc->direction_input = max732x_gpio_direction_input; + if (chip->dir_output) { + gc->direction_output = max732x_gpio_direction_output; + gc->set = max732x_gpio_set_value; + } + gc->get = max732x_gpio_get_value; + gc->can_sleep = 1; + + gc->base = gpio_start; + gc->ngpio = port; + gc->label = chip->client->name; + gc->owner = THIS_MODULE; + + return port; +} + +static int __devinit max732x_probe(struct i2c_client *client, + const struct i2c_device_id *id) +{ + struct max732x_platform_data *pdata; + struct max732x_chip *chip; + struct i2c_client *c; + uint16_t addr_a, addr_b; + int ret, nr_port; + + pdata = client->dev.platform_data; + if (pdata == NULL) + return -ENODEV; + + chip = kzalloc(sizeof(struct max732x_chip), GFP_KERNEL); + if (chip == NULL) + return -ENOMEM; + chip->client = client; + + nr_port = max732x_setup_gpio(chip, id, pdata->gpio_base); + + addr_a = (client->addr & 0x0f) | 0x60; + addr_b = (client->addr & 0x0f) | 0x50; + + switch (client->addr & 0x70) { + case 0x60: + chip->client_group_a = client; + if (nr_port > 7) { + c = i2c_new_dummy(client->adapter, addr_b); + chip->client_group_b = chip->client_dummy = c; + } + break; + case 0x50: + chip->client_group_b = client; + if (nr_port > 7) { + c = i2c_new_dummy(client->adapter, addr_a); + chip->client_group_a = chip->client_dummy = c; + } + break; + default: + dev_err(&client->dev, "invalid I2C address specified %02x\n", + client->addr); + ret = -EINVAL; + goto out_failed; + } + + mutex_init(&chip->lock); + + max732x_read(chip, is_group_a(chip, 0), &chip->reg_out[0]); + if (nr_port > 7) + max732x_read(chip, is_group_a(chip, 8), &chip->reg_out[1]); + + ret = gpiochip_add(&chip->gpio_chip); + if (ret) + goto out_failed; + + if (pdata->setup) { + ret = pdata->setup(client, chip->gpio_chip.base, + chip->gpio_chip.ngpio, pdata->context); + if (ret < 0) + dev_warn(&client->dev, "setup failed, %d\n", ret); + } + + i2c_set_clientdata(client, chip); + return 0; + +out_failed: + kfree(chip); + return ret; +} + +static int __devexit max732x_remove(struct i2c_client *client) +{ + struct max732x_platform_data *pdata = client->dev.platform_data; + struct max732x_chip *chip = i2c_get_clientdata(client); + int ret; + + if (pdata->teardown) { + ret = pdata->teardown(client, chip->gpio_chip.base, + chip->gpio_chip.ngpio, pdata->context); + if (ret < 0) { + dev_err(&client->dev, "%s failed, %d\n", + "teardown", ret); + return ret; + } + } + + ret = gpiochip_remove(&chip->gpio_chip); + if (ret) { + dev_err(&client->dev, "%s failed, %d\n", + "gpiochip_remove()", ret); + return ret; + } + + /* unregister any dummy i2c_client */ + if (chip->client_dummy) + i2c_unregister_device(chip->client_dummy); + + kfree(chip); + return 0; +} + +static struct i2c_driver max732x_driver = { + .driver = { + .name = "max732x", + .owner = THIS_MODULE, + }, + .probe = max732x_probe, + .remove = __devexit_p(max732x_remove), + .id_table = max732x_id, +}; + +static int __init max732x_init(void) +{ + return i2c_add_driver(&max732x_driver); +} +module_init(max732x_init); + +static void __exit max732x_exit(void) +{ + i2c_del_driver(&max732x_driver); +} +module_exit(max732x_exit); + +MODULE_AUTHOR("Eric Miao <eric.miao@marvell.com>"); +MODULE_DESCRIPTION("GPIO expander driver for MAX732X"); +MODULE_LICENSE("GPL"); diff --git a/drivers/gpio/mcp23s08.c b/drivers/gpio/mcp23s08.c index 7f92fdd5f0e2..8a1b405fefda 100644 --- a/drivers/gpio/mcp23s08.c +++ b/drivers/gpio/mcp23s08.c @@ -40,15 +40,26 @@ struct mcp23s08 { struct spi_device *spi; u8 addr; + u8 cache[11]; /* lock protects the cached values */ struct mutex lock; - u8 cache[11]; struct gpio_chip chip; struct work_struct work; }; +/* A given spi_device can represent up to four mcp23s08 chips + * sharing the same chipselect but using different addresses + * (e.g. chips #0 and #3 might be populated, but not #1 or $2). + * Driver data holds all the per-chip data. + */ +struct mcp23s08_driver_data { + unsigned ngpio; + struct mcp23s08 *mcp[4]; + struct mcp23s08 chip[]; +}; + static int mcp23s08_read(struct mcp23s08 *mcp, unsigned reg) { u8 tx[2], rx[1]; @@ -208,25 +219,18 @@ done: /*----------------------------------------------------------------------*/ -static int mcp23s08_probe(struct spi_device *spi) +static int mcp23s08_probe_one(struct spi_device *spi, unsigned addr, + unsigned base, unsigned pullups) { - struct mcp23s08 *mcp; - struct mcp23s08_platform_data *pdata; + struct mcp23s08_driver_data *data = spi_get_drvdata(spi); + struct mcp23s08 *mcp = data->mcp[addr]; int status; int do_update = 0; - pdata = spi->dev.platform_data; - if (!pdata || pdata->slave > 3 || !pdata->base) - return -ENODEV; - - mcp = kzalloc(sizeof *mcp, GFP_KERNEL); - if (!mcp) - return -ENOMEM; - mutex_init(&mcp->lock); mcp->spi = spi; - mcp->addr = 0x40 | (pdata->slave << 1); + mcp->addr = 0x40 | (addr << 1); mcp->chip.label = "mcp23s08", @@ -236,26 +240,28 @@ static int mcp23s08_probe(struct spi_device *spi) mcp->chip.set = mcp23s08_set; mcp->chip.dbg_show = mcp23s08_dbg_show; - mcp->chip.base = pdata->base; + mcp->chip.base = base; mcp->chip.ngpio = 8; mcp->chip.can_sleep = 1; + mcp->chip.dev = &spi->dev; mcp->chip.owner = THIS_MODULE; - spi_set_drvdata(spi, mcp); - - /* verify MCP_IOCON.SEQOP = 0, so sequential reads work */ + /* verify MCP_IOCON.SEQOP = 0, so sequential reads work, + * and MCP_IOCON.HAEN = 1, so we work with all chips. + */ status = mcp23s08_read(mcp, MCP_IOCON); if (status < 0) goto fail; - if (status & IOCON_SEQOP) { + if ((status & IOCON_SEQOP) || !(status & IOCON_HAEN)) { status &= ~IOCON_SEQOP; + status |= IOCON_HAEN; status = mcp23s08_write(mcp, MCP_IOCON, (u8) status); if (status < 0) goto fail; } /* configure ~100K pullups */ - status = mcp23s08_write(mcp, MCP_GPPU, pdata->pullups); + status = mcp23s08_write(mcp, MCP_GPPU, pullups); if (status < 0) goto fail; @@ -282,11 +288,58 @@ static int mcp23s08_probe(struct spi_device *spi) tx[1] = MCP_IPOL; memcpy(&tx[2], &mcp->cache[MCP_IPOL], sizeof(tx) - 2); status = spi_write_then_read(mcp->spi, tx, sizeof tx, NULL, 0); - - /* FIXME check status... */ + if (status < 0) + goto fail; } status = gpiochip_add(&mcp->chip); +fail: + if (status < 0) + dev_dbg(&spi->dev, "can't setup chip %d, --> %d\n", + addr, status); + return status; +} + +static int mcp23s08_probe(struct spi_device *spi) +{ + struct mcp23s08_platform_data *pdata; + unsigned addr; + unsigned chips = 0; + struct mcp23s08_driver_data *data; + int status; + unsigned base; + + pdata = spi->dev.platform_data; + if (!pdata || !gpio_is_valid(pdata->base)) + return -ENODEV; + + for (addr = 0; addr < 4; addr++) { + if (!pdata->chip[addr].is_present) + continue; + chips++; + } + if (!chips) + return -ENODEV; + + data = kzalloc(sizeof *data + chips * sizeof(struct mcp23s08), + GFP_KERNEL); + if (!data) + return -ENOMEM; + spi_set_drvdata(spi, data); + + base = pdata->base; + for (addr = 0; addr < 4; addr++) { + if (!pdata->chip[addr].is_present) + continue; + chips--; + data->mcp[addr] = &data->chip[chips]; + status = mcp23s08_probe_one(spi, addr, base, + pdata->chip[addr].pullups); + if (status < 0) + goto fail; + base += 8; + } + data->ngpio = base - pdata->base; /* NOTE: these chips have a relatively sane IRQ framework, with * per-signal masking and level/edge triggering. It's not yet @@ -294,8 +347,9 @@ static int mcp23s08_probe(struct spi_device *spi) */ if (pdata->setup) { - status = pdata->setup(spi, mcp->chip.base, - mcp->chip.ngpio, pdata->context); + status = pdata->setup(spi, + pdata->base, data->ngpio, + pdata->context); if (status < 0) dev_dbg(&spi->dev, "setup --> %d\n", status); } @@ -303,19 +357,29 @@ static int mcp23s08_probe(struct spi_device *spi) return 0; fail: - kfree(mcp); + for (addr = 0; addr < 4; addr++) { + int tmp; + + if (!data->mcp[addr]) + continue; + tmp = gpiochip_remove(&data->mcp[addr]->chip); + if (tmp < 0) + dev_err(&spi->dev, "%s --> %d\n", "remove", tmp); + } + kfree(data); return status; } static int mcp23s08_remove(struct spi_device *spi) { - struct mcp23s08 *mcp = spi_get_drvdata(spi); + struct mcp23s08_driver_data *data = spi_get_drvdata(spi); struct mcp23s08_platform_data *pdata = spi->dev.platform_data; + unsigned addr; int status = 0; if (pdata->teardown) { status = pdata->teardown(spi, - mcp->chip.base, mcp->chip.ngpio, + pdata->base, data->ngpio, pdata->context); if (status < 0) { dev_err(&spi->dev, "%s --> %d\n", "teardown", status); @@ -323,11 +387,20 @@ static int mcp23s08_remove(struct spi_device *spi) } } - status = gpiochip_remove(&mcp->chip); + for (addr = 0; addr < 4; addr++) { + int tmp; + + if (!data->mcp[addr]) + continue; + + tmp = gpiochip_remove(&data->mcp[addr]->chip); + if (tmp < 0) { + dev_err(&spi->dev, "%s --> %d\n", "remove", tmp); + status = tmp; + } + } if (status == 0) - kfree(mcp); - else - dev_err(&spi->dev, "%s --> %d\n", "remove", status); + kfree(data); return status; } @@ -355,4 +428,3 @@ static void __exit mcp23s08_exit(void) module_exit(mcp23s08_exit); MODULE_LICENSE("GPL"); - diff --git a/drivers/gpio/pca953x.c b/drivers/gpio/pca953x.c index a380730b61ab..cc8468692ae0 100644 --- a/drivers/gpio/pca953x.c +++ b/drivers/gpio/pca953x.c @@ -188,6 +188,7 @@ static void pca953x_setup_gpio(struct pca953x_chip *chip, int gpios) gc->base = chip->gpio_start; gc->ngpio = gpios; gc->label = chip->client->name; + gc->dev = &chip->client->dev; gc->owner = THIS_MODULE; } diff --git a/drivers/gpio/pcf857x.c b/drivers/gpio/pcf857x.c index d25d356c4f20..fc9c6ae739ee 100644 --- a/drivers/gpio/pcf857x.c +++ b/drivers/gpio/pcf857x.c @@ -200,6 +200,7 @@ static int pcf857x_probe(struct i2c_client *client, gpio->chip.base = pdata->gpio_base; gpio->chip.can_sleep = 1; + gpio->chip.dev = &client->dev; gpio->chip.owner = THIS_MODULE; /* NOTE: the OnSemi jlc1562b is also largely compatible with diff --git a/drivers/i2c/chips/Kconfig b/drivers/i2c/chips/Kconfig index 50e0a4653741..a95cb9465d65 100644 --- a/drivers/i2c/chips/Kconfig +++ b/drivers/i2c/chips/Kconfig @@ -126,7 +126,7 @@ config ISP1301_OMAP config TPS65010 tristate "TPS6501x Power Management chips" - depends on HAVE_GPIO_LIB + depends on GPIOLIB default y if MACH_OMAP_H2 || MACH_OMAP_H3 || MACH_OMAP_OSK help If you say yes here you get support for the TPS6501x series of diff --git a/drivers/i2c/chips/tps65010.c b/drivers/i2c/chips/tps65010.c index 85949685191b..cf02e8fceb42 100644 --- a/drivers/i2c/chips/tps65010.c +++ b/drivers/i2c/chips/tps65010.c @@ -636,6 +636,8 @@ static int tps65010_probe(struct i2c_client *client, tps->outmask = board->outmask; tps->chip.label = client->name; + tps->chip.dev = &client->dev; + tps->chip.owner = THIS_MODULE; tps->chip.set = tps65010_gpio_set; tps->chip.direction_output = tps65010_output; diff --git a/drivers/input/serio/hp_sdc.c b/drivers/input/serio/hp_sdc.c index aad664d5259f..0d395979b2d1 100644 --- a/drivers/input/serio/hp_sdc.c +++ b/drivers/input/serio/hp_sdc.c @@ -70,7 +70,6 @@ #include <linux/semaphore.h> #include <linux/slab.h> #include <linux/hil.h> -#include <linux/semaphore.h> #include <asm/io.h> #include <asm/system.h> diff --git a/drivers/isdn/hisax/st5481.h b/drivers/isdn/hisax/st5481.h index 2044e7173ab4..cff7a6354334 100644 --- a/drivers/isdn/hisax/st5481.h +++ b/drivers/isdn/hisax/st5481.h @@ -220,7 +220,7 @@ enum { #define ERR(format, arg...) \ printk(KERN_ERR "%s:%s: " format "\n" , __FILE__, __func__ , ## arg) -#define WARN(format, arg...) \ +#define WARNING(format, arg...) \ printk(KERN_WARNING "%s:%s: " format "\n" , __FILE__, __func__ , ## arg) #define INFO(format, arg...) \ @@ -412,7 +412,7 @@ struct st5481_adapter { ({ \ int status; \ if ((status = usb_submit_urb(urb, mem_flags)) < 0) { \ - WARN("usb_submit_urb failed,status=%d", status); \ + WARNING("usb_submit_urb failed,status=%d", status); \ } \ status; \ }) diff --git a/drivers/isdn/hisax/st5481_b.c b/drivers/isdn/hisax/st5481_b.c index fa64115cd7c7..0074b600a0ef 100644 --- a/drivers/isdn/hisax/st5481_b.c +++ b/drivers/isdn/hisax/st5481_b.c @@ -180,7 +180,7 @@ static void usb_b_out_complete(struct urb *urb) DBG(4,"urb killed status %d", urb->status); return; // Give up default: - WARN("urb status %d",urb->status); + WARNING("urb status %d",urb->status); if (b_out->busy == 0) { st5481_usb_pipe_reset(adapter, (bcs->channel+1)*2 | USB_DIR_OUT, NULL, NULL); } @@ -372,6 +372,6 @@ void st5481_b_l2l1(struct hisax_if *ifc, int pr, void *arg) B_L1L2(bcs, PH_DEACTIVATE | INDICATION, NULL); break; default: - WARN("pr %#x\n", pr); + WARNING("pr %#x\n", pr); } } diff --git a/drivers/isdn/hisax/st5481_d.c b/drivers/isdn/hisax/st5481_d.c index b8c4855cc889..077991c1cd05 100644 --- a/drivers/isdn/hisax/st5481_d.c +++ b/drivers/isdn/hisax/st5481_d.c @@ -389,7 +389,7 @@ static void usb_d_out_complete(struct urb *urb) DBG(1,"urb killed status %d", urb->status); break; default: - WARN("urb status %d",urb->status); + WARNING("urb status %d",urb->status); if (d_out->busy == 0) { st5481_usb_pipe_reset(adapter, EP_D_OUT | USB_DIR_OUT, fifo_reseted, adapter); } @@ -420,7 +420,7 @@ static void dout_start_xmit(struct FsmInst *fsm, int event, void *arg) isdnhdlc_out_init(&d_out->hdlc_state, 1, 0); if (test_and_set_bit(buf_nr, &d_out->busy)) { - WARN("ep %d urb %d busy %#lx", EP_D_OUT, buf_nr, d_out->busy); + WARNING("ep %d urb %d busy %#lx", EP_D_OUT, buf_nr, d_out->busy); return; } urb = d_out->urb[buf_nr]; @@ -601,7 +601,7 @@ void st5481_d_l2l1(struct hisax_if *hisax_d_if, int pr, void *arg) FsmEvent(&adapter->d_out.fsm, EV_DOUT_START_XMIT, NULL); break; default: - WARN("pr %#x\n", pr); + WARNING("pr %#x\n", pr); break; } } diff --git a/drivers/isdn/hisax/st5481_usb.c b/drivers/isdn/hisax/st5481_usb.c index 427a8b0520f5..ec3c0e507669 100644 --- a/drivers/isdn/hisax/st5481_usb.c +++ b/drivers/isdn/hisax/st5481_usb.c @@ -66,7 +66,7 @@ static void usb_ctrl_msg(struct st5481_adapter *adapter, struct ctrl_msg *ctrl_msg; if ((w_index = fifo_add(&ctrl->msg_fifo.f)) < 0) { - WARN("control msg FIFO full"); + WARNING("control msg FIFO full"); return; } ctrl_msg = &ctrl->msg_fifo.data[w_index]; @@ -139,7 +139,7 @@ static void usb_ctrl_complete(struct urb *urb) DBG(1,"urb killed status %d", urb->status); return; // Give up default: - WARN("urb status %d",urb->status); + WARNING("urb status %d",urb->status); break; } } @@ -198,7 +198,7 @@ static void usb_int_complete(struct urb *urb) DBG(2, "urb shutting down with status: %d", urb->status); return; default: - WARN("nonzero urb status received: %d", urb->status); + WARNING("nonzero urb status received: %d", urb->status); goto exit; } @@ -235,7 +235,7 @@ static void usb_int_complete(struct urb *urb) exit: status = usb_submit_urb (urb, GFP_ATOMIC); if (status) - WARN("usb_submit_urb failed with result %d", status); + WARNING("usb_submit_urb failed with result %d", status); } /* ====================================================================== @@ -257,7 +257,7 @@ int st5481_setup_usb(struct st5481_adapter *adapter) DBG(2,""); if ((status = usb_reset_configuration (dev)) < 0) { - WARN("reset_configuration failed,status=%d",status); + WARNING("reset_configuration failed,status=%d",status); return status; } @@ -269,7 +269,7 @@ int st5481_setup_usb(struct st5481_adapter *adapter) // Check if the config is sane if ( altsetting->desc.bNumEndpoints != 7 ) { - WARN("expecting 7 got %d endpoints!", altsetting->desc.bNumEndpoints); + WARNING("expecting 7 got %d endpoints!", altsetting->desc.bNumEndpoints); return -EINVAL; } @@ -279,7 +279,7 @@ int st5481_setup_usb(struct st5481_adapter *adapter) // Use alternative setting 3 on interface 0 to have 2B+D if ((status = usb_set_interface (dev, 0, 3)) < 0) { - WARN("usb_set_interface failed,status=%d",status); + WARNING("usb_set_interface failed,status=%d",status); return status; } @@ -497,7 +497,7 @@ static void usb_in_complete(struct urb *urb) DBG(1,"urb killed status %d", urb->status); return; // Give up default: - WARN("urb status %d",urb->status); + WARNING("urb status %d",urb->status); break; } } @@ -523,7 +523,7 @@ static void usb_in_complete(struct urb *urb) DBG(4,"count=%d",status); DBG_PACKET(0x400, in->rcvbuf, status); if (!(skb = dev_alloc_skb(status))) { - WARN("receive out of memory\n"); + WARNING("receive out of memory\n"); break; } memcpy(skb_put(skb, status), in->rcvbuf, status); diff --git a/drivers/lguest/lguest_device.c b/drivers/lguest/lguest_device.c index 1a8de57289eb..37344aaee22f 100644 --- a/drivers/lguest/lguest_device.c +++ b/drivers/lguest/lguest_device.c @@ -98,16 +98,20 @@ static u32 lg_get_features(struct virtio_device *vdev) return features; } -static void lg_set_features(struct virtio_device *vdev, u32 features) +static void lg_finalize_features(struct virtio_device *vdev) { - unsigned int i; + unsigned int i, bits; struct lguest_device_desc *desc = to_lgdev(vdev)->desc; /* Second half of bitmap is features we accept. */ u8 *out_features = lg_features(desc) + desc->feature_len; + /* Give virtio_ring a chance to accept features. */ + vring_transport_features(vdev); + memset(out_features, 0, desc->feature_len); - for (i = 0; i < min(desc->feature_len * 8, 32); i++) { - if (features & (1 << i)) + bits = min_t(unsigned, desc->feature_len, sizeof(vdev->features)) * 8; + for (i = 0; i < bits; i++) { + if (test_bit(i, vdev->features)) out_features[i / 8] |= (1 << (i % 8)); } } @@ -297,7 +301,7 @@ static void lg_del_vq(struct virtqueue *vq) /* The ops structure which hooks everything together. */ static struct virtio_config_ops lguest_config_ops = { .get_features = lg_get_features, - .set_features = lg_set_features, + .finalize_features = lg_finalize_features, .get = lg_get, .set = lg_set, .get_status = lg_get_status, diff --git a/drivers/mfd/Kconfig b/drivers/mfd/Kconfig index 9f93c29fed35..1f57a99fd968 100644 --- a/drivers/mfd/Kconfig +++ b/drivers/mfd/Kconfig @@ -19,6 +19,14 @@ config MFD_SM501 interface. The device may be connected by PCI or local bus with varying functions enabled. +config MFD_SM501_GPIO + bool "Export GPIO via GPIO layer" + depends on MFD_SM501 && HAVE_GPIO_LIB + ---help--- + This option uses the gpio library layer to export the 64 GPIO + lines on the SM501. The platform data is used to supply the + base number for the first GPIO line to register. + config MFD_ASIC3 bool "Support for Compaq ASIC3" depends on GENERIC_HARDIRQS && HAVE_GPIO_LIB && ARM @@ -28,7 +36,7 @@ config MFD_ASIC3 config HTC_EGPIO bool "HTC EGPIO support" - depends on GENERIC_HARDIRQS && HAVE_GPIO_LIB && ARM + depends on GENERIC_HARDIRQS && GPIOLIB && ARM help This driver supports the CPLD egpio chip present on several HTC phones. It provides basic support for input @@ -44,7 +52,7 @@ config HTC_PASIC3 config MFD_TC6393XB bool "Support Toshiba TC6393XB" - depends on HAVE_GPIO_LIB + depends on GPIOLIB select MFD_CORE help Support for Toshiba Mobile IO Controller TC6393XB diff --git a/drivers/mfd/htc-egpio.c b/drivers/mfd/htc-egpio.c index 8872cc077519..6be43172dc65 100644 --- a/drivers/mfd/htc-egpio.c +++ b/drivers/mfd/htc-egpio.c @@ -318,6 +318,8 @@ static int __init egpio_probe(struct platform_device *pdev) ei->chip[i].dev = &(pdev->dev); chip = &(ei->chip[i].chip); chip->label = "htc-egpio"; + chip->dev = &pdev->dev; + chip->owner = THIS_MODULE; chip->get = egpio_get; chip->set = egpio_set; chip->direction_input = egpio_direction_input; diff --git a/drivers/mfd/htc-pasic3.c b/drivers/mfd/htc-pasic3.c index 633cbba072f0..91b294dcc133 100644 --- a/drivers/mfd/htc-pasic3.c +++ b/drivers/mfd/htc-pasic3.c @@ -238,6 +238,8 @@ static int pasic3_remove(struct platform_device *pdev) return 0; } +MODULE_ALIAS("platform:pasic3"); + static struct platform_driver pasic3_driver = { .driver = { .name = "pasic3", diff --git a/drivers/mfd/mcp-sa11x0.c b/drivers/mfd/mcp-sa11x0.c index 1eab7cffceaa..b5272b5ce3fa 100644 --- a/drivers/mfd/mcp-sa11x0.c +++ b/drivers/mfd/mcp-sa11x0.c @@ -242,6 +242,8 @@ static int mcp_sa11x0_resume(struct platform_device *dev) /* * The driver for the SA11x0 MCP port. */ +MODULE_ALIAS("platform:sa11x0-mcp"); + static struct platform_driver mcp_sa11x0_driver = { .probe = mcp_sa11x0_probe, .remove = mcp_sa11x0_remove, diff --git a/drivers/mfd/mfd-core.c b/drivers/mfd/mfd-core.c index d7d88ce053a6..0454be4266c1 100644 --- a/drivers/mfd/mfd-core.c +++ b/drivers/mfd/mfd-core.c @@ -36,7 +36,7 @@ static int mfd_add_device(struct platform_device *parent, if (ret) goto fail_device; - memzero(res, sizeof(res)); + memset(res, 0, sizeof(res)); for (r = 0; r < cell->num_resources; r++) { res[r].name = cell->resources[r].name; res[r].flags = cell->resources[r].flags; diff --git a/drivers/mfd/sm501.c b/drivers/mfd/sm501.c index 2fe64734d8af..7aebad4c06ff 100644 --- a/drivers/mfd/sm501.c +++ b/drivers/mfd/sm501.c @@ -19,6 +19,7 @@ #include <linux/device.h> #include <linux/platform_device.h> #include <linux/pci.h> +#include <linux/i2c-gpio.h> #include <linux/sm501.h> #include <linux/sm501-regs.h> @@ -31,10 +32,37 @@ struct sm501_device { struct platform_device pdev; }; +struct sm501_gpio; + +#ifdef CONFIG_MFD_SM501_GPIO +#include <linux/gpio.h> + +struct sm501_gpio_chip { + struct gpio_chip gpio; + struct sm501_gpio *ourgpio; /* to get back to parent. */ + void __iomem *regbase; +}; + +struct sm501_gpio { + struct sm501_gpio_chip low; + struct sm501_gpio_chip high; + spinlock_t lock; + + unsigned int registered : 1; + void __iomem *regs; + struct resource *regs_res; +}; +#else +struct sm501_gpio { + /* no gpio support, empty definition for sm501_devdata. */ +}; +#endif + struct sm501_devdata { spinlock_t reg_lock; struct mutex clock_lock; struct list_head devices; + struct sm501_gpio gpio; struct device *dev; struct resource *io_res; @@ -42,6 +70,7 @@ struct sm501_devdata { struct resource *regs_claim; struct sm501_platdata *platdata; + unsigned int in_suspend; unsigned long pm_misc; @@ -52,6 +81,7 @@ struct sm501_devdata { unsigned int rev; }; + #define MHZ (1000 * 1000) #ifdef DEBUG @@ -276,58 +306,6 @@ unsigned long sm501_modify_reg(struct device *dev, EXPORT_SYMBOL_GPL(sm501_modify_reg); -unsigned long sm501_gpio_get(struct device *dev, - unsigned long gpio) -{ - struct sm501_devdata *sm = dev_get_drvdata(dev); - unsigned long result; - unsigned long reg; - - reg = (gpio > 32) ? SM501_GPIO_DATA_HIGH : SM501_GPIO_DATA_LOW; - result = readl(sm->regs + reg); - - result >>= (gpio & 31); - return result & 1UL; -} - -EXPORT_SYMBOL_GPL(sm501_gpio_get); - -void sm501_gpio_set(struct device *dev, - unsigned long gpio, - unsigned int to, - unsigned int dir) -{ - struct sm501_devdata *sm = dev_get_drvdata(dev); - - unsigned long bit = 1 << (gpio & 31); - unsigned long base; - unsigned long save; - unsigned long val; - - base = (gpio > 32) ? SM501_GPIO_DATA_HIGH : SM501_GPIO_DATA_LOW; - base += SM501_GPIO; - - spin_lock_irqsave(&sm->reg_lock, save); - - val = readl(sm->regs + base) & ~bit; - if (to) - val |= bit; - writel(val, sm->regs + base); - - val = readl(sm->regs + SM501_GPIO_DDR_LOW) & ~bit; - if (dir) - val |= bit; - - writel(val, sm->regs + SM501_GPIO_DDR_LOW); - sm501_sync_regs(sm); - - spin_unlock_irqrestore(&sm->reg_lock, save); - -} - -EXPORT_SYMBOL_GPL(sm501_gpio_set); - - /* sm501_unit_power * * alters the power active gate to set specific units on or off @@ -906,6 +884,313 @@ static int sm501_register_display(struct sm501_devdata *sm, return sm501_register_device(sm, pdev); } +#ifdef CONFIG_MFD_SM501_GPIO + +static inline struct sm501_gpio_chip *to_sm501_gpio(struct gpio_chip *gc) +{ + return container_of(gc, struct sm501_gpio_chip, gpio); +} + +static inline struct sm501_devdata *sm501_gpio_to_dev(struct sm501_gpio *gpio) +{ + return container_of(gpio, struct sm501_devdata, gpio); +} + +static int sm501_gpio_get(struct gpio_chip *chip, unsigned offset) + +{ + struct sm501_gpio_chip *smgpio = to_sm501_gpio(chip); + unsigned long result; + + result = readl(smgpio->regbase + SM501_GPIO_DATA_LOW); + result >>= offset; + + return result & 1UL; +} + +static void sm501_gpio_set(struct gpio_chip *chip, unsigned offset, int value) + +{ + struct sm501_gpio_chip *smchip = to_sm501_gpio(chip); + struct sm501_gpio *smgpio = smchip->ourgpio; + unsigned long bit = 1 << offset; + void __iomem *regs = smchip->regbase; + unsigned long save; + unsigned long val; + + dev_dbg(sm501_gpio_to_dev(smgpio)->dev, "%s(%p,%d)\n", + __func__, chip, offset); + + spin_lock_irqsave(&smgpio->lock, save); + + val = readl(regs + SM501_GPIO_DATA_LOW) & ~bit; + if (value) + val |= bit; + writel(val, regs); + + sm501_sync_regs(sm501_gpio_to_dev(smgpio)); + spin_unlock_irqrestore(&smgpio->lock, save); +} + +static int sm501_gpio_input(struct gpio_chip *chip, unsigned offset) +{ + struct sm501_gpio_chip *smchip = to_sm501_gpio(chip); + struct sm501_gpio *smgpio = smchip->ourgpio; + void __iomem *regs = smchip->regbase; + unsigned long bit = 1 << offset; + unsigned long save; + unsigned long ddr; + + dev_info(sm501_gpio_to_dev(smgpio)->dev, "%s(%p,%d)\n", + __func__, chip, offset); + + spin_lock_irqsave(&smgpio->lock, save); + + ddr = readl(regs + SM501_GPIO_DDR_LOW); + writel(ddr & ~bit, regs + SM501_GPIO_DDR_LOW); + + sm501_sync_regs(sm501_gpio_to_dev(smgpio)); + spin_unlock_irqrestore(&smgpio->lock, save); + + return 0; +} + +static int sm501_gpio_output(struct gpio_chip *chip, + unsigned offset, int value) +{ + struct sm501_gpio_chip *smchip = to_sm501_gpio(chip); + struct sm501_gpio *smgpio = smchip->ourgpio; + unsigned long bit = 1 << offset; + void __iomem *regs = smchip->regbase; + unsigned long save; + unsigned long val; + unsigned long ddr; + + dev_dbg(sm501_gpio_to_dev(smgpio)->dev, "%s(%p,%d,%d)\n", + __func__, chip, offset, value); + + spin_lock_irqsave(&smgpio->lock, save); + + val = readl(regs + SM501_GPIO_DATA_LOW); + if (value) + val |= bit; + else + val &= ~bit; + writel(val, regs); + + ddr = readl(regs + SM501_GPIO_DDR_LOW); + writel(ddr | bit, regs + SM501_GPIO_DDR_LOW); + + sm501_sync_regs(sm501_gpio_to_dev(smgpio)); + writel(val, regs + SM501_GPIO_DATA_LOW); + + sm501_sync_regs(sm501_gpio_to_dev(smgpio)); + spin_unlock_irqrestore(&smgpio->lock, save); + + return 0; +} + +static struct gpio_chip gpio_chip_template = { + .ngpio = 32, + .direction_input = sm501_gpio_input, + .direction_output = sm501_gpio_output, + .set = sm501_gpio_set, + .get = sm501_gpio_get, +}; + +static int __devinit sm501_gpio_register_chip(struct sm501_devdata *sm, + struct sm501_gpio *gpio, + struct sm501_gpio_chip *chip) +{ + struct sm501_platdata *pdata = sm->platdata; + struct gpio_chip *gchip = &chip->gpio; + int base = pdata->gpio_base; + + chip->gpio = gpio_chip_template; + + if (chip == &gpio->high) { + if (base > 0) + base += 32; + chip->regbase = gpio->regs + SM501_GPIO_DATA_HIGH; + gchip->label = "SM501-HIGH"; + } else { + chip->regbase = gpio->regs + SM501_GPIO_DATA_LOW; + gchip->label = "SM501-LOW"; + } + + gchip->base = base; + chip->ourgpio = gpio; + + return gpiochip_add(gchip); +} + +static int sm501_register_gpio(struct sm501_devdata *sm) +{ + struct sm501_gpio *gpio = &sm->gpio; + resource_size_t iobase = sm->io_res->start + SM501_GPIO; + int ret; + int tmp; + + dev_dbg(sm->dev, "registering gpio block %08llx\n", + (unsigned long long)iobase); + + spin_lock_init(&gpio->lock); + + gpio->regs_res = request_mem_region(iobase, 0x20, "sm501-gpio"); + if (gpio->regs_res == NULL) { + dev_err(sm->dev, "gpio: failed to request region\n"); + return -ENXIO; + } + + gpio->regs = ioremap(iobase, 0x20); + if (gpio->regs == NULL) { + dev_err(sm->dev, "gpio: failed to remap registers\n"); + ret = -ENXIO; + goto err_claimed; + } + + /* Register both our chips. */ + + ret = sm501_gpio_register_chip(sm, gpio, &gpio->low); + if (ret) { + dev_err(sm->dev, "failed to add low chip\n"); + goto err_mapped; + } + + ret = sm501_gpio_register_chip(sm, gpio, &gpio->high); + if (ret) { + dev_err(sm->dev, "failed to add high chip\n"); + goto err_low_chip; + } + + gpio->registered = 1; + + return 0; + + err_low_chip: + tmp = gpiochip_remove(&gpio->low.gpio); + if (tmp) { + dev_err(sm->dev, "cannot remove low chip, cannot tidy up\n"); + return ret; + } + + err_mapped: + iounmap(gpio->regs); + + err_claimed: + release_resource(gpio->regs_res); + kfree(gpio->regs_res); + + return ret; +} + +static void sm501_gpio_remove(struct sm501_devdata *sm) +{ + struct sm501_gpio *gpio = &sm->gpio; + int ret; + + if (!sm->gpio.registered) + return; + + ret = gpiochip_remove(&gpio->low.gpio); + if (ret) + dev_err(sm->dev, "cannot remove low chip, cannot tidy up\n"); + + ret = gpiochip_remove(&gpio->high.gpio); + if (ret) + dev_err(sm->dev, "cannot remove high chip, cannot tidy up\n"); + + iounmap(gpio->regs); + release_resource(gpio->regs_res); + kfree(gpio->regs_res); +} + +static inline int sm501_gpio_pin2nr(struct sm501_devdata *sm, unsigned int pin) +{ + struct sm501_gpio *gpio = &sm->gpio; + int base = (pin < 32) ? gpio->low.gpio.base : gpio->high.gpio.base; + + return (pin % 32) + base; +} + +static inline int sm501_gpio_isregistered(struct sm501_devdata *sm) +{ + return sm->gpio.registered; +} +#else +static inline int sm501_register_gpio(struct sm501_devdata *sm) +{ + return 0; +} + +static inline void sm501_gpio_remove(struct sm501_devdata *sm) +{ +} + +static inline int sm501_gpio_pin2nr(struct sm501_devdata *sm, unsigned int pin) +{ + return -1; +} + +static inline int sm501_gpio_isregistered(struct sm501_devdata *sm) +{ + return 0; +} +#endif + +static int sm501_register_gpio_i2c_instance(struct sm501_devdata *sm, + struct sm501_platdata_gpio_i2c *iic) +{ + struct i2c_gpio_platform_data *icd; + struct platform_device *pdev; + + pdev = sm501_create_subdev(sm, "i2c-gpio", 0, + sizeof(struct i2c_gpio_platform_data)); + if (!pdev) + return -ENOMEM; + + icd = pdev->dev.platform_data; + + /* We keep the pin_sda and pin_scl fields relative in case the + * same platform data is passed to >1 SM501. + */ + + icd->sda_pin = sm501_gpio_pin2nr(sm, iic->pin_sda); + icd->scl_pin = sm501_gpio_pin2nr(sm, iic->pin_scl); + icd->timeout = iic->timeout; + icd->udelay = iic->udelay; + + /* note, we can't use either of the pin numbers, as the i2c-gpio + * driver uses the platform.id field to generate the bus number + * to register with the i2c core; The i2c core doesn't have enough + * entries to deal with anything we currently use. + */ + + pdev->id = iic->bus_num; + + dev_info(sm->dev, "registering i2c-%d: sda=%d (%d), scl=%d (%d)\n", + iic->bus_num, + icd->sda_pin, iic->pin_sda, icd->scl_pin, iic->pin_scl); + + return sm501_register_device(sm, pdev); +} + +static int sm501_register_gpio_i2c(struct sm501_devdata *sm, + struct sm501_platdata *pdata) +{ + struct sm501_platdata_gpio_i2c *iic = pdata->gpio_i2c; + int index; + int ret; + + for (index = 0; index < pdata->gpio_i2c_nr; index++, iic++) { + ret = sm501_register_gpio_i2c_instance(sm, iic); + if (ret < 0) + return ret; + } + + return 0; +} + /* sm501_dbg_regs * * Debug attribute to attach to parent device to show core registers @@ -1013,6 +1298,7 @@ static unsigned int sm501_mem_local[] = { static int sm501_init_dev(struct sm501_devdata *sm) { struct sm501_initdata *idata; + struct sm501_platdata *pdata; resource_size_t mem_avail; unsigned long dramctrl; unsigned long devid; @@ -1051,7 +1337,9 @@ static int sm501_init_dev(struct sm501_devdata *sm) /* check to see if we have some device initialisation */ - idata = sm->platdata ? sm->platdata->init : NULL; + pdata = sm->platdata; + idata = pdata ? pdata->init : NULL; + if (idata) { sm501_init_regs(sm, idata); @@ -1059,6 +1347,15 @@ static int sm501_init_dev(struct sm501_devdata *sm) sm501_register_usbhost(sm, &mem_avail); if (idata->devices & (SM501_USE_UART0 | SM501_USE_UART1)) sm501_register_uart(sm, idata->devices); + if (idata->devices & SM501_USE_GPIO) + sm501_register_gpio(sm); + } + + if (pdata->gpio_i2c != NULL && pdata->gpio_i2c_nr > 0) { + if (!sm501_gpio_isregistered(sm)) + dev_err(sm->dev, "no gpio available for i2c gpio.\n"); + else + sm501_register_gpio_i2c(sm, pdata); } ret = sm501_check_clocks(sm); @@ -1138,8 +1435,31 @@ static int sm501_plat_probe(struct platform_device *dev) } #ifdef CONFIG_PM + /* power management support */ +static void sm501_set_power(struct sm501_devdata *sm, int on) +{ + struct sm501_platdata *pd = sm->platdata; + + if (pd == NULL) + return; + + if (pd->get_power) { + if (pd->get_power(sm->dev) == on) { + dev_dbg(sm->dev, "is already %d\n", on); + return; + } + } + + if (pd->set_power) { + dev_dbg(sm->dev, "setting power to %d\n", on); + + pd->set_power(sm->dev, on); + sm501_mdelay(sm, 10); + } +} + static int sm501_plat_suspend(struct platform_device *pdev, pm_message_t state) { struct sm501_devdata *sm = platform_get_drvdata(pdev); @@ -1148,6 +1468,12 @@ static int sm501_plat_suspend(struct platform_device *pdev, pm_message_t state) sm->pm_misc = readl(sm->regs + SM501_MISC_CONTROL); sm501_dump_regs(sm); + + if (sm->platdata) { + if (sm->platdata->flags & SM501_FLAG_SUSPEND_OFF) + sm501_set_power(sm, 0); + } + return 0; } @@ -1155,6 +1481,8 @@ static int sm501_plat_resume(struct platform_device *pdev) { struct sm501_devdata *sm = platform_get_drvdata(pdev); + sm501_set_power(sm, 1); + sm501_dump_regs(sm); sm501_dump_gate(sm); sm501_dump_clk(sm); @@ -1229,6 +1557,7 @@ static struct sm501_platdata_fb sm501_fb_pdata = { static struct sm501_platdata sm501_pci_platdata = { .init = &sm501_pci_initdata, .fb = &sm501_fb_pdata, + .gpio_base = -1, }; static int sm501_pci_probe(struct pci_dev *dev, @@ -1335,6 +1664,8 @@ static void sm501_dev_remove(struct sm501_devdata *sm) sm501_remove_sub(sm, smdev); device_remove_file(sm->dev, &dev_attr_dbg_regs); + + sm501_gpio_remove(sm); } static void sm501_pci_remove(struct pci_dev *dev) @@ -1378,6 +1709,8 @@ static struct pci_driver sm501_pci_drv = { .remove = sm501_pci_remove, }; +MODULE_ALIAS("platform:sm501"); + static struct platform_driver sm501_plat_drv = { .driver = { .name = "sm501", diff --git a/drivers/misc/Kconfig b/drivers/misc/Kconfig index d5bc288b1b0d..321eb9134635 100644 --- a/drivers/misc/Kconfig +++ b/drivers/misc/Kconfig @@ -77,11 +77,13 @@ config IBM_ASM for your IBM server. config PHANTOM - tristate "Sensable PHANToM" + tristate "Sensable PHANToM (PCI)" depends on PCI help Say Y here if you want to build a driver for Sensable PHANToM device. + This driver is only for PCI PHANToMs. + If you choose to build module, its name will be phantom. If unsure, say N here. @@ -212,6 +214,18 @@ config TC1100_WMI This is a driver for the WMI extensions (wireless and bluetooth power control) of the HP Compaq TC1100 tablet. +config HP_WMI + tristate "HP WMI extras" + depends on ACPI_WMI + depends on INPUT + depends on RFKILL + help + Say Y here if you want to support WMI-based hotkeys on HP laptops and + to read data from WMI such as docking or ambient light sensor state. + + To compile this driver as a module, choose M here: the module will + be called hp-wmi. + config MSI_LAPTOP tristate "MSI Laptop Extras" depends on X86 @@ -424,6 +438,7 @@ config SGI_XP config HP_ILO tristate "Channel interface driver for HP iLO/iLO2 processor" + depends on PCI default n help The channel interface driver allows applications to communicate diff --git a/drivers/misc/Makefile b/drivers/misc/Makefile index 688fe76135e0..f5e273420c09 100644 --- a/drivers/misc/Makefile +++ b/drivers/misc/Makefile @@ -13,6 +13,7 @@ obj-$(CONFIG_ACER_WMI) += acer-wmi.o obj-$(CONFIG_ATMEL_PWM) += atmel_pwm.o obj-$(CONFIG_ATMEL_SSC) += atmel-ssc.o obj-$(CONFIG_ATMEL_TCLIB) += atmel_tclib.o +obj-$(CONFIG_HP_WMI) += hp-wmi.o obj-$(CONFIG_TC1100_WMI) += tc1100-wmi.o obj-$(CONFIG_LKDTM) += lkdtm.o obj-$(CONFIG_TIFM_CORE) += tifm_core.o diff --git a/drivers/misc/hp-wmi.c b/drivers/misc/hp-wmi.c new file mode 100644 index 000000000000..1dbcbcb323a2 --- /dev/null +++ b/drivers/misc/hp-wmi.c @@ -0,0 +1,494 @@ +/* + * HP WMI hotkeys + * + * Copyright (C) 2008 Red Hat <mjg@redhat.com> + * + * Portions based on wistron_btns.c: + * Copyright (C) 2005 Miloslav Trmac <mitr@volny.cz> + * Copyright (C) 2005 Bernhard Rosenkraenzer <bero@arklinux.org> + * Copyright (C) 2005 Dmitry Torokhov <dtor@mail.ru> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#include <linux/kernel.h> +#include <linux/module.h> +#include <linux/init.h> +#include <linux/types.h> +#include <linux/input.h> +#include <acpi/acpi_drivers.h> +#include <linux/platform_device.h> +#include <linux/acpi.h> +#include <linux/rfkill.h> +#include <linux/string.h> + +MODULE_AUTHOR("Matthew Garrett <mjg59@srcf.ucam.org>"); +MODULE_DESCRIPTION("HP laptop WMI hotkeys driver"); +MODULE_LICENSE("GPL"); + +MODULE_ALIAS("wmi:95F24279-4D7B-4334-9387-ACCDC67EF61C"); +MODULE_ALIAS("wmi:5FB7F034-2C63-45e9-BE91-3D44E2C707E4"); + +#define HPWMI_EVENT_GUID "95F24279-4D7B-4334-9387-ACCDC67EF61C" +#define HPWMI_BIOS_GUID "5FB7F034-2C63-45e9-BE91-3D44E2C707E4" + +#define HPWMI_DISPLAY_QUERY 0x1 +#define HPWMI_HDDTEMP_QUERY 0x2 +#define HPWMI_ALS_QUERY 0x3 +#define HPWMI_DOCK_QUERY 0x4 +#define HPWMI_WIRELESS_QUERY 0x5 + +static int __init hp_wmi_bios_setup(struct platform_device *device); +static int __exit hp_wmi_bios_remove(struct platform_device *device); + +struct bios_args { + u32 signature; + u32 command; + u32 commandtype; + u32 datasize; + u32 data; +}; + +struct bios_return { + u32 sigpass; + u32 return_code; + u32 value; +}; + +struct key_entry { + char type; /* See KE_* below */ + u8 code; + u16 keycode; +}; + +enum { KE_KEY, KE_SW, KE_END }; + +static struct key_entry hp_wmi_keymap[] = { + {KE_SW, 0x01, SW_DOCK}, + {KE_KEY, 0x02, KEY_BRIGHTNESSUP}, + {KE_KEY, 0x03, KEY_BRIGHTNESSDOWN}, + {KE_KEY, 0x04, KEY_HELP}, + {KE_END, 0} +}; + +static struct input_dev *hp_wmi_input_dev; +static struct platform_device *hp_wmi_platform_dev; + +static struct rfkill *wifi_rfkill; +static struct rfkill *bluetooth_rfkill; +static struct rfkill *wwan_rfkill; + +static struct platform_driver hp_wmi_driver = { + .driver = { + .name = "hp-wmi", + .owner = THIS_MODULE, + }, + .probe = hp_wmi_bios_setup, + .remove = hp_wmi_bios_remove, +}; + +static int hp_wmi_perform_query(int query, int write, int value) +{ + struct bios_return bios_return; + acpi_status status; + union acpi_object *obj; + struct bios_args args = { + .signature = 0x55434553, + .command = write ? 0x2 : 0x1, + .commandtype = query, + .datasize = write ? 0x4 : 0, + .data = value, + }; + struct acpi_buffer input = { sizeof(struct bios_args), &args }; + struct acpi_buffer output = { ACPI_ALLOCATE_BUFFER, NULL }; + + status = wmi_evaluate_method(HPWMI_BIOS_GUID, 0, 0x3, &input, &output); + + obj = output.pointer; + + if (!obj || obj->type != ACPI_TYPE_BUFFER) + return -EINVAL; + + bios_return = *((struct bios_return *)obj->buffer.pointer); + if (bios_return.return_code > 0) + return bios_return.return_code * -1; + else + return bios_return.value; +} + +static int hp_wmi_display_state(void) +{ + return hp_wmi_perform_query(HPWMI_DISPLAY_QUERY, 0, 0); +} + +static int hp_wmi_hddtemp_state(void) +{ + return hp_wmi_perform_query(HPWMI_HDDTEMP_QUERY, 0, 0); +} + +static int hp_wmi_als_state(void) +{ + return hp_wmi_perform_query(HPWMI_ALS_QUERY, 0, 0); +} + +static int hp_wmi_dock_state(void) +{ + return hp_wmi_perform_query(HPWMI_DOCK_QUERY, 0, 0); +} + +static int hp_wmi_wifi_set(void *data, enum rfkill_state state) +{ + if (state) + return hp_wmi_perform_query(HPWMI_WIRELESS_QUERY, 1, 0x101); + else + return hp_wmi_perform_query(HPWMI_WIRELESS_QUERY, 1, 0x100); +} + +static int hp_wmi_bluetooth_set(void *data, enum rfkill_state state) +{ + if (state) + return hp_wmi_perform_query(HPWMI_WIRELESS_QUERY, 1, 0x202); + else + return hp_wmi_perform_query(HPWMI_WIRELESS_QUERY, 1, 0x200); +} + +static int hp_wmi_wwan_set(void *data, enum rfkill_state state) +{ + if (state) + return hp_wmi_perform_query(HPWMI_WIRELESS_QUERY, 1, 0x404); + else + return hp_wmi_perform_query(HPWMI_WIRELESS_QUERY, 1, 0x400); +} + +static int hp_wmi_wifi_state(void) +{ + int wireless = hp_wmi_perform_query(HPWMI_WIRELESS_QUERY, 0, 0); + + if (wireless & 0x100) + return 1; + else + return 0; +} + +static int hp_wmi_bluetooth_state(void) +{ + int wireless = hp_wmi_perform_query(HPWMI_WIRELESS_QUERY, 0, 0); + + if (wireless & 0x10000) + return 1; + else + return 0; +} + +static int hp_wmi_wwan_state(void) +{ + int wireless = hp_wmi_perform_query(HPWMI_WIRELESS_QUERY, 0, 0); + + if (wireless & 0x1000000) + return 1; + else + return 0; +} + +static ssize_t show_display(struct device *dev, struct device_attribute *attr, + char *buf) +{ + int value = hp_wmi_display_state(); + if (value < 0) + return -EINVAL; + return sprintf(buf, "%d\n", value); +} + +static ssize_t show_hddtemp(struct device *dev, struct device_attribute *attr, + char *buf) +{ + int value = hp_wmi_hddtemp_state(); + if (value < 0) + return -EINVAL; + return sprintf(buf, "%d\n", value); +} + +static ssize_t show_als(struct device *dev, struct device_attribute *attr, + char *buf) +{ + int value = hp_wmi_als_state(); + if (value < 0) + return -EINVAL; + return sprintf(buf, "%d\n", value); +} + +static ssize_t show_dock(struct device *dev, struct device_attribute *attr, + char *buf) +{ + int value = hp_wmi_dock_state(); + if (value < 0) + return -EINVAL; + return sprintf(buf, "%d\n", value); +} + +static ssize_t set_als(struct device *dev, struct device_attribute *attr, + const char *buf, size_t count) +{ + u32 tmp = simple_strtoul(buf, NULL, 10); + hp_wmi_perform_query(HPWMI_ALS_QUERY, 1, tmp); + return count; +} + +static DEVICE_ATTR(display, S_IRUGO, show_display, NULL); +static DEVICE_ATTR(hddtemp, S_IRUGO, show_hddtemp, NULL); +static DEVICE_ATTR(als, S_IRUGO | S_IWUSR, show_als, set_als); +static DEVICE_ATTR(dock, S_IRUGO, show_dock, NULL); + +static struct key_entry *hp_wmi_get_entry_by_scancode(int code) +{ + struct key_entry *key; + + for (key = hp_wmi_keymap; key->type != KE_END; key++) + if (code == key->code) + return key; + + return NULL; +} + +static struct key_entry *hp_wmi_get_entry_by_keycode(int keycode) +{ + struct key_entry *key; + + for (key = hp_wmi_keymap; key->type != KE_END; key++) + if (key->type == KE_KEY && keycode == key->keycode) + return key; + + return NULL; +} + +static int hp_wmi_getkeycode(struct input_dev *dev, int scancode, int *keycode) +{ + struct key_entry *key = hp_wmi_get_entry_by_scancode(scancode); + + if (key && key->type == KE_KEY) { + *keycode = key->keycode; + return 0; + } + + return -EINVAL; +} + +static int hp_wmi_setkeycode(struct input_dev *dev, int scancode, int keycode) +{ + struct key_entry *key; + int old_keycode; + + if (keycode < 0 || keycode > KEY_MAX) + return -EINVAL; + + key = hp_wmi_get_entry_by_scancode(scancode); + if (key && key->type == KE_KEY) { + old_keycode = key->keycode; + key->keycode = keycode; + set_bit(keycode, dev->keybit); + if (!hp_wmi_get_entry_by_keycode(old_keycode)) + clear_bit(old_keycode, dev->keybit); + return 0; + } + + return -EINVAL; +} + +void hp_wmi_notify(u32 value, void *context) +{ + struct acpi_buffer response = { ACPI_ALLOCATE_BUFFER, NULL }; + static struct key_entry *key; + union acpi_object *obj; + + wmi_get_event_data(value, &response); + + obj = (union acpi_object *)response.pointer; + + if (obj && obj->type == ACPI_TYPE_BUFFER && obj->buffer.length == 8) { + int eventcode = *((u8 *) obj->buffer.pointer); + key = hp_wmi_get_entry_by_scancode(eventcode); + if (key) { + switch (key->type) { + case KE_KEY: + input_report_key(hp_wmi_input_dev, + key->keycode, 1); + input_sync(hp_wmi_input_dev); + input_report_key(hp_wmi_input_dev, + key->keycode, 0); + input_sync(hp_wmi_input_dev); + break; + case KE_SW: + input_report_switch(hp_wmi_input_dev, + key->keycode, + hp_wmi_dock_state()); + input_sync(hp_wmi_input_dev); + break; + } + } else if (eventcode == 0x5) { + if (wifi_rfkill) + wifi_rfkill->state = hp_wmi_wifi_state(); + if (bluetooth_rfkill) + bluetooth_rfkill->state = + hp_wmi_bluetooth_state(); + if (wwan_rfkill) + wwan_rfkill->state = hp_wmi_wwan_state(); + } else + printk(KERN_INFO "HP WMI: Unknown key pressed - %x\n", + eventcode); + } else + printk(KERN_INFO "HP WMI: Unknown response received\n"); +} + +static int __init hp_wmi_input_setup(void) +{ + struct key_entry *key; + int err; + + hp_wmi_input_dev = input_allocate_device(); + + hp_wmi_input_dev->name = "HP WMI hotkeys"; + hp_wmi_input_dev->phys = "wmi/input0"; + hp_wmi_input_dev->id.bustype = BUS_HOST; + hp_wmi_input_dev->getkeycode = hp_wmi_getkeycode; + hp_wmi_input_dev->setkeycode = hp_wmi_setkeycode; + + for (key = hp_wmi_keymap; key->type != KE_END; key++) { + switch (key->type) { + case KE_KEY: + set_bit(EV_KEY, hp_wmi_input_dev->evbit); + set_bit(key->keycode, hp_wmi_input_dev->keybit); + break; + case KE_SW: + set_bit(EV_SW, hp_wmi_input_dev->evbit); + set_bit(key->keycode, hp_wmi_input_dev->swbit); + break; + } + } + + err = input_register_device(hp_wmi_input_dev); + + if (err) { + input_free_device(hp_wmi_input_dev); + return err; + } + + return 0; +} + +static void cleanup_sysfs(struct platform_device *device) +{ + device_remove_file(&device->dev, &dev_attr_display); + device_remove_file(&device->dev, &dev_attr_hddtemp); + device_remove_file(&device->dev, &dev_attr_als); + device_remove_file(&device->dev, &dev_attr_dock); +} + +static int __init hp_wmi_bios_setup(struct platform_device *device) +{ + int err; + + err = device_create_file(&device->dev, &dev_attr_display); + if (err) + goto add_sysfs_error; + err = device_create_file(&device->dev, &dev_attr_hddtemp); + if (err) + goto add_sysfs_error; + err = device_create_file(&device->dev, &dev_attr_als); + if (err) + goto add_sysfs_error; + err = device_create_file(&device->dev, &dev_attr_dock); + if (err) + goto add_sysfs_error; + + wifi_rfkill = rfkill_allocate(&device->dev, RFKILL_TYPE_WLAN); + wifi_rfkill->name = "hp-wifi"; + wifi_rfkill->state = hp_wmi_wifi_state(); + wifi_rfkill->toggle_radio = hp_wmi_wifi_set; + wifi_rfkill->user_claim_unsupported = 1; + + bluetooth_rfkill = rfkill_allocate(&device->dev, + RFKILL_TYPE_BLUETOOTH); + bluetooth_rfkill->name = "hp-bluetooth"; + bluetooth_rfkill->state = hp_wmi_bluetooth_state(); + bluetooth_rfkill->toggle_radio = hp_wmi_bluetooth_set; + bluetooth_rfkill->user_claim_unsupported = 1; + + wwan_rfkill = rfkill_allocate(&device->dev, RFKILL_TYPE_WIMAX); + wwan_rfkill->name = "hp-wwan"; + wwan_rfkill->state = hp_wmi_wwan_state(); + wwan_rfkill->toggle_radio = hp_wmi_wwan_set; + wwan_rfkill->user_claim_unsupported = 1; + + rfkill_register(wifi_rfkill); + rfkill_register(bluetooth_rfkill); + rfkill_register(wwan_rfkill); + + return 0; +add_sysfs_error: + cleanup_sysfs(device); + return err; +} + +static int __exit hp_wmi_bios_remove(struct platform_device *device) +{ + cleanup_sysfs(device); + + rfkill_unregister(wifi_rfkill); + rfkill_unregister(bluetooth_rfkill); + rfkill_unregister(wwan_rfkill); + + return 0; +} + +static int __init hp_wmi_init(void) +{ + int err; + + if (wmi_has_guid(HPWMI_EVENT_GUID)) { + err = wmi_install_notify_handler(HPWMI_EVENT_GUID, + hp_wmi_notify, NULL); + if (!err) + hp_wmi_input_setup(); + } + + if (wmi_has_guid(HPWMI_BIOS_GUID)) { + err = platform_driver_register(&hp_wmi_driver); + if (err) + return 0; + hp_wmi_platform_dev = platform_device_alloc("hp-wmi", -1); + if (!hp_wmi_platform_dev) { + platform_driver_unregister(&hp_wmi_driver); + return 0; + } + platform_device_add(hp_wmi_platform_dev); + } + + return 0; +} + +static void __exit hp_wmi_exit(void) +{ + if (wmi_has_guid(HPWMI_EVENT_GUID)) { + wmi_remove_notify_handler(HPWMI_EVENT_GUID); + input_unregister_device(hp_wmi_input_dev); + } + if (hp_wmi_platform_dev) { + platform_device_del(hp_wmi_platform_dev); + platform_driver_unregister(&hp_wmi_driver); + } +} + +module_init(hp_wmi_init); +module_exit(hp_wmi_exit); diff --git a/drivers/misc/phantom.c b/drivers/misc/phantom.c index 4ce3bdc2f959..daf585689ce3 100644 --- a/drivers/misc/phantom.c +++ b/drivers/misc/phantom.c @@ -563,6 +563,6 @@ module_init(phantom_init); module_exit(phantom_exit); MODULE_AUTHOR("Jiri Slaby <jirislaby@gmail.com>"); -MODULE_DESCRIPTION("Sensable Phantom driver"); +MODULE_DESCRIPTION("Sensable Phantom driver (PCI devices)"); MODULE_LICENSE("GPL"); MODULE_VERSION(PHANTOM_VERSION); diff --git a/drivers/mtd/ubi/build.c b/drivers/mtd/ubi/build.c index 961416ac0616..c7630a228310 100644 --- a/drivers/mtd/ubi/build.c +++ b/drivers/mtd/ubi/build.c @@ -51,14 +51,13 @@ * @name: MTD device name or number string * @vid_hdr_offs: VID header offset */ -struct mtd_dev_param -{ +struct mtd_dev_param { char name[MTD_PARAM_LEN_MAX]; int vid_hdr_offs; }; /* Numbers of elements set in the @mtd_dev_param array */ -static int mtd_devs = 0; +static int mtd_devs; /* MTD devices specification parameters */ static struct mtd_dev_param mtd_dev_param[UBI_MAX_DEVICES]; @@ -160,8 +159,7 @@ void ubi_put_device(struct ubi_device *ubi) } /** - * ubi_get_by_major - get UBI device description object by character device - * major number. + * ubi_get_by_major - get UBI device by character device major number. * @major: major number * * This function is similar to 'ubi_get_device()', but it searches the device @@ -355,15 +353,34 @@ static void kill_volumes(struct ubi_device *ubi) } /** + * free_user_volumes - free all user volumes. + * @ubi: UBI device description object + * + * Normally the volumes are freed at the release function of the volume device + * objects. However, on error paths the volumes have to be freed before the + * device objects have been initialized. + */ +static void free_user_volumes(struct ubi_device *ubi) +{ + int i; + + for (i = 0; i < ubi->vtbl_slots; i++) + if (ubi->volumes[i]) { + kfree(ubi->volumes[i]->eba_tbl); + kfree(ubi->volumes[i]); + } +} + +/** * uif_init - initialize user interfaces for an UBI device. * @ubi: UBI device description object * * This function returns zero in case of success and a negative error code in - * case of failure. + * case of failure. Note, this function destroys all volumes if it failes. */ static int uif_init(struct ubi_device *ubi) { - int i, err; + int i, err, do_free = 0; dev_t dev; sprintf(ubi->ubi_name, UBI_NAME_STR "%d", ubi->ubi_num); @@ -384,7 +401,7 @@ static int uif_init(struct ubi_device *ubi) ubi_assert(MINOR(dev) == 0); cdev_init(&ubi->cdev, &ubi_cdev_operations); - dbg_msg("%s major is %u", ubi->ubi_name, MAJOR(dev)); + dbg_gen("%s major is %u", ubi->ubi_name, MAJOR(dev)); ubi->cdev.owner = THIS_MODULE; err = cdev_add(&ubi->cdev, dev, 1); @@ -410,10 +427,13 @@ static int uif_init(struct ubi_device *ubi) out_volumes: kill_volumes(ubi); + do_free = 0; out_sysfs: ubi_sysfs_close(ubi); cdev_del(&ubi->cdev); out_unreg: + if (do_free) + free_user_volumes(ubi); unregister_chrdev_region(ubi->cdev.dev, ubi->vtbl_slots + 1); ubi_err("cannot initialize UBI %s, error %d", ubi->ubi_name, err); return err; @@ -422,6 +442,10 @@ out_unreg: /** * uif_close - close user interfaces for an UBI device. * @ubi: UBI device description object + * + * Note, since this function un-registers UBI volume device objects (@vol->dev), + * the memory allocated voe the volumes is freed as well (in the release + * function). */ static void uif_close(struct ubi_device *ubi) { @@ -432,6 +456,21 @@ static void uif_close(struct ubi_device *ubi) } /** + * free_internal_volumes - free internal volumes. + * @ubi: UBI device description object + */ +static void free_internal_volumes(struct ubi_device *ubi) +{ + int i; + + for (i = ubi->vtbl_slots; + i < ubi->vtbl_slots + UBI_INT_VOL_COUNT; i++) { + kfree(ubi->volumes[i]->eba_tbl); + kfree(ubi->volumes[i]); + } +} + +/** * attach_by_scanning - attach an MTD device using scanning method. * @ubi: UBI device descriptor * @@ -475,6 +514,7 @@ static int attach_by_scanning(struct ubi_device *ubi) out_wl: ubi_wl_close(ubi); out_vtbl: + free_internal_volumes(ubi); vfree(ubi->vtbl); out_si: ubi_scan_destroy_si(si); @@ -482,7 +522,7 @@ out_si: } /** - * io_init - initialize I/O unit for a given UBI device. + * io_init - initialize I/O sub-system for a given UBI device. * @ubi: UBI device description object * * If @ubi->vid_hdr_offset or @ubi->leb_start is zero, default offsets are @@ -530,7 +570,11 @@ static int io_init(struct ubi_device *ubi) ubi->min_io_size = ubi->mtd->writesize; ubi->hdrs_min_io_size = ubi->mtd->writesize >> ubi->mtd->subpage_sft; - /* Make sure minimal I/O unit is power of 2 */ + /* + * Make sure minimal I/O unit is power of 2. Note, there is no + * fundamental reason for this assumption. It is just an optimization + * which allows us to avoid costly division operations. + */ if (!is_power_of_2(ubi->min_io_size)) { ubi_err("min. I/O unit (%d) is not power of 2", ubi->min_io_size); @@ -581,7 +625,7 @@ static int io_init(struct ubi_device *ubi) if (ubi->vid_hdr_offset < UBI_EC_HDR_SIZE || ubi->leb_start < ubi->vid_hdr_offset + UBI_VID_HDR_SIZE || ubi->leb_start > ubi->peb_size - UBI_VID_HDR_SIZE || - ubi->leb_start % ubi->min_io_size) { + ubi->leb_start & (ubi->min_io_size - 1)) { ubi_err("bad VID header (%d) or data offsets (%d)", ubi->vid_hdr_offset, ubi->leb_start); return -EINVAL; @@ -646,7 +690,7 @@ static int autoresize(struct ubi_device *ubi, int vol_id) /* * Clear the auto-resize flag in the volume in-memory copy of the - * volume table, and 'ubi_resize_volume()' will propogate this change + * volume table, and 'ubi_resize_volume()' will propagate this change * to the flash. */ ubi->vtbl[vol_id].flags &= ~UBI_VTBL_AUTORESIZE_FLG; @@ -655,7 +699,7 @@ static int autoresize(struct ubi_device *ubi, int vol_id) struct ubi_vtbl_record vtbl_rec; /* - * No avalilable PEBs to re-size the volume, clear the flag on + * No available PEBs to re-size the volume, clear the flag on * flash and exit. */ memcpy(&vtbl_rec, &ubi->vtbl[vol_id], @@ -682,13 +726,13 @@ static int autoresize(struct ubi_device *ubi, int vol_id) /** * ubi_attach_mtd_dev - attach an MTD device. - * @mtd_dev: MTD device description object + * @mtd: MTD device description object * @ubi_num: number to assign to the new UBI device * @vid_hdr_offset: VID header offset * * This function attaches MTD device @mtd_dev to UBI and assign @ubi_num number * to the newly created UBI device, unless @ubi_num is %UBI_DEV_NUM_AUTO, in - * which case this function finds a vacant device nubert and assings it + * which case this function finds a vacant device number and assigns it * automatically. Returns the new UBI device number in case of success and a * negative error code in case of failure. * @@ -698,7 +742,7 @@ static int autoresize(struct ubi_device *ubi, int vol_id) int ubi_attach_mtd_dev(struct mtd_info *mtd, int ubi_num, int vid_hdr_offset) { struct ubi_device *ubi; - int i, err; + int i, err, do_free = 1; /* * Check if we already have the same MTD device attached. @@ -735,7 +779,8 @@ int ubi_attach_mtd_dev(struct mtd_info *mtd, int ubi_num, int vid_hdr_offset) if (!ubi_devices[ubi_num]) break; if (ubi_num == UBI_MAX_DEVICES) { - dbg_err("only %d UBI devices may be created", UBI_MAX_DEVICES); + dbg_err("only %d UBI devices may be created", + UBI_MAX_DEVICES); return -ENFILE; } } else { @@ -760,6 +805,7 @@ int ubi_attach_mtd_dev(struct mtd_info *mtd, int ubi_num, int vid_hdr_offset) mutex_init(&ubi->buf_mutex); mutex_init(&ubi->ckvol_mutex); + mutex_init(&ubi->mult_mutex); mutex_init(&ubi->volumes_mutex); spin_lock_init(&ubi->volumes_lock); @@ -798,7 +844,7 @@ int ubi_attach_mtd_dev(struct mtd_info *mtd, int ubi_num, int vid_hdr_offset) err = uif_init(ubi); if (err) - goto out_detach; + goto out_nofree; ubi->bgt_thread = kthread_create(ubi_thread, ubi, ubi->bgt_name); if (IS_ERR(ubi->bgt_thread)) { @@ -824,20 +870,22 @@ int ubi_attach_mtd_dev(struct mtd_info *mtd, int ubi_num, int vid_hdr_offset) ubi->beb_rsvd_pebs); ubi_msg("max/mean erase counter: %d/%d", ubi->max_ec, ubi->mean_ec); - /* Enable the background thread */ - if (!DBG_DISABLE_BGT) { + if (!DBG_DISABLE_BGT) ubi->thread_enabled = 1; - wake_up_process(ubi->bgt_thread); - } + wake_up_process(ubi->bgt_thread); ubi_devices[ubi_num] = ubi; return ubi_num; out_uif: uif_close(ubi); +out_nofree: + do_free = 0; out_detach: - ubi_eba_close(ubi); ubi_wl_close(ubi); + if (do_free) + free_user_volumes(ubi); + free_internal_volumes(ubi); vfree(ubi->vtbl); out_free: vfree(ubi->peb_buf1); @@ -899,8 +947,8 @@ int ubi_detach_mtd_dev(int ubi_num, int anyway) kthread_stop(ubi->bgt_thread); uif_close(ubi); - ubi_eba_close(ubi); ubi_wl_close(ubi); + free_internal_volumes(ubi); vfree(ubi->vtbl); put_mtd_device(ubi->mtd); vfree(ubi->peb_buf1); @@ -1044,8 +1092,7 @@ static void __exit ubi_exit(void) module_exit(ubi_exit); /** - * bytes_str_to_int - convert a string representing number of bytes to an - * integer. + * bytes_str_to_int - convert a number of bytes string into an integer. * @str: the string to convert * * This function returns positive resulting integer in case of success and a diff --git a/drivers/mtd/ubi/cdev.c b/drivers/mtd/ubi/cdev.c index 89193ba9451e..03c759b4eeb5 100644 --- a/drivers/mtd/ubi/cdev.c +++ b/drivers/mtd/ubi/cdev.c @@ -39,9 +39,9 @@ #include <linux/stat.h> #include <linux/ioctl.h> #include <linux/capability.h> +#include <linux/uaccess.h> #include <linux/smp_lock.h> #include <mtd/ubi-user.h> -#include <asm/uaccess.h> #include <asm/div64.h> #include "ubi.h" @@ -116,7 +116,7 @@ static int vol_cdev_open(struct inode *inode, struct file *file) else mode = UBI_READONLY; - dbg_msg("open volume %d, mode %d", vol_id, mode); + dbg_gen("open volume %d, mode %d", vol_id, mode); desc = ubi_open_volume(ubi_num, vol_id, mode); unlock_kernel(); @@ -132,7 +132,7 @@ static int vol_cdev_release(struct inode *inode, struct file *file) struct ubi_volume_desc *desc = file->private_data; struct ubi_volume *vol = desc->vol; - dbg_msg("release volume %d, mode %d", vol->vol_id, desc->mode); + dbg_gen("release volume %d, mode %d", vol->vol_id, desc->mode); if (vol->updating) { ubi_warn("update of volume %d not finished, volume is damaged", @@ -141,7 +141,7 @@ static int vol_cdev_release(struct inode *inode, struct file *file) vol->updating = 0; vfree(vol->upd_buf); } else if (vol->changing_leb) { - dbg_msg("only %lld of %lld bytes received for atomic LEB change" + dbg_gen("only %lld of %lld bytes received for atomic LEB change" " for volume %d:%d, cancel", vol->upd_received, vol->upd_bytes, vol->ubi->ubi_num, vol->vol_id); vol->changing_leb = 0; @@ -183,7 +183,7 @@ static loff_t vol_cdev_llseek(struct file *file, loff_t offset, int origin) return -EINVAL; } - dbg_msg("seek volume %d, offset %lld, origin %d, new offset %lld", + dbg_gen("seek volume %d, offset %lld, origin %d, new offset %lld", vol->vol_id, offset, origin, new_offset); file->f_pos = new_offset; @@ -201,7 +201,7 @@ static ssize_t vol_cdev_read(struct file *file, __user char *buf, size_t count, void *tbuf; uint64_t tmp; - dbg_msg("read %zd bytes from offset %lld of volume %d", + dbg_gen("read %zd bytes from offset %lld of volume %d", count, *offp, vol->vol_id); if (vol->updating) { @@ -216,7 +216,7 @@ static ssize_t vol_cdev_read(struct file *file, __user char *buf, size_t count, return 0; if (vol->corrupted) - dbg_msg("read from corrupted volume %d", vol->vol_id); + dbg_gen("read from corrupted volume %d", vol->vol_id); if (*offp + count > vol->used_bytes) count_save = count = vol->used_bytes - *offp; @@ -285,7 +285,7 @@ static ssize_t vol_cdev_direct_write(struct file *file, const char __user *buf, char *tbuf; uint64_t tmp; - dbg_msg("requested: write %zd bytes to offset %lld of volume %u", + dbg_gen("requested: write %zd bytes to offset %lld of volume %u", count, *offp, vol->vol_id); if (vol->vol_type == UBI_STATIC_VOLUME) @@ -295,7 +295,7 @@ static ssize_t vol_cdev_direct_write(struct file *file, const char __user *buf, off = do_div(tmp, vol->usable_leb_size); lnum = tmp; - if (off % ubi->min_io_size) { + if (off & (ubi->min_io_size - 1)) { dbg_err("unaligned position"); return -EINVAL; } @@ -304,7 +304,7 @@ static ssize_t vol_cdev_direct_write(struct file *file, const char __user *buf, count_save = count = vol->used_bytes - *offp; /* We can write only in fractions of the minimum I/O unit */ - if (count % ubi->min_io_size) { + if (count & (ubi->min_io_size - 1)) { dbg_err("unaligned write length"); return -EINVAL; } @@ -352,7 +352,7 @@ static ssize_t vol_cdev_direct_write(struct file *file, const char __user *buf, } #else -#define vol_cdev_direct_write(file, buf, count, offp) -EPERM +#define vol_cdev_direct_write(file, buf, count, offp) (-EPERM) #endif /* CONFIG_MTD_UBI_DEBUG_USERSPACE_IO */ static ssize_t vol_cdev_write(struct file *file, const char __user *buf, @@ -437,7 +437,8 @@ static int vol_cdev_ioctl(struct inode *inode, struct file *file, break; } - rsvd_bytes = vol->reserved_pebs * (ubi->leb_size-vol->data_pad); + rsvd_bytes = (long long)vol->reserved_pebs * + ubi->leb_size-vol->data_pad; if (bytes < 0 || bytes > rsvd_bytes) { err = -EINVAL; break; @@ -513,7 +514,7 @@ static int vol_cdev_ioctl(struct inode *inode, struct file *file, break; } - dbg_msg("erase LEB %d:%d", vol->vol_id, lnum); + dbg_gen("erase LEB %d:%d", vol->vol_id, lnum); err = ubi_eba_unmap_leb(ubi, vol, lnum); if (err) break; @@ -564,7 +565,7 @@ static int verify_mkvol_req(const struct ubi_device *ubi, if (req->alignment > ubi->leb_size) goto bad; - n = req->alignment % ubi->min_io_size; + n = req->alignment & (ubi->min_io_size - 1); if (req->alignment != 1 && n) goto bad; @@ -573,6 +574,10 @@ static int verify_mkvol_req(const struct ubi_device *ubi, goto bad; } + n = strnlen(req->name, req->name_len + 1); + if (n != req->name_len) + goto bad; + return 0; bad: @@ -600,6 +605,166 @@ static int verify_rsvol_req(const struct ubi_device *ubi, return 0; } +/** + * rename_volumes - rename UBI volumes. + * @ubi: UBI device description object + * @req: volumes re-name request + * + * This is a helper function for the volume re-name IOCTL which validates the + * the request, opens the volume and calls corresponding volumes management + * function. Returns zero in case of success and a negative error code in case + * of failure. + */ +static int rename_volumes(struct ubi_device *ubi, + struct ubi_rnvol_req *req) +{ + int i, n, err; + struct list_head rename_list; + struct ubi_rename_entry *re, *re1; + + if (req->count < 0 || req->count > UBI_MAX_RNVOL) + return -EINVAL; + + if (req->count == 0) + return 0; + + /* Validate volume IDs and names in the request */ + for (i = 0; i < req->count; i++) { + if (req->ents[i].vol_id < 0 || + req->ents[i].vol_id >= ubi->vtbl_slots) + return -EINVAL; + if (req->ents[i].name_len < 0) + return -EINVAL; + if (req->ents[i].name_len > UBI_VOL_NAME_MAX) + return -ENAMETOOLONG; + req->ents[i].name[req->ents[i].name_len] = '\0'; + n = strlen(req->ents[i].name); + if (n != req->ents[i].name_len) + err = -EINVAL; + } + + /* Make sure volume IDs and names are unique */ + for (i = 0; i < req->count - 1; i++) { + for (n = i + 1; n < req->count; n++) { + if (req->ents[i].vol_id == req->ents[n].vol_id) { + dbg_err("duplicated volume id %d", + req->ents[i].vol_id); + return -EINVAL; + } + if (!strcmp(req->ents[i].name, req->ents[n].name)) { + dbg_err("duplicated volume name \"%s\"", + req->ents[i].name); + return -EINVAL; + } + } + } + + /* Create the re-name list */ + INIT_LIST_HEAD(&rename_list); + for (i = 0; i < req->count; i++) { + int vol_id = req->ents[i].vol_id; + int name_len = req->ents[i].name_len; + const char *name = req->ents[i].name; + + re = kzalloc(sizeof(struct ubi_rename_entry), GFP_KERNEL); + if (!re) { + err = -ENOMEM; + goto out_free; + } + + re->desc = ubi_open_volume(ubi->ubi_num, vol_id, UBI_EXCLUSIVE); + if (IS_ERR(re->desc)) { + err = PTR_ERR(re->desc); + dbg_err("cannot open volume %d, error %d", vol_id, err); + kfree(re); + goto out_free; + } + + /* Skip this re-naming if the name does not really change */ + if (re->desc->vol->name_len == name_len && + !memcmp(re->desc->vol->name, name, name_len)) { + ubi_close_volume(re->desc); + kfree(re); + continue; + } + + re->new_name_len = name_len; + memcpy(re->new_name, name, name_len); + list_add_tail(&re->list, &rename_list); + dbg_msg("will rename volume %d from \"%s\" to \"%s\"", + vol_id, re->desc->vol->name, name); + } + + if (list_empty(&rename_list)) + return 0; + + /* Find out the volumes which have to be removed */ + list_for_each_entry(re, &rename_list, list) { + struct ubi_volume_desc *desc; + int no_remove_needed = 0; + + /* + * Volume @re->vol_id is going to be re-named to + * @re->new_name, while its current name is @name. If a volume + * with name @re->new_name currently exists, it has to be + * removed, unless it is also re-named in the request (@req). + */ + list_for_each_entry(re1, &rename_list, list) { + if (re->new_name_len == re1->desc->vol->name_len && + !memcmp(re->new_name, re1->desc->vol->name, + re1->desc->vol->name_len)) { + no_remove_needed = 1; + break; + } + } + + if (no_remove_needed) + continue; + + /* + * It seems we need to remove volume with name @re->new_name, + * if it exists. + */ + desc = ubi_open_volume_nm(ubi->ubi_num, re->new_name, UBI_EXCLUSIVE); + if (IS_ERR(desc)) { + err = PTR_ERR(desc); + if (err == -ENODEV) + /* Re-naming into a non-existing volume name */ + continue; + + /* The volume exists but busy, or an error occurred */ + dbg_err("cannot open volume \"%s\", error %d", + re->new_name, err); + goto out_free; + } + + re = kzalloc(sizeof(struct ubi_rename_entry), GFP_KERNEL); + if (!re) { + err = -ENOMEM; + ubi_close_volume(desc); + goto out_free; + } + + re->remove = 1; + re->desc = desc; + list_add(&re->list, &rename_list); + dbg_msg("will remove volume %d, name \"%s\"", + re->desc->vol->vol_id, re->desc->vol->name); + } + + mutex_lock(&ubi->volumes_mutex); + err = ubi_rename_volumes(ubi, &rename_list); + mutex_unlock(&ubi->volumes_mutex); + +out_free: + list_for_each_entry_safe(re, re1, &rename_list, list) { + ubi_close_volume(re->desc); + list_del(&re->list); + kfree(re); + } + return err; +} + static int ubi_cdev_ioctl(struct inode *inode, struct file *file, unsigned int cmd, unsigned long arg) { @@ -621,19 +786,18 @@ static int ubi_cdev_ioctl(struct inode *inode, struct file *file, { struct ubi_mkvol_req req; - dbg_msg("create volume"); + dbg_gen("create volume"); err = copy_from_user(&req, argp, sizeof(struct ubi_mkvol_req)); if (err) { err = -EFAULT; break; } + req.name[req.name_len] = '\0'; err = verify_mkvol_req(ubi, &req); if (err) break; - req.name[req.name_len] = '\0'; - mutex_lock(&ubi->volumes_mutex); err = ubi_create_volume(ubi, &req); mutex_unlock(&ubi->volumes_mutex); @@ -652,7 +816,7 @@ static int ubi_cdev_ioctl(struct inode *inode, struct file *file, { int vol_id; - dbg_msg("remove volume"); + dbg_gen("remove volume"); err = get_user(vol_id, (__user int32_t *)argp); if (err) { err = -EFAULT; @@ -666,7 +830,7 @@ static int ubi_cdev_ioctl(struct inode *inode, struct file *file, } mutex_lock(&ubi->volumes_mutex); - err = ubi_remove_volume(desc); + err = ubi_remove_volume(desc, 0); mutex_unlock(&ubi->volumes_mutex); /* @@ -685,7 +849,7 @@ static int ubi_cdev_ioctl(struct inode *inode, struct file *file, uint64_t tmp; struct ubi_rsvol_req req; - dbg_msg("re-size volume"); + dbg_gen("re-size volume"); err = copy_from_user(&req, argp, sizeof(struct ubi_rsvol_req)); if (err) { err = -EFAULT; @@ -713,6 +877,32 @@ static int ubi_cdev_ioctl(struct inode *inode, struct file *file, break; } + /* Re-name volumes command */ + case UBI_IOCRNVOL: + { + struct ubi_rnvol_req *req; + + dbg_msg("re-name volumes"); + req = kmalloc(sizeof(struct ubi_rnvol_req), GFP_KERNEL); + if (!req) { + err = -ENOMEM; + break; + }; + + err = copy_from_user(req, argp, sizeof(struct ubi_rnvol_req)); + if (err) { + err = -EFAULT; + kfree(req); + break; + } + + mutex_lock(&ubi->mult_mutex); + err = rename_volumes(ubi, req); + mutex_unlock(&ubi->mult_mutex); + kfree(req); + break; + } + default: err = -ENOTTY; break; @@ -738,7 +928,7 @@ static int ctrl_cdev_ioctl(struct inode *inode, struct file *file, struct ubi_attach_req req; struct mtd_info *mtd; - dbg_msg("attach MTD device"); + dbg_gen("attach MTD device"); err = copy_from_user(&req, argp, sizeof(struct ubi_attach_req)); if (err) { err = -EFAULT; @@ -778,7 +968,7 @@ static int ctrl_cdev_ioctl(struct inode *inode, struct file *file, { int ubi_num; - dbg_msg("dettach MTD device"); + dbg_gen("dettach MTD device"); err = get_user(ubi_num, (__user int32_t *)argp); if (err) { err = -EFAULT; diff --git a/drivers/mtd/ubi/debug.c b/drivers/mtd/ubi/debug.c index 56956ec2845f..c0ed60e8ade9 100644 --- a/drivers/mtd/ubi/debug.c +++ b/drivers/mtd/ubi/debug.c @@ -24,7 +24,7 @@ * changes. */ -#ifdef CONFIG_MTD_UBI_DEBUG_MSG +#ifdef CONFIG_MTD_UBI_DEBUG #include "ubi.h" @@ -34,14 +34,19 @@ */ void ubi_dbg_dump_ec_hdr(const struct ubi_ec_hdr *ec_hdr) { - dbg_msg("erase counter header dump:"); - dbg_msg("magic %#08x", be32_to_cpu(ec_hdr->magic)); - dbg_msg("version %d", (int)ec_hdr->version); - dbg_msg("ec %llu", (long long)be64_to_cpu(ec_hdr->ec)); - dbg_msg("vid_hdr_offset %d", be32_to_cpu(ec_hdr->vid_hdr_offset)); - dbg_msg("data_offset %d", be32_to_cpu(ec_hdr->data_offset)); - dbg_msg("hdr_crc %#08x", be32_to_cpu(ec_hdr->hdr_crc)); - dbg_msg("erase counter header hexdump:"); + printk(KERN_DEBUG "Erase counter header dump:\n"); + printk(KERN_DEBUG "\tmagic %#08x\n", + be32_to_cpu(ec_hdr->magic)); + printk(KERN_DEBUG "\tversion %d\n", (int)ec_hdr->version); + printk(KERN_DEBUG "\tec %llu\n", + (long long)be64_to_cpu(ec_hdr->ec)); + printk(KERN_DEBUG "\tvid_hdr_offset %d\n", + be32_to_cpu(ec_hdr->vid_hdr_offset)); + printk(KERN_DEBUG "\tdata_offset %d\n", + be32_to_cpu(ec_hdr->data_offset)); + printk(KERN_DEBUG "\thdr_crc %#08x\n", + be32_to_cpu(ec_hdr->hdr_crc)); + printk(KERN_DEBUG "erase counter header hexdump:\n"); print_hex_dump(KERN_DEBUG, "", DUMP_PREFIX_OFFSET, 32, 1, ec_hdr, UBI_EC_HDR_SIZE, 1); } @@ -52,22 +57,23 @@ void ubi_dbg_dump_ec_hdr(const struct ubi_ec_hdr *ec_hdr) */ void ubi_dbg_dump_vid_hdr(const struct ubi_vid_hdr *vid_hdr) { - dbg_msg("volume identifier header dump:"); - dbg_msg("magic %08x", be32_to_cpu(vid_hdr->magic)); - dbg_msg("version %d", (int)vid_hdr->version); - dbg_msg("vol_type %d", (int)vid_hdr->vol_type); - dbg_msg("copy_flag %d", (int)vid_hdr->copy_flag); - dbg_msg("compat %d", (int)vid_hdr->compat); - dbg_msg("vol_id %d", be32_to_cpu(vid_hdr->vol_id)); - dbg_msg("lnum %d", be32_to_cpu(vid_hdr->lnum)); - dbg_msg("leb_ver %u", be32_to_cpu(vid_hdr->leb_ver)); - dbg_msg("data_size %d", be32_to_cpu(vid_hdr->data_size)); - dbg_msg("used_ebs %d", be32_to_cpu(vid_hdr->used_ebs)); - dbg_msg("data_pad %d", be32_to_cpu(vid_hdr->data_pad)); - dbg_msg("sqnum %llu", + printk(KERN_DEBUG "Volume identifier header dump:\n"); + printk(KERN_DEBUG "\tmagic %08x\n", be32_to_cpu(vid_hdr->magic)); + printk(KERN_DEBUG "\tversion %d\n", (int)vid_hdr->version); + printk(KERN_DEBUG "\tvol_type %d\n", (int)vid_hdr->vol_type); + printk(KERN_DEBUG "\tcopy_flag %d\n", (int)vid_hdr->copy_flag); + printk(KERN_DEBUG "\tcompat %d\n", (int)vid_hdr->compat); + printk(KERN_DEBUG "\tvol_id %d\n", be32_to_cpu(vid_hdr->vol_id)); + printk(KERN_DEBUG "\tlnum %d\n", be32_to_cpu(vid_hdr->lnum)); + printk(KERN_DEBUG "\tdata_size %d\n", be32_to_cpu(vid_hdr->data_size)); + printk(KERN_DEBUG "\tused_ebs %d\n", be32_to_cpu(vid_hdr->used_ebs)); + printk(KERN_DEBUG "\tdata_pad %d\n", be32_to_cpu(vid_hdr->data_pad)); + printk(KERN_DEBUG "\tsqnum %llu\n", (unsigned long long)be64_to_cpu(vid_hdr->sqnum)); - dbg_msg("hdr_crc %08x", be32_to_cpu(vid_hdr->hdr_crc)); - dbg_msg("volume identifier header hexdump:"); + printk(KERN_DEBUG "\thdr_crc %08x\n", be32_to_cpu(vid_hdr->hdr_crc)); + printk(KERN_DEBUG "Volume identifier header hexdump:\n"); + print_hex_dump(KERN_DEBUG, "", DUMP_PREFIX_OFFSET, 32, 1, + vid_hdr, UBI_VID_HDR_SIZE, 1); } /** @@ -76,27 +82,27 @@ void ubi_dbg_dump_vid_hdr(const struct ubi_vid_hdr *vid_hdr) */ void ubi_dbg_dump_vol_info(const struct ubi_volume *vol) { - dbg_msg("volume information dump:"); - dbg_msg("vol_id %d", vol->vol_id); - dbg_msg("reserved_pebs %d", vol->reserved_pebs); - dbg_msg("alignment %d", vol->alignment); - dbg_msg("data_pad %d", vol->data_pad); - dbg_msg("vol_type %d", vol->vol_type); - dbg_msg("name_len %d", vol->name_len); - dbg_msg("usable_leb_size %d", vol->usable_leb_size); - dbg_msg("used_ebs %d", vol->used_ebs); - dbg_msg("used_bytes %lld", vol->used_bytes); - dbg_msg("last_eb_bytes %d", vol->last_eb_bytes); - dbg_msg("corrupted %d", vol->corrupted); - dbg_msg("upd_marker %d", vol->upd_marker); + printk(KERN_DEBUG "Volume information dump:\n"); + printk(KERN_DEBUG "\tvol_id %d\n", vol->vol_id); + printk(KERN_DEBUG "\treserved_pebs %d\n", vol->reserved_pebs); + printk(KERN_DEBUG "\talignment %d\n", vol->alignment); + printk(KERN_DEBUG "\tdata_pad %d\n", vol->data_pad); + printk(KERN_DEBUG "\tvol_type %d\n", vol->vol_type); + printk(KERN_DEBUG "\tname_len %d\n", vol->name_len); + printk(KERN_DEBUG "\tusable_leb_size %d\n", vol->usable_leb_size); + printk(KERN_DEBUG "\tused_ebs %d\n", vol->used_ebs); + printk(KERN_DEBUG "\tused_bytes %lld\n", vol->used_bytes); + printk(KERN_DEBUG "\tlast_eb_bytes %d\n", vol->last_eb_bytes); + printk(KERN_DEBUG "\tcorrupted %d\n", vol->corrupted); + printk(KERN_DEBUG "\tupd_marker %d\n", vol->upd_marker); if (vol->name_len <= UBI_VOL_NAME_MAX && strnlen(vol->name, vol->name_len + 1) == vol->name_len) { - dbg_msg("name %s", vol->name); + printk(KERN_DEBUG "\tname %s\n", vol->name); } else { - dbg_msg("the 1st 5 characters of the name: %c%c%c%c%c", - vol->name[0], vol->name[1], vol->name[2], - vol->name[3], vol->name[4]); + printk(KERN_DEBUG "\t1st 5 characters of name: %c%c%c%c%c\n", + vol->name[0], vol->name[1], vol->name[2], + vol->name[3], vol->name[4]); } } @@ -109,28 +115,29 @@ void ubi_dbg_dump_vtbl_record(const struct ubi_vtbl_record *r, int idx) { int name_len = be16_to_cpu(r->name_len); - dbg_msg("volume table record %d dump:", idx); - dbg_msg("reserved_pebs %d", be32_to_cpu(r->reserved_pebs)); - dbg_msg("alignment %d", be32_to_cpu(r->alignment)); - dbg_msg("data_pad %d", be32_to_cpu(r->data_pad)); - dbg_msg("vol_type %d", (int)r->vol_type); - dbg_msg("upd_marker %d", (int)r->upd_marker); - dbg_msg("name_len %d", name_len); + printk(KERN_DEBUG "Volume table record %d dump:\n", idx); + printk(KERN_DEBUG "\treserved_pebs %d\n", + be32_to_cpu(r->reserved_pebs)); + printk(KERN_DEBUG "\talignment %d\n", be32_to_cpu(r->alignment)); + printk(KERN_DEBUG "\tdata_pad %d\n", be32_to_cpu(r->data_pad)); + printk(KERN_DEBUG "\tvol_type %d\n", (int)r->vol_type); + printk(KERN_DEBUG "\tupd_marker %d\n", (int)r->upd_marker); + printk(KERN_DEBUG "\tname_len %d\n", name_len); if (r->name[0] == '\0') { - dbg_msg("name NULL"); + printk(KERN_DEBUG "\tname NULL\n"); return; } if (name_len <= UBI_VOL_NAME_MAX && strnlen(&r->name[0], name_len + 1) == name_len) { - dbg_msg("name %s", &r->name[0]); + printk(KERN_DEBUG "\tname %s\n", &r->name[0]); } else { - dbg_msg("1st 5 characters of the name: %c%c%c%c%c", + printk(KERN_DEBUG "\t1st 5 characters of name: %c%c%c%c%c\n", r->name[0], r->name[1], r->name[2], r->name[3], r->name[4]); } - dbg_msg("crc %#08x", be32_to_cpu(r->crc)); + printk(KERN_DEBUG "\tcrc %#08x\n", be32_to_cpu(r->crc)); } /** @@ -139,15 +146,15 @@ void ubi_dbg_dump_vtbl_record(const struct ubi_vtbl_record *r, int idx) */ void ubi_dbg_dump_sv(const struct ubi_scan_volume *sv) { - dbg_msg("volume scanning information dump:"); - dbg_msg("vol_id %d", sv->vol_id); - dbg_msg("highest_lnum %d", sv->highest_lnum); - dbg_msg("leb_count %d", sv->leb_count); - dbg_msg("compat %d", sv->compat); - dbg_msg("vol_type %d", sv->vol_type); - dbg_msg("used_ebs %d", sv->used_ebs); - dbg_msg("last_data_size %d", sv->last_data_size); - dbg_msg("data_pad %d", sv->data_pad); + printk(KERN_DEBUG "Volume scanning information dump:\n"); + printk(KERN_DEBUG "\tvol_id %d\n", sv->vol_id); + printk(KERN_DEBUG "\thighest_lnum %d\n", sv->highest_lnum); + printk(KERN_DEBUG "\tleb_count %d\n", sv->leb_count); + printk(KERN_DEBUG "\tcompat %d\n", sv->compat); + printk(KERN_DEBUG "\tvol_type %d\n", sv->vol_type); + printk(KERN_DEBUG "\tused_ebs %d\n", sv->used_ebs); + printk(KERN_DEBUG "\tlast_data_size %d\n", sv->last_data_size); + printk(KERN_DEBUG "\tdata_pad %d\n", sv->data_pad); } /** @@ -157,14 +164,13 @@ void ubi_dbg_dump_sv(const struct ubi_scan_volume *sv) */ void ubi_dbg_dump_seb(const struct ubi_scan_leb *seb, int type) { - dbg_msg("eraseblock scanning information dump:"); - dbg_msg("ec %d", seb->ec); - dbg_msg("pnum %d", seb->pnum); + printk(KERN_DEBUG "eraseblock scanning information dump:\n"); + printk(KERN_DEBUG "\tec %d\n", seb->ec); + printk(KERN_DEBUG "\tpnum %d\n", seb->pnum); if (type == 0) { - dbg_msg("lnum %d", seb->lnum); - dbg_msg("scrub %d", seb->scrub); - dbg_msg("sqnum %llu", seb->sqnum); - dbg_msg("leb_ver %u", seb->leb_ver); + printk(KERN_DEBUG "\tlnum %d\n", seb->lnum); + printk(KERN_DEBUG "\tscrub %d\n", seb->scrub); + printk(KERN_DEBUG "\tsqnum %llu\n", seb->sqnum); } } @@ -176,16 +182,16 @@ void ubi_dbg_dump_mkvol_req(const struct ubi_mkvol_req *req) { char nm[17]; - dbg_msg("volume creation request dump:"); - dbg_msg("vol_id %d", req->vol_id); - dbg_msg("alignment %d", req->alignment); - dbg_msg("bytes %lld", (long long)req->bytes); - dbg_msg("vol_type %d", req->vol_type); - dbg_msg("name_len %d", req->name_len); + printk(KERN_DEBUG "Volume creation request dump:\n"); + printk(KERN_DEBUG "\tvol_id %d\n", req->vol_id); + printk(KERN_DEBUG "\talignment %d\n", req->alignment); + printk(KERN_DEBUG "\tbytes %lld\n", (long long)req->bytes); + printk(KERN_DEBUG "\tvol_type %d\n", req->vol_type); + printk(KERN_DEBUG "\tname_len %d\n", req->name_len); memcpy(nm, req->name, 16); nm[16] = 0; - dbg_msg("the 1st 16 characters of the name: %s", nm); + printk(KERN_DEBUG "\t1st 16 characters of name: %s\n", nm); } -#endif /* CONFIG_MTD_UBI_DEBUG_MSG */ +#endif /* CONFIG_MTD_UBI_DEBUG */ diff --git a/drivers/mtd/ubi/debug.h b/drivers/mtd/ubi/debug.h index 8ea99d8c9e1f..78e914d23ece 100644 --- a/drivers/mtd/ubi/debug.h +++ b/drivers/mtd/ubi/debug.h @@ -24,21 +24,16 @@ #ifdef CONFIG_MTD_UBI_DEBUG #include <linux/random.h> -#define ubi_assert(expr) BUG_ON(!(expr)) #define dbg_err(fmt, ...) ubi_err(fmt, ##__VA_ARGS__) -#else -#define ubi_assert(expr) ({}) -#define dbg_err(fmt, ...) ({}) -#endif -#ifdef CONFIG_MTD_UBI_DEBUG_DISABLE_BGT -#define DBG_DISABLE_BGT 1 -#else -#define DBG_DISABLE_BGT 0 -#endif +#define ubi_assert(expr) do { \ + if (unlikely(!(expr))) { \ + printk(KERN_CRIT "UBI assert failed in %s at %u (pid %d)\n", \ + __func__, __LINE__, current->pid); \ + ubi_dbg_dump_stack(); \ + } \ +} while (0) -#ifdef CONFIG_MTD_UBI_DEBUG_MSG -/* Generic debugging message */ #define dbg_msg(fmt, ...) \ printk(KERN_DEBUG "UBI DBG (pid %d): %s: " fmt "\n", \ current->pid, __func__, ##__VA_ARGS__) @@ -61,36 +56,29 @@ void ubi_dbg_dump_sv(const struct ubi_scan_volume *sv); void ubi_dbg_dump_seb(const struct ubi_scan_leb *seb, int type); void ubi_dbg_dump_mkvol_req(const struct ubi_mkvol_req *req); +#ifdef CONFIG_MTD_UBI_DEBUG_MSG +/* General debugging messages */ +#define dbg_gen(fmt, ...) dbg_msg(fmt, ##__VA_ARGS__) #else - -#define dbg_msg(fmt, ...) ({}) -#define ubi_dbg_dump_stack() ({}) -#define ubi_dbg_dump_ec_hdr(ec_hdr) ({}) -#define ubi_dbg_dump_vid_hdr(vid_hdr) ({}) -#define ubi_dbg_dump_vol_info(vol) ({}) -#define ubi_dbg_dump_vtbl_record(r, idx) ({}) -#define ubi_dbg_dump_sv(sv) ({}) -#define ubi_dbg_dump_seb(seb, type) ({}) -#define ubi_dbg_dump_mkvol_req(req) ({}) - -#endif /* CONFIG_MTD_UBI_DEBUG_MSG */ +#define dbg_gen(fmt, ...) ({}) +#endif #ifdef CONFIG_MTD_UBI_DEBUG_MSG_EBA -/* Messages from the eraseblock association unit */ +/* Messages from the eraseblock association sub-system */ #define dbg_eba(fmt, ...) dbg_msg(fmt, ##__VA_ARGS__) #else #define dbg_eba(fmt, ...) ({}) #endif #ifdef CONFIG_MTD_UBI_DEBUG_MSG_WL -/* Messages from the wear-leveling unit */ +/* Messages from the wear-leveling sub-system */ #define dbg_wl(fmt, ...) dbg_msg(fmt, ##__VA_ARGS__) #else #define dbg_wl(fmt, ...) ({}) #endif #ifdef CONFIG_MTD_UBI_DEBUG_MSG_IO -/* Messages from the input/output unit */ +/* Messages from the input/output sub-system */ #define dbg_io(fmt, ...) dbg_msg(fmt, ##__VA_ARGS__) #else #define dbg_io(fmt, ...) ({}) @@ -105,6 +93,12 @@ void ubi_dbg_dump_mkvol_req(const struct ubi_mkvol_req *req); #define UBI_IO_DEBUG 0 #endif +#ifdef CONFIG_MTD_UBI_DEBUG_DISABLE_BGT +#define DBG_DISABLE_BGT 1 +#else +#define DBG_DISABLE_BGT 0 +#endif + #ifdef CONFIG_MTD_UBI_DEBUG_EMULATE_BITFLIPS /** * ubi_dbg_is_bitflip - if it is time to emulate a bit-flip. @@ -149,4 +143,30 @@ static inline int ubi_dbg_is_erase_failure(void) #define ubi_dbg_is_erase_failure() 0 #endif +#else + +#define ubi_assert(expr) ({}) +#define dbg_err(fmt, ...) ({}) +#define dbg_msg(fmt, ...) ({}) +#define dbg_gen(fmt, ...) ({}) +#define dbg_eba(fmt, ...) ({}) +#define dbg_wl(fmt, ...) ({}) +#define dbg_io(fmt, ...) ({}) +#define dbg_bld(fmt, ...) ({}) +#define ubi_dbg_dump_stack() ({}) +#define ubi_dbg_dump_ec_hdr(ec_hdr) ({}) +#define ubi_dbg_dump_vid_hdr(vid_hdr) ({}) +#define ubi_dbg_dump_vol_info(vol) ({}) +#define ubi_dbg_dump_vtbl_record(r, idx) ({}) +#define ubi_dbg_dump_sv(sv) ({}) +#define ubi_dbg_dump_seb(seb, type) ({}) +#define ubi_dbg_dump_mkvol_req(req) ({}) + +#define UBI_IO_DEBUG 0 +#define DBG_DISABLE_BGT 0 +#define ubi_dbg_is_bitflip() 0 +#define ubi_dbg_is_write_failure() 0 +#define ubi_dbg_is_erase_failure() 0 + +#endif /* !CONFIG_MTD_UBI_DEBUG */ #endif /* !__UBI_DEBUG_H__ */ diff --git a/drivers/mtd/ubi/eba.c b/drivers/mtd/ubi/eba.c index 7ce91ca742b1..e04bcf1dff87 100644 --- a/drivers/mtd/ubi/eba.c +++ b/drivers/mtd/ubi/eba.c @@ -19,20 +19,20 @@ */ /* - * The UBI Eraseblock Association (EBA) unit. + * The UBI Eraseblock Association (EBA) sub-system. * - * This unit is responsible for I/O to/from logical eraseblock. + * This sub-system is responsible for I/O to/from logical eraseblock. * * Although in this implementation the EBA table is fully kept and managed in * RAM, which assumes poor scalability, it might be (partially) maintained on * flash in future implementations. * - * The EBA unit implements per-logical eraseblock locking. Before accessing a - * logical eraseblock it is locked for reading or writing. The per-logical - * eraseblock locking is implemented by means of the lock tree. The lock tree - * is an RB-tree which refers all the currently locked logical eraseblocks. The - * lock tree elements are &struct ubi_ltree_entry objects. They are indexed by - * (@vol_id, @lnum) pairs. + * The EBA sub-system implements per-logical eraseblock locking. Before + * accessing a logical eraseblock it is locked for reading or writing. The + * per-logical eraseblock locking is implemented by means of the lock tree. The + * lock tree is an RB-tree which refers all the currently locked logical + * eraseblocks. The lock tree elements are &struct ubi_ltree_entry objects. + * They are indexed by (@vol_id, @lnum) pairs. * * EBA also maintains the global sequence counter which is incremented each * time a logical eraseblock is mapped to a physical eraseblock and it is @@ -189,9 +189,7 @@ static struct ubi_ltree_entry *ltree_add_entry(struct ubi_device *ubi, le->users += 1; spin_unlock(&ubi->ltree_lock); - if (le_free) - kfree(le_free); - + kfree(le_free); return le; } @@ -223,22 +221,18 @@ static int leb_read_lock(struct ubi_device *ubi, int vol_id, int lnum) */ static void leb_read_unlock(struct ubi_device *ubi, int vol_id, int lnum) { - int free = 0; struct ubi_ltree_entry *le; spin_lock(&ubi->ltree_lock); le = ltree_lookup(ubi, vol_id, lnum); le->users -= 1; ubi_assert(le->users >= 0); + up_read(&le->mutex); if (le->users == 0) { rb_erase(&le->rb, &ubi->ltree); - free = 1; + kfree(le); } spin_unlock(&ubi->ltree_lock); - - up_read(&le->mutex); - if (free) - kfree(le); } /** @@ -274,7 +268,6 @@ static int leb_write_lock(struct ubi_device *ubi, int vol_id, int lnum) */ static int leb_write_trylock(struct ubi_device *ubi, int vol_id, int lnum) { - int free; struct ubi_ltree_entry *le; le = ltree_add_entry(ubi, vol_id, lnum); @@ -289,12 +282,9 @@ static int leb_write_trylock(struct ubi_device *ubi, int vol_id, int lnum) ubi_assert(le->users >= 0); if (le->users == 0) { rb_erase(&le->rb, &ubi->ltree); - free = 1; - } else - free = 0; - spin_unlock(&ubi->ltree_lock); - if (free) kfree(le); + } + spin_unlock(&ubi->ltree_lock); return 1; } @@ -307,23 +297,18 @@ static int leb_write_trylock(struct ubi_device *ubi, int vol_id, int lnum) */ static void leb_write_unlock(struct ubi_device *ubi, int vol_id, int lnum) { - int free; struct ubi_ltree_entry *le; spin_lock(&ubi->ltree_lock); le = ltree_lookup(ubi, vol_id, lnum); le->users -= 1; ubi_assert(le->users >= 0); + up_write(&le->mutex); if (le->users == 0) { rb_erase(&le->rb, &ubi->ltree); - free = 1; - } else - free = 0; - spin_unlock(&ubi->ltree_lock); - - up_write(&le->mutex); - if (free) kfree(le); + } + spin_unlock(&ubi->ltree_lock); } /** @@ -516,9 +501,8 @@ static int recover_peb(struct ubi_device *ubi, int pnum, int vol_id, int lnum, struct ubi_vid_hdr *vid_hdr; vid_hdr = ubi_zalloc_vid_hdr(ubi, GFP_NOFS); - if (!vid_hdr) { + if (!vid_hdr) return -ENOMEM; - } mutex_lock(&ubi->buf_mutex); @@ -752,7 +736,7 @@ int ubi_eba_write_leb_st(struct ubi_device *ubi, struct ubi_volume *vol, /* If this is the last LEB @len may be unaligned */ len = ALIGN(data_size, ubi->min_io_size); else - ubi_assert(len % ubi->min_io_size == 0); + ubi_assert(!(len & (ubi->min_io_size - 1))); vid_hdr = ubi_zalloc_vid_hdr(ubi, GFP_NOFS); if (!vid_hdr) @@ -919,7 +903,7 @@ retry: } if (vol->eba_tbl[lnum] >= 0) { - err = ubi_wl_put_peb(ubi, vol->eba_tbl[lnum], 1); + err = ubi_wl_put_peb(ubi, vol->eba_tbl[lnum], 0); if (err) goto out_leb_unlock; } @@ -1141,7 +1125,7 @@ out_unlock_leb: } /** - * ubi_eba_init_scan - initialize the EBA unit using scanning information. + * ubi_eba_init_scan - initialize the EBA sub-system using scanning information. * @ubi: UBI device description object * @si: scanning information * @@ -1156,7 +1140,7 @@ int ubi_eba_init_scan(struct ubi_device *ubi, struct ubi_scan_info *si) struct ubi_scan_leb *seb; struct rb_node *rb; - dbg_eba("initialize EBA unit"); + dbg_eba("initialize EBA sub-system"); spin_lock_init(&ubi->ltree_lock); mutex_init(&ubi->alc_mutex); @@ -1222,7 +1206,7 @@ int ubi_eba_init_scan(struct ubi_device *ubi, struct ubi_scan_info *si) ubi->rsvd_pebs += ubi->beb_rsvd_pebs; } - dbg_eba("EBA unit is initialized"); + dbg_eba("EBA sub-system is initialized"); return 0; out_free: @@ -1233,20 +1217,3 @@ out_free: } return err; } - -/** - * ubi_eba_close - close EBA unit. - * @ubi: UBI device description object - */ -void ubi_eba_close(const struct ubi_device *ubi) -{ - int i, num_volumes = ubi->vtbl_slots + UBI_INT_VOL_COUNT; - - dbg_eba("close EBA unit"); - - for (i = 0; i < num_volumes; i++) { - if (!ubi->volumes[i]) - continue; - kfree(ubi->volumes[i]->eba_tbl); - } -} diff --git a/drivers/mtd/ubi/gluebi.c b/drivers/mtd/ubi/gluebi.c index e909b390069a..605812bb0b1a 100644 --- a/drivers/mtd/ubi/gluebi.c +++ b/drivers/mtd/ubi/gluebi.c @@ -111,7 +111,7 @@ static int gluebi_read(struct mtd_info *mtd, loff_t from, size_t len, struct ubi_device *ubi; uint64_t tmp = from; - dbg_msg("read %zd bytes from offset %lld", len, from); + dbg_gen("read %zd bytes from offset %lld", len, from); if (len < 0 || from < 0 || from + len > mtd->size) return -EINVAL; @@ -162,7 +162,7 @@ static int gluebi_write(struct mtd_info *mtd, loff_t to, size_t len, struct ubi_device *ubi; uint64_t tmp = to; - dbg_msg("write %zd bytes to offset %lld", len, to); + dbg_gen("write %zd bytes to offset %lld", len, to); if (len < 0 || to < 0 || len + to > mtd->size) return -EINVAL; @@ -215,7 +215,7 @@ static int gluebi_erase(struct mtd_info *mtd, struct erase_info *instr) struct ubi_volume *vol; struct ubi_device *ubi; - dbg_msg("erase %u bytes at offset %u", instr->len, instr->addr); + dbg_gen("erase %u bytes at offset %u", instr->len, instr->addr); if (instr->addr < 0 || instr->addr > mtd->size - mtd->erasesize) return -EINVAL; @@ -249,8 +249,8 @@ static int gluebi_erase(struct mtd_info *mtd, struct erase_info *instr) if (err) goto out_err; - instr->state = MTD_ERASE_DONE; - mtd_erase_callback(instr); + instr->state = MTD_ERASE_DONE; + mtd_erase_callback(instr); return 0; out_err: @@ -299,12 +299,12 @@ int ubi_create_gluebi(struct ubi_device *ubi, struct ubi_volume *vol) mtd->size = vol->used_bytes; if (add_mtd_device(mtd)) { - ubi_err("cannot not add MTD device\n"); + ubi_err("cannot not add MTD device"); kfree(mtd->name); return -ENFILE; } - dbg_msg("added mtd%d (\"%s\"), size %u, EB size %u", + dbg_gen("added mtd%d (\"%s\"), size %u, EB size %u", mtd->index, mtd->name, mtd->size, mtd->erasesize); return 0; } @@ -322,7 +322,7 @@ int ubi_destroy_gluebi(struct ubi_volume *vol) int err; struct mtd_info *mtd = &vol->gluebi_mtd; - dbg_msg("remove mtd%d", mtd->index); + dbg_gen("remove mtd%d", mtd->index); err = del_mtd_device(mtd); if (err) return err; diff --git a/drivers/mtd/ubi/io.c b/drivers/mtd/ubi/io.c index 4ac11df7b048..2fb64be44f1b 100644 --- a/drivers/mtd/ubi/io.c +++ b/drivers/mtd/ubi/io.c @@ -20,15 +20,15 @@ */ /* - * UBI input/output unit. + * UBI input/output sub-system. * - * This unit provides a uniform way to work with all kinds of the underlying - * MTD devices. It also implements handy functions for reading and writing UBI - * headers. + * This sub-system provides a uniform way to work with all kinds of the + * underlying MTD devices. It also implements handy functions for reading and + * writing UBI headers. * * We are trying to have a paranoid mindset and not to trust to what we read - * from the flash media in order to be more secure and robust. So this unit - * validates every single header it reads from the flash media. + * from the flash media in order to be more secure and robust. So this + * sub-system validates every single header it reads from the flash media. * * Some words about how the eraseblock headers are stored. * @@ -79,11 +79,11 @@ * 512-byte chunks, we have to allocate one more buffer and copy our VID header * to offset 448 of this buffer. * - * The I/O unit does the following trick in order to avoid this extra copy. - * It always allocates a @ubi->vid_hdr_alsize bytes buffer for the VID header - * and returns a pointer to offset @ubi->vid_hdr_shift of this buffer. When the - * VID header is being written out, it shifts the VID header pointer back and - * writes the whole sub-page. + * The I/O sub-system does the following trick in order to avoid this extra + * copy. It always allocates a @ubi->vid_hdr_alsize bytes buffer for the VID + * header and returns a pointer to offset @ubi->vid_hdr_shift of this buffer. + * When the VID header is being written out, it shifts the VID header pointer + * back and writes the whole sub-page. */ #include <linux/crc32.h> @@ -156,15 +156,19 @@ retry: /* * -EUCLEAN is reported if there was a bit-flip which * was corrected, so this is harmless. + * + * We do not report about it here unless debugging is + * enabled. A corresponding message will be printed + * later, when it is has been scrubbed. */ - ubi_msg("fixable bit-flip detected at PEB %d", pnum); + dbg_msg("fixable bit-flip detected at PEB %d", pnum); ubi_assert(len == read); return UBI_IO_BITFLIPS; } if (read != len && retries++ < UBI_IO_RETRIES) { - dbg_io("error %d while reading %d bytes from PEB %d:%d, " - "read only %zd bytes, retry", + dbg_io("error %d while reading %d bytes from PEB %d:%d," + " read only %zd bytes, retry", err, len, pnum, offset, read); yield(); goto retry; @@ -187,7 +191,7 @@ retry: ubi_assert(len == read); if (ubi_dbg_is_bitflip()) { - dbg_msg("bit-flip (emulated)"); + dbg_gen("bit-flip (emulated)"); err = UBI_IO_BITFLIPS; } } @@ -391,6 +395,7 @@ static int torture_peb(struct ubi_device *ubi, int pnum) { int err, i, patt_count; + ubi_msg("run torture test for PEB %d", pnum); patt_count = ARRAY_SIZE(patterns); ubi_assert(patt_count > 0); @@ -434,6 +439,7 @@ static int torture_peb(struct ubi_device *ubi, int pnum) } err = patt_count; + ubi_msg("PEB %d passed torture test, do not mark it a bad", pnum); out: mutex_unlock(&ubi->buf_mutex); @@ -699,8 +705,8 @@ int ubi_io_read_ec_hdr(struct ubi_device *ubi, int pnum, if (hdr_crc != crc) { if (verbose) { - ubi_warn("bad EC header CRC at PEB %d, calculated %#08x," - " read %#08x", pnum, crc, hdr_crc); + ubi_warn("bad EC header CRC at PEB %d, calculated " + "%#08x, read %#08x", pnum, crc, hdr_crc); ubi_dbg_dump_ec_hdr(ec_hdr); } return UBI_IO_BAD_EC_HDR; @@ -1095,8 +1101,7 @@ fail: } /** - * paranoid_check_peb_ec_hdr - check that the erase counter header of a - * physical eraseblock is in-place and is all right. + * paranoid_check_peb_ec_hdr - check erase counter header. * @ubi: UBI device description object * @pnum: the physical eraseblock number to check * @@ -1174,8 +1179,7 @@ fail: } /** - * paranoid_check_peb_vid_hdr - check that the volume identifier header of a - * physical eraseblock is in-place and is all right. + * paranoid_check_peb_vid_hdr - check volume identifier header. * @ubi: UBI device description object * @pnum: the physical eraseblock number to check * @@ -1256,7 +1260,7 @@ static int paranoid_check_all_ff(struct ubi_device *ubi, int pnum, int offset, fail: ubi_err("paranoid check failed for PEB %d", pnum); - dbg_msg("hex dump of the %d-%d region", offset, offset + len); + ubi_msg("hex dump of the %d-%d region", offset, offset + len); print_hex_dump(KERN_DEBUG, "", DUMP_PREFIX_OFFSET, 32, 1, ubi->dbg_peb_buf, len, 1); err = 1; diff --git a/drivers/mtd/ubi/kapi.c b/drivers/mtd/ubi/kapi.c index a70d58823f8d..5d9bcf109c13 100644 --- a/drivers/mtd/ubi/kapi.c +++ b/drivers/mtd/ubi/kapi.c @@ -106,7 +106,7 @@ struct ubi_volume_desc *ubi_open_volume(int ubi_num, int vol_id, int mode) struct ubi_device *ubi; struct ubi_volume *vol; - dbg_msg("open device %d volume %d, mode %d", ubi_num, vol_id, mode); + dbg_gen("open device %d volume %d, mode %d", ubi_num, vol_id, mode); if (ubi_num < 0 || ubi_num >= UBI_MAX_DEVICES) return ERR_PTR(-EINVAL); @@ -215,7 +215,7 @@ struct ubi_volume_desc *ubi_open_volume_nm(int ubi_num, const char *name, struct ubi_device *ubi; struct ubi_volume_desc *ret; - dbg_msg("open volume %s, mode %d", name, mode); + dbg_gen("open volume %s, mode %d", name, mode); if (!name) return ERR_PTR(-EINVAL); @@ -266,7 +266,7 @@ void ubi_close_volume(struct ubi_volume_desc *desc) struct ubi_volume *vol = desc->vol; struct ubi_device *ubi = vol->ubi; - dbg_msg("close volume %d, mode %d", vol->vol_id, desc->mode); + dbg_gen("close volume %d, mode %d", vol->vol_id, desc->mode); spin_lock(&ubi->volumes_lock); switch (desc->mode) { @@ -323,7 +323,7 @@ int ubi_leb_read(struct ubi_volume_desc *desc, int lnum, char *buf, int offset, struct ubi_device *ubi = vol->ubi; int err, vol_id = vol->vol_id; - dbg_msg("read %d bytes from LEB %d:%d:%d", len, vol_id, lnum, offset); + dbg_gen("read %d bytes from LEB %d:%d:%d", len, vol_id, lnum, offset); if (vol_id < 0 || vol_id >= ubi->vtbl_slots || lnum < 0 || lnum >= vol->used_ebs || offset < 0 || len < 0 || @@ -388,7 +388,7 @@ int ubi_leb_write(struct ubi_volume_desc *desc, int lnum, const void *buf, struct ubi_device *ubi = vol->ubi; int vol_id = vol->vol_id; - dbg_msg("write %d bytes to LEB %d:%d:%d", len, vol_id, lnum, offset); + dbg_gen("write %d bytes to LEB %d:%d:%d", len, vol_id, lnum, offset); if (vol_id < 0 || vol_id >= ubi->vtbl_slots) return -EINVAL; @@ -397,8 +397,8 @@ int ubi_leb_write(struct ubi_volume_desc *desc, int lnum, const void *buf, return -EROFS; if (lnum < 0 || lnum >= vol->reserved_pebs || offset < 0 || len < 0 || - offset + len > vol->usable_leb_size || offset % ubi->min_io_size || - len % ubi->min_io_size) + offset + len > vol->usable_leb_size || + offset & (ubi->min_io_size - 1) || len & (ubi->min_io_size - 1)) return -EINVAL; if (dtype != UBI_LONGTERM && dtype != UBI_SHORTTERM && @@ -438,7 +438,7 @@ int ubi_leb_change(struct ubi_volume_desc *desc, int lnum, const void *buf, struct ubi_device *ubi = vol->ubi; int vol_id = vol->vol_id; - dbg_msg("atomically write %d bytes to LEB %d:%d", len, vol_id, lnum); + dbg_gen("atomically write %d bytes to LEB %d:%d", len, vol_id, lnum); if (vol_id < 0 || vol_id >= ubi->vtbl_slots) return -EINVAL; @@ -447,7 +447,7 @@ int ubi_leb_change(struct ubi_volume_desc *desc, int lnum, const void *buf, return -EROFS; if (lnum < 0 || lnum >= vol->reserved_pebs || len < 0 || - len > vol->usable_leb_size || len % ubi->min_io_size) + len > vol->usable_leb_size || len & (ubi->min_io_size - 1)) return -EINVAL; if (dtype != UBI_LONGTERM && dtype != UBI_SHORTTERM && @@ -482,7 +482,7 @@ int ubi_leb_erase(struct ubi_volume_desc *desc, int lnum) struct ubi_device *ubi = vol->ubi; int err; - dbg_msg("erase LEB %d:%d", vol->vol_id, lnum); + dbg_gen("erase LEB %d:%d", vol->vol_id, lnum); if (desc->mode == UBI_READONLY || vol->vol_type == UBI_STATIC_VOLUME) return -EROFS; @@ -542,7 +542,7 @@ int ubi_leb_unmap(struct ubi_volume_desc *desc, int lnum) struct ubi_volume *vol = desc->vol; struct ubi_device *ubi = vol->ubi; - dbg_msg("unmap LEB %d:%d", vol->vol_id, lnum); + dbg_gen("unmap LEB %d:%d", vol->vol_id, lnum); if (desc->mode == UBI_READONLY || vol->vol_type == UBI_STATIC_VOLUME) return -EROFS; @@ -579,7 +579,7 @@ int ubi_leb_map(struct ubi_volume_desc *desc, int lnum, int dtype) struct ubi_volume *vol = desc->vol; struct ubi_device *ubi = vol->ubi; - dbg_msg("unmap LEB %d:%d", vol->vol_id, lnum); + dbg_gen("unmap LEB %d:%d", vol->vol_id, lnum); if (desc->mode == UBI_READONLY || vol->vol_type == UBI_STATIC_VOLUME) return -EROFS; @@ -621,7 +621,7 @@ int ubi_is_mapped(struct ubi_volume_desc *desc, int lnum) { struct ubi_volume *vol = desc->vol; - dbg_msg("test LEB %d:%d", vol->vol_id, lnum); + dbg_gen("test LEB %d:%d", vol->vol_id, lnum); if (lnum < 0 || lnum >= vol->reserved_pebs) return -EINVAL; @@ -632,3 +632,27 @@ int ubi_is_mapped(struct ubi_volume_desc *desc, int lnum) return vol->eba_tbl[lnum] >= 0; } EXPORT_SYMBOL_GPL(ubi_is_mapped); + +/** + * ubi_sync - synchronize UBI device buffers. + * @ubi_num: UBI device to synchronize + * + * The underlying MTD device may cache data in hardware or in software. This + * function ensures the caches are flushed. Returns zero in case of success and + * a negative error code in case of failure. + */ +int ubi_sync(int ubi_num) +{ + struct ubi_device *ubi; + + ubi = ubi_get_device(ubi_num); + if (!ubi) + return -ENODEV; + + if (ubi->mtd->sync) + ubi->mtd->sync(ubi->mtd); + + ubi_put_device(ubi); + return 0; +} +EXPORT_SYMBOL_GPL(ubi_sync); diff --git a/drivers/mtd/ubi/misc.c b/drivers/mtd/ubi/misc.c index 93e052812012..22ad31402945 100644 --- a/drivers/mtd/ubi/misc.c +++ b/drivers/mtd/ubi/misc.c @@ -37,7 +37,7 @@ int ubi_calc_data_len(const struct ubi_device *ubi, const void *buf, { int i; - ubi_assert(length % ubi->min_io_size == 0); + ubi_assert(!(length & (ubi->min_io_size - 1))); for (i = length - 1; i >= 0; i--) if (((const uint8_t *)buf)[i] != 0xFF) diff --git a/drivers/mtd/ubi/scan.c b/drivers/mtd/ubi/scan.c index 96d410e106ab..967bb4406df9 100644 --- a/drivers/mtd/ubi/scan.c +++ b/drivers/mtd/ubi/scan.c @@ -19,9 +19,9 @@ */ /* - * UBI scanning unit. + * UBI scanning sub-system. * - * This unit is responsible for scanning the flash media, checking UBI + * This sub-system is responsible for scanning the flash media, checking UBI * headers and providing complete information about the UBI flash image. * * The scanning information is represented by a &struct ubi_scan_info' object. @@ -93,8 +93,7 @@ static int add_to_list(struct ubi_scan_info *si, int pnum, int ec, } /** - * validate_vid_hdr - check that volume identifier header is correct and - * consistent. + * validate_vid_hdr - check volume identifier header. * @vid_hdr: the volume identifier header to check * @sv: information about the volume this logical eraseblock belongs to * @pnum: physical eraseblock number the VID header came from @@ -103,7 +102,7 @@ static int add_to_list(struct ubi_scan_info *si, int pnum, int ec, * non-zero if an inconsistency was found and zero if not. * * Note, UBI does sanity check of everything it reads from the flash media. - * Most of the checks are done in the I/O unit. Here we check that the + * Most of the checks are done in the I/O sub-system. Here we check that the * information in the VID header is consistent to the information in other VID * headers of the same volume. */ @@ -247,45 +246,21 @@ static int compare_lebs(struct ubi_device *ubi, const struct ubi_scan_leb *seb, struct ubi_vid_hdr *vh = NULL; unsigned long long sqnum2 = be64_to_cpu(vid_hdr->sqnum); - if (seb->sqnum == 0 && sqnum2 == 0) { - long long abs, v1 = seb->leb_ver, v2 = be32_to_cpu(vid_hdr->leb_ver); - + if (sqnum2 == seb->sqnum) { /* - * UBI constantly increases the logical eraseblock version - * number and it can overflow. Thus, we have to bear in mind - * that versions that are close to %0xFFFFFFFF are less then - * versions that are close to %0. - * - * The UBI WL unit guarantees that the number of pending tasks - * is not greater then %0x7FFFFFFF. So, if the difference - * between any two versions is greater or equivalent to - * %0x7FFFFFFF, there was an overflow and the logical - * eraseblock with lower version is actually newer then the one - * with higher version. - * - * FIXME: but this is anyway obsolete and will be removed at - * some point. + * This must be a really ancient UBI image which has been + * created before sequence numbers support has been added. At + * that times we used 32-bit LEB versions stored in logical + * eraseblocks. That was before UBI got into mainline. We do not + * support these images anymore. Well, those images will work + * still work, but only if no unclean reboots happened. */ - dbg_bld("using old crappy leb_ver stuff"); - - if (v1 == v2) { - ubi_err("PEB %d and PEB %d have the same version %lld", - seb->pnum, pnum, v1); - return -EINVAL; - } - - abs = v1 - v2; - if (abs < 0) - abs = -abs; + ubi_err("unsupported on-flash UBI format\n"); + return -EINVAL; + } - if (abs < 0x7FFFFFFF) - /* Non-overflow situation */ - second_is_newer = (v2 > v1); - else - second_is_newer = (v2 < v1); - } else - /* Obviously the LEB with lower sequence counter is older */ - second_is_newer = sqnum2 > seb->sqnum; + /* Obviously the LEB with lower sequence counter is older */ + second_is_newer = !!(sqnum2 > seb->sqnum); /* * Now we know which copy is newer. If the copy flag of the PEB with @@ -293,7 +268,7 @@ static int compare_lebs(struct ubi_device *ubi, const struct ubi_scan_leb *seb, * check data CRC. For the second PEB we already have the VID header, * for the first one - we'll need to re-read it from flash. * - * FIXME: this may be optimized so that we wouldn't read twice. + * Note: this may be optimized so that we wouldn't read twice. */ if (second_is_newer) { @@ -379,8 +354,7 @@ out_free_vidh: } /** - * ubi_scan_add_used - add information about a physical eraseblock to the - * scanning information. + * ubi_scan_add_used - add physical eraseblock to the scanning information. * @ubi: UBI device description object * @si: scanning information * @pnum: the physical eraseblock number @@ -400,7 +374,6 @@ int ubi_scan_add_used(struct ubi_device *ubi, struct ubi_scan_info *si, int bitflips) { int err, vol_id, lnum; - uint32_t leb_ver; unsigned long long sqnum; struct ubi_scan_volume *sv; struct ubi_scan_leb *seb; @@ -409,10 +382,9 @@ int ubi_scan_add_used(struct ubi_device *ubi, struct ubi_scan_info *si, vol_id = be32_to_cpu(vid_hdr->vol_id); lnum = be32_to_cpu(vid_hdr->lnum); sqnum = be64_to_cpu(vid_hdr->sqnum); - leb_ver = be32_to_cpu(vid_hdr->leb_ver); - dbg_bld("PEB %d, LEB %d:%d, EC %d, sqnum %llu, ver %u, bitflips %d", - pnum, vol_id, lnum, ec, sqnum, leb_ver, bitflips); + dbg_bld("PEB %d, LEB %d:%d, EC %d, sqnum %llu, bitflips %d", + pnum, vol_id, lnum, ec, sqnum, bitflips); sv = add_volume(si, vol_id, pnum, vid_hdr); if (IS_ERR(sv) < 0) @@ -445,25 +417,20 @@ int ubi_scan_add_used(struct ubi_device *ubi, struct ubi_scan_info *si, */ dbg_bld("this LEB already exists: PEB %d, sqnum %llu, " - "LEB ver %u, EC %d", seb->pnum, seb->sqnum, - seb->leb_ver, seb->ec); - - /* - * Make sure that the logical eraseblocks have different - * versions. Otherwise the image is bad. - */ - if (seb->leb_ver == leb_ver && leb_ver != 0) { - ubi_err("two LEBs with same version %u", leb_ver); - ubi_dbg_dump_seb(seb, 0); - ubi_dbg_dump_vid_hdr(vid_hdr); - return -EINVAL; - } + "EC %d", seb->pnum, seb->sqnum, seb->ec); /* * Make sure that the logical eraseblocks have different * sequence numbers. Otherwise the image is bad. * - * FIXME: remove 'sqnum != 0' check when leb_ver is removed. + * However, if the sequence number is zero, we assume it must + * be an ancient UBI image from the era when UBI did not have + * sequence numbers. We still can attach these images, unless + * there is a need to distinguish between old and new + * eraseblocks, in which case we'll refuse the image in + * 'compare_lebs()'. In other words, we attach old clean + * images, but refuse attaching old images with duplicated + * logical eraseblocks because there was an unclean reboot. */ if (seb->sqnum == sqnum && sqnum != 0) { ubi_err("two LEBs with same sequence number %llu", @@ -503,7 +470,6 @@ int ubi_scan_add_used(struct ubi_device *ubi, struct ubi_scan_info *si, seb->pnum = pnum; seb->scrub = ((cmp_res & 2) || bitflips); seb->sqnum = sqnum; - seb->leb_ver = leb_ver; if (sv->highest_lnum == lnum) sv->last_data_size = @@ -540,7 +506,6 @@ int ubi_scan_add_used(struct ubi_device *ubi, struct ubi_scan_info *si, seb->lnum = lnum; seb->sqnum = sqnum; seb->scrub = bitflips; - seb->leb_ver = leb_ver; if (sv->highest_lnum <= lnum) { sv->highest_lnum = lnum; @@ -554,8 +519,7 @@ int ubi_scan_add_used(struct ubi_device *ubi, struct ubi_scan_info *si, } /** - * ubi_scan_find_sv - find information about a particular volume in the - * scanning information. + * ubi_scan_find_sv - find volume in the scanning information. * @si: scanning information * @vol_id: the requested volume ID * @@ -584,8 +548,7 @@ struct ubi_scan_volume *ubi_scan_find_sv(const struct ubi_scan_info *si, } /** - * ubi_scan_find_seb - find information about a particular logical - * eraseblock in the volume scanning information. + * ubi_scan_find_seb - find LEB in the volume scanning information. * @sv: a pointer to the volume scanning information * @lnum: the requested logical eraseblock * @@ -645,9 +608,9 @@ void ubi_scan_rm_volume(struct ubi_scan_info *si, struct ubi_scan_volume *sv) * * This function erases physical eraseblock 'pnum', and writes the erase * counter header to it. This function should only be used on UBI device - * initialization stages, when the EBA unit had not been yet initialized. This - * function returns zero in case of success and a negative error code in case - * of failure. + * initialization stages, when the EBA sub-system had not been yet initialized. + * This function returns zero in case of success and a negative error code in + * case of failure. */ int ubi_scan_erase_peb(struct ubi_device *ubi, const struct ubi_scan_info *si, int pnum, int ec) @@ -687,9 +650,10 @@ out_free: * @si: scanning information * * This function returns a free physical eraseblock. It is supposed to be - * called on the UBI initialization stages when the wear-leveling unit is not - * initialized yet. This function picks a physical eraseblocks from one of the - * lists, writes the EC header if it is needed, and removes it from the list. + * called on the UBI initialization stages when the wear-leveling sub-system is + * not initialized yet. This function picks a physical eraseblocks from one of + * the lists, writes the EC header if it is needed, and removes it from the + * list. * * This function returns scanning physical eraseblock information in case of * success and an error code in case of failure. @@ -742,8 +706,7 @@ struct ubi_scan_leb *ubi_scan_get_free_peb(struct ubi_device *ubi, } /** - * process_eb - read UBI headers, check them and add corresponding data - * to the scanning information. + * process_eb - read, check UBI headers, and add them to scanning information. * @ubi: UBI device description object * @si: scanning information * @pnum: the physical eraseblock number @@ -751,7 +714,8 @@ struct ubi_scan_leb *ubi_scan_get_free_peb(struct ubi_device *ubi, * This function returns a zero if the physical eraseblock was successfully * handled and a negative error code in case of failure. */ -static int process_eb(struct ubi_device *ubi, struct ubi_scan_info *si, int pnum) +static int process_eb(struct ubi_device *ubi, struct ubi_scan_info *si, + int pnum) { long long uninitialized_var(ec); int err, bitflips = 0, vol_id, ec_corr = 0; @@ -764,8 +728,9 @@ static int process_eb(struct ubi_device *ubi, struct ubi_scan_info *si, int pnum return err; else if (err) { /* - * FIXME: this is actually duty of the I/O unit to initialize - * this, but MTD does not provide enough information. + * FIXME: this is actually duty of the I/O sub-system to + * initialize this, but MTD does not provide enough + * information. */ si->bad_peb_count += 1; return 0; @@ -930,7 +895,7 @@ struct ubi_scan_info *ubi_scan(struct ubi_device *ubi) for (pnum = 0; pnum < ubi->peb_count; pnum++) { cond_resched(); - dbg_msg("process PEB %d", pnum); + dbg_gen("process PEB %d", pnum); err = process_eb(ubi, si, pnum); if (err < 0) goto out_vidh; @@ -1079,8 +1044,7 @@ void ubi_scan_destroy_si(struct ubi_scan_info *si) #ifdef CONFIG_MTD_UBI_DEBUG_PARANOID /** - * paranoid_check_si - check if the scanning information is correct and - * consistent. + * paranoid_check_si - check the scanning information. * @ubi: UBI device description object * @si: scanning information * @@ -1265,11 +1229,6 @@ static int paranoid_check_si(struct ubi_device *ubi, struct ubi_scan_info *si) ubi_err("bad data_pad %d", sv->data_pad); goto bad_vid_hdr; } - - if (seb->leb_ver != be32_to_cpu(vidh->leb_ver)) { - ubi_err("bad leb_ver %u", seb->leb_ver); - goto bad_vid_hdr; - } } if (!last_seb) @@ -1299,8 +1258,7 @@ static int paranoid_check_si(struct ubi_device *ubi, struct ubi_scan_info *si) if (err < 0) { kfree(buf); return err; - } - else if (err) + } else if (err) buf[pnum] = 1; } diff --git a/drivers/mtd/ubi/scan.h b/drivers/mtd/ubi/scan.h index 966b9b682a42..61df208e2f20 100644 --- a/drivers/mtd/ubi/scan.h +++ b/drivers/mtd/ubi/scan.h @@ -34,7 +34,6 @@ * @u: unions RB-tree or @list links * @u.rb: link in the per-volume RB-tree of &struct ubi_scan_leb objects * @u.list: link in one of the eraseblock lists - * @leb_ver: logical eraseblock version (obsolete) * * One object of this type is allocated for each physical eraseblock during * scanning. @@ -49,7 +48,6 @@ struct ubi_scan_leb { struct rb_node rb; struct list_head list; } u; - uint32_t leb_ver; }; /** @@ -59,16 +57,16 @@ struct ubi_scan_leb { * @leb_count: number of logical eraseblocks in this volume * @vol_type: volume type * @used_ebs: number of used logical eraseblocks in this volume (only for - * static volumes) + * static volumes) * @last_data_size: amount of data in the last logical eraseblock of this - * volume (always equivalent to the usable logical eraseblock size in case of - * dynamic volumes) + * volume (always equivalent to the usable logical eraseblock + * size in case of dynamic volumes) * @data_pad: how many bytes at the end of logical eraseblocks of this volume - * are not used (due to volume alignment) + * are not used (due to volume alignment) * @compat: compatibility flags of this volume * @rb: link in the volume RB-tree * @root: root of the RB-tree containing all the eraseblock belonging to this - * volume (&struct ubi_scan_leb objects) + * volume (&struct ubi_scan_leb objects) * * One object of this type is allocated for each volume during scanning. */ @@ -92,8 +90,8 @@ struct ubi_scan_volume { * @free: list of free physical eraseblocks * @erase: list of physical eraseblocks which have to be erased * @alien: list of physical eraseblocks which should not be used by UBI (e.g., + * those belonging to "preserve"-compatible internal volumes) * @bad_peb_count: count of bad physical eraseblocks - * those belonging to "preserve"-compatible internal volumes) * @vols_found: number of volumes found during scanning * @highest_vol_id: highest volume ID * @alien_peb_count: count of physical eraseblocks in the @alien list @@ -106,8 +104,8 @@ struct ubi_scan_volume { * @ec_count: a temporary variable used when calculating @mean_ec * * This data structure contains the result of scanning and may be used by other - * UBI units to build final UBI data structures, further error-recovery and so - * on. + * UBI sub-systems to build final UBI data structures, further error-recovery + * and so on. */ struct ubi_scan_info { struct rb_root volumes; @@ -132,8 +130,7 @@ struct ubi_device; struct ubi_vid_hdr; /* - * ubi_scan_move_to_list - move a physical eraseblock from the volume tree to a - * list. + * ubi_scan_move_to_list - move a PEB from the volume tree to a list. * * @sv: volume scanning information * @seb: scanning eraseblock infprmation diff --git a/drivers/mtd/ubi/ubi-media.h b/drivers/mtd/ubi/ubi-media.h index c3185d9fd048..2ad940409053 100644 --- a/drivers/mtd/ubi/ubi-media.h +++ b/drivers/mtd/ubi/ubi-media.h @@ -98,10 +98,11 @@ enum { * Compatibility constants used by internal volumes. * * @UBI_COMPAT_DELETE: delete this internal volume before anything is written - * to the flash + * to the flash * @UBI_COMPAT_RO: attach this device in read-only mode * @UBI_COMPAT_PRESERVE: preserve this internal volume - do not touch its - * physical eraseblocks, don't allow the wear-leveling unit to move them + * physical eraseblocks, don't allow the wear-leveling + * sub-system to move them * @UBI_COMPAT_REJECT: reject this UBI image */ enum { @@ -123,7 +124,7 @@ enum { * struct ubi_ec_hdr - UBI erase counter header. * @magic: erase counter header magic number (%UBI_EC_HDR_MAGIC) * @version: version of UBI implementation which is supposed to accept this - * UBI image + * UBI image * @padding1: reserved for future, zeroes * @ec: the erase counter * @vid_hdr_offset: where the VID header starts @@ -159,24 +160,23 @@ struct ubi_ec_hdr { * struct ubi_vid_hdr - on-flash UBI volume identifier header. * @magic: volume identifier header magic number (%UBI_VID_HDR_MAGIC) * @version: UBI implementation version which is supposed to accept this UBI - * image (%UBI_VERSION) + * image (%UBI_VERSION) * @vol_type: volume type (%UBI_VID_DYNAMIC or %UBI_VID_STATIC) * @copy_flag: if this logical eraseblock was copied from another physical - * eraseblock (for wear-leveling reasons) + * eraseblock (for wear-leveling reasons) * @compat: compatibility of this volume (%0, %UBI_COMPAT_DELETE, - * %UBI_COMPAT_IGNORE, %UBI_COMPAT_PRESERVE, or %UBI_COMPAT_REJECT) + * %UBI_COMPAT_IGNORE, %UBI_COMPAT_PRESERVE, or %UBI_COMPAT_REJECT) * @vol_id: ID of this volume * @lnum: logical eraseblock number - * @leb_ver: version of this logical eraseblock (IMPORTANT: obsolete, to be - * removed, kept only for not breaking older UBI users) + * @padding1: reserved for future, zeroes * @data_size: how many bytes of data this logical eraseblock contains * @used_ebs: total number of used logical eraseblocks in this volume * @data_pad: how many bytes at the end of this physical eraseblock are not - * used + * used * @data_crc: CRC checksum of the data stored in this logical eraseblock - * @padding1: reserved for future, zeroes - * @sqnum: sequence number * @padding2: reserved for future, zeroes + * @sqnum: sequence number + * @padding3: reserved for future, zeroes * @hdr_crc: volume identifier header CRC checksum * * The @sqnum is the value of the global sequence counter at the time when this @@ -224,10 +224,6 @@ struct ubi_ec_hdr { * checksum is correct, this physical eraseblock is selected (P1). Otherwise * the older one (P) is selected. * - * Note, there is an obsolete @leb_ver field which was used instead of @sqnum - * in the past. But it is not used anymore and we keep it in order to be able - * to deal with old UBI images. It will be removed at some point. - * * There are 2 sorts of volumes in UBI: user volumes and internal volumes. * Internal volumes are not seen from outside and are used for various internal * UBI purposes. In this implementation there is only one internal volume - the @@ -248,9 +244,9 @@ struct ubi_ec_hdr { * The @data_crc field contains the CRC checksum of the contents of the logical * eraseblock if this is a static volume. In case of dynamic volumes, it does * not contain the CRC checksum as a rule. The only exception is when the - * data of the physical eraseblock was moved by the wear-leveling unit, then - * the wear-leveling unit calculates the data CRC and stores it in the - * @data_crc field. And of course, the @copy_flag is %in this case. + * data of the physical eraseblock was moved by the wear-leveling sub-system, + * then the wear-leveling sub-system calculates the data CRC and stores it in + * the @data_crc field. And of course, the @copy_flag is %in this case. * * The @data_size field is used only for static volumes because UBI has to know * how many bytes of data are stored in this eraseblock. For dynamic volumes, @@ -277,14 +273,14 @@ struct ubi_vid_hdr { __u8 compat; __be32 vol_id; __be32 lnum; - __be32 leb_ver; /* obsolete, to be removed, don't use */ + __u8 padding1[4]; __be32 data_size; __be32 used_ebs; __be32 data_pad; __be32 data_crc; - __u8 padding1[4]; + __u8 padding2[4]; __be64 sqnum; - __u8 padding2[12]; + __u8 padding3[12]; __be32 hdr_crc; } __attribute__ ((packed)); diff --git a/drivers/mtd/ubi/ubi.h b/drivers/mtd/ubi/ubi.h index 67dcbd11c15c..1c3fa18c26a7 100644 --- a/drivers/mtd/ubi/ubi.h +++ b/drivers/mtd/ubi/ubi.h @@ -74,15 +74,15 @@ #define UBI_IO_RETRIES 3 /* - * Error codes returned by the I/O unit. + * Error codes returned by the I/O sub-system. * * UBI_IO_PEB_EMPTY: the physical eraseblock is empty, i.e. it contains only - * 0xFF bytes + * %0xFF bytes * UBI_IO_PEB_FREE: the physical eraseblock is free, i.e. it contains only a - * valid erase counter header, and the rest are %0xFF bytes + * valid erase counter header, and the rest are %0xFF bytes * UBI_IO_BAD_EC_HDR: the erase counter header is corrupted (bad magic or CRC) * UBI_IO_BAD_VID_HDR: the volume identifier header is corrupted (bad magic or - * CRC) + * CRC) * UBI_IO_BITFLIPS: bit-flips were detected and corrected */ enum { @@ -99,9 +99,9 @@ enum { * @ec: erase counter * @pnum: physical eraseblock number * - * This data structure is used in the WL unit. Each physical eraseblock has a - * corresponding &struct wl_entry object which may be kept in different - * RB-trees. See WL unit for details. + * This data structure is used in the WL sub-system. Each physical eraseblock + * has a corresponding &struct wl_entry object which may be kept in different + * RB-trees. See WL sub-system for details. */ struct ubi_wl_entry { struct rb_node rb; @@ -118,10 +118,10 @@ struct ubi_wl_entry { * @mutex: read/write mutex to implement read/write access serialization to * the (@vol_id, @lnum) logical eraseblock * - * This data structure is used in the EBA unit to implement per-LEB locking. - * When a logical eraseblock is being locked - corresponding + * This data structure is used in the EBA sub-system to implement per-LEB + * locking. When a logical eraseblock is being locked - corresponding * &struct ubi_ltree_entry object is inserted to the lock tree (@ubi->ltree). - * See EBA unit for details. + * See EBA sub-system for details. */ struct ubi_ltree_entry { struct rb_node rb; @@ -131,6 +131,27 @@ struct ubi_ltree_entry { struct rw_semaphore mutex; }; +/** + * struct ubi_rename_entry - volume re-name description data structure. + * @new_name_len: new volume name length + * @new_name: new volume name + * @remove: if not zero, this volume should be removed, not re-named + * @desc: descriptor of the volume + * @list: links re-name entries into a list + * + * This data structure is utilized in the multiple volume re-name code. Namely, + * UBI first creates a list of &struct ubi_rename_entry objects from the + * &struct ubi_rnvol_req request object, and then utilizes this list to do all + * the job. + */ +struct ubi_rename_entry { + int new_name_len; + char new_name[UBI_VOL_NAME_MAX + 1]; + int remove; + struct ubi_volume_desc *desc; + struct list_head list; +}; + struct ubi_volume_desc; /** @@ -206,7 +227,7 @@ struct ubi_volume { int alignment; int data_pad; int name_len; - char name[UBI_VOL_NAME_MAX+1]; + char name[UBI_VOL_NAME_MAX + 1]; int upd_ebs; int ch_lnum; @@ -225,7 +246,7 @@ struct ubi_volume { #ifdef CONFIG_MTD_UBI_GLUEBI /* * Gluebi-related stuff may be compiled out. - * TODO: this should not be built into UBI but should be a separate + * Note: this should not be built into UBI but should be a separate * ubimtd driver which works on top of UBI and emulates MTD devices. */ struct ubi_volume_desc *gluebi_desc; @@ -235,8 +256,7 @@ struct ubi_volume { }; /** - * struct ubi_volume_desc - descriptor of the UBI volume returned when it is - * opened. + * struct ubi_volume_desc - UBI volume descriptor returned when it is opened. * @vol: reference to the corresponding volume description object * @mode: open mode (%UBI_READONLY, %UBI_READWRITE, or %UBI_EXCLUSIVE) */ @@ -273,7 +293,7 @@ struct ubi_wl_entry; * @vtbl_size: size of the volume table in bytes * @vtbl: in-RAM volume table copy * @volumes_mutex: protects on-flash volume table and serializes volume - * changes, like creation, deletion, update, resize + * changes, like creation, deletion, update, re-size and re-name * * @max_ec: current highest erase counter value * @mean_ec: current mean erase counter value @@ -293,6 +313,7 @@ struct ubi_wl_entry; * @move_to, @move_to_put @erase_pending, @wl_scheduled, and @works * fields * @move_mutex: serializes eraseblock moves + * @work_sem: sycnhronizes the WL worker with use tasks * @wl_scheduled: non-zero if the wear-leveling was scheduled * @lookuptbl: a table to quickly find a &struct ubi_wl_entry object for any * physical eraseblock @@ -316,11 +337,11 @@ struct ubi_wl_entry; * @ro_mode: if the UBI device is in read-only mode * @leb_size: logical eraseblock size * @leb_start: starting offset of logical eraseblocks within physical - * eraseblocks + * eraseblocks * @ec_hdr_alsize: size of the EC header aligned to @hdrs_min_io_size * @vid_hdr_alsize: size of the VID header aligned to @hdrs_min_io_size * @vid_hdr_offset: starting offset of the volume identifier header (might be - * unaligned) + * unaligned) * @vid_hdr_aloffset: starting offset of the VID header aligned to * @hdrs_min_io_size * @vid_hdr_shift: contains @vid_hdr_offset - @vid_hdr_aloffset @@ -331,6 +352,8 @@ struct ubi_wl_entry; * @peb_buf1: a buffer of PEB size used for different purposes * @peb_buf2: another buffer of PEB size used for different purposes * @buf_mutex: proptects @peb_buf1 and @peb_buf2 + * @ckvol_mutex: serializes static volume checking when opening + * @mult_mutex: serializes operations on multiple volumes, like re-nameing * @dbg_peb_buf: buffer of PEB size used for debugging * @dbg_buf_mutex: proptects @dbg_peb_buf */ @@ -356,16 +379,16 @@ struct ubi_device { struct mutex volumes_mutex; int max_ec; - /* TODO: mean_ec is not updated run-time, fix */ + /* Note, mean_ec is not updated run-time - should be fixed */ int mean_ec; - /* EBA unit's stuff */ + /* EBA sub-system's stuff */ unsigned long long global_sqnum; spinlock_t ltree_lock; struct rb_root ltree; struct mutex alc_mutex; - /* Wear-leveling unit's stuff */ + /* Wear-leveling sub-system's stuff */ struct rb_root used; struct rb_root free; struct rb_root scrub; @@ -388,7 +411,7 @@ struct ubi_device { int thread_enabled; char bgt_name[sizeof(UBI_BGT_NAME_PATTERN)+2]; - /* I/O unit's stuff */ + /* I/O sub-system's stuff */ long long flash_size; int peb_count; int peb_size; @@ -411,6 +434,7 @@ struct ubi_device { void *peb_buf2; struct mutex buf_mutex; struct mutex ckvol_mutex; + struct mutex mult_mutex; #ifdef CONFIG_MTD_UBI_DEBUG void *dbg_peb_buf; struct mutex dbg_buf_mutex; @@ -427,12 +451,15 @@ extern struct mutex ubi_devices_mutex; /* vtbl.c */ int ubi_change_vtbl_record(struct ubi_device *ubi, int idx, struct ubi_vtbl_record *vtbl_rec); +int ubi_vtbl_rename_volumes(struct ubi_device *ubi, + struct list_head *rename_list); int ubi_read_volume_table(struct ubi_device *ubi, struct ubi_scan_info *si); /* vmt.c */ int ubi_create_volume(struct ubi_device *ubi, struct ubi_mkvol_req *req); -int ubi_remove_volume(struct ubi_volume_desc *desc); +int ubi_remove_volume(struct ubi_volume_desc *desc, int no_vtbl); int ubi_resize_volume(struct ubi_volume_desc *desc, int reserved_pebs); +int ubi_rename_volumes(struct ubi_device *ubi, struct list_head *rename_list); int ubi_add_volume(struct ubi_device *ubi, struct ubi_volume *vol); void ubi_free_volume(struct ubi_device *ubi, struct ubi_volume *vol); @@ -447,7 +474,8 @@ int ubi_more_leb_change_data(struct ubi_device *ubi, struct ubi_volume *vol, const void __user *buf, int count); /* misc.c */ -int ubi_calc_data_len(const struct ubi_device *ubi, const void *buf, int length); +int ubi_calc_data_len(const struct ubi_device *ubi, const void *buf, + int length); int ubi_check_volume(struct ubi_device *ubi, int vol_id); void ubi_calculate_reserved(struct ubi_device *ubi); @@ -477,7 +505,6 @@ int ubi_eba_atomic_leb_change(struct ubi_device *ubi, struct ubi_volume *vol, int ubi_eba_copy_leb(struct ubi_device *ubi, int from, int to, struct ubi_vid_hdr *vid_hdr); int ubi_eba_init_scan(struct ubi_device *ubi, struct ubi_scan_info *si); -void ubi_eba_close(const struct ubi_device *ubi); /* wl.c */ int ubi_wl_get_peb(struct ubi_device *ubi, int dtype); diff --git a/drivers/mtd/ubi/upd.c b/drivers/mtd/ubi/upd.c index ddaa1a56cc69..8b89cc18ff0b 100644 --- a/drivers/mtd/ubi/upd.c +++ b/drivers/mtd/ubi/upd.c @@ -39,7 +39,7 @@ */ #include <linux/err.h> -#include <asm/uaccess.h> +#include <linux/uaccess.h> #include <asm/div64.h> #include "ubi.h" @@ -56,11 +56,11 @@ static int set_update_marker(struct ubi_device *ubi, struct ubi_volume *vol) int err; struct ubi_vtbl_record vtbl_rec; - dbg_msg("set update marker for volume %d", vol->vol_id); + dbg_gen("set update marker for volume %d", vol->vol_id); if (vol->upd_marker) { ubi_assert(ubi->vtbl[vol->vol_id].upd_marker); - dbg_msg("already set"); + dbg_gen("already set"); return 0; } @@ -92,7 +92,7 @@ static int clear_update_marker(struct ubi_device *ubi, struct ubi_volume *vol, uint64_t tmp; struct ubi_vtbl_record vtbl_rec; - dbg_msg("clear update marker for volume %d", vol->vol_id); + dbg_gen("clear update marker for volume %d", vol->vol_id); memcpy(&vtbl_rec, &ubi->vtbl[vol->vol_id], sizeof(struct ubi_vtbl_record)); @@ -133,7 +133,7 @@ int ubi_start_update(struct ubi_device *ubi, struct ubi_volume *vol, int i, err; uint64_t tmp; - dbg_msg("start update of volume %d, %llu bytes", vol->vol_id, bytes); + dbg_gen("start update of volume %d, %llu bytes", vol->vol_id, bytes); ubi_assert(!vol->updating && !vol->changing_leb); vol->updating = 1; @@ -183,7 +183,7 @@ int ubi_start_leb_change(struct ubi_device *ubi, struct ubi_volume *vol, { ubi_assert(!vol->updating && !vol->changing_leb); - dbg_msg("start changing LEB %d:%d, %u bytes", + dbg_gen("start changing LEB %d:%d, %u bytes", vol->vol_id, req->lnum, req->bytes); if (req->bytes == 0) return ubi_eba_atomic_leb_change(ubi, vol, req->lnum, NULL, 0, @@ -237,16 +237,17 @@ static int write_leb(struct ubi_device *ubi, struct ubi_volume *vol, int lnum, int err; if (vol->vol_type == UBI_DYNAMIC_VOLUME) { - len = ALIGN(len, ubi->min_io_size); - memset(buf + len, 0xFF, len - len); + int l = ALIGN(len, ubi->min_io_size); - len = ubi_calc_data_len(ubi, buf, len); + memset(buf + len, 0xFF, l - len); + len = ubi_calc_data_len(ubi, buf, l); if (len == 0) { - dbg_msg("all %d bytes contain 0xFF - skip", len); + dbg_gen("all %d bytes contain 0xFF - skip", len); return 0; } - err = ubi_eba_write_leb(ubi, vol, lnum, buf, 0, len, UBI_UNKNOWN); + err = ubi_eba_write_leb(ubi, vol, lnum, buf, 0, len, + UBI_UNKNOWN); } else { /* * When writing static volume, and this is the last logical @@ -267,6 +268,7 @@ static int write_leb(struct ubi_device *ubi, struct ubi_volume *vol, int lnum, /** * ubi_more_update_data - write more update data. + * @ubi: UBI device description object * @vol: volume description object * @buf: write data (user-space memory buffer) * @count: how much bytes to write @@ -283,7 +285,7 @@ int ubi_more_update_data(struct ubi_device *ubi, struct ubi_volume *vol, uint64_t tmp; int lnum, offs, err = 0, len, to_write = count; - dbg_msg("write %d of %lld bytes, %lld already passed", + dbg_gen("write %d of %lld bytes, %lld already passed", count, vol->upd_bytes, vol->upd_received); if (ubi->ro_mode) @@ -384,6 +386,7 @@ int ubi_more_update_data(struct ubi_device *ubi, struct ubi_volume *vol, /** * ubi_more_leb_change_data - accept more data for atomic LEB change. + * @ubi: UBI device description object * @vol: volume description object * @buf: write data (user-space memory buffer) * @count: how much bytes to write @@ -400,7 +403,7 @@ int ubi_more_leb_change_data(struct ubi_device *ubi, struct ubi_volume *vol, { int err; - dbg_msg("write %d of %lld bytes, %lld already passed", + dbg_gen("write %d of %lld bytes, %lld already passed", count, vol->upd_bytes, vol->upd_received); if (ubi->ro_mode) @@ -418,7 +421,8 @@ int ubi_more_leb_change_data(struct ubi_device *ubi, struct ubi_volume *vol, if (vol->upd_received == vol->upd_bytes) { int len = ALIGN((int)vol->upd_bytes, ubi->min_io_size); - memset(vol->upd_buf + vol->upd_bytes, 0xFF, len - vol->upd_bytes); + memset(vol->upd_buf + vol->upd_bytes, 0xFF, + len - vol->upd_bytes); len = ubi_calc_data_len(ubi, vol->upd_buf, len); err = ubi_eba_atomic_leb_change(ubi, vol, vol->ch_lnum, vol->upd_buf, len, UBI_UNKNOWN); diff --git a/drivers/mtd/ubi/vmt.c b/drivers/mtd/ubi/vmt.c index 5be58d85c639..3531ca9a1e24 100644 --- a/drivers/mtd/ubi/vmt.c +++ b/drivers/mtd/ubi/vmt.c @@ -28,9 +28,9 @@ #include "ubi.h" #ifdef CONFIG_MTD_UBI_DEBUG_PARANOID -static void paranoid_check_volumes(struct ubi_device *ubi); +static int paranoid_check_volumes(struct ubi_device *ubi); #else -#define paranoid_check_volumes(ubi) +#define paranoid_check_volumes(ubi) 0 #endif static ssize_t vol_attribute_show(struct device *dev, @@ -127,6 +127,7 @@ static void vol_release(struct device *dev) { struct ubi_volume *vol = container_of(dev, struct ubi_volume, dev); + kfree(vol->eba_tbl); kfree(vol); } @@ -201,7 +202,7 @@ static void volume_sysfs_close(struct ubi_volume *vol) */ int ubi_create_volume(struct ubi_device *ubi, struct ubi_mkvol_req *req) { - int i, err, vol_id = req->vol_id, dont_free = 0; + int i, err, vol_id = req->vol_id, do_free = 1; struct ubi_volume *vol; struct ubi_vtbl_record vtbl_rec; uint64_t bytes; @@ -217,7 +218,7 @@ int ubi_create_volume(struct ubi_device *ubi, struct ubi_mkvol_req *req) spin_lock(&ubi->volumes_lock); if (vol_id == UBI_VOL_NUM_AUTO) { /* Find unused volume ID */ - dbg_msg("search for vacant volume ID"); + dbg_gen("search for vacant volume ID"); for (i = 0; i < ubi->vtbl_slots; i++) if (!ubi->volumes[i]) { vol_id = i; @@ -232,7 +233,7 @@ int ubi_create_volume(struct ubi_device *ubi, struct ubi_mkvol_req *req) req->vol_id = vol_id; } - dbg_msg("volume ID %d, %llu bytes, type %d, name %s", + dbg_gen("volume ID %d, %llu bytes, type %d, name %s", vol_id, (unsigned long long)req->bytes, (int)req->vol_type, req->name); @@ -252,7 +253,7 @@ int ubi_create_volume(struct ubi_device *ubi, struct ubi_mkvol_req *req) goto out_unlock; } - /* Calculate how many eraseblocks are requested */ + /* Calculate how many eraseblocks are requested */ vol->usable_leb_size = ubi->leb_size - ubi->leb_size % req->alignment; bytes = req->bytes; if (do_div(bytes, vol->usable_leb_size)) @@ -274,7 +275,7 @@ int ubi_create_volume(struct ubi_device *ubi, struct ubi_mkvol_req *req) vol->data_pad = ubi->leb_size % vol->alignment; vol->vol_type = req->vol_type; vol->name_len = req->name_len; - memcpy(vol->name, req->name, vol->name_len + 1); + memcpy(vol->name, req->name, vol->name_len); vol->ubi = ubi; /* @@ -349,7 +350,7 @@ int ubi_create_volume(struct ubi_device *ubi, struct ubi_mkvol_req *req) vtbl_rec.vol_type = UBI_VID_DYNAMIC; else vtbl_rec.vol_type = UBI_VID_STATIC; - memcpy(vtbl_rec.name, vol->name, vol->name_len + 1); + memcpy(vtbl_rec.name, vol->name, vol->name_len); err = ubi_change_vtbl_record(ubi, vol_id, &vtbl_rec); if (err) @@ -360,19 +361,19 @@ int ubi_create_volume(struct ubi_device *ubi, struct ubi_mkvol_req *req) ubi->vol_count += 1; spin_unlock(&ubi->volumes_lock); - paranoid_check_volumes(ubi); - return 0; + err = paranoid_check_volumes(ubi); + return err; out_sysfs: /* - * We have registered our device, we should not free the volume* + * We have registered our device, we should not free the volume * description object in this function in case of an error - it is * freed by the release function. * * Get device reference to prevent the release function from being * called just after sysfs has been closed. */ - dont_free = 1; + do_free = 0; get_device(&vol->dev); volume_sysfs_close(vol); out_gluebi: @@ -382,17 +383,18 @@ out_gluebi: out_cdev: cdev_del(&vol->cdev); out_mapping: - kfree(vol->eba_tbl); + if (do_free) + kfree(vol->eba_tbl); out_acc: spin_lock(&ubi->volumes_lock); ubi->rsvd_pebs -= vol->reserved_pebs; ubi->avail_pebs += vol->reserved_pebs; out_unlock: spin_unlock(&ubi->volumes_lock); - if (dont_free) - put_device(&vol->dev); - else + if (do_free) kfree(vol); + else + put_device(&vol->dev); ubi_err("cannot create volume %d, error %d", vol_id, err); return err; } @@ -400,19 +402,20 @@ out_unlock: /** * ubi_remove_volume - remove volume. * @desc: volume descriptor + * @no_vtbl: do not change volume table if not zero * * This function removes volume described by @desc. The volume has to be opened * in "exclusive" mode. Returns zero in case of success and a negative error * code in case of failure. The caller has to have the @ubi->volumes_mutex * locked. */ -int ubi_remove_volume(struct ubi_volume_desc *desc) +int ubi_remove_volume(struct ubi_volume_desc *desc, int no_vtbl) { struct ubi_volume *vol = desc->vol; struct ubi_device *ubi = vol->ubi; int i, err, vol_id = vol->vol_id, reserved_pebs = vol->reserved_pebs; - dbg_msg("remove UBI volume %d", vol_id); + dbg_gen("remove UBI volume %d", vol_id); ubi_assert(desc->mode == UBI_EXCLUSIVE); ubi_assert(vol == ubi->volumes[vol_id]); @@ -435,9 +438,11 @@ int ubi_remove_volume(struct ubi_volume_desc *desc) if (err) goto out_err; - err = ubi_change_vtbl_record(ubi, vol_id, NULL); - if (err) - goto out_err; + if (!no_vtbl) { + err = ubi_change_vtbl_record(ubi, vol_id, NULL); + if (err) + goto out_err; + } for (i = 0; i < vol->reserved_pebs; i++) { err = ubi_eba_unmap_leb(ubi, vol, i); @@ -445,8 +450,6 @@ int ubi_remove_volume(struct ubi_volume_desc *desc) goto out_err; } - kfree(vol->eba_tbl); - vol->eba_tbl = NULL; cdev_del(&vol->cdev); volume_sysfs_close(vol); @@ -465,8 +468,9 @@ int ubi_remove_volume(struct ubi_volume_desc *desc) ubi->vol_count -= 1; spin_unlock(&ubi->volumes_lock); - paranoid_check_volumes(ubi); - return 0; + if (!no_vtbl) + err = paranoid_check_volumes(ubi); + return err; out_err: ubi_err("cannot remove volume %d, error %d", vol_id, err); @@ -497,7 +501,7 @@ int ubi_resize_volume(struct ubi_volume_desc *desc, int reserved_pebs) if (ubi->ro_mode) return -EROFS; - dbg_msg("re-size volume %d to from %d to %d PEBs", + dbg_gen("re-size volume %d to from %d to %d PEBs", vol_id, vol->reserved_pebs, reserved_pebs); if (vol->vol_type == UBI_STATIC_VOLUME && @@ -586,8 +590,8 @@ int ubi_resize_volume(struct ubi_volume_desc *desc, int reserved_pebs) (long long)vol->used_ebs * vol->usable_leb_size; } - paranoid_check_volumes(ubi); - return 0; + err = paranoid_check_volumes(ubi); + return err; out_acc: if (pebs > 0) { @@ -602,6 +606,44 @@ out_free: } /** + * ubi_rename_volumes - re-name UBI volumes. + * @ubi: UBI device description object + * @rename_list: list of &struct ubi_rename_entry objects + * + * This function re-names or removes volumes specified in the re-name list. + * Returns zero in case of success and a negative error code in case of + * failure. + */ +int ubi_rename_volumes(struct ubi_device *ubi, struct list_head *rename_list) +{ + int err; + struct ubi_rename_entry *re; + + err = ubi_vtbl_rename_volumes(ubi, rename_list); + if (err) + return err; + + list_for_each_entry(re, rename_list, list) { + if (re->remove) { + err = ubi_remove_volume(re->desc, 1); + if (err) + break; + } else { + struct ubi_volume *vol = re->desc->vol; + + spin_lock(&ubi->volumes_lock); + vol->name_len = re->new_name_len; + memcpy(vol->name, re->new_name, re->new_name_len + 1); + spin_unlock(&ubi->volumes_lock); + } + } + + if (!err) + err = paranoid_check_volumes(ubi); + return err; +} + +/** * ubi_add_volume - add volume. * @ubi: UBI device description object * @vol: volume description object @@ -615,8 +657,7 @@ int ubi_add_volume(struct ubi_device *ubi, struct ubi_volume *vol) int err, vol_id = vol->vol_id; dev_t dev; - dbg_msg("add volume %d", vol_id); - ubi_dbg_dump_vol_info(vol); + dbg_gen("add volume %d", vol_id); /* Register character device for the volume */ cdev_init(&vol->cdev, &ubi_vol_cdev_operations); @@ -650,8 +691,8 @@ int ubi_add_volume(struct ubi_device *ubi, struct ubi_volume *vol) return err; } - paranoid_check_volumes(ubi); - return 0; + err = paranoid_check_volumes(ubi); + return err; out_gluebi: err = ubi_destroy_gluebi(vol); @@ -672,7 +713,7 @@ void ubi_free_volume(struct ubi_device *ubi, struct ubi_volume *vol) { int err; - dbg_msg("free volume %d", vol->vol_id); + dbg_gen("free volume %d", vol->vol_id); ubi->volumes[vol->vol_id] = NULL; err = ubi_destroy_gluebi(vol); @@ -686,8 +727,10 @@ void ubi_free_volume(struct ubi_device *ubi, struct ubi_volume *vol) * paranoid_check_volume - check volume information. * @ubi: UBI device description object * @vol_id: volume ID + * + * Returns zero if volume is all right and a a negative error code if not. */ -static void paranoid_check_volume(struct ubi_device *ubi, int vol_id) +static int paranoid_check_volume(struct ubi_device *ubi, int vol_id) { int idx = vol_id2idx(ubi, vol_id); int reserved_pebs, alignment, data_pad, vol_type, name_len, upd_marker; @@ -705,16 +748,7 @@ static void paranoid_check_volume(struct ubi_device *ubi, int vol_id) goto fail; } spin_unlock(&ubi->volumes_lock); - return; - } - - if (vol->exclusive) { - /* - * The volume may be being created at the moment, do not check - * it (e.g., it may be in the middle of ubi_create_volume(). - */ - spin_unlock(&ubi->volumes_lock); - return; + return 0; } if (vol->reserved_pebs < 0 || vol->alignment < 0 || vol->data_pad < 0 || @@ -727,7 +761,7 @@ static void paranoid_check_volume(struct ubi_device *ubi, int vol_id) goto fail; } - n = vol->alignment % ubi->min_io_size; + n = vol->alignment & (ubi->min_io_size - 1); if (vol->alignment != 1 && n) { ubi_err("alignment is not multiple of min I/O unit"); goto fail; @@ -824,31 +858,39 @@ static void paranoid_check_volume(struct ubi_device *ubi, int vol_id) if (alignment != vol->alignment || data_pad != vol->data_pad || upd_marker != vol->upd_marker || vol_type != vol->vol_type || - name_len!= vol->name_len || strncmp(name, vol->name, name_len)) { + name_len != vol->name_len || strncmp(name, vol->name, name_len)) { ubi_err("volume info is different"); goto fail; } spin_unlock(&ubi->volumes_lock); - return; + return 0; fail: ubi_err("paranoid check failed for volume %d", vol_id); - ubi_dbg_dump_vol_info(vol); + if (vol) + ubi_dbg_dump_vol_info(vol); ubi_dbg_dump_vtbl_record(&ubi->vtbl[vol_id], vol_id); spin_unlock(&ubi->volumes_lock); - BUG(); + return -EINVAL; } /** * paranoid_check_volumes - check information about all volumes. * @ubi: UBI device description object + * + * Returns zero if volumes are all right and a a negative error code if not. */ -static void paranoid_check_volumes(struct ubi_device *ubi) +static int paranoid_check_volumes(struct ubi_device *ubi) { - int i; + int i, err = 0; - for (i = 0; i < ubi->vtbl_slots; i++) - paranoid_check_volume(ubi, i); + for (i = 0; i < ubi->vtbl_slots; i++) { + err = paranoid_check_volume(ubi, i); + if (err) + break; + } + + return err; } #endif diff --git a/drivers/mtd/ubi/vtbl.c b/drivers/mtd/ubi/vtbl.c index af36b12be278..217d0e111b2a 100644 --- a/drivers/mtd/ubi/vtbl.c +++ b/drivers/mtd/ubi/vtbl.c @@ -115,8 +115,58 @@ int ubi_change_vtbl_record(struct ubi_device *ubi, int idx, } /** - * vtbl_check - check if volume table is not corrupted and contains sensible - * data. + * ubi_vtbl_rename_volumes - rename UBI volumes in the volume table. + * @ubi: UBI device description object + * @rename_list: list of &struct ubi_rename_entry objects + * + * This function re-names multiple volumes specified in @req in the volume + * table. Returns zero in case of success and a negative error code in case of + * failure. + */ +int ubi_vtbl_rename_volumes(struct ubi_device *ubi, + struct list_head *rename_list) +{ + int i, err; + struct ubi_rename_entry *re; + struct ubi_volume *layout_vol; + + list_for_each_entry(re, rename_list, list) { + uint32_t crc; + struct ubi_volume *vol = re->desc->vol; + struct ubi_vtbl_record *vtbl_rec = &ubi->vtbl[vol->vol_id]; + + if (re->remove) { + memcpy(vtbl_rec, &empty_vtbl_record, + sizeof(struct ubi_vtbl_record)); + continue; + } + + vtbl_rec->name_len = cpu_to_be16(re->new_name_len); + memcpy(vtbl_rec->name, re->new_name, re->new_name_len); + memset(vtbl_rec->name + re->new_name_len, 0, + UBI_VOL_NAME_MAX + 1 - re->new_name_len); + crc = crc32(UBI_CRC32_INIT, vtbl_rec, + UBI_VTBL_RECORD_SIZE_CRC); + vtbl_rec->crc = cpu_to_be32(crc); + } + + layout_vol = ubi->volumes[vol_id2idx(ubi, UBI_LAYOUT_VOLUME_ID)]; + for (i = 0; i < UBI_LAYOUT_VOLUME_EBS; i++) { + err = ubi_eba_unmap_leb(ubi, layout_vol, i); + if (err) + return err; + + err = ubi_eba_write_leb(ubi, layout_vol, i, ubi->vtbl, 0, + ubi->vtbl_size, UBI_LONGTERM); + if (err) + return err; + } + + return 0; +} + +/** + * vtbl_check - check if volume table is not corrupted and sensible. * @ubi: UBI device description object * @vtbl: volume table * @@ -127,7 +177,7 @@ static int vtbl_check(const struct ubi_device *ubi, const struct ubi_vtbl_record *vtbl) { int i, n, reserved_pebs, alignment, data_pad, vol_type, name_len; - int upd_marker; + int upd_marker, err; uint32_t crc; const char *name; @@ -153,7 +203,7 @@ static int vtbl_check(const struct ubi_device *ubi, if (reserved_pebs == 0) { if (memcmp(&vtbl[i], &empty_vtbl_record, UBI_VTBL_RECORD_SIZE)) { - dbg_err("bad empty record"); + err = 2; goto bad; } continue; @@ -161,56 +211,57 @@ static int vtbl_check(const struct ubi_device *ubi, if (reserved_pebs < 0 || alignment < 0 || data_pad < 0 || name_len < 0) { - dbg_err("negative values"); + err = 3; goto bad; } if (alignment > ubi->leb_size || alignment == 0) { - dbg_err("bad alignment"); + err = 4; goto bad; } - n = alignment % ubi->min_io_size; + n = alignment & (ubi->min_io_size - 1); if (alignment != 1 && n) { - dbg_err("alignment is not multiple of min I/O unit"); + err = 5; goto bad; } n = ubi->leb_size % alignment; if (data_pad != n) { dbg_err("bad data_pad, has to be %d", n); + err = 6; goto bad; } if (vol_type != UBI_VID_DYNAMIC && vol_type != UBI_VID_STATIC) { - dbg_err("bad vol_type"); + err = 7; goto bad; } if (upd_marker != 0 && upd_marker != 1) { - dbg_err("bad upd_marker"); + err = 8; goto bad; } if (reserved_pebs > ubi->good_peb_count) { dbg_err("too large reserved_pebs, good PEBs %d", ubi->good_peb_count); + err = 9; goto bad; } if (name_len > UBI_VOL_NAME_MAX) { - dbg_err("too long volume name, max %d", - UBI_VOL_NAME_MAX); + err = 10; goto bad; } if (name[0] == '\0') { - dbg_err("NULL volume name"); + err = 11; goto bad; } if (name_len != strnlen(name, name_len + 1)) { - dbg_err("bad name_len"); + err = 12; goto bad; } } @@ -235,7 +286,7 @@ static int vtbl_check(const struct ubi_device *ubi, return 0; bad: - ubi_err("volume table check failed, record %d", i); + ubi_err("volume table check failed: record %d, error %d", i, err); ubi_dbg_dump_vtbl_record(&vtbl[i], i); return -EINVAL; } @@ -287,7 +338,6 @@ retry: vid_hdr->data_pad = cpu_to_be32(0); vid_hdr->lnum = cpu_to_be32(copy); vid_hdr->sqnum = cpu_to_be64(++si->max_sqnum); - vid_hdr->leb_ver = cpu_to_be32(old_seb ? old_seb->leb_ver + 1: 0); /* The EC header is already there, write the VID header */ err = ubi_io_write_vid_hdr(ubi, new_seb->pnum, vid_hdr); @@ -370,7 +420,7 @@ static struct ubi_vtbl_record *process_lvol(struct ubi_device *ubi, * to LEB 0. */ - dbg_msg("check layout volume"); + dbg_gen("check layout volume"); /* Read both LEB 0 and LEB 1 into memory */ ubi_rb_for_each_entry(rb, seb, &sv->root, u.rb) { @@ -384,7 +434,16 @@ static struct ubi_vtbl_record *process_lvol(struct ubi_device *ubi, err = ubi_io_read_data(ubi, leb[seb->lnum], seb->pnum, 0, ubi->vtbl_size); if (err == UBI_IO_BITFLIPS || err == -EBADMSG) - /* Scrub the PEB later */ + /* + * Scrub the PEB later. Note, -EBADMSG indicates an + * uncorrectable ECC error, but we have our own CRC and + * the data will be checked later. If the data is OK, + * the PEB will be scrubbed (because we set + * seb->scrub). If the data is not OK, the contents of + * the PEB will be recovered from the second copy, and + * seb->scrub will be cleared in + * 'ubi_scan_add_used()'. + */ seb->scrub = 1; else if (err) goto out_free; @@ -400,7 +459,8 @@ static struct ubi_vtbl_record *process_lvol(struct ubi_device *ubi, if (!leb_corrupted[0]) { /* LEB 0 is OK */ if (leb[1]) - leb_corrupted[1] = memcmp(leb[0], leb[1], ubi->vtbl_size); + leb_corrupted[1] = memcmp(leb[0], leb[1], + ubi->vtbl_size); if (leb_corrupted[1]) { ubi_warn("volume table copy #2 is corrupted"); err = create_vtbl(ubi, si, 1, leb[0]); @@ -620,30 +680,32 @@ static int init_volumes(struct ubi_device *ubi, const struct ubi_scan_info *si, static int check_sv(const struct ubi_volume *vol, const struct ubi_scan_volume *sv) { + int err; + if (sv->highest_lnum >= vol->reserved_pebs) { - dbg_err("bad highest_lnum"); + err = 1; goto bad; } if (sv->leb_count > vol->reserved_pebs) { - dbg_err("bad leb_count"); + err = 2; goto bad; } if (sv->vol_type != vol->vol_type) { - dbg_err("bad vol_type"); + err = 3; goto bad; } if (sv->used_ebs > vol->reserved_pebs) { - dbg_err("bad used_ebs"); + err = 4; goto bad; } if (sv->data_pad != vol->data_pad) { - dbg_err("bad data_pad"); + err = 5; goto bad; } return 0; bad: - ubi_err("bad scanning information"); + ubi_err("bad scanning information, error %d", err); ubi_dbg_dump_sv(sv); ubi_dbg_dump_vol_info(vol); return -EINVAL; @@ -672,14 +734,13 @@ static int check_scanning_info(const struct ubi_device *ubi, return -EINVAL; } - if (si->highest_vol_id >= ubi->vtbl_slots + UBI_INT_VOL_COUNT&& + if (si->highest_vol_id >= ubi->vtbl_slots + UBI_INT_VOL_COUNT && si->highest_vol_id < UBI_INTERNAL_VOL_START) { ubi_err("too large volume ID %d found by scanning", si->highest_vol_id); return -EINVAL; } - for (i = 0; i < ubi->vtbl_slots + UBI_INT_VOL_COUNT; i++) { cond_resched(); @@ -717,8 +778,7 @@ static int check_scanning_info(const struct ubi_device *ubi, } /** - * ubi_read_volume_table - read volume table. - * information. + * ubi_read_volume_table - read the volume table. * @ubi: UBI device description object * @si: scanning information * @@ -797,11 +857,10 @@ int ubi_read_volume_table(struct ubi_device *ubi, struct ubi_scan_info *si) out_free: vfree(ubi->vtbl); - for (i = 0; i < ubi->vtbl_slots + UBI_INT_VOL_COUNT; i++) - if (ubi->volumes[i]) { - kfree(ubi->volumes[i]); - ubi->volumes[i] = NULL; - } + for (i = 0; i < ubi->vtbl_slots + UBI_INT_VOL_COUNT; i++) { + kfree(ubi->volumes[i]); + ubi->volumes[i] = NULL; + } return err; } diff --git a/drivers/mtd/ubi/wl.c b/drivers/mtd/ubi/wl.c index a471a491f0ab..05d70937b543 100644 --- a/drivers/mtd/ubi/wl.c +++ b/drivers/mtd/ubi/wl.c @@ -19,22 +19,22 @@ */ /* - * UBI wear-leveling unit. + * UBI wear-leveling sub-system. * - * This unit is responsible for wear-leveling. It works in terms of physical - * eraseblocks and erase counters and knows nothing about logical eraseblocks, - * volumes, etc. From this unit's perspective all physical eraseblocks are of - * two types - used and free. Used physical eraseblocks are those that were - * "get" by the 'ubi_wl_get_peb()' function, and free physical eraseblocks are - * those that were put by the 'ubi_wl_put_peb()' function. + * This sub-system is responsible for wear-leveling. It works in terms of + * physical* eraseblocks and erase counters and knows nothing about logical + * eraseblocks, volumes, etc. From this sub-system's perspective all physical + * eraseblocks are of two types - used and free. Used physical eraseblocks are + * those that were "get" by the 'ubi_wl_get_peb()' function, and free physical + * eraseblocks are those that were put by the 'ubi_wl_put_peb()' function. * * Physical eraseblocks returned by 'ubi_wl_get_peb()' have only erase counter - * header. The rest of the physical eraseblock contains only 0xFF bytes. + * header. The rest of the physical eraseblock contains only %0xFF bytes. * - * When physical eraseblocks are returned to the WL unit by means of the + * When physical eraseblocks are returned to the WL sub-system by means of the * 'ubi_wl_put_peb()' function, they are scheduled for erasure. The erasure is * done asynchronously in context of the per-UBI device background thread, - * which is also managed by the WL unit. + * which is also managed by the WL sub-system. * * The wear-leveling is ensured by means of moving the contents of used * physical eraseblocks with low erase counter to free physical eraseblocks @@ -43,34 +43,36 @@ * The 'ubi_wl_get_peb()' function accepts data type hints which help to pick * an "optimal" physical eraseblock. For example, when it is known that the * physical eraseblock will be "put" soon because it contains short-term data, - * the WL unit may pick a free physical eraseblock with low erase counter, and - * so forth. + * the WL sub-system may pick a free physical eraseblock with low erase + * counter, and so forth. * - * If the WL unit fails to erase a physical eraseblock, it marks it as bad. + * If the WL sub-system fails to erase a physical eraseblock, it marks it as + * bad. * - * This unit is also responsible for scrubbing. If a bit-flip is detected in a - * physical eraseblock, it has to be moved. Technically this is the same as - * moving it for wear-leveling reasons. + * This sub-system is also responsible for scrubbing. If a bit-flip is detected + * in a physical eraseblock, it has to be moved. Technically this is the same + * as moving it for wear-leveling reasons. * - * As it was said, for the UBI unit all physical eraseblocks are either "free" - * or "used". Free eraseblock are kept in the @wl->free RB-tree, while used - * eraseblocks are kept in a set of different RB-trees: @wl->used, + * As it was said, for the UBI sub-system all physical eraseblocks are either + * "free" or "used". Free eraseblock are kept in the @wl->free RB-tree, while + * used eraseblocks are kept in a set of different RB-trees: @wl->used, * @wl->prot.pnum, @wl->prot.aec, and @wl->scrub. * * Note, in this implementation, we keep a small in-RAM object for each physical * eraseblock. This is surely not a scalable solution. But it appears to be good * enough for moderately large flashes and it is simple. In future, one may - * re-work this unit and make it more scalable. + * re-work this sub-system and make it more scalable. * - * At the moment this unit does not utilize the sequence number, which was - * introduced relatively recently. But it would be wise to do this because the - * sequence number of a logical eraseblock characterizes how old is it. For + * At the moment this sub-system does not utilize the sequence number, which + * was introduced relatively recently. But it would be wise to do this because + * the sequence number of a logical eraseblock characterizes how old is it. For * example, when we move a PEB with low erase counter, and we need to pick the * target PEB, we pick a PEB with the highest EC if our PEB is "old" and we * pick target PEB with an average EC if our PEB is not very "old". This is a - * room for future re-works of the WL unit. + * room for future re-works of the WL sub-system. * - * FIXME: looks too complex, should be simplified (later). + * Note: the stuff with protection trees looks too complex and is difficult to + * understand. Should be fixed. */ #include <linux/slab.h> @@ -92,20 +94,21 @@ /* * Maximum difference between two erase counters. If this threshold is - * exceeded, the WL unit starts moving data from used physical eraseblocks with - * low erase counter to free physical eraseblocks with high erase counter. + * exceeded, the WL sub-system starts moving data from used physical + * eraseblocks with low erase counter to free physical eraseblocks with high + * erase counter. */ #define UBI_WL_THRESHOLD CONFIG_MTD_UBI_WL_THRESHOLD /* - * When a physical eraseblock is moved, the WL unit has to pick the target + * When a physical eraseblock is moved, the WL sub-system has to pick the target * physical eraseblock to move to. The simplest way would be just to pick the * one with the highest erase counter. But in certain workloads this could lead * to an unlimited wear of one or few physical eraseblock. Indeed, imagine a * situation when the picked physical eraseblock is constantly erased after the * data is written to it. So, we have a constant which limits the highest erase - * counter of the free physical eraseblock to pick. Namely, the WL unit does - * not pick eraseblocks with erase counter greater then the lowest erase + * counter of the free physical eraseblock to pick. Namely, the WL sub-system + * does not pick eraseblocks with erase counter greater then the lowest erase * counter plus %WL_FREE_MAX_DIFF. */ #define WL_FREE_MAX_DIFF (2*UBI_WL_THRESHOLD) @@ -123,11 +126,11 @@ * @abs_ec: the absolute erase counter value when the protection ends * @e: the wear-leveling entry of the physical eraseblock under protection * - * When the WL unit returns a physical eraseblock, the physical eraseblock is - * protected from being moved for some "time". For this reason, the physical - * eraseblock is not directly moved from the @wl->free tree to the @wl->used - * tree. There is one more tree in between where this physical eraseblock is - * temporarily stored (@wl->prot). + * When the WL sub-system returns a physical eraseblock, the physical + * eraseblock is protected from being moved for some "time". For this reason, + * the physical eraseblock is not directly moved from the @wl->free tree to the + * @wl->used tree. There is one more tree in between where this physical + * eraseblock is temporarily stored (@wl->prot). * * All this protection stuff is needed because: * o we don't want to move physical eraseblocks just after we have given them @@ -175,7 +178,6 @@ struct ubi_wl_prot_entry { * @list: a link in the list of pending works * @func: worker function * @priv: private data of the worker function - * * @e: physical eraseblock to erase * @torture: if the physical eraseblock has to be tortured * @@ -473,52 +475,47 @@ retry: } switch (dtype) { - case UBI_LONGTERM: - /* - * For long term data we pick a physical eraseblock - * with high erase counter. But the highest erase - * counter we can pick is bounded by the the lowest - * erase counter plus %WL_FREE_MAX_DIFF. - */ - e = find_wl_entry(&ubi->free, WL_FREE_MAX_DIFF); - protect = LT_PROTECTION; - break; - case UBI_UNKNOWN: - /* - * For unknown data we pick a physical eraseblock with - * medium erase counter. But we by no means can pick a - * physical eraseblock with erase counter greater or - * equivalent than the lowest erase counter plus - * %WL_FREE_MAX_DIFF. - */ - first = rb_entry(rb_first(&ubi->free), - struct ubi_wl_entry, rb); - last = rb_entry(rb_last(&ubi->free), - struct ubi_wl_entry, rb); + case UBI_LONGTERM: + /* + * For long term data we pick a physical eraseblock with high + * erase counter. But the highest erase counter we can pick is + * bounded by the the lowest erase counter plus + * %WL_FREE_MAX_DIFF. + */ + e = find_wl_entry(&ubi->free, WL_FREE_MAX_DIFF); + protect = LT_PROTECTION; + break; + case UBI_UNKNOWN: + /* + * For unknown data we pick a physical eraseblock with medium + * erase counter. But we by no means can pick a physical + * eraseblock with erase counter greater or equivalent than the + * lowest erase counter plus %WL_FREE_MAX_DIFF. + */ + first = rb_entry(rb_first(&ubi->free), struct ubi_wl_entry, rb); + last = rb_entry(rb_last(&ubi->free), struct ubi_wl_entry, rb); - if (last->ec - first->ec < WL_FREE_MAX_DIFF) - e = rb_entry(ubi->free.rb_node, - struct ubi_wl_entry, rb); - else { - medium_ec = (first->ec + WL_FREE_MAX_DIFF)/2; - e = find_wl_entry(&ubi->free, medium_ec); - } - protect = U_PROTECTION; - break; - case UBI_SHORTTERM: - /* - * For short term data we pick a physical eraseblock - * with the lowest erase counter as we expect it will - * be erased soon. - */ - e = rb_entry(rb_first(&ubi->free), - struct ubi_wl_entry, rb); - protect = ST_PROTECTION; - break; - default: - protect = 0; - e = NULL; - BUG(); + if (last->ec - first->ec < WL_FREE_MAX_DIFF) + e = rb_entry(ubi->free.rb_node, + struct ubi_wl_entry, rb); + else { + medium_ec = (first->ec + WL_FREE_MAX_DIFF)/2; + e = find_wl_entry(&ubi->free, medium_ec); + } + protect = U_PROTECTION; + break; + case UBI_SHORTTERM: + /* + * For short term data we pick a physical eraseblock with the + * lowest erase counter as we expect it will be erased soon. + */ + e = rb_entry(rb_first(&ubi->free), struct ubi_wl_entry, rb); + protect = ST_PROTECTION; + break; + default: + protect = 0; + e = NULL; + BUG(); } /* @@ -582,7 +579,8 @@ found: * This function returns zero in case of success and a negative error code in * case of failure. */ -static int sync_erase(struct ubi_device *ubi, struct ubi_wl_entry *e, int torture) +static int sync_erase(struct ubi_device *ubi, struct ubi_wl_entry *e, + int torture) { int err; struct ubi_ec_hdr *ec_hdr; @@ -634,8 +632,7 @@ out_free: } /** - * check_protection_over - check if it is time to stop protecting some - * physical eraseblocks. + * check_protection_over - check if it is time to stop protecting some PEBs. * @ubi: UBI device description object * * This function is called after each erase operation, when the absolute erase @@ -871,6 +868,10 @@ static int wear_leveling_worker(struct ubi_device *ubi, struct ubi_work *wrk, } ubi_free_vid_hdr(ubi, vid_hdr); + if (scrubbing && !protect) + ubi_msg("scrubbed PEB %d, data moved to PEB %d", + e1->pnum, e2->pnum); + spin_lock(&ubi->wl_lock); if (protect) prot_tree_add(ubi, e1, pe, protect); @@ -1054,8 +1055,8 @@ static int erase_worker(struct ubi_device *ubi, struct ubi_work *wl_wrk, spin_unlock(&ubi->wl_lock); /* - * One more erase operation has happened, take care about protected - * physical eraseblocks. + * One more erase operation has happened, take care about + * protected physical eraseblocks. */ check_protection_over(ubi); @@ -1136,7 +1137,7 @@ out_ro: } /** - * ubi_wl_put_peb - return a physical eraseblock to the wear-leveling unit. + * ubi_wl_put_peb - return a PEB to the wear-leveling sub-system. * @ubi: UBI device description object * @pnum: physical eraseblock to return * @torture: if this physical eraseblock has to be tortured @@ -1175,11 +1176,11 @@ retry: /* * User is putting the physical eraseblock which was selected * as the target the data is moved to. It may happen if the EBA - * unit already re-mapped the LEB in 'ubi_eba_copy_leb()' but - * the WL unit has not put the PEB to the "used" tree yet, but - * it is about to do this. So we just set a flag which will - * tell the WL worker that the PEB is not needed anymore and - * should be scheduled for erasure. + * sub-system already re-mapped the LEB in 'ubi_eba_copy_leb()' + * but the WL sub-system has not put the PEB to the "used" tree + * yet, but it is about to do this. So we just set a flag which + * will tell the WL worker that the PEB is not needed anymore + * and should be scheduled for erasure. */ dbg_wl("PEB %d is the target of data moving", pnum); ubi_assert(!ubi->move_to_put); @@ -1229,7 +1230,7 @@ int ubi_wl_scrub_peb(struct ubi_device *ubi, int pnum) { struct ubi_wl_entry *e; - ubi_msg("schedule PEB %d for scrubbing", pnum); + dbg_msg("schedule PEB %d for scrubbing", pnum); retry: spin_lock(&ubi->wl_lock); @@ -1368,7 +1369,7 @@ int ubi_thread(void *u) int err; if (kthread_should_stop()) - goto out; + break; if (try_to_freeze()) continue; @@ -1403,7 +1404,6 @@ int ubi_thread(void *u) cond_resched(); } -out: dbg_wl("background thread \"%s\" is killed", ubi->bgt_name); return 0; } @@ -1426,8 +1426,7 @@ static void cancel_pending(struct ubi_device *ubi) } /** - * ubi_wl_init_scan - initialize the wear-leveling unit using scanning - * information. + * ubi_wl_init_scan - initialize the WL sub-system using scanning information. * @ubi: UBI device description object * @si: scanning information * @@ -1584,13 +1583,12 @@ static void protection_trees_destroy(struct ubi_device *ubi) } /** - * ubi_wl_close - close the wear-leveling unit. + * ubi_wl_close - close the wear-leveling sub-system. * @ubi: UBI device description object */ void ubi_wl_close(struct ubi_device *ubi) { - dbg_wl("close the UBI wear-leveling unit"); - + dbg_wl("close the WL sub-system"); cancel_pending(ubi); protection_trees_destroy(ubi); tree_destroy(&ubi->used); @@ -1602,8 +1600,7 @@ void ubi_wl_close(struct ubi_device *ubi) #ifdef CONFIG_MTD_UBI_DEBUG_PARANOID /** - * paranoid_check_ec - make sure that the erase counter of a physical eraseblock - * is correct. + * paranoid_check_ec - make sure that the erase counter of a PEB is correct. * @ubi: UBI device description object * @pnum: the physical eraseblock number to check * @ec: the erase counter to check @@ -1644,13 +1641,12 @@ out_free: } /** - * paranoid_check_in_wl_tree - make sure that a wear-leveling entry is present - * in a WL RB-tree. + * paranoid_check_in_wl_tree - check that wear-leveling entry is in WL RB-tree. * @e: the wear-leveling entry to check * @root: the root of the tree * - * This function returns zero if @e is in the @root RB-tree and %1 if it - * is not. + * This function returns zero if @e is in the @root RB-tree and %1 if it is + * not. */ static int paranoid_check_in_wl_tree(struct ubi_wl_entry *e, struct rb_root *root) diff --git a/drivers/net/ibmveth.c b/drivers/net/ibmveth.c index 00527805e4f1..e5a6e2e84540 100644 --- a/drivers/net/ibmveth.c +++ b/drivers/net/ibmveth.c @@ -33,6 +33,7 @@ */ #include <linux/module.h> +#include <linux/moduleparam.h> #include <linux/types.h> #include <linux/errno.h> #include <linux/ioport.h> @@ -52,7 +53,9 @@ #include <asm/hvcall.h> #include <asm/atomic.h> #include <asm/vio.h> +#include <asm/iommu.h> #include <asm/uaccess.h> +#include <asm/firmware.h> #include <linux/seq_file.h> #include "ibmveth.h" @@ -94,8 +97,10 @@ static void ibmveth_proc_register_adapter(struct ibmveth_adapter *adapter); static void ibmveth_proc_unregister_adapter(struct ibmveth_adapter *adapter); static irqreturn_t ibmveth_interrupt(int irq, void *dev_instance); static void ibmveth_rxq_harvest_buffer(struct ibmveth_adapter *adapter); +static unsigned long ibmveth_get_desired_dma(struct vio_dev *vdev); static struct kobj_type ktype_veth_pool; + #ifdef CONFIG_PROC_FS #define IBMVETH_PROC_DIR "ibmveth" static struct proc_dir_entry *ibmveth_proc_dir; @@ -226,16 +231,16 @@ static void ibmveth_replenish_buffer_pool(struct ibmveth_adapter *adapter, struc u32 i; u32 count = pool->size - atomic_read(&pool->available); u32 buffers_added = 0; + struct sk_buff *skb; + unsigned int free_index, index; + u64 correlator; + unsigned long lpar_rc; + dma_addr_t dma_addr; mb(); for(i = 0; i < count; ++i) { - struct sk_buff *skb; - unsigned int free_index, index; - u64 correlator; union ibmveth_buf_desc desc; - unsigned long lpar_rc; - dma_addr_t dma_addr; skb = alloc_skb(pool->buff_size, GFP_ATOMIC); @@ -255,6 +260,9 @@ static void ibmveth_replenish_buffer_pool(struct ibmveth_adapter *adapter, struc dma_addr = dma_map_single(&adapter->vdev->dev, skb->data, pool->buff_size, DMA_FROM_DEVICE); + if (dma_mapping_error(dma_addr)) + goto failure; + pool->free_map[free_index] = IBM_VETH_INVALID_MAP; pool->dma_addr[index] = dma_addr; pool->skbuff[index] = skb; @@ -267,20 +275,9 @@ static void ibmveth_replenish_buffer_pool(struct ibmveth_adapter *adapter, struc lpar_rc = h_add_logical_lan_buffer(adapter->vdev->unit_address, desc.desc); - if(lpar_rc != H_SUCCESS) { - pool->free_map[free_index] = index; - pool->skbuff[index] = NULL; - if (pool->consumer_index == 0) - pool->consumer_index = pool->size - 1; - else - pool->consumer_index--; - dma_unmap_single(&adapter->vdev->dev, - pool->dma_addr[index], pool->buff_size, - DMA_FROM_DEVICE); - dev_kfree_skb_any(skb); - adapter->replenish_add_buff_failure++; - break; - } else { + if (lpar_rc != H_SUCCESS) + goto failure; + else { buffers_added++; adapter->replenish_add_buff_success++; } @@ -288,6 +285,24 @@ static void ibmveth_replenish_buffer_pool(struct ibmveth_adapter *adapter, struc mb(); atomic_add(buffers_added, &(pool->available)); + return; + +failure: + pool->free_map[free_index] = index; + pool->skbuff[index] = NULL; + if (pool->consumer_index == 0) + pool->consumer_index = pool->size - 1; + else + pool->consumer_index--; + if (!dma_mapping_error(dma_addr)) + dma_unmap_single(&adapter->vdev->dev, + pool->dma_addr[index], pool->buff_size, + DMA_FROM_DEVICE); + dev_kfree_skb_any(skb); + adapter->replenish_add_buff_failure++; + + mb(); + atomic_add(buffers_added, &(pool->available)); } /* replenish routine */ @@ -297,7 +312,7 @@ static void ibmveth_replenish_task(struct ibmveth_adapter *adapter) adapter->replenish_task_cycles++; - for(i = 0; i < IbmVethNumBufferPools; i++) + for (i = (IbmVethNumBufferPools - 1); i >= 0; i--) if(adapter->rx_buff_pool[i].active) ibmveth_replenish_buffer_pool(adapter, &adapter->rx_buff_pool[i]); @@ -472,6 +487,18 @@ static void ibmveth_cleanup(struct ibmveth_adapter *adapter) if (adapter->rx_buff_pool[i].active) ibmveth_free_buffer_pool(adapter, &adapter->rx_buff_pool[i]); + + if (adapter->bounce_buffer != NULL) { + if (!dma_mapping_error(adapter->bounce_buffer_dma)) { + dma_unmap_single(&adapter->vdev->dev, + adapter->bounce_buffer_dma, + adapter->netdev->mtu + IBMVETH_BUFF_OH, + DMA_BIDIRECTIONAL); + adapter->bounce_buffer_dma = DMA_ERROR_CODE; + } + kfree(adapter->bounce_buffer); + adapter->bounce_buffer = NULL; + } } static int ibmveth_register_logical_lan(struct ibmveth_adapter *adapter, @@ -607,6 +634,24 @@ static int ibmveth_open(struct net_device *netdev) return rc; } + adapter->bounce_buffer = + kmalloc(netdev->mtu + IBMVETH_BUFF_OH, GFP_KERNEL); + if (!adapter->bounce_buffer) { + ibmveth_error_printk("unable to allocate bounce buffer\n"); + ibmveth_cleanup(adapter); + napi_disable(&adapter->napi); + return -ENOMEM; + } + adapter->bounce_buffer_dma = + dma_map_single(&adapter->vdev->dev, adapter->bounce_buffer, + netdev->mtu + IBMVETH_BUFF_OH, DMA_BIDIRECTIONAL); + if (dma_mapping_error(adapter->bounce_buffer_dma)) { + ibmveth_error_printk("unable to map bounce buffer\n"); + ibmveth_cleanup(adapter); + napi_disable(&adapter->napi); + return -ENOMEM; + } + ibmveth_debug_printk("initial replenish cycle\n"); ibmveth_interrupt(netdev->irq, netdev); @@ -853,10 +898,12 @@ static int ibmveth_start_xmit(struct sk_buff *skb, struct net_device *netdev) unsigned int tx_packets = 0; unsigned int tx_send_failed = 0; unsigned int tx_map_failed = 0; + int used_bounce = 0; + unsigned long data_dma_addr; desc.fields.flags_len = IBMVETH_BUF_VALID | skb->len; - desc.fields.address = dma_map_single(&adapter->vdev->dev, skb->data, - skb->len, DMA_TO_DEVICE); + data_dma_addr = dma_map_single(&adapter->vdev->dev, skb->data, + skb->len, DMA_TO_DEVICE); if (skb->ip_summed == CHECKSUM_PARTIAL && ip_hdr(skb)->protocol != IPPROTO_TCP && skb_checksum_help(skb)) { @@ -875,12 +922,16 @@ static int ibmveth_start_xmit(struct sk_buff *skb, struct net_device *netdev) buf[1] = 0; } - if (dma_mapping_error(desc.fields.address)) { - ibmveth_error_printk("tx: unable to map xmit buffer\n"); + if (dma_mapping_error(data_dma_addr)) { + if (!firmware_has_feature(FW_FEATURE_CMO)) + ibmveth_error_printk("tx: unable to map xmit buffer\n"); + skb_copy_from_linear_data(skb, adapter->bounce_buffer, + skb->len); + desc.fields.address = adapter->bounce_buffer_dma; tx_map_failed++; - tx_dropped++; - goto out; - } + used_bounce = 1; + } else + desc.fields.address = data_dma_addr; /* send the frame. Arbitrarily set retrycount to 1024 */ correlator = 0; @@ -904,8 +955,9 @@ static int ibmveth_start_xmit(struct sk_buff *skb, struct net_device *netdev) netdev->trans_start = jiffies; } - dma_unmap_single(&adapter->vdev->dev, desc.fields.address, - skb->len, DMA_TO_DEVICE); + if (!used_bounce) + dma_unmap_single(&adapter->vdev->dev, data_dma_addr, + skb->len, DMA_TO_DEVICE); out: spin_lock_irqsave(&adapter->stats_lock, flags); netdev->stats.tx_dropped += tx_dropped; @@ -1053,9 +1105,9 @@ static void ibmveth_set_multicast_list(struct net_device *netdev) static int ibmveth_change_mtu(struct net_device *dev, int new_mtu) { struct ibmveth_adapter *adapter = dev->priv; + struct vio_dev *viodev = adapter->vdev; int new_mtu_oh = new_mtu + IBMVETH_BUFF_OH; - int reinit = 0; - int i, rc; + int i; if (new_mtu < IBMVETH_MAX_MTU) return -EINVAL; @@ -1067,23 +1119,34 @@ static int ibmveth_change_mtu(struct net_device *dev, int new_mtu) if (i == IbmVethNumBufferPools) return -EINVAL; + /* Deactivate all the buffer pools so that the next loop can activate + only the buffer pools necessary to hold the new MTU */ + for (i = 0; i < IbmVethNumBufferPools; i++) + if (adapter->rx_buff_pool[i].active) { + ibmveth_free_buffer_pool(adapter, + &adapter->rx_buff_pool[i]); + adapter->rx_buff_pool[i].active = 0; + } + /* Look for an active buffer pool that can hold the new MTU */ for(i = 0; i<IbmVethNumBufferPools; i++) { - if (!adapter->rx_buff_pool[i].active) { - adapter->rx_buff_pool[i].active = 1; - reinit = 1; - } + adapter->rx_buff_pool[i].active = 1; if (new_mtu_oh < adapter->rx_buff_pool[i].buff_size) { - if (reinit && netif_running(adapter->netdev)) { + if (netif_running(adapter->netdev)) { adapter->pool_config = 1; ibmveth_close(adapter->netdev); adapter->pool_config = 0; dev->mtu = new_mtu; - if ((rc = ibmveth_open(adapter->netdev))) - return rc; - } else - dev->mtu = new_mtu; + vio_cmo_set_dev_desired(viodev, + ibmveth_get_desired_dma + (viodev)); + return ibmveth_open(adapter->netdev); + } + dev->mtu = new_mtu; + vio_cmo_set_dev_desired(viodev, + ibmveth_get_desired_dma + (viodev)); return 0; } } @@ -1098,6 +1161,46 @@ static void ibmveth_poll_controller(struct net_device *dev) } #endif +/** + * ibmveth_get_desired_dma - Calculate IO memory desired by the driver + * + * @vdev: struct vio_dev for the device whose desired IO mem is to be returned + * + * Return value: + * Number of bytes of IO data the driver will need to perform well. + */ +static unsigned long ibmveth_get_desired_dma(struct vio_dev *vdev) +{ + struct net_device *netdev = dev_get_drvdata(&vdev->dev); + struct ibmveth_adapter *adapter; + unsigned long ret; + int i; + int rxqentries = 1; + + /* netdev inits at probe time along with the structures we need below*/ + if (netdev == NULL) + return IOMMU_PAGE_ALIGN(IBMVETH_IO_ENTITLEMENT_DEFAULT); + + adapter = netdev_priv(netdev); + + ret = IBMVETH_BUFF_LIST_SIZE + IBMVETH_FILT_LIST_SIZE; + ret += IOMMU_PAGE_ALIGN(netdev->mtu); + + for (i = 0; i < IbmVethNumBufferPools; i++) { + /* add the size of the active receive buffers */ + if (adapter->rx_buff_pool[i].active) + ret += + adapter->rx_buff_pool[i].size * + IOMMU_PAGE_ALIGN(adapter->rx_buff_pool[i]. + buff_size); + rxqentries += adapter->rx_buff_pool[i].size; + } + /* add the size of the receive queue entries */ + ret += IOMMU_PAGE_ALIGN(rxqentries * sizeof(struct ibmveth_rx_q_entry)); + + return ret; +} + static int __devinit ibmveth_probe(struct vio_dev *dev, const struct vio_device_id *id) { int rc, i; @@ -1242,6 +1345,8 @@ static int __devexit ibmveth_remove(struct vio_dev *dev) ibmveth_proc_unregister_adapter(adapter); free_netdev(netdev); + dev_set_drvdata(&dev->dev, NULL); + return 0; } @@ -1402,14 +1507,15 @@ const char * buf, size_t count) return -EPERM; } - pool->active = 0; if (netif_running(netdev)) { adapter->pool_config = 1; ibmveth_close(netdev); + pool->active = 0; adapter->pool_config = 0; if ((rc = ibmveth_open(netdev))) return rc; } + pool->active = 0; } } else if (attr == &veth_num_attr) { if (value <= 0 || value > IBMVETH_MAX_POOL_COUNT) @@ -1485,6 +1591,7 @@ static struct vio_driver ibmveth_driver = { .id_table = ibmveth_device_table, .probe = ibmveth_probe, .remove = ibmveth_remove, + .get_desired_dma = ibmveth_get_desired_dma, .driver = { .name = ibmveth_driver_name, .owner = THIS_MODULE, diff --git a/drivers/net/ibmveth.h b/drivers/net/ibmveth.h index 41f61cd18852..d28186948752 100644 --- a/drivers/net/ibmveth.h +++ b/drivers/net/ibmveth.h @@ -93,9 +93,12 @@ static inline long h_illan_attributes(unsigned long unit_address, plpar_hcall_norets(H_CHANGE_LOGICAL_LAN_MAC, ua, mac) #define IbmVethNumBufferPools 5 +#define IBMVETH_IO_ENTITLEMENT_DEFAULT 4243456 /* MTU of 1500 needs 4.2Mb */ #define IBMVETH_BUFF_OH 22 /* Overhead: 14 ethernet header + 8 opaque handle */ #define IBMVETH_MAX_MTU 68 #define IBMVETH_MAX_POOL_COUNT 4096 +#define IBMVETH_BUFF_LIST_SIZE 4096 +#define IBMVETH_FILT_LIST_SIZE 4096 #define IBMVETH_MAX_BUF_SIZE (1024 * 128) static int pool_size[] = { 512, 1024 * 2, 1024 * 16, 1024 * 32, 1024 * 64 }; @@ -143,6 +146,8 @@ struct ibmveth_adapter { struct ibmveth_rx_q rx_queue; int pool_config; int rx_csum; + void *bounce_buffer; + dma_addr_t bounce_buffer_dma; /* adapter specific stats */ u64 replenish_task_cycles; diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c index c28d7cb2035b..0196a0df9021 100644 --- a/drivers/net/virtio_net.c +++ b/drivers/net/virtio_net.c @@ -19,6 +19,7 @@ //#define DEBUG #include <linux/netdevice.h> #include <linux/etherdevice.h> +#include <linux/ethtool.h> #include <linux/module.h> #include <linux/virtio.h> #include <linux/virtio_net.h> @@ -54,9 +55,15 @@ struct virtnet_info struct tasklet_struct tasklet; bool free_in_tasklet; + /* I like... big packets and I cannot lie! */ + bool big_packets; + /* Receive & send queues. */ struct sk_buff_head recv; struct sk_buff_head send; + + /* Chain pages by the private ptr. */ + struct page *pages; }; static inline struct virtio_net_hdr *skb_vnet_hdr(struct sk_buff *skb) @@ -69,6 +76,23 @@ static inline void vnet_hdr_to_sg(struct scatterlist *sg, struct sk_buff *skb) sg_init_one(sg, skb_vnet_hdr(skb), sizeof(struct virtio_net_hdr)); } +static void give_a_page(struct virtnet_info *vi, struct page *page) +{ + page->private = (unsigned long)vi->pages; + vi->pages = page; +} + +static struct page *get_a_page(struct virtnet_info *vi, gfp_t gfp_mask) +{ + struct page *p = vi->pages; + + if (p) + vi->pages = (struct page *)p->private; + else + p = alloc_page(gfp_mask); + return p; +} + static void skb_xmit_done(struct virtqueue *svq) { struct virtnet_info *vi = svq->vdev->priv; @@ -88,6 +112,7 @@ static void receive_skb(struct net_device *dev, struct sk_buff *skb, unsigned len) { struct virtio_net_hdr *hdr = skb_vnet_hdr(skb); + int err; if (unlikely(len < sizeof(struct virtio_net_hdr) + ETH_HLEN)) { pr_debug("%s: short packet %i\n", dev->name, len); @@ -95,10 +120,23 @@ static void receive_skb(struct net_device *dev, struct sk_buff *skb, goto drop; } len -= sizeof(struct virtio_net_hdr); - BUG_ON(len > MAX_PACKET_LEN); - skb_trim(skb, len); + if (len <= MAX_PACKET_LEN) { + unsigned int i; + for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) + give_a_page(dev->priv, skb_shinfo(skb)->frags[i].page); + skb->data_len = 0; + skb_shinfo(skb)->nr_frags = 0; + } + + err = pskb_trim(skb, len); + if (err) { + pr_debug("%s: pskb_trim failed %i %d\n", dev->name, len, err); + dev->stats.rx_dropped++; + goto drop; + } + skb->truesize += skb->data_len; dev->stats.rx_bytes += skb->len; dev->stats.rx_packets++; @@ -160,7 +198,7 @@ static void try_fill_recv(struct virtnet_info *vi) { struct sk_buff *skb; struct scatterlist sg[2+MAX_SKB_FRAGS]; - int num, err; + int num, err, i; sg_init_table(sg, 2+MAX_SKB_FRAGS); for (;;) { @@ -170,6 +208,24 @@ static void try_fill_recv(struct virtnet_info *vi) skb_put(skb, MAX_PACKET_LEN); vnet_hdr_to_sg(sg, skb); + + if (vi->big_packets) { + for (i = 0; i < MAX_SKB_FRAGS; i++) { + skb_frag_t *f = &skb_shinfo(skb)->frags[i]; + f->page = get_a_page(vi, GFP_ATOMIC); + if (!f->page) + break; + + f->page_offset = 0; + f->size = PAGE_SIZE; + + skb->data_len += PAGE_SIZE; + skb->len += PAGE_SIZE; + + skb_shinfo(skb)->nr_frags++; + } + } + num = skb_to_sgvec(skb, sg+1, 0, skb->len) + 1; skb_queue_head(&vi->recv, skb); @@ -335,16 +391,11 @@ again: free_old_xmit_skbs(vi); /* If we has a buffer left over from last time, send it now. */ - if (unlikely(vi->last_xmit_skb)) { - if (xmit_skb(vi, vi->last_xmit_skb) != 0) { - /* Drop this skb: we only queue one. */ - vi->dev->stats.tx_dropped++; - kfree_skb(skb); - skb = NULL; - goto stop_queue; - } - vi->last_xmit_skb = NULL; - } + if (unlikely(vi->last_xmit_skb) && + xmit_skb(vi, vi->last_xmit_skb) != 0) + goto stop_queue; + + vi->last_xmit_skb = NULL; /* Put new one in send queue and do transmit */ if (likely(skb)) { @@ -370,6 +421,11 @@ stop_queue: netif_start_queue(dev); goto again; } + if (skb) { + /* Drop this skb: we only queue one. */ + vi->dev->stats.tx_dropped++; + kfree_skb(skb); + } goto done; } @@ -408,6 +464,22 @@ static int virtnet_close(struct net_device *dev) return 0; } +static int virtnet_set_tx_csum(struct net_device *dev, u32 data) +{ + struct virtnet_info *vi = netdev_priv(dev); + struct virtio_device *vdev = vi->vdev; + + if (data && !virtio_has_feature(vdev, VIRTIO_NET_F_CSUM)) + return -ENOSYS; + + return ethtool_op_set_tx_hw_csum(dev, data); +} + +static struct ethtool_ops virtnet_ethtool_ops = { + .set_tx_csum = virtnet_set_tx_csum, + .set_sg = ethtool_op_set_sg, +}; + static int virtnet_probe(struct virtio_device *vdev) { int err; @@ -427,6 +499,7 @@ static int virtnet_probe(struct virtio_device *vdev) #ifdef CONFIG_NET_POLL_CONTROLLER dev->poll_controller = virtnet_netpoll; #endif + SET_ETHTOOL_OPS(dev, &virtnet_ethtool_ops); SET_NETDEV_DEV(dev, &vdev->dev); /* Do we support "hardware" checksums? */ @@ -462,11 +535,18 @@ static int virtnet_probe(struct virtio_device *vdev) vi->dev = dev; vi->vdev = vdev; vdev->priv = vi; + vi->pages = NULL; /* If they give us a callback when all buffers are done, we don't need * the timer. */ vi->free_in_tasklet = virtio_has_feature(vdev,VIRTIO_F_NOTIFY_ON_EMPTY); + /* If we can receive ANY GSO packets, we must allocate large ones. */ + if (virtio_has_feature(vdev, VIRTIO_NET_F_GUEST_TSO4) + || virtio_has_feature(vdev, VIRTIO_NET_F_GUEST_TSO6) + || virtio_has_feature(vdev, VIRTIO_NET_F_GUEST_ECN)) + vi->big_packets = true; + /* We expect two virtqueues, receive then send. */ vi->rvq = vdev->config->find_vq(vdev, 0, skb_recv_done); if (IS_ERR(vi->rvq)) { @@ -541,6 +621,10 @@ static void virtnet_remove(struct virtio_device *vdev) vdev->config->del_vq(vi->svq); vdev->config->del_vq(vi->rvq); unregister_netdev(vi->dev); + + while (vi->pages) + __free_pages(get_a_page(vi, GFP_KERNEL), 0); + free_netdev(vi->dev); } @@ -553,7 +637,9 @@ static unsigned int features[] = { VIRTIO_NET_F_CSUM, VIRTIO_NET_F_GUEST_CSUM, VIRTIO_NET_F_GSO, VIRTIO_NET_F_MAC, VIRTIO_NET_F_HOST_TSO4, VIRTIO_NET_F_HOST_UFO, VIRTIO_NET_F_HOST_TSO6, - VIRTIO_NET_F_HOST_ECN, VIRTIO_F_NOTIFY_ON_EMPTY, + VIRTIO_NET_F_HOST_ECN, VIRTIO_NET_F_GUEST_TSO4, VIRTIO_NET_F_GUEST_TSO6, + VIRTIO_NET_F_GUEST_ECN, /* We don't yet handle UFO input. */ + VIRTIO_F_NOTIFY_ON_EMPTY, }; static struct virtio_driver virtio_net = { diff --git a/drivers/of/Kconfig b/drivers/of/Kconfig index 3a7a11a75fb4..1d7ec3129349 100644 --- a/drivers/of/Kconfig +++ b/drivers/of/Kconfig @@ -4,7 +4,7 @@ config OF_DEVICE config OF_GPIO def_bool y - depends on OF && PPC_OF && HAVE_GPIO_LIB + depends on OF && PPC_OF && GPIOLIB help OpenFirmware GPIO accessors diff --git a/drivers/of/of_i2c.c b/drivers/of/of_i2c.c index 5c015d310d4a..344e1b03dd8b 100644 --- a/drivers/of/of_i2c.c +++ b/drivers/of/of_i2c.c @@ -91,8 +91,6 @@ void of_register_i2c_devices(struct i2c_adapter *adap, } info.irq = irq_of_parse_and_map(node, 0); - if (info.irq == NO_IRQ) - info.irq = -1; if (of_find_i2c_driver(node, &info) < 0) { irq_dispose_mapping(info.irq); diff --git a/drivers/parport/parport_ax88796.c b/drivers/parport/parport_ax88796.c index 4ec220b2eae7..6938d2e9f18f 100644 --- a/drivers/parport/parport_ax88796.c +++ b/drivers/parport/parport_ax88796.c @@ -406,6 +406,8 @@ static int parport_ax88796_resume(struct platform_device *dev) #define parport_ax88796_resume NULL #endif +MODULE_ALIAS("platform:ax88796-pp"); + static struct platform_driver axdrv = { .driver = { .name = "ax88796-pp", diff --git a/drivers/power/ds2760_battery.c b/drivers/power/ds2760_battery.c index 71be36f18709..308ddb201b66 100644 --- a/drivers/power/ds2760_battery.c +++ b/drivers/power/ds2760_battery.c @@ -433,6 +433,8 @@ static int ds2760_battery_resume(struct platform_device *pdev) #endif /* CONFIG_PM */ +MODULE_ALIAS("platform:ds2760-battery"); + static struct platform_driver ds2760_battery_driver = { .driver = { .name = "ds2760-battery", diff --git a/drivers/power/pda_power.c b/drivers/power/pda_power.c index 82810b7bff9c..0471ec743ab9 100644 --- a/drivers/power/pda_power.c +++ b/drivers/power/pda_power.c @@ -362,6 +362,8 @@ static int pda_power_resume(struct platform_device *pdev) #define pda_power_resume NULL #endif /* CONFIG_PM */ +MODULE_ALIAS("platform:pda-power"); + static struct platform_driver pda_power_pdrv = { .driver = { .name = "pda-power", diff --git a/drivers/s390/kvm/kvm_virtio.c b/drivers/s390/kvm/kvm_virtio.c index 5ab34340919b..79954bd6bfa5 100644 --- a/drivers/s390/kvm/kvm_virtio.c +++ b/drivers/s390/kvm/kvm_virtio.c @@ -15,6 +15,7 @@ #include <linux/err.h> #include <linux/virtio.h> #include <linux/virtio_config.h> +#include <linux/virtio_console.h> #include <linux/interrupt.h> #include <linux/virtio_ring.h> #include <linux/pfn.h> @@ -87,16 +88,20 @@ static u32 kvm_get_features(struct virtio_device *vdev) return features; } -static void kvm_set_features(struct virtio_device *vdev, u32 features) +static void kvm_finalize_features(struct virtio_device *vdev) { - unsigned int i; + unsigned int i, bits; struct kvm_device_desc *desc = to_kvmdev(vdev)->desc; /* Second half of bitmap is features we accept. */ u8 *out_features = kvm_vq_features(desc) + desc->feature_len; + /* Give virtio_ring a chance to accept features. */ + vring_transport_features(vdev); + memset(out_features, 0, desc->feature_len); - for (i = 0; i < min(desc->feature_len * 8, 32); i++) { - if (features & (1 << i)) + bits = min_t(unsigned, desc->feature_len, sizeof(vdev->features)) * 8; + for (i = 0; i < bits; i++) { + if (test_bit(i, vdev->features)) out_features[i / 8] |= (1 << (i % 8)); } } @@ -222,7 +227,7 @@ static void kvm_del_vq(struct virtqueue *vq) */ static struct virtio_config_ops kvm_vq_configspace_ops = { .get_features = kvm_get_features, - .set_features = kvm_set_features, + .finalize_features = kvm_finalize_features, .get = kvm_get, .set = kvm_set, .get_status = kvm_get_status, @@ -333,6 +338,25 @@ static int __init kvm_devices_init(void) return 0; } +/* code for early console output with virtio_console */ +static __init int early_put_chars(u32 vtermno, const char *buf, int count) +{ + char scratch[17]; + unsigned int len = count; + + if (len > sizeof(scratch) - 1) + len = sizeof(scratch) - 1; + scratch[len] = '\0'; + memcpy(scratch, buf, len); + kvm_hypercall1(KVM_S390_VIRTIO_NOTIFY, __pa(scratch)); + return len; +} + +void s390_virtio_console_init(void) +{ + virtio_cons_early_init(early_put_chars); +} + /* * We do this after core stuff, but before the drivers. */ diff --git a/drivers/scsi/ibmvscsi/ibmvfc.c b/drivers/scsi/ibmvscsi/ibmvfc.c index eb702b96d57c..c4a7c06793c5 100644 --- a/drivers/scsi/ibmvscsi/ibmvfc.c +++ b/drivers/scsi/ibmvscsi/ibmvfc.c @@ -3819,6 +3819,20 @@ static int ibmvfc_remove(struct vio_dev *vdev) return 0; } +/** + * ibmvfc_get_desired_dma - Calculate DMA resources needed by the driver + * @vdev: vio device struct + * + * Return value: + * Number of bytes the driver will need to DMA map at the same time in + * order to perform well. + */ +static unsigned long ibmvfc_get_desired_dma(struct vio_dev *vdev) +{ + unsigned long pool_dma = max_requests * sizeof(union ibmvfc_iu); + return pool_dma + ((512 * 1024) * driver_template.cmd_per_lun); +} + static struct vio_device_id ibmvfc_device_table[] __devinitdata = { {"fcp", "IBM,vfc-client"}, { "", "" } @@ -3829,6 +3843,7 @@ static struct vio_driver ibmvfc_driver = { .id_table = ibmvfc_device_table, .probe = ibmvfc_probe, .remove = ibmvfc_remove, + .get_desired_dma = ibmvfc_get_desired_dma, .driver = { .name = IBMVFC_NAME, .owner = THIS_MODULE, diff --git a/drivers/scsi/ibmvscsi/ibmvscsi.c b/drivers/scsi/ibmvscsi/ibmvscsi.c index 5d23368a1bce..20000ec79b04 100644 --- a/drivers/scsi/ibmvscsi/ibmvscsi.c +++ b/drivers/scsi/ibmvscsi/ibmvscsi.c @@ -72,6 +72,7 @@ #include <linux/delay.h> #include <asm/firmware.h> #include <asm/vio.h> +#include <asm/firmware.h> #include <scsi/scsi.h> #include <scsi/scsi_cmnd.h> #include <scsi/scsi_host.h> @@ -426,8 +427,10 @@ static int map_sg_data(struct scsi_cmnd *cmd, SG_ALL * sizeof(struct srp_direct_buf), &evt_struct->ext_list_token, 0); if (!evt_struct->ext_list) { - sdev_printk(KERN_ERR, cmd->device, - "Can't allocate memory for indirect table\n"); + if (!firmware_has_feature(FW_FEATURE_CMO)) + sdev_printk(KERN_ERR, cmd->device, + "Can't allocate memory " + "for indirect table\n"); return 0; } } @@ -743,7 +746,9 @@ static int ibmvscsi_queuecommand(struct scsi_cmnd *cmnd, srp_cmd->lun = ((u64) lun) << 48; if (!map_data_for_srp_cmd(cmnd, evt_struct, srp_cmd, hostdata->dev)) { - sdev_printk(KERN_ERR, cmnd->device, "couldn't convert cmd to srp_cmd\n"); + if (!firmware_has_feature(FW_FEATURE_CMO)) + sdev_printk(KERN_ERR, cmnd->device, + "couldn't convert cmd to srp_cmd\n"); free_event_struct(&hostdata->pool, evt_struct); return SCSI_MLQUEUE_HOST_BUSY; } @@ -855,7 +860,10 @@ static void send_mad_adapter_info(struct ibmvscsi_host_data *hostdata) DMA_BIDIRECTIONAL); if (dma_mapping_error(req->buffer)) { - dev_err(hostdata->dev, "Unable to map request_buffer for adapter_info!\n"); + if (!firmware_has_feature(FW_FEATURE_CMO)) + dev_err(hostdata->dev, + "Unable to map request_buffer for " + "adapter_info!\n"); free_event_struct(&hostdata->pool, evt_struct); return; } @@ -1400,7 +1408,9 @@ static int ibmvscsi_do_host_config(struct ibmvscsi_host_data *hostdata, DMA_BIDIRECTIONAL); if (dma_mapping_error(host_config->buffer)) { - dev_err(hostdata->dev, "dma_mapping error getting host config\n"); + if (!firmware_has_feature(FW_FEATURE_CMO)) + dev_err(hostdata->dev, + "dma_mapping error getting host config\n"); free_event_struct(&hostdata->pool, evt_struct); return -1; } @@ -1604,7 +1614,7 @@ static struct scsi_host_template driver_template = { .eh_host_reset_handler = ibmvscsi_eh_host_reset_handler, .slave_configure = ibmvscsi_slave_configure, .change_queue_depth = ibmvscsi_change_queue_depth, - .cmd_per_lun = 16, + .cmd_per_lun = IBMVSCSI_CMDS_PER_LUN_DEFAULT, .can_queue = IBMVSCSI_MAX_REQUESTS_DEFAULT, .this_id = -1, .sg_tablesize = SG_ALL, @@ -1613,6 +1623,26 @@ static struct scsi_host_template driver_template = { }; /** + * ibmvscsi_get_desired_dma - Calculate IO memory desired by the driver + * + * @vdev: struct vio_dev for the device whose desired IO mem is to be returned + * + * Return value: + * Number of bytes of IO data the driver will need to perform well. + */ +static unsigned long ibmvscsi_get_desired_dma(struct vio_dev *vdev) +{ + /* iu_storage data allocated in initialize_event_pool */ + unsigned long desired_io = max_requests * sizeof(union viosrp_iu); + + /* add io space for sg data */ + desired_io += (IBMVSCSI_MAX_SECTORS_DEFAULT * + IBMVSCSI_CMDS_PER_LUN_DEFAULT); + + return desired_io; +} + +/** * Called by bus code for each adapter */ static int ibmvscsi_probe(struct vio_dev *vdev, const struct vio_device_id *id) @@ -1641,7 +1671,7 @@ static int ibmvscsi_probe(struct vio_dev *vdev, const struct vio_device_id *id) hostdata->host = host; hostdata->dev = dev; atomic_set(&hostdata->request_limit, -1); - hostdata->host->max_sectors = 32 * 8; /* default max I/O 32 pages */ + hostdata->host->max_sectors = IBMVSCSI_MAX_SECTORS_DEFAULT; rc = ibmvscsi_ops->init_crq_queue(&hostdata->queue, hostdata, max_requests); if (rc != 0 && rc != H_RESOURCE) { @@ -1735,6 +1765,7 @@ static struct vio_driver ibmvscsi_driver = { .id_table = ibmvscsi_device_table, .probe = ibmvscsi_probe, .remove = ibmvscsi_remove, + .get_desired_dma = ibmvscsi_get_desired_dma, .driver = { .name = "ibmvscsi", .owner = THIS_MODULE, diff --git a/drivers/scsi/ibmvscsi/ibmvscsi.h b/drivers/scsi/ibmvscsi/ibmvscsi.h index 46e850e302c7..2d4339d5e16e 100644 --- a/drivers/scsi/ibmvscsi/ibmvscsi.h +++ b/drivers/scsi/ibmvscsi/ibmvscsi.h @@ -45,6 +45,8 @@ struct Scsi_Host; #define MAX_INDIRECT_BUFS 10 #define IBMVSCSI_MAX_REQUESTS_DEFAULT 100 +#define IBMVSCSI_CMDS_PER_LUN_DEFAULT 16 +#define IBMVSCSI_MAX_SECTORS_DEFAULT 256 /* 32 * 8 = default max I/O 32 pages */ #define IBMVSCSI_MAX_CMDS_PER_LUN 64 /* ------------------------------------------------------------ diff --git a/drivers/telephony/ixj.c b/drivers/telephony/ixj.c index 49cd9793404f..ec7aeb502d15 100644 --- a/drivers/telephony/ixj.c +++ b/drivers/telephony/ixj.c @@ -6095,15 +6095,15 @@ static int capabilities_check(IXJ *j, struct phone_capability *pcreq) return retval; } -static int ixj_ioctl(struct inode *inode, struct file *file_p, unsigned int cmd, unsigned long arg) +static long do_ixj_ioctl(struct file *file_p, unsigned int cmd, unsigned long arg) { IXJ_TONE ti; IXJ_FILTER jf; IXJ_FILTER_RAW jfr; void __user *argp = (void __user *)arg; - - unsigned int raise, mant; + struct inode *inode = file_p->f_path.dentry->d_inode; unsigned int minor = iminor(inode); + unsigned int raise, mant; int board = NUM(inode); IXJ *j = get_ixj(NUM(inode)); @@ -6661,6 +6661,15 @@ static int ixj_ioctl(struct inode *inode, struct file *file_p, unsigned int cmd, return retval; } +static long ixj_ioctl(struct file *file_p, unsigned int cmd, unsigned long arg) +{ + long ret; + lock_kernel(); + ret = do_ixj_ioctl(file_p, cmd, arg); + unlock_kernel(); + return ret; +} + static int ixj_fasync(int fd, struct file *file_p, int mode) { IXJ *j = get_ixj(NUM(file_p->f_path.dentry->d_inode)); @@ -6674,7 +6683,7 @@ static const struct file_operations ixj_fops = .read = ixj_enhanced_read, .write = ixj_enhanced_write, .poll = ixj_poll, - .ioctl = ixj_ioctl, + .unlocked_ioctl = ixj_ioctl, .release = ixj_release, .fasync = ixj_fasync }; diff --git a/drivers/usb/gadget/at91_udc.h b/drivers/usb/gadget/at91_udc.h index a973f2a50fb9..c65d62295890 100644 --- a/drivers/usb/gadget/at91_udc.h +++ b/drivers/usb/gadget/at91_udc.h @@ -171,7 +171,7 @@ struct at91_request { #endif #define ERR(stuff...) pr_err("udc: " stuff) -#define WARN(stuff...) pr_warning("udc: " stuff) +#define WARNING(stuff...) pr_warning("udc: " stuff) #define INFO(stuff...) pr_info("udc: " stuff) #define DBG(stuff...) pr_debug("udc: " stuff) diff --git a/drivers/usb/gadget/cdc2.c b/drivers/usb/gadget/cdc2.c index d490d0289507..a39a4b940c33 100644 --- a/drivers/usb/gadget/cdc2.c +++ b/drivers/usb/gadget/cdc2.c @@ -170,7 +170,7 @@ static int __init cdc_bind(struct usb_composite_dev *cdev) * but if the controller isn't recognized at all then * that assumption is a bit more likely to be wrong. */ - WARN(cdev, "controller '%s' not recognized; trying %s\n", + WARNING(cdev, "controller '%s' not recognized; trying %s\n", gadget->name, cdc_config_driver.label); device_desc.bcdDevice = diff --git a/drivers/usb/gadget/ether.c b/drivers/usb/gadget/ether.c index d7aaaa29b1e1..bcac2e68660d 100644 --- a/drivers/usb/gadget/ether.c +++ b/drivers/usb/gadget/ether.c @@ -293,7 +293,7 @@ static int __init eth_bind(struct usb_composite_dev *cdev) * but if the controller isn't recognized at all then * that assumption is a bit more likely to be wrong. */ - WARN(cdev, "controller '%s' not recognized; trying %s\n", + WARNING(cdev, "controller '%s' not recognized; trying %s\n", gadget->name, eth_config_driver.label); device_desc.bcdDevice = diff --git a/drivers/usb/gadget/file_storage.c b/drivers/usb/gadget/file_storage.c index 15c24edbb61a..ea2c31d18080 100644 --- a/drivers/usb/gadget/file_storage.c +++ b/drivers/usb/gadget/file_storage.c @@ -308,7 +308,7 @@ MODULE_LICENSE("Dual BSD/GPL"); dev_vdbg(&(d)->gadget->dev , fmt , ## args) #define ERROR(d, fmt, args...) \ dev_err(&(d)->gadget->dev , fmt , ## args) -#define WARN(d, fmt, args...) \ +#define WARNING(d, fmt, args...) \ dev_warn(&(d)->gadget->dev , fmt , ## args) #define INFO(d, fmt, args...) \ dev_info(&(d)->gadget->dev , fmt , ## args) @@ -1091,7 +1091,7 @@ static int ep0_queue(struct fsg_dev *fsg) if (rc != 0 && rc != -ESHUTDOWN) { /* We can't do much more than wait for a reset */ - WARN(fsg, "error in submission: %s --> %d\n", + WARNING(fsg, "error in submission: %s --> %d\n", fsg->ep0->name, rc); } return rc; @@ -1227,7 +1227,7 @@ static void received_cbi_adsc(struct fsg_dev *fsg, struct fsg_buffhd *bh) /* Save the command for later */ if (fsg->cbbuf_cmnd_size) - WARN(fsg, "CB[I] overwriting previous command\n"); + WARNING(fsg, "CB[I] overwriting previous command\n"); fsg->cbbuf_cmnd_size = req->actual; memcpy(fsg->cbbuf_cmnd, req->buf, fsg->cbbuf_cmnd_size); @@ -1506,7 +1506,7 @@ static void start_transfer(struct fsg_dev *fsg, struct usb_ep *ep, * submissions if DMA is enabled. */ if (rc != -ESHUTDOWN && !(rc == -EOPNOTSUPP && req->length == 0)) - WARN(fsg, "error in submission: %s --> %d\n", + WARNING(fsg, "error in submission: %s --> %d\n", ep->name, rc); } } @@ -2294,7 +2294,7 @@ static int halt_bulk_in_endpoint(struct fsg_dev *fsg) VDBG(fsg, "delayed bulk-in endpoint halt\n"); while (rc != 0) { if (rc != -EAGAIN) { - WARN(fsg, "usb_ep_set_halt -> %d\n", rc); + WARNING(fsg, "usb_ep_set_halt -> %d\n", rc); rc = 0; break; } @@ -2317,7 +2317,7 @@ static int wedge_bulk_in_endpoint(struct fsg_dev *fsg) VDBG(fsg, "delayed bulk-in endpoint wedge\n"); while (rc != 0) { if (rc != -EAGAIN) { - WARN(fsg, "usb_ep_set_wedge -> %d\n", rc); + WARNING(fsg, "usb_ep_set_wedge -> %d\n", rc); rc = 0; break; } @@ -3755,7 +3755,7 @@ static int __init check_parameters(struct fsg_dev *fsg) if (gcnum >= 0) mod_data.release = 0x0300 + gcnum; else { - WARN(fsg, "controller '%s' not recognized\n", + WARNING(fsg, "controller '%s' not recognized\n", fsg->gadget->name); mod_data.release = 0x0399; } diff --git a/drivers/usb/gadget/fsl_usb2_udc.c b/drivers/usb/gadget/fsl_usb2_udc.c index 1695382f30fe..1cfccf102a2d 100644 --- a/drivers/usb/gadget/fsl_usb2_udc.c +++ b/drivers/usb/gadget/fsl_usb2_udc.c @@ -1538,7 +1538,7 @@ static void dtd_complete_irq(struct fsl_udc *udc) /* If the ep is configured */ if (curr_ep->name == NULL) { - WARN("Invalid EP?"); + WARNING("Invalid EP?"); continue; } diff --git a/drivers/usb/gadget/fsl_usb2_udc.h b/drivers/usb/gadget/fsl_usb2_udc.h index 98b1483ef6a5..6131752a38bc 100644 --- a/drivers/usb/gadget/fsl_usb2_udc.h +++ b/drivers/usb/gadget/fsl_usb2_udc.h @@ -552,7 +552,7 @@ static void dump_msg(const char *label, const u8 * buf, unsigned int length) #endif #define ERR(stuff...) pr_err("udc: " stuff) -#define WARN(stuff...) pr_warning("udc: " stuff) +#define WARNING(stuff...) pr_warning("udc: " stuff) #define INFO(stuff...) pr_info("udc: " stuff) /*-------------------------------------------------------------------------*/ diff --git a/drivers/usb/gadget/gmidi.c b/drivers/usb/gadget/gmidi.c index 7f4d4828e3aa..ea8651e3da1a 100644 --- a/drivers/usb/gadget/gmidi.c +++ b/drivers/usb/gadget/gmidi.c @@ -138,8 +138,6 @@ static void gmidi_transmit(struct gmidi_device* dev, struct usb_request* req); dev_vdbg(&(d)->gadget->dev , fmt , ## args) #define ERROR(d, fmt, args...) \ dev_err(&(d)->gadget->dev , fmt , ## args) -#define WARN(d, fmt, args...) \ - dev_warn(&(d)->gadget->dev , fmt , ## args) #define INFO(d, fmt, args...) \ dev_info(&(d)->gadget->dev , fmt , ## args) diff --git a/drivers/usb/gadget/goku_udc.c b/drivers/usb/gadget/goku_udc.c index 48f1c63b7013..60aa04847b18 100644 --- a/drivers/usb/gadget/goku_udc.c +++ b/drivers/usb/gadget/goku_udc.c @@ -1768,7 +1768,7 @@ static int goku_probe(struct pci_dev *pdev, const struct pci_device_id *id) * usb_gadget_driver_{register,unregister}() must change. */ if (the_controller) { - WARN(dev, "ignoring %s\n", pci_name(pdev)); + WARNING(dev, "ignoring %s\n", pci_name(pdev)); return -EBUSY; } if (!pdev->irq) { diff --git a/drivers/usb/gadget/goku_udc.h b/drivers/usb/gadget/goku_udc.h index bc4eb1e0b507..566cb2319056 100644 --- a/drivers/usb/gadget/goku_udc.h +++ b/drivers/usb/gadget/goku_udc.h @@ -285,7 +285,7 @@ struct goku_udc { #define ERROR(dev,fmt,args...) \ xprintk(dev , KERN_ERR , fmt , ## args) -#define WARN(dev,fmt,args...) \ +#define WARNING(dev,fmt,args...) \ xprintk(dev , KERN_WARNING , fmt , ## args) #define INFO(dev,fmt,args...) \ xprintk(dev , KERN_INFO , fmt , ## args) diff --git a/drivers/usb/gadget/inode.c b/drivers/usb/gadget/inode.c index 04692d59fc1c..f4585d3e90d7 100644 --- a/drivers/usb/gadget/inode.c +++ b/drivers/usb/gadget/inode.c @@ -262,8 +262,6 @@ static const char *CHIP; #define ERROR(dev,fmt,args...) \ xprintk(dev , KERN_ERR , fmt , ## args) -#define WARN(dev,fmt,args...) \ - xprintk(dev , KERN_WARNING , fmt , ## args) #define INFO(dev,fmt,args...) \ xprintk(dev , KERN_INFO , fmt , ## args) diff --git a/drivers/usb/gadget/net2280.c b/drivers/usb/gadget/net2280.c index b67ab677af72..5cfb5ebf3881 100644 --- a/drivers/usb/gadget/net2280.c +++ b/drivers/usb/gadget/net2280.c @@ -1007,7 +1007,7 @@ static void scan_dma_completions (struct net2280_ep *ep) * 0122, and 0124; not all cases trigger the warning. */ if ((tmp & (1 << NAK_OUT_PACKETS)) == 0) { - WARN (ep->dev, "%s lost packet sync!\n", + WARNING (ep->dev, "%s lost packet sync!\n", ep->ep.name); req->req.status = -EOVERFLOW; } else if ((tmp = readl (&ep->regs->ep_avail)) != 0) { diff --git a/drivers/usb/gadget/net2280.h b/drivers/usb/gadget/net2280.h index 1f2af398a9a4..81a71dbdc2c6 100644 --- a/drivers/usb/gadget/net2280.h +++ b/drivers/usb/gadget/net2280.h @@ -272,7 +272,7 @@ static inline void net2280_led_shutdown (struct net2280 *dev) #define ERROR(dev,fmt,args...) \ xprintk(dev , KERN_ERR , fmt , ## args) -#define WARN(dev,fmt,args...) \ +#define WARNING(dev,fmt,args...) \ xprintk(dev , KERN_WARNING , fmt , ## args) #define INFO(dev,fmt,args...) \ xprintk(dev , KERN_INFO , fmt , ## args) diff --git a/drivers/usb/gadget/omap_udc.c b/drivers/usb/gadget/omap_udc.c index 4b79a8509e84..395bd1844482 100644 --- a/drivers/usb/gadget/omap_udc.c +++ b/drivers/usb/gadget/omap_udc.c @@ -1120,7 +1120,7 @@ static int omap_ep_set_halt(struct usb_ep *_ep, int value) status = -EINVAL; else if (value) { if (ep->udc->ep0_set_config) { - WARN("error changing config?\n"); + WARNING("error changing config?\n"); omap_writew(UDC_CLR_CFG, UDC_SYSCON2); } omap_writew(UDC_STALL_CMD, UDC_SYSCON2); @@ -1764,7 +1764,7 @@ do_stall: u.r.bRequestType, u.r.bRequest, status); if (udc->ep0_set_config) { if (udc->ep0_reset_config) - WARN("error resetting config?\n"); + WARNING("error resetting config?\n"); else omap_writew(UDC_CLR_CFG, UDC_SYSCON2); } @@ -3076,7 +3076,7 @@ static int omap_udc_suspend(struct platform_device *dev, pm_message_t message) * which would prevent entry to deep sleep... */ if ((devstat & UDC_ATT) != 0 && (devstat & UDC_SUS) == 0) { - WARN("session active; suspend requires disconnect\n"); + WARNING("session active; suspend requires disconnect\n"); omap_pullup(&udc->gadget, 0); } diff --git a/drivers/usb/gadget/omap_udc.h b/drivers/usb/gadget/omap_udc.h index 8522bbb12278..29edc51b6b22 100644 --- a/drivers/usb/gadget/omap_udc.h +++ b/drivers/usb/gadget/omap_udc.h @@ -188,7 +188,7 @@ struct omap_udc { #endif #define ERR(stuff...) pr_err("udc: " stuff) -#define WARN(stuff...) pr_warning("udc: " stuff) +#define WARNING(stuff...) pr_warning("udc: " stuff) #define INFO(stuff...) pr_info("udc: " stuff) #define DBG(stuff...) pr_debug("udc: " stuff) diff --git a/drivers/usb/gadget/printer.c b/drivers/usb/gadget/printer.c index 49cd9e145a9b..e0090085b78e 100644 --- a/drivers/usb/gadget/printer.c +++ b/drivers/usb/gadget/printer.c @@ -179,7 +179,7 @@ module_param(qlen, uint, S_IRUGO|S_IWUSR); #define ERROR(dev, fmt, args...) \ xprintk(dev, KERN_ERR, fmt, ## args) -#define WARN(dev, fmt, args...) \ +#define WARNING(dev, fmt, args...) \ xprintk(dev, KERN_WARNING, fmt, ## args) #define INFO(dev, fmt, args...) \ xprintk(dev, KERN_INFO, fmt, ## args) diff --git a/drivers/usb/gadget/pxa25x_udc.c b/drivers/usb/gadget/pxa25x_udc.c index 8fb0066609bb..7e6725d89976 100644 --- a/drivers/usb/gadget/pxa25x_udc.c +++ b/drivers/usb/gadget/pxa25x_udc.c @@ -342,7 +342,7 @@ pxa25x_ep_free_request (struct usb_ep *_ep, struct usb_request *_req) struct pxa25x_request *req; req = container_of (_req, struct pxa25x_request, req); - WARN_ON (!list_empty (&req->queue)); + WARN_ON(!list_empty (&req->queue)); kfree(req); } @@ -1556,7 +1556,7 @@ config_change: * tell us about config change events, * so later ones may fail... */ - WARN("config change %02x fail %d?\n", + WARNING("config change %02x fail %d?\n", u.r.bRequest, i); return; /* TODO experiment: if has_cfr, @@ -2330,7 +2330,7 @@ static int pxa25x_udc_suspend(struct platform_device *dev, pm_message_t state) unsigned long flags; if (!udc->mach->gpio_pullup && !udc->mach->udc_command) - WARN("USB host won't detect disconnect!\n"); + WARNING("USB host won't detect disconnect!\n"); udc->suspended = 1; local_irq_save(flags); diff --git a/drivers/usb/gadget/pxa25x_udc.h b/drivers/usb/gadget/pxa25x_udc.h index 4d11ece7c95f..c8a13215e02c 100644 --- a/drivers/usb/gadget/pxa25x_udc.h +++ b/drivers/usb/gadget/pxa25x_udc.h @@ -259,7 +259,7 @@ dump_state(struct pxa25x_udc *dev) #define DBG(lvl, stuff...) do{if ((lvl) <= UDC_DEBUG) DMSG(stuff);}while(0) #define ERR(stuff...) pr_err("udc: " stuff) -#define WARN(stuff...) pr_warning("udc: " stuff) +#define WARNING(stuff...) pr_warning("udc: " stuff) #define INFO(stuff...) pr_info("udc: " stuff) diff --git a/drivers/usb/gadget/u_ether.c b/drivers/usb/gadget/u_ether.c index 5458f43a8668..3791e6271903 100644 --- a/drivers/usb/gadget/u_ether.c +++ b/drivers/usb/gadget/u_ether.c @@ -116,7 +116,6 @@ static inline int qlen(struct usb_gadget *gadget) #undef DBG #undef VDBG #undef ERROR -#undef WARN #undef INFO #define xprintk(d, level, fmt, args...) \ @@ -140,8 +139,6 @@ static inline int qlen(struct usb_gadget *gadget) #define ERROR(dev, fmt, args...) \ xprintk(dev , KERN_ERR , fmt , ## args) -#define WARN(dev, fmt, args...) \ - xprintk(dev , KERN_WARNING , fmt , ## args) #define INFO(dev, fmt, args...) \ xprintk(dev , KERN_INFO , fmt , ## args) diff --git a/drivers/usb/host/isp116x-hcd.c b/drivers/usb/host/isp116x-hcd.c index 31178e10cbbe..ce1ca0ba0515 100644 --- a/drivers/usb/host/isp116x-hcd.c +++ b/drivers/usb/host/isp116x-hcd.c @@ -882,7 +882,7 @@ static void isp116x_endpoint_disable(struct usb_hcd *hcd, for (i = 0; i < 100 && !list_empty(&hep->urb_list); i++) msleep(3); if (!list_empty(&hep->urb_list)) - WARN("ep %p not empty?\n", ep); + WARNING("ep %p not empty?\n", ep); kfree(ep); hep->hcpriv = NULL; diff --git a/drivers/usb/host/isp116x.h b/drivers/usb/host/isp116x.h index 595b90a99848..aa211bafcff9 100644 --- a/drivers/usb/host/isp116x.h +++ b/drivers/usb/host/isp116x.h @@ -338,7 +338,7 @@ struct isp116x_ep { #endif #define ERR(stuff...) printk(KERN_ERR "116x: " stuff) -#define WARN(stuff...) printk(KERN_WARNING "116x: " stuff) +#define WARNING(stuff...) printk(KERN_WARNING "116x: " stuff) #define INFO(stuff...) printk(KERN_INFO "116x: " stuff) /* ------------------------------------------------- */ diff --git a/drivers/usb/host/sl811-hcd.c b/drivers/usb/host/sl811-hcd.c index 340d72da554a..8a74bbb57d08 100644 --- a/drivers/usb/host/sl811-hcd.c +++ b/drivers/usb/host/sl811-hcd.c @@ -1026,7 +1026,7 @@ sl811h_endpoint_disable(struct usb_hcd *hcd, struct usb_host_endpoint *hep) if (!list_empty(&hep->urb_list)) msleep(3); if (!list_empty(&hep->urb_list)) - WARN("ep %p not empty?\n", ep); + WARNING("ep %p not empty?\n", ep); kfree(ep); hep->hcpriv = NULL; diff --git a/drivers/usb/host/sl811.h b/drivers/usb/host/sl811.h index 7690d98e42a7..b6b8c1f233dd 100644 --- a/drivers/usb/host/sl811.h +++ b/drivers/usb/host/sl811.h @@ -261,6 +261,6 @@ sl811_read_buf(struct sl811 *sl811, int addr, void *buf, size_t count) #endif #define ERR(stuff...) printk(KERN_ERR "sl811: " stuff) -#define WARN(stuff...) printk(KERN_WARNING "sl811: " stuff) +#define WARNING(stuff...) printk(KERN_WARNING "sl811: " stuff) #define INFO(stuff...) printk(KERN_INFO "sl811: " stuff) diff --git a/drivers/usb/misc/usbtest.c b/drivers/usb/misc/usbtest.c index 054dedd28127..b358c4e1cf21 100644 --- a/drivers/usb/misc/usbtest.c +++ b/drivers/usb/misc/usbtest.c @@ -81,7 +81,7 @@ static struct usb_device *testdev_to_usbdev (struct usbtest_dev *test) #define ERROR(tdev, fmt, args...) \ dev_err(&(tdev)->intf->dev , fmt , ## args) -#define WARN(tdev, fmt, args...) \ +#define WARNING(tdev, fmt, args...) \ dev_warn(&(tdev)->intf->dev , fmt , ## args) /*-------------------------------------------------------------------------*/ @@ -1946,7 +1946,7 @@ usbtest_probe (struct usb_interface *intf, const struct usb_device_id *id) status = get_endpoints (dev, intf); if (status < 0) { - WARN(dev, "couldn't get endpoints, %d\n", + WARNING(dev, "couldn't get endpoints, %d\n", status); return status; } diff --git a/drivers/virtio/virtio.c b/drivers/virtio/virtio.c index 7084e7e146c0..5b78fd0aff0a 100644 --- a/drivers/virtio/virtio.c +++ b/drivers/virtio/virtio.c @@ -71,13 +71,6 @@ static int virtio_uevent(struct device *_dv, struct kobj_uevent_env *env) dev->id.device, dev->id.vendor); } -static struct bus_type virtio_bus = { - .name = "virtio", - .match = virtio_dev_match, - .dev_attrs = virtio_dev_attrs, - .uevent = virtio_uevent, -}; - static void add_status(struct virtio_device *dev, unsigned status) { dev->config->set_status(dev, dev->config->get_status(dev) | status); @@ -120,12 +113,16 @@ static int virtio_dev_probe(struct device *_d) set_bit(f, dev->features); } + /* Transport features always preserved to pass to finalize_features. */ + for (i = VIRTIO_TRANSPORT_F_START; i < VIRTIO_TRANSPORT_F_END; i++) + if (device_features & (1 << i)) + set_bit(i, dev->features); + err = drv->probe(dev); if (err) add_status(dev, VIRTIO_CONFIG_S_FAILED); else { - /* They should never have set feature bits beyond 32 */ - dev->config->set_features(dev, dev->features[0]); + dev->config->finalize_features(dev); add_status(dev, VIRTIO_CONFIG_S_DRIVER_OK); } return err; @@ -147,13 +144,20 @@ static int virtio_dev_remove(struct device *_d) return 0; } +static struct bus_type virtio_bus = { + .name = "virtio", + .match = virtio_dev_match, + .dev_attrs = virtio_dev_attrs, + .uevent = virtio_uevent, + .probe = virtio_dev_probe, + .remove = virtio_dev_remove, +}; + int register_virtio_driver(struct virtio_driver *driver) { /* Catch this early. */ BUG_ON(driver->feature_table_size && !driver->feature_table); driver->driver.bus = &virtio_bus; - driver->driver.probe = virtio_dev_probe; - driver->driver.remove = virtio_dev_remove; return driver_register(&driver->driver); } EXPORT_SYMBOL_GPL(register_virtio_driver); diff --git a/drivers/virtio/virtio_pci.c b/drivers/virtio/virtio_pci.c index eae7236310e4..c7dc37c7cce9 100644 --- a/drivers/virtio/virtio_pci.c +++ b/drivers/virtio/virtio_pci.c @@ -94,12 +94,17 @@ static u32 vp_get_features(struct virtio_device *vdev) return ioread32(vp_dev->ioaddr + VIRTIO_PCI_HOST_FEATURES); } -/* virtio config->set_features() implementation */ -static void vp_set_features(struct virtio_device *vdev, u32 features) +/* virtio config->finalize_features() implementation */ +static void vp_finalize_features(struct virtio_device *vdev) { struct virtio_pci_device *vp_dev = to_vp_device(vdev); - iowrite32(features, vp_dev->ioaddr + VIRTIO_PCI_GUEST_FEATURES); + /* Give virtio_ring a chance to accept features. */ + vring_transport_features(vdev); + + /* We only support 32 feature bits. */ + BUILD_BUG_ON(ARRAY_SIZE(vdev->features) != 1); + iowrite32(vdev->features[0], vp_dev->ioaddr+VIRTIO_PCI_GUEST_FEATURES); } /* virtio config->get() implementation */ @@ -297,7 +302,7 @@ static struct virtio_config_ops virtio_pci_config_ops = { .find_vq = vp_find_vq, .del_vq = vp_del_vq, .get_features = vp_get_features, - .set_features = vp_set_features, + .finalize_features = vp_finalize_features, }; /* the PCI probing function */ diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c index 72bf8bc09014..6eb5303fed11 100644 --- a/drivers/virtio/virtio_ring.c +++ b/drivers/virtio/virtio_ring.c @@ -18,6 +18,7 @@ */ #include <linux/virtio.h> #include <linux/virtio_ring.h> +#include <linux/virtio_config.h> #include <linux/device.h> #ifdef DEBUG @@ -87,8 +88,11 @@ static int vring_add_buf(struct virtqueue *_vq, if (vq->num_free < out + in) { pr_debug("Can't add buf len %i - avail = %i\n", out + in, vq->num_free); - /* We notify *even if* VRING_USED_F_NO_NOTIFY is set here. */ - vq->notify(&vq->vq); + /* FIXME: for historical reasons, we force a notify here if + * there are outgoing parts to the buffer. Presumably the + * host should service the ring ASAP. */ + if (out) + vq->notify(&vq->vq); END_USE(vq); return -ENOSPC; } @@ -320,4 +324,19 @@ void vring_del_virtqueue(struct virtqueue *vq) } EXPORT_SYMBOL_GPL(vring_del_virtqueue); +/* Manipulates transport-specific feature bits. */ +void vring_transport_features(struct virtio_device *vdev) +{ + unsigned int i; + + for (i = VIRTIO_TRANSPORT_F_START; i < VIRTIO_TRANSPORT_F_END; i++) { + switch (i) { + default: + /* We don't understand this bit. */ + clear_bit(i, vdev->features); + } + } +} +EXPORT_SYMBOL_GPL(vring_transport_features); + MODULE_LICENSE("GPL"); diff --git a/fs/Kconfig b/fs/Kconfig index 37db79a2ff95..97e3bdedb1e6 100644 --- a/fs/Kconfig +++ b/fs/Kconfig @@ -902,65 +902,7 @@ endif # BLOCK menu "Pseudo filesystems" -config PROC_FS - bool "/proc file system support" if EMBEDDED - default y - help - This is a virtual file system providing information about the status - of the system. "Virtual" means that it doesn't take up any space on - your hard disk: the files are created on the fly by the kernel when - you try to access them. Also, you cannot read the files with older - version of the program less: you need to use more or cat. - - It's totally cool; for example, "cat /proc/interrupts" gives - information about what the different IRQs are used for at the moment - (there is a small number of Interrupt ReQuest lines in your computer - that are used by the attached devices to gain the CPU's attention -- - often a source of trouble if two devices are mistakenly configured - to use the same IRQ). The program procinfo to display some - information about your system gathered from the /proc file system. - - Before you can use the /proc file system, it has to be mounted, - meaning it has to be given a location in the directory hierarchy. - That location should be /proc. A command such as "mount -t proc proc - /proc" or the equivalent line in /etc/fstab does the job. - - The /proc file system is explained in the file - <file:Documentation/filesystems/proc.txt> and on the proc(5) manpage - ("man 5 proc"). - - This option will enlarge your kernel by about 67 KB. Several - programs depend on this, so everyone should say Y here. - -config PROC_KCORE - bool "/proc/kcore support" if !ARM - depends on PROC_FS && MMU - -config PROC_VMCORE - bool "/proc/vmcore support (EXPERIMENTAL)" - depends on PROC_FS && CRASH_DUMP - default y - help - Exports the dump image of crashed kernel in ELF format. - -config PROC_SYSCTL - bool "Sysctl support (/proc/sys)" if EMBEDDED - depends on PROC_FS - select SYSCTL - default y - ---help--- - The sysctl interface provides a means of dynamically changing - certain kernel parameters and variables on the fly without requiring - a recompile of the kernel or reboot of the system. The primary - interface is through /proc/sys. If you say Y here a tree of - modifiable sysctl entries will be generated beneath the - /proc/sys directory. They are explained in the files - in <file:Documentation/sysctl/>. Note that enabling this - option will enlarge the kernel by at least 8 KB. - - As it is generally a good thing, you should say Y here unless - building a kernel for install/rescue disks or your system is very - limited in memory. +source "fs/proc/Kconfig" config SYSFS bool "sysfs file system support" if EMBEDDED @@ -2093,20 +2035,6 @@ config CODA_FS To compile the coda client support as a module, choose M here: the module will be called coda. -config CODA_FS_OLD_API - bool "Use 96-bit Coda file identifiers" - depends on CODA_FS - help - A new kernel-userspace API had to be introduced for Coda v6.0 - to support larger 128-bit file identifiers as needed by the - new realms implementation. - - However this new API is not backward compatible with older - clients. If you really need to run the old Coda userspace - cache manager then say Y. - - For most cases you probably want to say N. - config AFS_FS tristate "Andrew File System support (AFS) (EXPERIMENTAL)" depends on INET && EXPERIMENTAL @@ -586,7 +586,6 @@ static void use_mm(struct mm_struct *mm) struct task_struct *tsk = current; task_lock(tsk); - tsk->flags |= PF_BORROWED_MM; active_mm = tsk->active_mm; atomic_inc(&mm->mm_count); tsk->mm = mm; @@ -610,7 +609,6 @@ static void unuse_mm(struct mm_struct *mm) struct task_struct *tsk = current; task_lock(tsk); - tsk->flags &= ~PF_BORROWED_MM; tsk->mm = NULL; /* active_mm is still 'mm' */ enter_lazy_tlb(mm, tsk); diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c index 639d2d8b5710..3b6ff854d983 100644 --- a/fs/binfmt_elf.c +++ b/fs/binfmt_elf.c @@ -131,6 +131,15 @@ static int padzero(unsigned long elf_bss) #define STACK_ALLOC(sp, len) ({ sp -= len ; sp; }) #endif +#ifndef ELF_BASE_PLATFORM +/* + * AT_BASE_PLATFORM indicates the "real" hardware/microarchitecture. + * If the arch defines ELF_BASE_PLATFORM (in asm/elf.h), the value + * will be copied to the user stack in the same manner as AT_PLATFORM. + */ +#define ELF_BASE_PLATFORM NULL +#endif + static int create_elf_tables(struct linux_binprm *bprm, struct elfhdr *exec, unsigned long load_addr, unsigned long interp_load_addr) @@ -142,7 +151,9 @@ create_elf_tables(struct linux_binprm *bprm, struct elfhdr *exec, elf_addr_t __user *envp; elf_addr_t __user *sp; elf_addr_t __user *u_platform; + elf_addr_t __user *u_base_platform; const char *k_platform = ELF_PLATFORM; + const char *k_base_platform = ELF_BASE_PLATFORM; int items; elf_addr_t *elf_info; int ei_index = 0; @@ -172,6 +183,19 @@ create_elf_tables(struct linux_binprm *bprm, struct elfhdr *exec, return -EFAULT; } + /* + * If this architecture has a "base" platform capability + * string, copy it to userspace. + */ + u_base_platform = NULL; + if (k_base_platform) { + size_t len = strlen(k_base_platform) + 1; + + u_base_platform = (elf_addr_t __user *)STACK_ALLOC(p, len); + if (__copy_to_user(u_base_platform, k_base_platform, len)) + return -EFAULT; + } + /* Create the ELF interpreter info */ elf_info = (elf_addr_t *)current->mm->saved_auxv; /* update AT_VECTOR_SIZE_BASE if the number of NEW_AUX_ENT() changes */ @@ -209,6 +233,10 @@ create_elf_tables(struct linux_binprm *bprm, struct elfhdr *exec, NEW_AUX_ENT(AT_PLATFORM, (elf_addr_t)(unsigned long)u_platform); } + if (k_base_platform) { + NEW_AUX_ENT(AT_BASE_PLATFORM, + (elf_addr_t)(unsigned long)u_base_platform); + } if (bprm->interp_flags & BINPRM_FLAGS_EXECFD) { NEW_AUX_ENT(AT_EXECFD, bprm->interp_data); } @@ -1478,7 +1506,7 @@ static int fill_note_info(struct elfhdr *elf, int phdrs, const struct user_regset_view *view = task_user_regset_view(dump_task); struct elf_thread_core_info *t; struct elf_prpsinfo *psinfo; - struct task_struct *g, *p; + struct core_thread *ct; unsigned int i; info->size = 0; @@ -1517,31 +1545,26 @@ static int fill_note_info(struct elfhdr *elf, int phdrs, /* * Allocate a structure for each thread. */ - rcu_read_lock(); - do_each_thread(g, p) - if (p->mm == dump_task->mm) { - t = kzalloc(offsetof(struct elf_thread_core_info, - notes[info->thread_notes]), - GFP_ATOMIC); - if (unlikely(!t)) { - rcu_read_unlock(); - return 0; - } - t->task = p; - if (p == dump_task || !info->thread) { - t->next = info->thread; - info->thread = t; - } else { - /* - * Make sure to keep the original task at - * the head of the list. - */ - t->next = info->thread->next; - info->thread->next = t; - } + for (ct = &dump_task->mm->core_state->dumper; ct; ct = ct->next) { + t = kzalloc(offsetof(struct elf_thread_core_info, + notes[info->thread_notes]), + GFP_KERNEL); + if (unlikely(!t)) + return 0; + + t->task = ct->task; + if (ct->task == dump_task || !info->thread) { + t->next = info->thread; + info->thread = t; + } else { + /* + * Make sure to keep the original task at + * the head of the list. + */ + t->next = info->thread->next; + info->thread->next = t; } - while_each_thread(g, p); - rcu_read_unlock(); + } /* * Now fill in each thread's information. @@ -1688,7 +1711,6 @@ static int fill_note_info(struct elfhdr *elf, int phdrs, { #define NUM_NOTES 6 struct list_head *t; - struct task_struct *g, *p; info->notes = NULL; info->prstatus = NULL; @@ -1720,20 +1742,19 @@ static int fill_note_info(struct elfhdr *elf, int phdrs, info->thread_status_size = 0; if (signr) { + struct core_thread *ct; struct elf_thread_status *ets; - rcu_read_lock(); - do_each_thread(g, p) - if (current->mm == p->mm && current != p) { - ets = kzalloc(sizeof(*ets), GFP_ATOMIC); - if (!ets) { - rcu_read_unlock(); - return 0; - } - ets->thread = p; - list_add(&ets->list, &info->thread_list); - } - while_each_thread(g, p); - rcu_read_unlock(); + + for (ct = current->mm->core_state->dumper.next; + ct; ct = ct->next) { + ets = kzalloc(sizeof(*ets), GFP_KERNEL); + if (!ets) + return 0; + + ets->thread = ct->task; + list_add(&ets->list, &info->thread_list); + } + list_for_each(t, &info->thread_list) { int sz; diff --git a/fs/binfmt_elf_fdpic.c b/fs/binfmt_elf_fdpic.c index d051a32e6270..1b59b1edf26d 100644 --- a/fs/binfmt_elf_fdpic.c +++ b/fs/binfmt_elf_fdpic.c @@ -1573,7 +1573,6 @@ static int elf_fdpic_core_dump(long signr, struct pt_regs *regs, struct memelfnote *notes = NULL; struct elf_prstatus *prstatus = NULL; /* NT_PRSTATUS */ struct elf_prpsinfo *psinfo = NULL; /* NT_PRPSINFO */ - struct task_struct *g, *p; LIST_HEAD(thread_list); struct list_head *t; elf_fpregset_t *fpu = NULL; @@ -1622,20 +1621,19 @@ static int elf_fdpic_core_dump(long signr, struct pt_regs *regs, #endif if (signr) { + struct core_thread *ct; struct elf_thread_status *tmp; - rcu_read_lock(); - do_each_thread(g,p) - if (current->mm == p->mm && current != p) { - tmp = kzalloc(sizeof(*tmp), GFP_ATOMIC); - if (!tmp) { - rcu_read_unlock(); - goto cleanup; - } - tmp->thread = p; - list_add(&tmp->list, &thread_list); - } - while_each_thread(g,p); - rcu_read_unlock(); + + for (ct = current->mm->core_state->dumper.next; + ct; ct = ct->next) { + tmp = kzalloc(sizeof(*tmp), GFP_KERNEL); + if (!tmp) + goto cleanup; + + tmp->thread = ct->task; + list_add(&tmp->list, &thread_list); + } + list_for_each(t, &thread_list) { struct elf_thread_status *tmp; int sz; diff --git a/fs/coda/coda_linux.c b/fs/coda/coda_linux.c index e1c854890f94..bf4a3fd3c8e3 100644 --- a/fs/coda/coda_linux.c +++ b/fs/coda/coda_linux.c @@ -28,11 +28,9 @@ int coda_fake_statfs; char * coda_f2s(struct CodaFid *f) { static char s[60]; -#ifdef CONFIG_CODA_FS_OLD_API - sprintf(s, "(%08x.%08x.%08x)", f->opaque[0], f->opaque[1], f->opaque[2]); -#else + sprintf(s, "(%08x.%08x.%08x.%08x)", f->opaque[0], f->opaque[1], f->opaque[2], f->opaque[3]); -#endif + return s; } diff --git a/fs/coda/psdev.c b/fs/coda/psdev.c index 40c36f7352a6..0d9b80ec689c 100644 --- a/fs/coda/psdev.c +++ b/fs/coda/psdev.c @@ -378,11 +378,7 @@ MODULE_AUTHOR("Jan Harkes, Peter J. Braam"); MODULE_DESCRIPTION("Coda Distributed File System VFS interface"); MODULE_ALIAS_CHARDEV_MAJOR(CODA_PSDEV_MAJOR); MODULE_LICENSE("GPL"); -#ifdef CONFIG_CODA_FS_OLD_API -MODULE_VERSION("5.3.21"); -#else MODULE_VERSION("6.6"); -#endif static int __init init_coda(void) { diff --git a/fs/coda/upcall.c b/fs/coda/upcall.c index 359e531094dd..ce432bca95d1 100644 --- a/fs/coda/upcall.c +++ b/fs/coda/upcall.c @@ -52,12 +52,8 @@ static void *alloc_upcall(int opcode, int size) inp->ih.opcode = opcode; inp->ih.pid = current->pid; inp->ih.pgid = task_pgrp_nr(current); -#ifdef CONFIG_CODA_FS_OLD_API - memset(&inp->ih.cred, 0, sizeof(struct coda_cred)); - inp->ih.cred.cr_fsuid = current->fsuid; -#else inp->ih.uid = current->fsuid; -#endif + return (void*)inp; } @@ -166,20 +162,11 @@ int venus_close(struct super_block *sb, struct CodaFid *fid, int flags, union inputArgs *inp; union outputArgs *outp; int insize, outsize, error; -#ifdef CONFIG_CODA_FS_OLD_API - struct coda_cred cred = { 0, }; - cred.cr_fsuid = uid; -#endif insize = SIZE(release); UPARG(CODA_CLOSE); -#ifdef CONFIG_CODA_FS_OLD_API - memcpy(&(inp->ih.cred), &cred, sizeof(cred)); -#else inp->ih.uid = uid; -#endif - inp->coda_close.VFid = *fid; inp->coda_close.flags = flags; diff --git a/fs/compat_ioctl.c b/fs/compat_ioctl.c index 18e2c548161d..5235c67e7594 100644 --- a/fs/compat_ioctl.c +++ b/fs/compat_ioctl.c @@ -25,7 +25,6 @@ #include <linux/slab.h> #include <linux/raid/md.h> #include <linux/kd.h> -#include <linux/dirent.h> #include <linux/route.h> #include <linux/in6.h> #include <linux/ipv6_route.h> diff --git a/fs/dlm/plock.c b/fs/dlm/plock.c index 78878c5781ca..eba87ff3177b 100644 --- a/fs/dlm/plock.c +++ b/fs/dlm/plock.c @@ -116,7 +116,7 @@ int dlm_posix_lock(dlm_lockspace_t *lockspace, u64 number, struct file *file, if (xop->callback == NULL) wait_event(recv_wq, (op->done != 0)); else { - rv = -EINPROGRESS; + rv = FILE_LOCK_DEFERRED; goto out; } diff --git a/fs/dquot.c b/fs/dquot.c index 5ac77da19959..1346eebe74ce 100644 --- a/fs/dquot.c +++ b/fs/dquot.c @@ -562,6 +562,8 @@ static struct shrinker dqcache_shrinker = { */ static void dqput(struct dquot *dquot) { + int ret; + if (!dquot) return; #ifdef __DQUOT_PARANOIA @@ -594,7 +596,19 @@ we_slept: if (test_bit(DQ_ACTIVE_B, &dquot->dq_flags) && dquot_dirty(dquot)) { spin_unlock(&dq_list_lock); /* Commit dquot before releasing */ - dquot->dq_sb->dq_op->write_dquot(dquot); + ret = dquot->dq_sb->dq_op->write_dquot(dquot); + if (ret < 0) { + printk(KERN_ERR "VFS: cannot write quota structure on " + "device %s (error %d). Quota may get out of " + "sync!\n", dquot->dq_sb->s_id, ret); + /* + * We clear dirty bit anyway, so that we avoid + * infinite loop here + */ + spin_lock(&dq_list_lock); + clear_dquot_dirty(dquot); + spin_unlock(&dq_list_lock); + } goto we_slept; } /* Clear flag in case dquot was inactive (something bad happened) */ @@ -875,7 +889,10 @@ static void print_warning(struct dquot *dquot, const int warntype) char *msg = NULL; struct tty_struct *tty; - if (!need_print_warning(dquot)) + if (warntype == QUOTA_NL_IHARDBELOW || + warntype == QUOTA_NL_ISOFTBELOW || + warntype == QUOTA_NL_BHARDBELOW || + warntype == QUOTA_NL_BSOFTBELOW || !need_print_warning(dquot)) return; mutex_lock(&tty_mutex); @@ -1083,6 +1100,35 @@ static int check_bdq(struct dquot *dquot, qsize_t space, int prealloc, char *war return QUOTA_OK; } +static int info_idq_free(struct dquot *dquot, ulong inodes) +{ + if (test_bit(DQ_FAKE_B, &dquot->dq_flags) || + dquot->dq_dqb.dqb_curinodes <= dquot->dq_dqb.dqb_isoftlimit) + return QUOTA_NL_NOWARN; + + if (dquot->dq_dqb.dqb_curinodes - inodes <= dquot->dq_dqb.dqb_isoftlimit) + return QUOTA_NL_ISOFTBELOW; + if (dquot->dq_dqb.dqb_curinodes >= dquot->dq_dqb.dqb_ihardlimit && + dquot->dq_dqb.dqb_curinodes - inodes < dquot->dq_dqb.dqb_ihardlimit) + return QUOTA_NL_IHARDBELOW; + return QUOTA_NL_NOWARN; +} + +static int info_bdq_free(struct dquot *dquot, qsize_t space) +{ + if (test_bit(DQ_FAKE_B, &dquot->dq_flags) || + toqb(dquot->dq_dqb.dqb_curspace) <= dquot->dq_dqb.dqb_bsoftlimit) + return QUOTA_NL_NOWARN; + + if (toqb(dquot->dq_dqb.dqb_curspace - space) <= + dquot->dq_dqb.dqb_bsoftlimit) + return QUOTA_NL_BSOFTBELOW; + if (toqb(dquot->dq_dqb.dqb_curspace) >= dquot->dq_dqb.dqb_bhardlimit && + toqb(dquot->dq_dqb.dqb_curspace - space) < + dquot->dq_dqb.dqb_bhardlimit) + return QUOTA_NL_BHARDBELOW; + return QUOTA_NL_NOWARN; +} /* * Initialize quota pointers in inode * Transaction must be started at entry @@ -1139,6 +1185,28 @@ int dquot_drop(struct inode *inode) return 0; } +/* Wrapper to remove references to quota structures from inode */ +void vfs_dq_drop(struct inode *inode) +{ + /* Here we can get arbitrary inode from clear_inode() so we have + * to be careful. OTOH we don't need locking as quota operations + * are allowed to change only at mount time */ + if (!IS_NOQUOTA(inode) && inode->i_sb && inode->i_sb->dq_op + && inode->i_sb->dq_op->drop) { + int cnt; + /* Test before calling to rule out calls from proc and such + * where we are not allowed to block. Note that this is + * actually reliable test even without the lock - the caller + * must assure that nobody can come after the DQUOT_DROP and + * add quota pointers back anyway */ + for (cnt = 0; cnt < MAXQUOTAS; cnt++) + if (inode->i_dquot[cnt] != NODQUOT) + break; + if (cnt < MAXQUOTAS) + inode->i_sb->dq_op->drop(inode); + } +} + /* * Following four functions update i_blocks+i_bytes fields and * quota information (together with appropriate checks) @@ -1248,6 +1316,7 @@ warn_put_all: int dquot_free_space(struct inode *inode, qsize_t number) { unsigned int cnt; + char warntype[MAXQUOTAS]; /* First test before acquiring mutex - solves deadlocks when we * re-enter the quota code and are already holding the mutex */ @@ -1256,6 +1325,7 @@ out_sub: inode_sub_bytes(inode, number); return QUOTA_OK; } + down_read(&sb_dqopt(inode->i_sb)->dqptr_sem); /* Now recheck reliably when holding dqptr_sem */ if (IS_NOQUOTA(inode)) { @@ -1266,6 +1336,7 @@ out_sub: for (cnt = 0; cnt < MAXQUOTAS; cnt++) { if (inode->i_dquot[cnt] == NODQUOT) continue; + warntype[cnt] = info_bdq_free(inode->i_dquot[cnt], number); dquot_decr_space(inode->i_dquot[cnt], number); } inode_sub_bytes(inode, number); @@ -1274,6 +1345,7 @@ out_sub: for (cnt = 0; cnt < MAXQUOTAS; cnt++) if (inode->i_dquot[cnt]) mark_dquot_dirty(inode->i_dquot[cnt]); + flush_warnings(inode->i_dquot, warntype); up_read(&sb_dqopt(inode->i_sb)->dqptr_sem); return QUOTA_OK; } @@ -1284,11 +1356,13 @@ out_sub: int dquot_free_inode(const struct inode *inode, unsigned long number) { unsigned int cnt; + char warntype[MAXQUOTAS]; /* First test before acquiring mutex - solves deadlocks when we * re-enter the quota code and are already holding the mutex */ if (IS_NOQUOTA(inode)) return QUOTA_OK; + down_read(&sb_dqopt(inode->i_sb)->dqptr_sem); /* Now recheck reliably when holding dqptr_sem */ if (IS_NOQUOTA(inode)) { @@ -1299,6 +1373,7 @@ int dquot_free_inode(const struct inode *inode, unsigned long number) for (cnt = 0; cnt < MAXQUOTAS; cnt++) { if (inode->i_dquot[cnt] == NODQUOT) continue; + warntype[cnt] = info_idq_free(inode->i_dquot[cnt], number); dquot_decr_inodes(inode->i_dquot[cnt], number); } spin_unlock(&dq_data_lock); @@ -1306,6 +1381,7 @@ int dquot_free_inode(const struct inode *inode, unsigned long number) for (cnt = 0; cnt < MAXQUOTAS; cnt++) if (inode->i_dquot[cnt]) mark_dquot_dirty(inode->i_dquot[cnt]); + flush_warnings(inode->i_dquot, warntype); up_read(&sb_dqopt(inode->i_sb)->dqptr_sem); return QUOTA_OK; } @@ -1323,7 +1399,8 @@ int dquot_transfer(struct inode *inode, struct iattr *iattr) struct dquot *transfer_to[MAXQUOTAS]; int cnt, ret = NO_QUOTA, chuid = (iattr->ia_valid & ATTR_UID) && inode->i_uid != iattr->ia_uid, chgid = (iattr->ia_valid & ATTR_GID) && inode->i_gid != iattr->ia_gid; - char warntype[MAXQUOTAS]; + char warntype_to[MAXQUOTAS]; + char warntype_from_inodes[MAXQUOTAS], warntype_from_space[MAXQUOTAS]; /* First test before acquiring mutex - solves deadlocks when we * re-enter the quota code and are already holding the mutex */ @@ -1332,7 +1409,7 @@ int dquot_transfer(struct inode *inode, struct iattr *iattr) /* Clear the arrays */ for (cnt = 0; cnt < MAXQUOTAS; cnt++) { transfer_to[cnt] = transfer_from[cnt] = NODQUOT; - warntype[cnt] = QUOTA_NL_NOWARN; + warntype_to[cnt] = QUOTA_NL_NOWARN; } down_write(&sb_dqopt(inode->i_sb)->dqptr_sem); /* Now recheck reliably when holding dqptr_sem */ @@ -1364,8 +1441,9 @@ int dquot_transfer(struct inode *inode, struct iattr *iattr) if (transfer_to[cnt] == NODQUOT) continue; transfer_from[cnt] = inode->i_dquot[cnt]; - if (check_idq(transfer_to[cnt], 1, warntype+cnt) == NO_QUOTA || - check_bdq(transfer_to[cnt], space, 0, warntype+cnt) == NO_QUOTA) + if (check_idq(transfer_to[cnt], 1, warntype_to + cnt) == + NO_QUOTA || check_bdq(transfer_to[cnt], space, 0, + warntype_to + cnt) == NO_QUOTA) goto warn_put_all; } @@ -1381,6 +1459,10 @@ int dquot_transfer(struct inode *inode, struct iattr *iattr) /* Due to IO error we might not have transfer_from[] structure */ if (transfer_from[cnt]) { + warntype_from_inodes[cnt] = + info_idq_free(transfer_from[cnt], 1); + warntype_from_space[cnt] = + info_bdq_free(transfer_from[cnt], space); dquot_decr_inodes(transfer_from[cnt], 1); dquot_decr_space(transfer_from[cnt], space); } @@ -1400,7 +1482,9 @@ warn_put_all: if (transfer_to[cnt]) mark_dquot_dirty(transfer_to[cnt]); } - flush_warnings(transfer_to, warntype); + flush_warnings(transfer_to, warntype_to); + flush_warnings(transfer_from, warntype_from_inodes); + flush_warnings(transfer_from, warntype_from_space); for (cnt = 0; cnt < MAXQUOTAS; cnt++) { if (ret == QUOTA_OK && transfer_from[cnt] != NODQUOT) @@ -1412,6 +1496,18 @@ warn_put_all: return ret; } +/* Wrapper for transferring ownership of an inode */ +int vfs_dq_transfer(struct inode *inode, struct iattr *iattr) +{ + if (sb_any_quota_enabled(inode->i_sb) && !IS_NOQUOTA(inode)) { + vfs_dq_init(inode); + if (inode->i_sb->dq_op->transfer(inode, iattr) == NO_QUOTA) + return 1; + } + return 0; +} + + /* * Write info of quota file to disk */ @@ -1752,6 +1848,22 @@ out: return error; } +/* Wrapper to turn on quotas when remounting rw */ +int vfs_dq_quota_on_remount(struct super_block *sb) +{ + int cnt; + int ret = 0, err; + + if (!sb->s_qcop || !sb->s_qcop->quota_on) + return -ENOSYS; + for (cnt = 0; cnt < MAXQUOTAS; cnt++) { + err = sb->s_qcop->quota_on(sb, cnt, 0, NULL, 1); + if (err < 0 && !ret) + ret = err; + } + return ret; +} + /* Generic routine for getting common part of quota structure */ static void do_get_dqblk(struct dquot *dquot, struct if_dqblk *di) { @@ -2087,8 +2199,11 @@ EXPORT_SYMBOL(dquot_release); EXPORT_SYMBOL(dquot_mark_dquot_dirty); EXPORT_SYMBOL(dquot_initialize); EXPORT_SYMBOL(dquot_drop); +EXPORT_SYMBOL(vfs_dq_drop); EXPORT_SYMBOL(dquot_alloc_space); EXPORT_SYMBOL(dquot_alloc_inode); EXPORT_SYMBOL(dquot_free_space); EXPORT_SYMBOL(dquot_free_inode); EXPORT_SYMBOL(dquot_transfer); +EXPORT_SYMBOL(vfs_dq_transfer); +EXPORT_SYMBOL(vfs_dq_quota_on_remount); diff --git a/fs/exec.c b/fs/exec.c index 190ed1f92774..5e559013e303 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -25,19 +25,18 @@ #include <linux/slab.h> #include <linux/file.h> #include <linux/fdtable.h> -#include <linux/mman.h> +#include <linux/mm.h> #include <linux/stat.h> #include <linux/fcntl.h> #include <linux/smp_lock.h> +#include <linux/swap.h> #include <linux/string.h> #include <linux/init.h> -#include <linux/pagemap.h> #include <linux/highmem.h> #include <linux/spinlock.h> #include <linux/key.h> #include <linux/personality.h> #include <linux/binfmts.h> -#include <linux/swap.h> #include <linux/utsname.h> #include <linux/pid_namespace.h> #include <linux/module.h> @@ -47,7 +46,6 @@ #include <linux/mount.h> #include <linux/security.h> #include <linux/syscalls.h> -#include <linux/rmap.h> #include <linux/tsacct_kern.h> #include <linux/cn_proc.h> #include <linux/audit.h> @@ -724,12 +722,10 @@ static int exec_mmap(struct mm_struct *mm) * Make sure that if there is a core dump in progress * for the old mm, we get out and die instead of going * through with the exec. We must hold mmap_sem around - * checking core_waiters and changing tsk->mm. The - * core-inducing thread will increment core_waiters for - * each thread whose ->mm == old_mm. + * checking core_state and changing tsk->mm. */ down_read(&old_mm->mmap_sem); - if (unlikely(old_mm->core_waiters)) { + if (unlikely(old_mm->core_state)) { up_read(&old_mm->mmap_sem); return -EINTR; } @@ -1328,6 +1324,7 @@ int do_execve(char * filename, if (retval < 0) goto out; + current->flags &= ~PF_KTHREAD; retval = search_binary_handler(bprm,regs); if (retval >= 0) { /* execve success */ @@ -1382,17 +1379,14 @@ EXPORT_SYMBOL(set_binfmt); * name into corename, which must have space for at least * CORENAME_MAX_SIZE bytes plus one byte for the zero terminator. */ -static int format_corename(char *corename, const char *pattern, long signr) +static int format_corename(char *corename, int nr_threads, long signr) { - const char *pat_ptr = pattern; + const char *pat_ptr = core_pattern; + int ispipe = (*pat_ptr == '|'); char *out_ptr = corename; char *const out_end = corename + CORENAME_MAX_SIZE; int rc; int pid_in_pattern = 0; - int ispipe = 0; - - if (*pattern == '|') - ispipe = 1; /* Repeat as long as we have more pattern to process and more output space */ @@ -1493,7 +1487,7 @@ static int format_corename(char *corename, const char *pattern, long signr) * and core_uses_pid is set, then .%pid will be appended to * the filename. Do not do this for piped commands. */ if (!ispipe && !pid_in_pattern - && (core_uses_pid || atomic_read(¤t->mm->mm_users) != 1)) { + && (core_uses_pid || nr_threads)) { rc = snprintf(out_ptr, out_end - out_ptr, ".%d", task_tgid_vnr(current)); if (rc > out_end - out_ptr) @@ -1505,9 +1499,10 @@ out: return ispipe; } -static void zap_process(struct task_struct *start) +static int zap_process(struct task_struct *start) { struct task_struct *t; + int nr = 0; start->signal->flags = SIGNAL_GROUP_EXIT; start->signal->group_stop_count = 0; @@ -1515,72 +1510,99 @@ static void zap_process(struct task_struct *start) t = start; do { if (t != current && t->mm) { - t->mm->core_waiters++; sigaddset(&t->pending.signal, SIGKILL); signal_wake_up(t, 1); + nr++; } - } while ((t = next_thread(t)) != start); + } while_each_thread(start, t); + + return nr; } static inline int zap_threads(struct task_struct *tsk, struct mm_struct *mm, - int exit_code) + struct core_state *core_state, int exit_code) { struct task_struct *g, *p; unsigned long flags; - int err = -EAGAIN; + int nr = -EAGAIN; spin_lock_irq(&tsk->sighand->siglock); if (!signal_group_exit(tsk->signal)) { + mm->core_state = core_state; tsk->signal->group_exit_code = exit_code; - zap_process(tsk); - err = 0; + nr = zap_process(tsk); } spin_unlock_irq(&tsk->sighand->siglock); - if (err) - return err; + if (unlikely(nr < 0)) + return nr; - if (atomic_read(&mm->mm_users) == mm->core_waiters + 1) + if (atomic_read(&mm->mm_users) == nr + 1) goto done; - + /* + * We should find and kill all tasks which use this mm, and we should + * count them correctly into ->nr_threads. We don't take tasklist + * lock, but this is safe wrt: + * + * fork: + * None of sub-threads can fork after zap_process(leader). All + * processes which were created before this point should be + * visible to zap_threads() because copy_process() adds the new + * process to the tail of init_task.tasks list, and lock/unlock + * of ->siglock provides a memory barrier. + * + * do_exit: + * The caller holds mm->mmap_sem. This means that the task which + * uses this mm can't pass exit_mm(), so it can't exit or clear + * its ->mm. + * + * de_thread: + * It does list_replace_rcu(&leader->tasks, ¤t->tasks), + * we must see either old or new leader, this does not matter. + * However, it can change p->sighand, so lock_task_sighand(p) + * must be used. Since p->mm != NULL and we hold ->mmap_sem + * it can't fail. + * + * Note also that "g" can be the old leader with ->mm == NULL + * and already unhashed and thus removed from ->thread_group. + * This is OK, __unhash_process()->list_del_rcu() does not + * clear the ->next pointer, we will find the new leader via + * next_thread(). + */ rcu_read_lock(); for_each_process(g) { if (g == tsk->group_leader) continue; - + if (g->flags & PF_KTHREAD) + continue; p = g; do { if (p->mm) { - if (p->mm == mm) { - /* - * p->sighand can't disappear, but - * may be changed by de_thread() - */ + if (unlikely(p->mm == mm)) { lock_task_sighand(p, &flags); - zap_process(p); + nr += zap_process(p); unlock_task_sighand(p, &flags); } break; } - } while ((p = next_thread(p)) != g); + } while_each_thread(g, p); } rcu_read_unlock(); done: - return mm->core_waiters; + atomic_set(&core_state->nr_threads, nr); + return nr; } -static int coredump_wait(int exit_code) +static int coredump_wait(int exit_code, struct core_state *core_state) { struct task_struct *tsk = current; struct mm_struct *mm = tsk->mm; - struct completion startup_done; struct completion *vfork_done; int core_waiters; - init_completion(&mm->core_done); - init_completion(&startup_done); - mm->core_startup_done = &startup_done; - - core_waiters = zap_threads(tsk, mm, exit_code); + init_completion(&core_state->startup); + core_state->dumper.task = tsk; + core_state->dumper.next = NULL; + core_waiters = zap_threads(tsk, mm, core_state, exit_code); up_write(&mm->mmap_sem); if (unlikely(core_waiters < 0)) @@ -1597,12 +1619,32 @@ static int coredump_wait(int exit_code) } if (core_waiters) - wait_for_completion(&startup_done); + wait_for_completion(&core_state->startup); fail: - BUG_ON(mm->core_waiters); return core_waiters; } +static void coredump_finish(struct mm_struct *mm) +{ + struct core_thread *curr, *next; + struct task_struct *task; + + next = mm->core_state->dumper.next; + while ((curr = next) != NULL) { + next = curr->next; + task = curr->task; + /* + * see exit_mm(), curr->task must not see + * ->task == NULL before we read ->next. + */ + smp_mb(); + curr->task = NULL; + wake_up_process(task); + } + + mm->core_state = NULL; +} + /* * set_dumpable converts traditional three-value dumpable to two flags and * stores them into mm->flags. It modifies lower two bits of mm->flags, but @@ -1654,6 +1696,7 @@ int get_dumpable(struct mm_struct *mm) int do_coredump(long signr, int exit_code, struct pt_regs * regs) { + struct core_state core_state; char corename[CORENAME_MAX_SIZE + 1]; struct mm_struct *mm = current->mm; struct linux_binfmt * binfmt; @@ -1677,7 +1720,7 @@ int do_coredump(long signr, int exit_code, struct pt_regs * regs) /* * If another thread got here first, or we are not dumpable, bail out. */ - if (mm->core_waiters || !get_dumpable(mm)) { + if (mm->core_state || !get_dumpable(mm)) { up_write(&mm->mmap_sem); goto fail; } @@ -1692,7 +1735,7 @@ int do_coredump(long signr, int exit_code, struct pt_regs * regs) current->fsuid = 0; /* Dump root private */ } - retval = coredump_wait(exit_code); + retval = coredump_wait(exit_code, &core_state); if (retval < 0) goto fail; @@ -1707,7 +1750,7 @@ int do_coredump(long signr, int exit_code, struct pt_regs * regs) * uses lock_kernel() */ lock_kernel(); - ispipe = format_corename(corename, core_pattern, signr); + ispipe = format_corename(corename, retval, signr); unlock_kernel(); /* * Don't bother to check the RLIMIT_CORE value if core_pattern points @@ -1786,7 +1829,7 @@ fail_unlock: argv_free(helper_argv); current->fsuid = fsuid; - complete_all(&mm->core_done); + coredump_finish(mm); fail: return retval; } diff --git a/fs/ext2/super.c b/fs/ext2/super.c index ef50cbc792db..31308a3b0b8b 100644 --- a/fs/ext2/super.c +++ b/fs/ext2/super.c @@ -31,6 +31,7 @@ #include <linux/seq_file.h> #include <linux/mount.h> #include <linux/log2.h> +#include <linux/quotaops.h> #include <asm/uaccess.h> #include "ext2.h" #include "xattr.h" diff --git a/fs/ext2/xattr_security.c b/fs/ext2/xattr_security.c index eaa23d2d5213..70c0dbdcdcb7 100644 --- a/fs/ext2/xattr_security.c +++ b/fs/ext2/xattr_security.c @@ -14,7 +14,7 @@ static size_t ext2_xattr_security_list(struct inode *inode, char *list, size_t list_size, const char *name, size_t name_len) { - const int prefix_len = sizeof(XATTR_SECURITY_PREFIX)-1; + const int prefix_len = XATTR_SECURITY_PREFIX_LEN; const size_t total_len = prefix_len + name_len + 1; if (list && total_len <= list_size) { diff --git a/fs/ext2/xattr_trusted.c b/fs/ext2/xattr_trusted.c index 83ee149f353d..e8219f8eae9f 100644 --- a/fs/ext2/xattr_trusted.c +++ b/fs/ext2/xattr_trusted.c @@ -12,13 +12,11 @@ #include <linux/ext2_fs.h> #include "xattr.h" -#define XATTR_TRUSTED_PREFIX "trusted." - static size_t ext2_xattr_trusted_list(struct inode *inode, char *list, size_t list_size, const char *name, size_t name_len) { - const int prefix_len = sizeof(XATTR_TRUSTED_PREFIX)-1; + const int prefix_len = XATTR_TRUSTED_PREFIX_LEN; const size_t total_len = prefix_len + name_len + 1; if (!capable(CAP_SYS_ADMIN)) diff --git a/fs/ext2/xattr_user.c b/fs/ext2/xattr_user.c index f383e7c3a7b5..92495d28c62f 100644 --- a/fs/ext2/xattr_user.c +++ b/fs/ext2/xattr_user.c @@ -11,13 +11,11 @@ #include "ext2.h" #include "xattr.h" -#define XATTR_USER_PREFIX "user." - static size_t ext2_xattr_user_list(struct inode *inode, char *list, size_t list_size, const char *name, size_t name_len) { - const size_t prefix_len = sizeof(XATTR_USER_PREFIX)-1; + const size_t prefix_len = XATTR_USER_PREFIX_LEN; const size_t total_len = prefix_len + name_len + 1; if (!test_opt(inode->i_sb, XATTR_USER)) diff --git a/fs/ext3/dir.c b/fs/ext3/dir.c index 8ca3bfd72427..2eea96ec78ed 100644 --- a/fs/ext3/dir.c +++ b/fs/ext3/dir.c @@ -272,7 +272,7 @@ static void free_rb_tree_fname(struct rb_root *root) while (n) { /* Do the node's children first */ - if ((n)->rb_left) { + if (n->rb_left) { n = n->rb_left; continue; } @@ -301,24 +301,18 @@ static void free_rb_tree_fname(struct rb_root *root) parent->rb_right = NULL; n = parent; } - root->rb_node = NULL; } -static struct dir_private_info *create_dir_info(loff_t pos) +static struct dir_private_info *ext3_htree_create_dir_info(loff_t pos) { struct dir_private_info *p; - p = kmalloc(sizeof(struct dir_private_info), GFP_KERNEL); + p = kzalloc(sizeof(struct dir_private_info), GFP_KERNEL); if (!p) return NULL; - p->root.rb_node = NULL; - p->curr_node = NULL; - p->extra_fname = NULL; - p->last_pos = 0; p->curr_hash = pos2maj_hash(pos); p->curr_minor_hash = pos2min_hash(pos); - p->next_hash = 0; return p; } @@ -433,7 +427,7 @@ static int ext3_dx_readdir(struct file * filp, int ret; if (!info) { - info = create_dir_info(filp->f_pos); + info = ext3_htree_create_dir_info(filp->f_pos); if (!info) return -ENOMEM; filp->private_data = info; diff --git a/fs/ext3/ialloc.c b/fs/ext3/ialloc.c index 77126821b2e9..47b678d73e7a 100644 --- a/fs/ext3/ialloc.c +++ b/fs/ext3/ialloc.c @@ -669,6 +669,14 @@ struct inode *ext3_orphan_get(struct super_block *sb, unsigned long ino) if (IS_ERR(inode)) goto iget_failed; + /* + * If the orphans has i_nlinks > 0 then it should be able to be + * truncated, otherwise it won't be removed from the orphan list + * during processing and an infinite loop will result. + */ + if (inode->i_nlink && !ext3_can_truncate(inode)) + goto bad_orphan; + if (NEXT_ORPHAN(inode) > max_ino) goto bad_orphan; brelse(bitmap_bh); @@ -690,6 +698,7 @@ bad_orphan: printk(KERN_NOTICE "NEXT_ORPHAN(inode)=%u\n", NEXT_ORPHAN(inode)); printk(KERN_NOTICE "max_ino=%lu\n", max_ino); + printk(KERN_NOTICE "i_nlink=%u\n", inode->i_nlink); /* Avoid freeing blocks if we got a bad deleted inode */ if (inode->i_nlink == 0) inode->i_blocks = 0; diff --git a/fs/ext3/inode.c b/fs/ext3/inode.c index 6ae4ecf3ce40..3bf07d70b914 100644 --- a/fs/ext3/inode.c +++ b/fs/ext3/inode.c @@ -2127,7 +2127,21 @@ static void ext3_free_data(handle_t *handle, struct inode *inode, if (this_bh) { BUFFER_TRACE(this_bh, "call ext3_journal_dirty_metadata"); - ext3_journal_dirty_metadata(handle, this_bh); + + /* + * The buffer head should have an attached journal head at this + * point. However, if the data is corrupted and an indirect + * block pointed to itself, it would have been detached when + * the block was cleared. Check for this instead of OOPSing. + */ + if (bh2jh(this_bh)) + ext3_journal_dirty_metadata(handle, this_bh); + else + ext3_error(inode->i_sb, "ext3_free_data", + "circular indirect block detected, " + "inode=%lu, block=%llu", + inode->i_ino, + (unsigned long long)this_bh->b_blocknr); } } @@ -2253,6 +2267,19 @@ static void ext3_free_branches(handle_t *handle, struct inode *inode, } } +int ext3_can_truncate(struct inode *inode) +{ + if (IS_APPEND(inode) || IS_IMMUTABLE(inode)) + return 0; + if (S_ISREG(inode->i_mode)) + return 1; + if (S_ISDIR(inode->i_mode)) + return 1; + if (S_ISLNK(inode->i_mode)) + return !ext3_inode_is_fast_symlink(inode); + return 0; +} + /* * ext3_truncate() * @@ -2297,12 +2324,7 @@ void ext3_truncate(struct inode *inode) unsigned blocksize = inode->i_sb->s_blocksize; struct page *page; - if (!(S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) || - S_ISLNK(inode->i_mode))) - return; - if (ext3_inode_is_fast_symlink(inode)) - return; - if (IS_APPEND(inode) || IS_IMMUTABLE(inode)) + if (!ext3_can_truncate(inode)) return; /* @@ -2513,6 +2535,16 @@ static int __ext3_get_inode_loc(struct inode *inode, } if (!buffer_uptodate(bh)) { lock_buffer(bh); + + /* + * If the buffer has the write error flag, we have failed + * to write out another inode in the same block. In this + * case, we don't have to read the block because we may + * read the old inode data successfully. + */ + if (buffer_write_io_error(bh) && !buffer_uptodate(bh)) + set_buffer_uptodate(bh); + if (buffer_uptodate(bh)) { /* someone brought it uptodate while we waited */ unlock_buffer(bh); diff --git a/fs/ext3/namei.c b/fs/ext3/namei.c index 0b8cf80154f1..de13e919cd81 100644 --- a/fs/ext3/namei.c +++ b/fs/ext3/namei.c @@ -240,13 +240,13 @@ static inline unsigned dx_root_limit (struct inode *dir, unsigned infosize) { unsigned entry_space = dir->i_sb->s_blocksize - EXT3_DIR_REC_LEN(1) - EXT3_DIR_REC_LEN(2) - infosize; - return 0? 20: entry_space / sizeof(struct dx_entry); + return entry_space / sizeof(struct dx_entry); } static inline unsigned dx_node_limit (struct inode *dir) { unsigned entry_space = dir->i_sb->s_blocksize - EXT3_DIR_REC_LEN(0); - return 0? 22: entry_space / sizeof(struct dx_entry); + return entry_space / sizeof(struct dx_entry); } /* @@ -991,19 +991,21 @@ static struct buffer_head * ext3_dx_find_entry(struct dentry *dentry, de = (struct ext3_dir_entry_2 *) bh->b_data; top = (struct ext3_dir_entry_2 *) ((char *) de + sb->s_blocksize - EXT3_DIR_REC_LEN(0)); - for (; de < top; de = ext3_next_entry(de)) - if (ext3_match (namelen, name, de)) { - if (!ext3_check_dir_entry("ext3_find_entry", - dir, de, bh, - (block<<EXT3_BLOCK_SIZE_BITS(sb)) - +((char *)de - bh->b_data))) { - brelse (bh); + for (; de < top; de = ext3_next_entry(de)) { + int off = (block << EXT3_BLOCK_SIZE_BITS(sb)) + + ((char *) de - bh->b_data); + + if (!ext3_check_dir_entry(__func__, dir, de, bh, off)) { + brelse(bh); *err = ERR_BAD_DX_DIR; goto errout; } - *res_dir = de; - dx_release (frames); - return bh; + + if (ext3_match(namelen, name, de)) { + *res_dir = de; + dx_release(frames); + return bh; + } } brelse (bh); /* Check to see if we should continue to search */ diff --git a/fs/ext3/super.c b/fs/ext3/super.c index 2845425077e8..615788c6843a 100644 --- a/fs/ext3/super.c +++ b/fs/ext3/super.c @@ -842,7 +842,7 @@ static int parse_options (char *options, struct super_block *sb, int data_opt = 0; int option; #ifdef CONFIG_QUOTA - int qtype; + int qtype, qfmt; char *qname; #endif @@ -1018,9 +1018,11 @@ static int parse_options (char *options, struct super_block *sb, case Opt_grpjquota: qtype = GRPQUOTA; set_qf_name: - if (sb_any_quota_enabled(sb)) { + if ((sb_any_quota_enabled(sb) || + sb_any_quota_suspended(sb)) && + !sbi->s_qf_names[qtype]) { printk(KERN_ERR - "EXT3-fs: Cannot change journalled " + "EXT3-fs: Cannot change journaled " "quota options when quota turned on.\n"); return 0; } @@ -1056,9 +1058,11 @@ set_qf_name: case Opt_offgrpjquota: qtype = GRPQUOTA; clear_qf_name: - if (sb_any_quota_enabled(sb)) { + if ((sb_any_quota_enabled(sb) || + sb_any_quota_suspended(sb)) && + sbi->s_qf_names[qtype]) { printk(KERN_ERR "EXT3-fs: Cannot change " - "journalled quota options when " + "journaled quota options when " "quota turned on.\n"); return 0; } @@ -1069,10 +1073,20 @@ clear_qf_name: sbi->s_qf_names[qtype] = NULL; break; case Opt_jqfmt_vfsold: - sbi->s_jquota_fmt = QFMT_VFS_OLD; - break; + qfmt = QFMT_VFS_OLD; + goto set_qf_format; case Opt_jqfmt_vfsv0: - sbi->s_jquota_fmt = QFMT_VFS_V0; + qfmt = QFMT_VFS_V0; +set_qf_format: + if ((sb_any_quota_enabled(sb) || + sb_any_quota_suspended(sb)) && + sbi->s_jquota_fmt != qfmt) { + printk(KERN_ERR "EXT3-fs: Cannot change " + "journaled quota options when " + "quota turned on.\n"); + return 0; + } + sbi->s_jquota_fmt = qfmt; break; case Opt_quota: case Opt_usrquota: @@ -1084,7 +1098,8 @@ clear_qf_name: set_opt(sbi->s_mount_opt, GRPQUOTA); break; case Opt_noquota: - if (sb_any_quota_enabled(sb)) { + if (sb_any_quota_enabled(sb) || + sb_any_quota_suspended(sb)) { printk(KERN_ERR "EXT3-fs: Cannot change quota " "options when quota turned on.\n"); return 0; @@ -1169,14 +1184,14 @@ clear_qf_name: } if (!sbi->s_jquota_fmt) { - printk(KERN_ERR "EXT3-fs: journalled quota format " + printk(KERN_ERR "EXT3-fs: journaled quota format " "not specified.\n"); return 0; } } else { if (sbi->s_jquota_fmt) { - printk(KERN_ERR "EXT3-fs: journalled quota format " - "specified with no journalling " + printk(KERN_ERR "EXT3-fs: journaled quota format " + "specified with no journaling " "enabled.\n"); return 0; } @@ -1370,7 +1385,7 @@ static void ext3_orphan_cleanup (struct super_block * sb, int ret = ext3_quota_on_mount(sb, i); if (ret < 0) printk(KERN_ERR - "EXT3-fs: Cannot turn on journalled " + "EXT3-fs: Cannot turn on journaled " "quota: error %d\n", ret); } } @@ -2712,7 +2727,7 @@ static int ext3_release_dquot(struct dquot *dquot) static int ext3_mark_dquot_dirty(struct dquot *dquot) { - /* Are we journalling quotas? */ + /* Are we journaling quotas? */ if (EXT3_SB(dquot->dq_sb)->s_qf_names[USRQUOTA] || EXT3_SB(dquot->dq_sb)->s_qf_names[GRPQUOTA]) { dquot_mark_dquot_dirty(dquot); @@ -2759,23 +2774,42 @@ static int ext3_quota_on(struct super_block *sb, int type, int format_id, if (!test_opt(sb, QUOTA)) return -EINVAL; - /* Not journalling quota or remount? */ - if ((!EXT3_SB(sb)->s_qf_names[USRQUOTA] && - !EXT3_SB(sb)->s_qf_names[GRPQUOTA]) || remount) + /* When remounting, no checks are needed and in fact, path is NULL */ + if (remount) return vfs_quota_on(sb, type, format_id, path, remount); + err = path_lookup(path, LOOKUP_FOLLOW, &nd); if (err) return err; + /* Quotafile not on the same filesystem? */ if (nd.path.mnt->mnt_sb != sb) { path_put(&nd.path); return -EXDEV; } - /* Quotafile not in fs root? */ - if (nd.path.dentry->d_parent->d_inode != sb->s_root->d_inode) - printk(KERN_WARNING - "EXT3-fs: Quota file not on filesystem root. " - "Journalled quota will not work.\n"); + /* Journaling quota? */ + if (EXT3_SB(sb)->s_qf_names[type]) { + /* Quotafile not of fs root? */ + if (nd.path.dentry->d_parent->d_inode != sb->s_root->d_inode) + printk(KERN_WARNING + "EXT3-fs: Quota file not on filesystem root. " + "Journaled quota will not work.\n"); + } + + /* + * When we journal data on quota file, we have to flush journal to see + * all updates to the file when we bypass pagecache... + */ + if (ext3_should_journal_data(nd.path.dentry->d_inode)) { + /* + * We don't need to lock updates but journal_flush() could + * otherwise be livelocked... + */ + journal_lock_updates(EXT3_SB(sb)->s_journal); + journal_flush(EXT3_SB(sb)->s_journal); + journal_unlock_updates(EXT3_SB(sb)->s_journal); + } + path_put(&nd.path); return vfs_quota_on(sb, type, format_id, path, remount); } diff --git a/fs/ext3/xattr_security.c b/fs/ext3/xattr_security.c index 821efaf2b94e..37b81097bdf2 100644 --- a/fs/ext3/xattr_security.c +++ b/fs/ext3/xattr_security.c @@ -15,7 +15,7 @@ static size_t ext3_xattr_security_list(struct inode *inode, char *list, size_t list_size, const char *name, size_t name_len) { - const size_t prefix_len = sizeof(XATTR_SECURITY_PREFIX)-1; + const size_t prefix_len = XATTR_SECURITY_PREFIX_LEN; const size_t total_len = prefix_len + name_len + 1; diff --git a/fs/ext3/xattr_trusted.c b/fs/ext3/xattr_trusted.c index 0327497a55ce..c7c41a410c4b 100644 --- a/fs/ext3/xattr_trusted.c +++ b/fs/ext3/xattr_trusted.c @@ -13,13 +13,11 @@ #include <linux/ext3_fs.h> #include "xattr.h" -#define XATTR_TRUSTED_PREFIX "trusted." - static size_t ext3_xattr_trusted_list(struct inode *inode, char *list, size_t list_size, const char *name, size_t name_len) { - const size_t prefix_len = sizeof(XATTR_TRUSTED_PREFIX)-1; + const size_t prefix_len = XATTR_TRUSTED_PREFIX_LEN; const size_t total_len = prefix_len + name_len + 1; if (!capable(CAP_SYS_ADMIN)) diff --git a/fs/ext3/xattr_user.c b/fs/ext3/xattr_user.c index 1abd8f92c440..430fe63b31b3 100644 --- a/fs/ext3/xattr_user.c +++ b/fs/ext3/xattr_user.c @@ -12,13 +12,11 @@ #include <linux/ext3_fs.h> #include "xattr.h" -#define XATTR_USER_PREFIX "user." - static size_t ext3_xattr_user_list(struct inode *inode, char *list, size_t list_size, const char *name, size_t name_len) { - const size_t prefix_len = sizeof(XATTR_USER_PREFIX)-1; + const size_t prefix_len = XATTR_USER_PREFIX_LEN; const size_t total_len = prefix_len + name_len + 1; if (!test_opt(inode->i_sb, XATTR_USER)) diff --git a/fs/fat/dir.c b/fs/fat/dir.c index 34541d06e626..cd4a0162e10d 100644 --- a/fs/fat/dir.c +++ b/fs/fat/dir.c @@ -17,7 +17,6 @@ #include <linux/slab.h> #include <linux/time.h> #include <linux/msdos_fs.h> -#include <linux/dirent.h> #include <linux/smp_lock.h> #include <linux/buffer_head.h> #include <linux/compat.h> @@ -124,10 +123,11 @@ static inline int fat_get_entry(struct inode *dir, loff_t *pos, * but ignore that right now. * Ahem... Stack smashing in ring 0 isn't fun. Fixed. */ -static int uni16_to_x8(unsigned char *ascii, wchar_t *uni, int len, +static int uni16_to_x8(unsigned char *ascii, const wchar_t *uni, int len, int uni_xlate, struct nls_table *nls) { - wchar_t *ip, ec; + const wchar_t *ip; + wchar_t ec; unsigned char *op, nc; int charlen; int k; @@ -167,6 +167,16 @@ static int uni16_to_x8(unsigned char *ascii, wchar_t *uni, int len, return (op - ascii); } +static inline int fat_uni_to_x8(struct msdos_sb_info *sbi, const wchar_t *uni, + unsigned char *buf, int size) +{ + if (sbi->options.utf8) + return utf8_wcstombs(buf, uni, size); + else + return uni16_to_x8(buf, uni, size, sbi->options.unicode_xlate, + sbi->nls_io); +} + static inline int fat_short2uni(struct nls_table *t, unsigned char *c, int clen, wchar_t *uni) { @@ -227,6 +237,19 @@ fat_shortname2uni(struct nls_table *nls, unsigned char *buf, int buf_size, return len; } +static inline int fat_name_match(struct msdos_sb_info *sbi, + const unsigned char *a, int a_len, + const unsigned char *b, int b_len) +{ + if (a_len != b_len) + return 0; + + if (sbi->options.name_check != 's') + return !nls_strnicmp(sbi->nls_io, a, b, a_len); + else + return !memcmp(a, b, a_len); +} + enum { PARSE_INVALID = 1, PARSE_NOT_LONGNAME, PARSE_EOF, }; /** @@ -302,6 +325,19 @@ parse_long: } /* + * Maximum buffer size of short name. + * [(MSDOS_NAME + '.') * max one char + nul] + * For msdos style, ['.' (hidden) + MSDOS_NAME + '.' + nul] + */ +#define FAT_MAX_SHORT_SIZE ((MSDOS_NAME + 1) * NLS_MAX_CHARSET_SIZE + 1) +/* + * Maximum buffer size of unicode chars from slots. + * [(max longname slots * 13 (size in a slot) + nul) * sizeof(wchar_t)] + */ +#define FAT_MAX_UNI_CHARS ((MSDOS_SLOTS - 1) * 13 + 1) +#define FAT_MAX_UNI_SIZE (FAT_MAX_UNI_CHARS * sizeof(wchar_t)) + +/* * Return values: negative -> error, 0 -> not found, positive -> found, * value is the total amount of slots, including the shortname entry. */ @@ -312,29 +348,20 @@ int fat_search_long(struct inode *inode, const unsigned char *name, struct msdos_sb_info *sbi = MSDOS_SB(sb); struct buffer_head *bh = NULL; struct msdos_dir_entry *de; - struct nls_table *nls_io = sbi->nls_io; struct nls_table *nls_disk = sbi->nls_disk; - wchar_t bufuname[14]; unsigned char nr_slots; - int xlate_len; + wchar_t bufuname[14]; wchar_t *unicode = NULL; unsigned char work[MSDOS_NAME]; - unsigned char *bufname = NULL; - int uni_xlate = sbi->options.unicode_xlate; - int utf8 = sbi->options.utf8; - int anycase = (sbi->options.name_check != 's'); + unsigned char bufname[FAT_MAX_SHORT_SIZE]; unsigned short opt_shortname = sbi->options.shortname; loff_t cpos = 0; - int chl, i, j, last_u, err; - - bufname = __getname(); - if (!bufname) - return -ENOMEM; + int chl, i, j, last_u, err, len; err = -ENOENT; - while(1) { + while (1) { if (fat_get_entry(inode, &cpos, &bh, &de) == -1) - goto EODir; + goto end_of_dir; parse_record: nr_slots = 0; if (de->name[0] == DELETED_FLAG) @@ -353,7 +380,7 @@ parse_record: else if (status == PARSE_NOT_LONGNAME) goto parse_record; else if (status == PARSE_EOF) - goto EODir; + goto end_of_dir; } memcpy(work, de->name, sizeof(de->name)); @@ -394,30 +421,24 @@ parse_record: if (!last_u) continue; + /* Compare shortname */ bufuname[last_u] = 0x0000; - xlate_len = utf8 - ?utf8_wcstombs(bufname, bufuname, PATH_MAX) - :uni16_to_x8(bufname, bufuname, PATH_MAX, uni_xlate, nls_io); - if (xlate_len == name_len) - if ((!anycase && !memcmp(name, bufname, xlate_len)) || - (anycase && !nls_strnicmp(nls_io, name, bufname, - xlate_len))) - goto Found; + len = fat_uni_to_x8(sbi, bufuname, bufname, sizeof(bufname)); + if (fat_name_match(sbi, name, name_len, bufname, len)) + goto found; if (nr_slots) { - xlate_len = utf8 - ?utf8_wcstombs(bufname, unicode, PATH_MAX) - :uni16_to_x8(bufname, unicode, PATH_MAX, uni_xlate, nls_io); - if (xlate_len != name_len) - continue; - if ((!anycase && !memcmp(name, bufname, xlate_len)) || - (anycase && !nls_strnicmp(nls_io, name, bufname, - xlate_len))) - goto Found; + void *longname = unicode + FAT_MAX_UNI_CHARS; + int size = PATH_MAX - FAT_MAX_UNI_SIZE; + + /* Compare longname */ + len = fat_uni_to_x8(sbi, unicode, longname, size); + if (fat_name_match(sbi, name, name_len, longname, len)) + goto found; } } -Found: +found: nr_slots++; /* include the de */ sinfo->slot_off = cpos - nr_slots * sizeof(*de); sinfo->nr_slots = nr_slots; @@ -425,9 +446,7 @@ Found: sinfo->bh = bh; sinfo->i_pos = fat_make_i_pos(sb, sinfo->bh, sinfo->de); err = 0; -EODir: - if (bufname) - __putname(bufname); +end_of_dir: if (unicode) __putname(unicode); @@ -453,23 +472,20 @@ static int __fat_readdir(struct inode *inode, struct file *filp, void *dirent, struct msdos_sb_info *sbi = MSDOS_SB(sb); struct buffer_head *bh; struct msdos_dir_entry *de; - struct nls_table *nls_io = sbi->nls_io; struct nls_table *nls_disk = sbi->nls_disk; - unsigned char long_slots; - const char *fill_name; - int fill_len; + unsigned char nr_slots; wchar_t bufuname[14]; wchar_t *unicode = NULL; - unsigned char c, work[MSDOS_NAME], bufname[56], *ptname = bufname; - unsigned long lpos, dummy, *furrfu = &lpos; - int uni_xlate = sbi->options.unicode_xlate; + unsigned char c, work[MSDOS_NAME]; + unsigned char bufname[FAT_MAX_SHORT_SIZE], *ptname = bufname; + unsigned short opt_shortname = sbi->options.shortname; int isvfat = sbi->options.isvfat; - int utf8 = sbi->options.utf8; int nocase = sbi->options.nocase; - unsigned short opt_shortname = sbi->options.shortname; + const char *fill_name = NULL; unsigned long inum; - int chi, chl, i, i2, j, last, last_u, dotoffset = 0; + unsigned long lpos, dummy, *furrfu = &lpos; loff_t cpos; + int chi, chl, i, i2, j, last, last_u, dotoffset = 0, fill_len = 0; int ret = 0; lock_super(sb); @@ -489,43 +505,58 @@ static int __fat_readdir(struct inode *inode, struct file *filp, void *dirent, cpos = 0; } } - if (cpos & (sizeof(struct msdos_dir_entry)-1)) { + if (cpos & (sizeof(struct msdos_dir_entry) - 1)) { ret = -ENOENT; goto out; } bh = NULL; -GetNew: +get_new: if (fat_get_entry(inode, &cpos, &bh, &de) == -1) - goto EODir; + goto end_of_dir; parse_record: - long_slots = 0; - /* Check for long filename entry */ - if (isvfat) { + nr_slots = 0; + /* + * Check for long filename entry, but if short_only, we don't + * need to parse long filename. + */ + if (isvfat && !short_only) { if (de->name[0] == DELETED_FLAG) - goto RecEnd; + goto record_end; if (de->attr != ATTR_EXT && (de->attr & ATTR_VOLUME)) - goto RecEnd; + goto record_end; if (de->attr != ATTR_EXT && IS_FREE(de->name)) - goto RecEnd; + goto record_end; } else { if ((de->attr & ATTR_VOLUME) || IS_FREE(de->name)) - goto RecEnd; + goto record_end; } if (isvfat && de->attr == ATTR_EXT) { int status = fat_parse_long(inode, &cpos, &bh, &de, - &unicode, &long_slots); + &unicode, &nr_slots); if (status < 0) { filp->f_pos = cpos; ret = status; goto out; } else if (status == PARSE_INVALID) - goto RecEnd; + goto record_end; else if (status == PARSE_NOT_LONGNAME) goto parse_record; else if (status == PARSE_EOF) - goto EODir; + goto end_of_dir; + + if (nr_slots) { + void *longname = unicode + FAT_MAX_UNI_CHARS; + int size = PATH_MAX - FAT_MAX_UNI_SIZE; + int len = fat_uni_to_x8(sbi, unicode, longname, size); + + fill_name = longname; + fill_len = len; + /* !both && !short_only, so we don't need shortname. */ + if (!both) + goto start_filldir; + } } if (sbi->options.dotsOK) { @@ -587,12 +618,32 @@ parse_record: } } if (!last) - goto RecEnd; + goto record_end; i = last + dotoffset; j = last_u; - lpos = cpos - (long_slots+1)*sizeof(struct msdos_dir_entry); + if (isvfat) { + bufuname[j] = 0x0000; + i = fat_uni_to_x8(sbi, bufuname, bufname, sizeof(bufname)); + } + if (nr_slots) { + /* hack for fat_ioctl_filldir() */ + struct fat_ioctl_filldir_callback *p = dirent; + + p->longname = fill_name; + p->long_len = fill_len; + p->shortname = bufname; + p->short_len = i; + fill_name = NULL; + fill_len = 0; + } else { + fill_name = bufname; + fill_len = i; + } + +start_filldir: + lpos = cpos - (nr_slots + 1) * sizeof(struct msdos_dir_entry); if (!memcmp(de->name, MSDOS_DOT, MSDOS_NAME)) inum = inode->i_ino; else if (!memcmp(de->name, MSDOS_DOTDOT, MSDOS_NAME)) { @@ -607,49 +658,17 @@ parse_record: inum = iunique(sb, MSDOS_ROOT_INO); } - if (isvfat) { - bufuname[j] = 0x0000; - i = utf8 ? utf8_wcstombs(bufname, bufuname, sizeof(bufname)) - : uni16_to_x8(bufname, bufuname, sizeof(bufname), uni_xlate, nls_io); - } - - fill_name = bufname; - fill_len = i; - if (!short_only && long_slots) { - /* convert the unicode long name. 261 is maximum size - * of unicode buffer. (13 * slots + nul) */ - void *longname = unicode + 261; - int buf_size = PATH_MAX - (261 * sizeof(unicode[0])); - int long_len = utf8 - ? utf8_wcstombs(longname, unicode, buf_size) - : uni16_to_x8(longname, unicode, buf_size, uni_xlate, nls_io); - - if (!both) { - fill_name = longname; - fill_len = long_len; - } else { - /* hack for fat_ioctl_filldir() */ - struct fat_ioctl_filldir_callback *p = dirent; - - p->longname = longname; - p->long_len = long_len; - p->shortname = bufname; - p->short_len = i; - fill_name = NULL; - fill_len = 0; - } - } if (filldir(dirent, fill_name, fill_len, *furrfu, inum, (de->attr & ATTR_DIR) ? DT_DIR : DT_REG) < 0) - goto FillFailed; + goto fill_failed; -RecEnd: +record_end: furrfu = &lpos; filp->f_pos = cpos; - goto GetNew; -EODir: + goto get_new; +end_of_dir: filp->f_pos = cpos; -FillFailed: +fill_failed: brelse(bh); if (unicode) __putname(unicode); @@ -715,7 +734,7 @@ efault: \ return -EFAULT; \ } -FAT_IOCTL_FILLDIR_FUNC(fat_ioctl_filldir, dirent) +FAT_IOCTL_FILLDIR_FUNC(fat_ioctl_filldir, __fat_dirent) static int fat_ioctl_readdir(struct inode *inode, struct file *filp, void __user *dirent, filldir_t filldir, @@ -741,7 +760,7 @@ static int fat_ioctl_readdir(struct inode *inode, struct file *filp, static int fat_dir_ioctl(struct inode *inode, struct file *filp, unsigned int cmd, unsigned long arg) { - struct dirent __user *d1 = (struct dirent __user *)arg; + struct __fat_dirent __user *d1 = (struct __fat_dirent __user *)arg; int short_only, both; switch (cmd) { @@ -757,7 +776,7 @@ static int fat_dir_ioctl(struct inode *inode, struct file *filp, return fat_generic_ioctl(inode, filp, cmd, arg); } - if (!access_ok(VERIFY_WRITE, d1, sizeof(struct dirent[2]))) + if (!access_ok(VERIFY_WRITE, d1, sizeof(struct __fat_dirent[2]))) return -EFAULT; /* * Yes, we don't need this put_user() absolutely. However old @@ -1082,7 +1101,7 @@ int fat_alloc_new_dir(struct inode *dir, struct timespec *ts) goto error_free; } - fat_date_unix2dos(ts->tv_sec, &time, &date); + fat_date_unix2dos(ts->tv_sec, &time, &date, sbi->options.tz_utc); de = (struct msdos_dir_entry *)bhs[0]->b_data; /* filling the new directory slots ("." and ".." entries) */ diff --git a/fs/fat/inode.c b/fs/fat/inode.c index 46a4508ffd2e..23676f9d79ce 100644 --- a/fs/fat/inode.c +++ b/fs/fat/inode.c @@ -382,17 +382,20 @@ static int fat_fill_inode(struct inode *inode, struct msdos_dir_entry *de) inode->i_blocks = ((inode->i_size + (sbi->cluster_size - 1)) & ~((loff_t)sbi->cluster_size - 1)) >> 9; inode->i_mtime.tv_sec = - date_dos2unix(le16_to_cpu(de->time), le16_to_cpu(de->date)); + date_dos2unix(le16_to_cpu(de->time), le16_to_cpu(de->date), + sbi->options.tz_utc); inode->i_mtime.tv_nsec = 0; if (sbi->options.isvfat) { int secs = de->ctime_cs / 100; int csecs = de->ctime_cs % 100; inode->i_ctime.tv_sec = date_dos2unix(le16_to_cpu(de->ctime), - le16_to_cpu(de->cdate)) + secs; + le16_to_cpu(de->cdate), + sbi->options.tz_utc) + secs; inode->i_ctime.tv_nsec = csecs * 10000000; inode->i_atime.tv_sec = - date_dos2unix(0, le16_to_cpu(de->adate)); + date_dos2unix(0, le16_to_cpu(de->adate), + sbi->options.tz_utc); inode->i_atime.tv_nsec = 0; } else inode->i_ctime = inode->i_atime = inode->i_mtime; @@ -591,11 +594,14 @@ retry: raw_entry->attr = fat_attr(inode); raw_entry->start = cpu_to_le16(MSDOS_I(inode)->i_logstart); raw_entry->starthi = cpu_to_le16(MSDOS_I(inode)->i_logstart >> 16); - fat_date_unix2dos(inode->i_mtime.tv_sec, &raw_entry->time, &raw_entry->date); + fat_date_unix2dos(inode->i_mtime.tv_sec, &raw_entry->time, + &raw_entry->date, sbi->options.tz_utc); if (sbi->options.isvfat) { __le16 atime; - fat_date_unix2dos(inode->i_ctime.tv_sec,&raw_entry->ctime,&raw_entry->cdate); - fat_date_unix2dos(inode->i_atime.tv_sec,&atime,&raw_entry->adate); + fat_date_unix2dos(inode->i_ctime.tv_sec, &raw_entry->ctime, + &raw_entry->cdate, sbi->options.tz_utc); + fat_date_unix2dos(inode->i_atime.tv_sec, &atime, + &raw_entry->adate, sbi->options.tz_utc); raw_entry->ctime_cs = (inode->i_ctime.tv_sec & 1) * 100 + inode->i_ctime.tv_nsec / 10000000; } @@ -836,6 +842,8 @@ static int fat_show_options(struct seq_file *m, struct vfsmount *mnt) } if (sbi->options.flush) seq_puts(m, ",flush"); + if (opts->tz_utc) + seq_puts(m, ",tz=UTC"); return 0; } @@ -848,7 +856,7 @@ enum { Opt_charset, Opt_shortname_lower, Opt_shortname_win95, Opt_shortname_winnt, Opt_shortname_mixed, Opt_utf8_no, Opt_utf8_yes, Opt_uni_xl_no, Opt_uni_xl_yes, Opt_nonumtail_no, Opt_nonumtail_yes, - Opt_obsolate, Opt_flush, Opt_err, + Opt_obsolate, Opt_flush, Opt_tz_utc, Opt_err, }; static match_table_t fat_tokens = { @@ -883,6 +891,7 @@ static match_table_t fat_tokens = { {Opt_obsolate, "cvf_options=%100s"}, {Opt_obsolate, "posix"}, {Opt_flush, "flush"}, + {Opt_tz_utc, "tz=UTC"}, {Opt_err, NULL}, }; static match_table_t msdos_tokens = { @@ -947,10 +956,11 @@ static int parse_options(char *options, int is_vfat, int silent, int *debug, opts->utf8 = opts->unicode_xlate = 0; opts->numtail = 1; opts->usefree = opts->nocase = 0; + opts->tz_utc = 0; *debug = 0; if (!options) - return 0; + goto out; while ((p = strsep(&options, ",")) != NULL) { int token; @@ -1036,6 +1046,9 @@ static int parse_options(char *options, int is_vfat, int silent, int *debug, case Opt_flush: opts->flush = 1; break; + case Opt_tz_utc: + opts->tz_utc = 1; + break; /* msdos specific */ case Opt_dots: @@ -1104,10 +1117,13 @@ static int parse_options(char *options, int is_vfat, int silent, int *debug, return -EINVAL; } } + +out: /* UTF-8 doesn't provide FAT semantics */ if (!strcmp(opts->iocharset, "utf8")) { printk(KERN_ERR "FAT: utf8 is not a recommended IO charset" - " for FAT filesystems, filesystem will be case sensitive!\n"); + " for FAT filesystems, filesystem will be " + "case sensitive!\n"); } /* If user doesn't specify allow_utime, it's initialized from dmask. */ diff --git a/fs/fat/misc.c b/fs/fat/misc.c index 61f23511eacf..79fb98ad36d4 100644 --- a/fs/fat/misc.c +++ b/fs/fat/misc.c @@ -142,7 +142,7 @@ static int day_n[] = { }; /* Convert a MS-DOS time/date pair to a UNIX date (seconds since 1 1 70). */ -int date_dos2unix(unsigned short time, unsigned short date) +int date_dos2unix(unsigned short time, unsigned short date, int tz_utc) { int month, year, secs; @@ -156,16 +156,18 @@ int date_dos2unix(unsigned short time, unsigned short date) ((date & 31)-1+day_n[month]+(year/4)+year*365-((year & 3) == 0 && month < 2 ? 1 : 0)+3653); /* days since 1.1.70 plus 80's leap day */ - secs += sys_tz.tz_minuteswest*60; + if (!tz_utc) + secs += sys_tz.tz_minuteswest*60; return secs; } /* Convert linear UNIX date to a MS-DOS time/date pair. */ -void fat_date_unix2dos(int unix_date, __le16 *time, __le16 *date) +void fat_date_unix2dos(int unix_date, __le16 *time, __le16 *date, int tz_utc) { int day, year, nl_day, month; - unix_date -= sys_tz.tz_minuteswest*60; + if (!tz_utc) + unix_date -= sys_tz.tz_minuteswest*60; /* Jan 1 GMT 00:00:00 1980. But what about another time zone? */ if (unix_date < 315532800) diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c index 2060bf06b906..51d0035ff07e 100644 --- a/fs/fuse/dir.c +++ b/fs/fuse/dir.c @@ -97,7 +97,7 @@ void fuse_invalidate_attr(struct inode *inode) * timeout is unknown (unlink, rmdir, rename and in some cases * lookup) */ -static void fuse_invalidate_entry_cache(struct dentry *entry) +void fuse_invalidate_entry_cache(struct dentry *entry) { fuse_dentry_settime(entry, 0); } @@ -112,18 +112,16 @@ static void fuse_invalidate_entry(struct dentry *entry) fuse_invalidate_entry_cache(entry); } -static void fuse_lookup_init(struct fuse_req *req, struct inode *dir, - struct dentry *entry, +static void fuse_lookup_init(struct fuse_conn *fc, struct fuse_req *req, + u64 nodeid, struct qstr *name, struct fuse_entry_out *outarg) { - struct fuse_conn *fc = get_fuse_conn(dir); - memset(outarg, 0, sizeof(struct fuse_entry_out)); req->in.h.opcode = FUSE_LOOKUP; - req->in.h.nodeid = get_node_id(dir); + req->in.h.nodeid = nodeid; req->in.numargs = 1; - req->in.args[0].size = entry->d_name.len + 1; - req->in.args[0].value = entry->d_name.name; + req->in.args[0].size = name->len + 1; + req->in.args[0].value = name->name; req->out.numargs = 1; if (fc->minor < 9) req->out.args[0].size = FUSE_COMPAT_ENTRY_OUT_SIZE; @@ -189,7 +187,8 @@ static int fuse_dentry_revalidate(struct dentry *entry, struct nameidata *nd) attr_version = fuse_get_attr_version(fc); parent = dget_parent(entry); - fuse_lookup_init(req, parent->d_inode, entry, &outarg); + fuse_lookup_init(fc, req, get_node_id(parent->d_inode), + &entry->d_name, &outarg); request_send(fc, req); dput(parent); err = req->out.h.error; @@ -225,7 +224,7 @@ static int invalid_nodeid(u64 nodeid) return !nodeid || nodeid == FUSE_ROOT_ID; } -static struct dentry_operations fuse_dentry_operations = { +struct dentry_operations fuse_dentry_operations = { .d_revalidate = fuse_dentry_revalidate, }; @@ -239,85 +238,127 @@ int fuse_valid_type(int m) * Add a directory inode to a dentry, ensuring that no other dentry * refers to this inode. Called with fc->inst_mutex. */ -static int fuse_d_add_directory(struct dentry *entry, struct inode *inode) +static struct dentry *fuse_d_add_directory(struct dentry *entry, + struct inode *inode) { struct dentry *alias = d_find_alias(inode); - if (alias) { + if (alias && !(alias->d_flags & DCACHE_DISCONNECTED)) { /* This tries to shrink the subtree below alias */ fuse_invalidate_entry(alias); dput(alias); if (!list_empty(&inode->i_dentry)) - return -EBUSY; + return ERR_PTR(-EBUSY); + } else { + dput(alias); } - d_add(entry, inode); - return 0; + return d_splice_alias(inode, entry); } -static struct dentry *fuse_lookup(struct inode *dir, struct dentry *entry, - struct nameidata *nd) +int fuse_lookup_name(struct super_block *sb, u64 nodeid, struct qstr *name, + struct fuse_entry_out *outarg, struct inode **inode) { - int err; - struct fuse_entry_out outarg; - struct inode *inode = NULL; - struct fuse_conn *fc = get_fuse_conn(dir); + struct fuse_conn *fc = get_fuse_conn_super(sb); struct fuse_req *req; struct fuse_req *forget_req; u64 attr_version; + int err; - if (entry->d_name.len > FUSE_NAME_MAX) - return ERR_PTR(-ENAMETOOLONG); + *inode = NULL; + err = -ENAMETOOLONG; + if (name->len > FUSE_NAME_MAX) + goto out; req = fuse_get_req(fc); + err = PTR_ERR(req); if (IS_ERR(req)) - return ERR_CAST(req); + goto out; forget_req = fuse_get_req(fc); + err = PTR_ERR(forget_req); if (IS_ERR(forget_req)) { fuse_put_request(fc, req); - return ERR_CAST(forget_req); + goto out; } attr_version = fuse_get_attr_version(fc); - fuse_lookup_init(req, dir, entry, &outarg); + fuse_lookup_init(fc, req, nodeid, name, outarg); request_send(fc, req); err = req->out.h.error; fuse_put_request(fc, req); /* Zero nodeid is same as -ENOENT, but with valid timeout */ - if (!err && outarg.nodeid && - (invalid_nodeid(outarg.nodeid) || - !fuse_valid_type(outarg.attr.mode))) - err = -EIO; - if (!err && outarg.nodeid) { - inode = fuse_iget(dir->i_sb, outarg.nodeid, outarg.generation, - &outarg.attr, entry_attr_timeout(&outarg), - attr_version); - if (!inode) { - fuse_send_forget(fc, forget_req, outarg.nodeid, 1); - return ERR_PTR(-ENOMEM); - } + if (err || !outarg->nodeid) + goto out_put_forget; + + err = -EIO; + if (!outarg->nodeid) + goto out_put_forget; + if (!fuse_valid_type(outarg->attr.mode)) + goto out_put_forget; + + *inode = fuse_iget(sb, outarg->nodeid, outarg->generation, + &outarg->attr, entry_attr_timeout(outarg), + attr_version); + err = -ENOMEM; + if (!*inode) { + fuse_send_forget(fc, forget_req, outarg->nodeid, 1); + goto out; } + err = 0; + + out_put_forget: fuse_put_request(fc, forget_req); - if (err && err != -ENOENT) - return ERR_PTR(err); + out: + return err; +} + +static struct dentry *fuse_lookup(struct inode *dir, struct dentry *entry, + struct nameidata *nd) +{ + int err; + struct fuse_entry_out outarg; + struct inode *inode; + struct dentry *newent; + struct fuse_conn *fc = get_fuse_conn(dir); + bool outarg_valid = true; + + err = fuse_lookup_name(dir->i_sb, get_node_id(dir), &entry->d_name, + &outarg, &inode); + if (err == -ENOENT) { + outarg_valid = false; + err = 0; + } + if (err) + goto out_err; + + err = -EIO; + if (inode && get_node_id(inode) == FUSE_ROOT_ID) + goto out_iput; if (inode && S_ISDIR(inode->i_mode)) { mutex_lock(&fc->inst_mutex); - err = fuse_d_add_directory(entry, inode); + newent = fuse_d_add_directory(entry, inode); mutex_unlock(&fc->inst_mutex); - if (err) { - iput(inode); - return ERR_PTR(err); - } - } else - d_add(entry, inode); + err = PTR_ERR(newent); + if (IS_ERR(newent)) + goto out_iput; + } else { + newent = d_splice_alias(inode, entry); + } + entry = newent ? newent : entry; entry->d_op = &fuse_dentry_operations; - if (!err) + if (outarg_valid) fuse_change_entry_timeout(entry, &outarg); else fuse_invalidate_entry_cache(entry); - return NULL; + + return newent; + + out_iput: + iput(inode); + out_err: + return ERR_PTR(err); } /* diff --git a/fs/fuse/file.c b/fs/fuse/file.c index 8092f0d9fd1f..67ff2c6a8f63 100644 --- a/fs/fuse/file.c +++ b/fs/fuse/file.c @@ -1341,6 +1341,11 @@ static int fuse_setlk(struct file *file, struct file_lock *fl, int flock) pid_t pid = fl->fl_type != F_UNLCK ? current->tgid : 0; int err; + if (fl->fl_lmops && fl->fl_lmops->fl_grant) { + /* NLM needs asynchronous locks, which we don't support yet */ + return -ENOLCK; + } + /* Unlock on close is handled by the flush method */ if (fl->fl_flags & FL_CLOSE) return 0; @@ -1365,7 +1370,9 @@ static int fuse_file_lock(struct file *file, int cmd, struct file_lock *fl) struct fuse_conn *fc = get_fuse_conn(inode); int err; - if (cmd == F_GETLK) { + if (cmd == F_CANCELLK) { + err = 0; + } else if (cmd == F_GETLK) { if (fc->no_lock) { posix_test_lock(file, fl); err = 0; @@ -1373,7 +1380,7 @@ static int fuse_file_lock(struct file *file, int cmd, struct file_lock *fl) err = fuse_getlk(file, fl); } else { if (fc->no_lock) - err = posix_lock_file_wait(file, fl); + err = posix_lock_file(file, fl, NULL); else err = fuse_setlk(file, fl, 0); } diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h index bae948657c4f..3a876076bdd1 100644 --- a/fs/fuse/fuse_i.h +++ b/fs/fuse/fuse_i.h @@ -363,6 +363,9 @@ struct fuse_conn { /** Do not send separate SETATTR request before open(O_TRUNC) */ unsigned atomic_o_trunc : 1; + /** Filesystem supports NFS exporting. Only set in INIT */ + unsigned export_support : 1; + /* * The following bitfields are only for optimization purposes * and hence races in setting them will not cause malfunction @@ -464,6 +467,8 @@ static inline u64 get_node_id(struct inode *inode) /** Device operations */ extern const struct file_operations fuse_dev_operations; +extern struct dentry_operations fuse_dentry_operations; + /** * Get a filled in inode */ @@ -471,6 +476,9 @@ struct inode *fuse_iget(struct super_block *sb, u64 nodeid, int generation, struct fuse_attr *attr, u64 attr_valid, u64 attr_version); +int fuse_lookup_name(struct super_block *sb, u64 nodeid, struct qstr *name, + struct fuse_entry_out *outarg, struct inode **inode); + /** * Send FORGET command */ @@ -604,6 +612,8 @@ void fuse_abort_conn(struct fuse_conn *fc); */ void fuse_invalidate_attr(struct inode *inode); +void fuse_invalidate_entry_cache(struct dentry *entry); + /** * Acquire reference to fuse_conn */ diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c index 3141690558c8..7d2f7d6e22e2 100644 --- a/fs/fuse/inode.c +++ b/fs/fuse/inode.c @@ -18,6 +18,7 @@ #include <linux/statfs.h> #include <linux/random.h> #include <linux/sched.h> +#include <linux/exportfs.h> MODULE_AUTHOR("Miklos Szeredi <miklos@szeredi.hu>"); MODULE_DESCRIPTION("Filesystem in Userspace"); @@ -552,6 +553,174 @@ static struct inode *get_root_inode(struct super_block *sb, unsigned mode) return fuse_iget(sb, 1, 0, &attr, 0, 0); } +struct fuse_inode_handle +{ + u64 nodeid; + u32 generation; +}; + +static struct dentry *fuse_get_dentry(struct super_block *sb, + struct fuse_inode_handle *handle) +{ + struct fuse_conn *fc = get_fuse_conn_super(sb); + struct inode *inode; + struct dentry *entry; + int err = -ESTALE; + + if (handle->nodeid == 0) + goto out_err; + + inode = ilookup5(sb, handle->nodeid, fuse_inode_eq, &handle->nodeid); + if (!inode) { + struct fuse_entry_out outarg; + struct qstr name; + + if (!fc->export_support) + goto out_err; + + name.len = 1; + name.name = "."; + err = fuse_lookup_name(sb, handle->nodeid, &name, &outarg, + &inode); + if (err && err != -ENOENT) + goto out_err; + if (err || !inode) { + err = -ESTALE; + goto out_err; + } + err = -EIO; + if (get_node_id(inode) != handle->nodeid) + goto out_iput; + } + err = -ESTALE; + if (inode->i_generation != handle->generation) + goto out_iput; + + entry = d_alloc_anon(inode); + err = -ENOMEM; + if (!entry) + goto out_iput; + + if (get_node_id(inode) != FUSE_ROOT_ID) { + entry->d_op = &fuse_dentry_operations; + fuse_invalidate_entry_cache(entry); + } + + return entry; + + out_iput: + iput(inode); + out_err: + return ERR_PTR(err); +} + +static int fuse_encode_fh(struct dentry *dentry, u32 *fh, int *max_len, + int connectable) +{ + struct inode *inode = dentry->d_inode; + bool encode_parent = connectable && !S_ISDIR(inode->i_mode); + int len = encode_parent ? 6 : 3; + u64 nodeid; + u32 generation; + + if (*max_len < len) + return 255; + + nodeid = get_fuse_inode(inode)->nodeid; + generation = inode->i_generation; + + fh[0] = (u32)(nodeid >> 32); + fh[1] = (u32)(nodeid & 0xffffffff); + fh[2] = generation; + + if (encode_parent) { + struct inode *parent; + + spin_lock(&dentry->d_lock); + parent = dentry->d_parent->d_inode; + nodeid = get_fuse_inode(parent)->nodeid; + generation = parent->i_generation; + spin_unlock(&dentry->d_lock); + + fh[3] = (u32)(nodeid >> 32); + fh[4] = (u32)(nodeid & 0xffffffff); + fh[5] = generation; + } + + *max_len = len; + return encode_parent ? 0x82 : 0x81; +} + +static struct dentry *fuse_fh_to_dentry(struct super_block *sb, + struct fid *fid, int fh_len, int fh_type) +{ + struct fuse_inode_handle handle; + + if ((fh_type != 0x81 && fh_type != 0x82) || fh_len < 3) + return NULL; + + handle.nodeid = (u64) fid->raw[0] << 32; + handle.nodeid |= (u64) fid->raw[1]; + handle.generation = fid->raw[2]; + return fuse_get_dentry(sb, &handle); +} + +static struct dentry *fuse_fh_to_parent(struct super_block *sb, + struct fid *fid, int fh_len, int fh_type) +{ + struct fuse_inode_handle parent; + + if (fh_type != 0x82 || fh_len < 6) + return NULL; + + parent.nodeid = (u64) fid->raw[3] << 32; + parent.nodeid |= (u64) fid->raw[4]; + parent.generation = fid->raw[5]; + return fuse_get_dentry(sb, &parent); +} + +static struct dentry *fuse_get_parent(struct dentry *child) +{ + struct inode *child_inode = child->d_inode; + struct fuse_conn *fc = get_fuse_conn(child_inode); + struct inode *inode; + struct dentry *parent; + struct fuse_entry_out outarg; + struct qstr name; + int err; + + if (!fc->export_support) + return ERR_PTR(-ESTALE); + + name.len = 2; + name.name = ".."; + err = fuse_lookup_name(child_inode->i_sb, get_node_id(child_inode), + &name, &outarg, &inode); + if (err && err != -ENOENT) + return ERR_PTR(err); + if (err || !inode) + return ERR_PTR(-ESTALE); + + parent = d_alloc_anon(inode); + if (!parent) { + iput(inode); + return ERR_PTR(-ENOMEM); + } + if (get_node_id(inode) != FUSE_ROOT_ID) { + parent->d_op = &fuse_dentry_operations; + fuse_invalidate_entry_cache(parent); + } + + return parent; +} + +static const struct export_operations fuse_export_operations = { + .fh_to_dentry = fuse_fh_to_dentry, + .fh_to_parent = fuse_fh_to_parent, + .encode_fh = fuse_encode_fh, + .get_parent = fuse_get_parent, +}; + static const struct super_operations fuse_super_operations = { .alloc_inode = fuse_alloc_inode, .destroy_inode = fuse_destroy_inode, @@ -581,6 +750,11 @@ static void process_init_reply(struct fuse_conn *fc, struct fuse_req *req) fc->no_lock = 1; if (arg->flags & FUSE_ATOMIC_O_TRUNC) fc->atomic_o_trunc = 1; + if (arg->minor >= 9) { + /* LOOKUP has dependency on proto version */ + if (arg->flags & FUSE_EXPORT_SUPPORT) + fc->export_support = 1; + } if (arg->flags & FUSE_BIG_WRITES) fc->big_writes = 1; } else { @@ -607,7 +781,7 @@ static void fuse_send_init(struct fuse_conn *fc, struct fuse_req *req) arg->minor = FUSE_KERNEL_MINOR_VERSION; arg->max_readahead = fc->bdi.ra_pages * PAGE_CACHE_SIZE; arg->flags |= FUSE_ASYNC_READ | FUSE_POSIX_LOCKS | FUSE_ATOMIC_O_TRUNC | - FUSE_BIG_WRITES; + FUSE_EXPORT_SUPPORT | FUSE_BIG_WRITES; req->in.h.opcode = FUSE_INIT; req->in.numargs = 1; req->in.args[0].size = sizeof(*arg); @@ -652,6 +826,7 @@ static int fuse_fill_super(struct super_block *sb, void *data, int silent) sb->s_magic = FUSE_SUPER_MAGIC; sb->s_op = &fuse_super_operations; sb->s_maxbytes = MAX_LFS_FILESIZE; + sb->s_export_op = &fuse_export_operations; file = fget(d.fd); if (!file) diff --git a/fs/hfs/bitmap.c b/fs/hfs/bitmap.c index 24e75798ddf0..c6e97366e8ac 100644 --- a/fs/hfs/bitmap.c +++ b/fs/hfs/bitmap.c @@ -145,7 +145,7 @@ u32 hfs_vbm_search_free(struct super_block *sb, u32 goal, u32 *num_bits) if (!*num_bits) return 0; - down(&HFS_SB(sb)->bitmap_lock); + mutex_lock(&HFS_SB(sb)->bitmap_lock); bitmap = HFS_SB(sb)->bitmap; pos = hfs_find_set_zero_bits(bitmap, HFS_SB(sb)->fs_ablocks, goal, num_bits); @@ -162,7 +162,7 @@ u32 hfs_vbm_search_free(struct super_block *sb, u32 goal, u32 *num_bits) HFS_SB(sb)->free_ablocks -= *num_bits; hfs_bitmap_dirty(sb); out: - up(&HFS_SB(sb)->bitmap_lock); + mutex_unlock(&HFS_SB(sb)->bitmap_lock); return pos; } @@ -205,7 +205,7 @@ int hfs_clear_vbm_bits(struct super_block *sb, u16 start, u16 count) if ((start + count) > HFS_SB(sb)->fs_ablocks) return -2; - down(&HFS_SB(sb)->bitmap_lock); + mutex_lock(&HFS_SB(sb)->bitmap_lock); /* bitmap is always on a 32-bit boundary */ curr = HFS_SB(sb)->bitmap + (start / 32); len = count; @@ -236,7 +236,7 @@ int hfs_clear_vbm_bits(struct super_block *sb, u16 start, u16 count) } out: HFS_SB(sb)->free_ablocks += len; - up(&HFS_SB(sb)->bitmap_lock); + mutex_unlock(&HFS_SB(sb)->bitmap_lock); hfs_bitmap_dirty(sb); return 0; diff --git a/fs/hfs/btree.c b/fs/hfs/btree.c index f6621a785202..9b9d6395bad3 100644 --- a/fs/hfs/btree.c +++ b/fs/hfs/btree.c @@ -40,7 +40,7 @@ struct hfs_btree *hfs_btree_open(struct super_block *sb, u32 id, btree_keycmp ke { struct hfs_mdb *mdb = HFS_SB(sb)->mdb; HFS_I(tree->inode)->flags = 0; - init_MUTEX(&HFS_I(tree->inode)->extents_lock); + mutex_init(&HFS_I(tree->inode)->extents_lock); switch (id) { case HFS_EXT_CNID: hfs_inode_read_fork(tree->inode, mdb->drXTExtRec, mdb->drXTFlSize, diff --git a/fs/hfs/extent.c b/fs/hfs/extent.c index c176f67ba0a5..2c16316d2917 100644 --- a/fs/hfs/extent.c +++ b/fs/hfs/extent.c @@ -343,16 +343,16 @@ int hfs_get_block(struct inode *inode, sector_t block, goto done; } - down(&HFS_I(inode)->extents_lock); + mutex_lock(&HFS_I(inode)->extents_lock); res = hfs_ext_read_extent(inode, ablock); if (!res) dblock = hfs_ext_find_block(HFS_I(inode)->cached_extents, ablock - HFS_I(inode)->cached_start); else { - up(&HFS_I(inode)->extents_lock); + mutex_unlock(&HFS_I(inode)->extents_lock); return -EIO; } - up(&HFS_I(inode)->extents_lock); + mutex_unlock(&HFS_I(inode)->extents_lock); done: map_bh(bh_result, sb, HFS_SB(sb)->fs_start + @@ -375,7 +375,7 @@ int hfs_extend_file(struct inode *inode) u32 start, len, goal; int res; - down(&HFS_I(inode)->extents_lock); + mutex_lock(&HFS_I(inode)->extents_lock); if (HFS_I(inode)->alloc_blocks == HFS_I(inode)->first_blocks) goal = hfs_ext_lastblock(HFS_I(inode)->first_extents); else { @@ -425,7 +425,7 @@ int hfs_extend_file(struct inode *inode) goto insert_extent; } out: - up(&HFS_I(inode)->extents_lock); + mutex_unlock(&HFS_I(inode)->extents_lock); if (!res) { HFS_I(inode)->alloc_blocks += len; mark_inode_dirty(inode); @@ -487,7 +487,7 @@ void hfs_file_truncate(struct inode *inode) if (blk_cnt == alloc_cnt) goto out; - down(&HFS_I(inode)->extents_lock); + mutex_lock(&HFS_I(inode)->extents_lock); hfs_find_init(HFS_SB(sb)->ext_tree, &fd); while (1) { if (alloc_cnt == HFS_I(inode)->first_blocks) { @@ -514,7 +514,7 @@ void hfs_file_truncate(struct inode *inode) hfs_brec_remove(&fd); } hfs_find_exit(&fd); - up(&HFS_I(inode)->extents_lock); + mutex_unlock(&HFS_I(inode)->extents_lock); HFS_I(inode)->alloc_blocks = blk_cnt; out: diff --git a/fs/hfs/hfs_fs.h b/fs/hfs/hfs_fs.h index 147374b6f675..9955232fdf8c 100644 --- a/fs/hfs/hfs_fs.h +++ b/fs/hfs/hfs_fs.h @@ -11,6 +11,7 @@ #include <linux/slab.h> #include <linux/types.h> +#include <linux/mutex.h> #include <linux/buffer_head.h> #include <linux/fs.h> @@ -53,7 +54,7 @@ struct hfs_inode_info { struct list_head open_dir_list; struct inode *rsrc_inode; - struct semaphore extents_lock; + struct mutex extents_lock; u16 alloc_blocks, clump_blocks; sector_t fs_blocks; @@ -139,7 +140,7 @@ struct hfs_sb_info { struct nls_table *nls_io, *nls_disk; - struct semaphore bitmap_lock; + struct mutex bitmap_lock; unsigned long flags; diff --git a/fs/hfs/inode.c b/fs/hfs/inode.c index 97f8446c4ff4..dc4ec640e875 100644 --- a/fs/hfs/inode.c +++ b/fs/hfs/inode.c @@ -150,7 +150,7 @@ struct inode *hfs_new_inode(struct inode *dir, struct qstr *name, int mode) if (!inode) return NULL; - init_MUTEX(&HFS_I(inode)->extents_lock); + mutex_init(&HFS_I(inode)->extents_lock); INIT_LIST_HEAD(&HFS_I(inode)->open_dir_list); hfs_cat_build_key(sb, (btree_key *)&HFS_I(inode)->cat_key, dir->i_ino, name); inode->i_ino = HFS_SB(sb)->next_id++; @@ -281,7 +281,7 @@ static int hfs_read_inode(struct inode *inode, void *data) HFS_I(inode)->flags = 0; HFS_I(inode)->rsrc_inode = NULL; - init_MUTEX(&HFS_I(inode)->extents_lock); + mutex_init(&HFS_I(inode)->extents_lock); INIT_LIST_HEAD(&HFS_I(inode)->open_dir_list); /* Initialize the inode */ diff --git a/fs/hfs/super.c b/fs/hfs/super.c index 8cf67974adf6..ac2ec5ef66e4 100644 --- a/fs/hfs/super.c +++ b/fs/hfs/super.c @@ -372,7 +372,7 @@ static int hfs_fill_super(struct super_block *sb, void *data, int silent) sb->s_op = &hfs_super_operations; sb->s_flags |= MS_NODIRATIME; - init_MUTEX(&sbi->bitmap_lock); + mutex_init(&sbi->bitmap_lock); res = hfs_mdb_get(sb); if (res) { diff --git a/fs/hfsplus/extents.c b/fs/hfsplus/extents.c index 12e899cd7886..fec8f61227ff 100644 --- a/fs/hfsplus/extents.c +++ b/fs/hfsplus/extents.c @@ -199,16 +199,16 @@ int hfsplus_get_block(struct inode *inode, sector_t iblock, goto done; } - down(&HFSPLUS_I(inode).extents_lock); + mutex_lock(&HFSPLUS_I(inode).extents_lock); res = hfsplus_ext_read_extent(inode, ablock); if (!res) { dblock = hfsplus_ext_find_block(HFSPLUS_I(inode).cached_extents, ablock - HFSPLUS_I(inode).cached_start); } else { - up(&HFSPLUS_I(inode).extents_lock); + mutex_unlock(&HFSPLUS_I(inode).extents_lock); return -EIO; } - up(&HFSPLUS_I(inode).extents_lock); + mutex_unlock(&HFSPLUS_I(inode).extents_lock); done: dprint(DBG_EXTENT, "get_block(%lu): %llu - %u\n", inode->i_ino, (long long)iblock, dblock); @@ -355,7 +355,7 @@ int hfsplus_file_extend(struct inode *inode) return -ENOSPC; } - down(&HFSPLUS_I(inode).extents_lock); + mutex_lock(&HFSPLUS_I(inode).extents_lock); if (HFSPLUS_I(inode).alloc_blocks == HFSPLUS_I(inode).first_blocks) goal = hfsplus_ext_lastblock(HFSPLUS_I(inode).first_extents); else { @@ -408,7 +408,7 @@ int hfsplus_file_extend(struct inode *inode) goto insert_extent; } out: - up(&HFSPLUS_I(inode).extents_lock); + mutex_unlock(&HFSPLUS_I(inode).extents_lock); if (!res) { HFSPLUS_I(inode).alloc_blocks += len; mark_inode_dirty(inode); @@ -465,7 +465,7 @@ void hfsplus_file_truncate(struct inode *inode) if (blk_cnt == alloc_cnt) goto out; - down(&HFSPLUS_I(inode).extents_lock); + mutex_lock(&HFSPLUS_I(inode).extents_lock); hfs_find_init(HFSPLUS_SB(sb).ext_tree, &fd); while (1) { if (alloc_cnt == HFSPLUS_I(inode).first_blocks) { @@ -492,7 +492,7 @@ void hfsplus_file_truncate(struct inode *inode) hfs_brec_remove(&fd); } hfs_find_exit(&fd); - up(&HFSPLUS_I(inode).extents_lock); + mutex_unlock(&HFSPLUS_I(inode).extents_lock); HFSPLUS_I(inode).alloc_blocks = blk_cnt; out: diff --git a/fs/hfsplus/hfsplus_fs.h b/fs/hfsplus/hfsplus_fs.h index 9e59537b43d5..f027a905225f 100644 --- a/fs/hfsplus/hfsplus_fs.h +++ b/fs/hfsplus/hfsplus_fs.h @@ -11,6 +11,7 @@ #define _LINUX_HFSPLUS_FS_H #include <linux/fs.h> +#include <linux/mutex.h> #include <linux/buffer_head.h> #include "hfsplus_raw.h" @@ -154,7 +155,7 @@ struct hfsplus_sb_info { struct hfsplus_inode_info { - struct semaphore extents_lock; + struct mutex extents_lock; u32 clump_blocks, alloc_blocks; sector_t fs_blocks; /* Allocation extents from catalog record or volume header */ diff --git a/fs/hfsplus/inode.c b/fs/hfsplus/inode.c index 67e1c8b467c4..cc3b5e24339b 100644 --- a/fs/hfsplus/inode.c +++ b/fs/hfsplus/inode.c @@ -163,7 +163,7 @@ static struct dentry *hfsplus_file_lookup(struct inode *dir, struct dentry *dent inode->i_ino = dir->i_ino; INIT_LIST_HEAD(&HFSPLUS_I(inode).open_dir_list); - init_MUTEX(&HFSPLUS_I(inode).extents_lock); + mutex_init(&HFSPLUS_I(inode).extents_lock); HFSPLUS_I(inode).flags = HFSPLUS_FLG_RSRC; hfs_find_init(HFSPLUS_SB(sb).cat_tree, &fd); @@ -316,7 +316,7 @@ struct inode *hfsplus_new_inode(struct super_block *sb, int mode) inode->i_nlink = 1; inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME_SEC; INIT_LIST_HEAD(&HFSPLUS_I(inode).open_dir_list); - init_MUTEX(&HFSPLUS_I(inode).extents_lock); + mutex_init(&HFSPLUS_I(inode).extents_lock); atomic_set(&HFSPLUS_I(inode).opencnt, 0); HFSPLUS_I(inode).flags = 0; memset(HFSPLUS_I(inode).first_extents, 0, sizeof(hfsplus_extent_rec)); diff --git a/fs/hfsplus/super.c b/fs/hfsplus/super.c index ce97a54518d8..3859118531c7 100644 --- a/fs/hfsplus/super.c +++ b/fs/hfsplus/super.c @@ -34,7 +34,7 @@ struct inode *hfsplus_iget(struct super_block *sb, unsigned long ino) return inode; INIT_LIST_HEAD(&HFSPLUS_I(inode).open_dir_list); - init_MUTEX(&HFSPLUS_I(inode).extents_lock); + mutex_init(&HFSPLUS_I(inode).extents_lock); HFSPLUS_I(inode).flags = 0; HFSPLUS_I(inode).rsrc_inode = NULL; atomic_set(&HFSPLUS_I(inode).opencnt, 0); diff --git a/fs/isofs/rock.c b/fs/isofs/rock.c index 6bd48f0a7047..c2fb2dd0131f 100644 --- a/fs/isofs/rock.c +++ b/fs/isofs/rock.c @@ -209,6 +209,11 @@ repeat: while (rs.len > 2) { /* There may be one byte for padding somewhere */ rr = (struct rock_ridge *)rs.chr; + /* + * Ignore rock ridge info if rr->len is out of range, but + * don't return -EIO because that would make the file + * invisible. + */ if (rr->len < 3) goto out; /* Something got screwed up here */ sig = isonum_721(rs.chr); @@ -216,8 +221,12 @@ repeat: goto eio; rs.chr += rr->len; rs.len -= rr->len; + /* + * As above, just ignore the rock ridge info if rr->len + * is bogus. + */ if (rs.len < 0) - goto eio; /* corrupted isofs */ + goto out; /* Something got screwed up here */ switch (sig) { case SIG('R', 'R'): @@ -307,6 +316,11 @@ parse_rock_ridge_inode_internal(struct iso_directory_record *de, repeat: while (rs.len > 2) { /* There may be one byte for padding somewhere */ rr = (struct rock_ridge *)rs.chr; + /* + * Ignore rock ridge info if rr->len is out of range, but + * don't return -EIO because that would make the file + * invisible. + */ if (rr->len < 3) goto out; /* Something got screwed up here */ sig = isonum_721(rs.chr); @@ -314,8 +328,12 @@ repeat: goto eio; rs.chr += rr->len; rs.len -= rr->len; + /* + * As above, just ignore the rock ridge info if rr->len + * is bogus. + */ if (rs.len < 0) - goto eio; /* corrupted isofs */ + goto out; /* Something got screwed up here */ switch (sig) { #ifndef CONFIG_ZISOFS /* No flag for SF or ZF */ diff --git a/fs/jbd/commit.c b/fs/jbd/commit.c index 5a8ca61498ca..2eccbfaa1d48 100644 --- a/fs/jbd/commit.c +++ b/fs/jbd/commit.c @@ -36,7 +36,7 @@ static void journal_end_buffer_io_sync(struct buffer_head *bh, int uptodate) /* * When an ext3-ordered file is truncated, it is possible that many pages are - * not sucessfully freed, because they are attached to a committing transaction. + * not successfully freed, because they are attached to a committing transaction. * After the transaction commits, these pages are left on the LRU, with no * ->mapping, and with attached buffers. These pages are trivially reclaimable * by the VM, but their apparent absence upsets the VM accounting, and it makes @@ -45,8 +45,8 @@ static void journal_end_buffer_io_sync(struct buffer_head *bh, int uptodate) * So here, we have a buffer which has just come off the forget list. Look to * see if we can strip all buffers from the backing page. * - * Called under lock_journal(), and possibly under journal_datalist_lock. The - * caller provided us with a ref against the buffer, and we drop that here. + * Called under journal->j_list_lock. The caller provided us with a ref + * against the buffer, and we drop that here. */ static void release_buffer_page(struct buffer_head *bh) { @@ -78,6 +78,19 @@ nope: } /* + * Decrement reference counter for data buffer. If it has been marked + * 'BH_Freed', release it and the page to which it belongs if possible. + */ +static void release_data_buffer(struct buffer_head *bh) +{ + if (buffer_freed(bh)) { + clear_buffer_freed(bh); + release_buffer_page(bh); + } else + put_bh(bh); +} + +/* * Try to acquire jbd_lock_bh_state() against the buffer, when j_list_lock is * held. For ranking reasons we must trylock. If we lose, schedule away and * return 0. j_list_lock is dropped in this case. @@ -172,7 +185,7 @@ static void journal_do_submit_data(struct buffer_head **wbuf, int bufs) /* * Submit all the data buffers to disk */ -static void journal_submit_data_buffers(journal_t *journal, +static int journal_submit_data_buffers(journal_t *journal, transaction_t *commit_transaction) { struct journal_head *jh; @@ -180,6 +193,7 @@ static void journal_submit_data_buffers(journal_t *journal, int locked; int bufs = 0; struct buffer_head **wbuf = journal->j_wbuf; + int err = 0; /* * Whenever we unlock the journal and sleep, things can get added @@ -231,7 +245,7 @@ write_out_data: if (locked) unlock_buffer(bh); BUFFER_TRACE(bh, "already cleaned up"); - put_bh(bh); + release_data_buffer(bh); continue; } if (locked && test_clear_buffer_dirty(bh)) { @@ -253,15 +267,17 @@ write_out_data: put_bh(bh); } else { BUFFER_TRACE(bh, "writeout complete: unfile"); + if (unlikely(!buffer_uptodate(bh))) + err = -EIO; __journal_unfile_buffer(jh); jbd_unlock_bh_state(bh); if (locked) unlock_buffer(bh); journal_remove_journal_head(bh); - /* Once for our safety reference, once for + /* One for our safety reference, other for * journal_remove_journal_head() */ put_bh(bh); - put_bh(bh); + release_data_buffer(bh); } if (need_resched() || spin_needbreak(&journal->j_list_lock)) { @@ -271,6 +287,8 @@ write_out_data: } spin_unlock(&journal->j_list_lock); journal_do_submit_data(wbuf, bufs); + + return err; } /* @@ -410,8 +428,7 @@ void journal_commit_transaction(journal_t *journal) * Now start flushing things to disk, in the order they appear * on the transaction lists. Data blocks go first. */ - err = 0; - journal_submit_data_buffers(journal, commit_transaction); + err = journal_submit_data_buffers(journal, commit_transaction); /* * Wait for all previously submitted IO to complete. @@ -426,10 +443,21 @@ void journal_commit_transaction(journal_t *journal) if (buffer_locked(bh)) { spin_unlock(&journal->j_list_lock); wait_on_buffer(bh); - if (unlikely(!buffer_uptodate(bh))) - err = -EIO; spin_lock(&journal->j_list_lock); } + if (unlikely(!buffer_uptodate(bh))) { + if (TestSetPageLocked(bh->b_page)) { + spin_unlock(&journal->j_list_lock); + lock_page(bh->b_page); + spin_lock(&journal->j_list_lock); + } + if (bh->b_page->mapping) + set_bit(AS_EIO, &bh->b_page->mapping->flags); + + unlock_page(bh->b_page); + SetPageError(bh->b_page); + err = -EIO; + } if (!inverted_lock(journal, bh)) { put_bh(bh); spin_lock(&journal->j_list_lock); @@ -443,17 +471,21 @@ void journal_commit_transaction(journal_t *journal) } else { jbd_unlock_bh_state(bh); } - put_bh(bh); + release_data_buffer(bh); cond_resched_lock(&journal->j_list_lock); } spin_unlock(&journal->j_list_lock); - if (err) - journal_abort(journal, err); + if (err) { + char b[BDEVNAME_SIZE]; - journal_write_revoke_records(journal, commit_transaction); + printk(KERN_WARNING + "JBD: Detected IO errors while flushing file data " + "on %s\n", bdevname(journal->j_fs_dev, b)); + err = 0; + } - jbd_debug(3, "JBD: commit phase 2\n"); + journal_write_revoke_records(journal, commit_transaction); /* * If we found any dirty or locked buffers, then we should have diff --git a/fs/jbd/journal.c b/fs/jbd/journal.c index b99c3b3654c4..aa7143a8349b 100644 --- a/fs/jbd/journal.c +++ b/fs/jbd/journal.c @@ -68,7 +68,6 @@ EXPORT_SYMBOL(journal_set_features); EXPORT_SYMBOL(journal_create); EXPORT_SYMBOL(journal_load); EXPORT_SYMBOL(journal_destroy); -EXPORT_SYMBOL(journal_update_superblock); EXPORT_SYMBOL(journal_abort); EXPORT_SYMBOL(journal_errno); EXPORT_SYMBOL(journal_ack_err); @@ -1636,9 +1635,10 @@ static int journal_init_journal_head_cache(void) static void journal_destroy_journal_head_cache(void) { - J_ASSERT(journal_head_cache != NULL); - kmem_cache_destroy(journal_head_cache); - journal_head_cache = NULL; + if (journal_head_cache) { + kmem_cache_destroy(journal_head_cache); + journal_head_cache = NULL; + } } /* diff --git a/fs/jbd/revoke.c b/fs/jbd/revoke.c index 1bb43e987f4b..c7bd649bbbdc 100644 --- a/fs/jbd/revoke.c +++ b/fs/jbd/revoke.c @@ -166,138 +166,123 @@ static struct jbd_revoke_record_s *find_revoke_record(journal_t *journal, return NULL; } +void journal_destroy_revoke_caches(void) +{ + if (revoke_record_cache) { + kmem_cache_destroy(revoke_record_cache); + revoke_record_cache = NULL; + } + if (revoke_table_cache) { + kmem_cache_destroy(revoke_table_cache); + revoke_table_cache = NULL; + } +} + int __init journal_init_revoke_caches(void) { + J_ASSERT(!revoke_record_cache); + J_ASSERT(!revoke_table_cache); + revoke_record_cache = kmem_cache_create("revoke_record", sizeof(struct jbd_revoke_record_s), 0, SLAB_HWCACHE_ALIGN|SLAB_TEMPORARY, NULL); if (!revoke_record_cache) - return -ENOMEM; + goto record_cache_failure; revoke_table_cache = kmem_cache_create("revoke_table", sizeof(struct jbd_revoke_table_s), 0, SLAB_TEMPORARY, NULL); - if (!revoke_table_cache) { - kmem_cache_destroy(revoke_record_cache); - revoke_record_cache = NULL; - return -ENOMEM; - } + if (!revoke_table_cache) + goto table_cache_failure; + return 0; -} -void journal_destroy_revoke_caches(void) -{ - kmem_cache_destroy(revoke_record_cache); - revoke_record_cache = NULL; - kmem_cache_destroy(revoke_table_cache); - revoke_table_cache = NULL; +table_cache_failure: + journal_destroy_revoke_caches(); +record_cache_failure: + return -ENOMEM; } -/* Initialise the revoke table for a given journal to a given size. */ - -int journal_init_revoke(journal_t *journal, int hash_size) +static struct jbd_revoke_table_s *journal_init_revoke_table(int hash_size) { - int shift, tmp; + int shift = 0; + int tmp = hash_size; + struct jbd_revoke_table_s *table; - J_ASSERT (journal->j_revoke_table[0] == NULL); + table = kmem_cache_alloc(revoke_table_cache, GFP_KERNEL); + if (!table) + goto out; - shift = 0; - tmp = hash_size; while((tmp >>= 1UL) != 0UL) shift++; - journal->j_revoke_table[0] = kmem_cache_alloc(revoke_table_cache, GFP_KERNEL); - if (!journal->j_revoke_table[0]) - return -ENOMEM; - journal->j_revoke = journal->j_revoke_table[0]; - - /* Check that the hash_size is a power of two */ - J_ASSERT(is_power_of_2(hash_size)); - - journal->j_revoke->hash_size = hash_size; - - journal->j_revoke->hash_shift = shift; - - journal->j_revoke->hash_table = + table->hash_size = hash_size; + table->hash_shift = shift; + table->hash_table = kmalloc(hash_size * sizeof(struct list_head), GFP_KERNEL); - if (!journal->j_revoke->hash_table) { - kmem_cache_free(revoke_table_cache, journal->j_revoke_table[0]); - journal->j_revoke = NULL; - return -ENOMEM; + if (!table->hash_table) { + kmem_cache_free(revoke_table_cache, table); + table = NULL; + goto out; } for (tmp = 0; tmp < hash_size; tmp++) - INIT_LIST_HEAD(&journal->j_revoke->hash_table[tmp]); + INIT_LIST_HEAD(&table->hash_table[tmp]); - journal->j_revoke_table[1] = kmem_cache_alloc(revoke_table_cache, GFP_KERNEL); - if (!journal->j_revoke_table[1]) { - kfree(journal->j_revoke_table[0]->hash_table); - kmem_cache_free(revoke_table_cache, journal->j_revoke_table[0]); - return -ENOMEM; +out: + return table; +} + +static void journal_destroy_revoke_table(struct jbd_revoke_table_s *table) +{ + int i; + struct list_head *hash_list; + + for (i = 0; i < table->hash_size; i++) { + hash_list = &table->hash_table[i]; + J_ASSERT(list_empty(hash_list)); } - journal->j_revoke = journal->j_revoke_table[1]; + kfree(table->hash_table); + kmem_cache_free(revoke_table_cache, table); +} - /* Check that the hash_size is a power of two */ +/* Initialise the revoke table for a given journal to a given size. */ +int journal_init_revoke(journal_t *journal, int hash_size) +{ + J_ASSERT(journal->j_revoke_table[0] == NULL); J_ASSERT(is_power_of_2(hash_size)); - journal->j_revoke->hash_size = hash_size; + journal->j_revoke_table[0] = journal_init_revoke_table(hash_size); + if (!journal->j_revoke_table[0]) + goto fail0; - journal->j_revoke->hash_shift = shift; + journal->j_revoke_table[1] = journal_init_revoke_table(hash_size); + if (!journal->j_revoke_table[1]) + goto fail1; - journal->j_revoke->hash_table = - kmalloc(hash_size * sizeof(struct list_head), GFP_KERNEL); - if (!journal->j_revoke->hash_table) { - kfree(journal->j_revoke_table[0]->hash_table); - kmem_cache_free(revoke_table_cache, journal->j_revoke_table[0]); - kmem_cache_free(revoke_table_cache, journal->j_revoke_table[1]); - journal->j_revoke = NULL; - return -ENOMEM; - } - - for (tmp = 0; tmp < hash_size; tmp++) - INIT_LIST_HEAD(&journal->j_revoke->hash_table[tmp]); + journal->j_revoke = journal->j_revoke_table[1]; spin_lock_init(&journal->j_revoke_lock); return 0; -} -/* Destoy a journal's revoke table. The table must already be empty! */ +fail1: + journal_destroy_revoke_table(journal->j_revoke_table[0]); +fail0: + return -ENOMEM; +} +/* Destroy a journal's revoke table. The table must already be empty! */ void journal_destroy_revoke(journal_t *journal) { - struct jbd_revoke_table_s *table; - struct list_head *hash_list; - int i; - - table = journal->j_revoke_table[0]; - if (!table) - return; - - for (i=0; i<table->hash_size; i++) { - hash_list = &table->hash_table[i]; - J_ASSERT (list_empty(hash_list)); - } - - kfree(table->hash_table); - kmem_cache_free(revoke_table_cache, table); - journal->j_revoke = NULL; - - table = journal->j_revoke_table[1]; - if (!table) - return; - - for (i=0; i<table->hash_size; i++) { - hash_list = &table->hash_table[i]; - J_ASSERT (list_empty(hash_list)); - } - - kfree(table->hash_table); - kmem_cache_free(revoke_table_cache, table); journal->j_revoke = NULL; + if (journal->j_revoke_table[0]) + journal_destroy_revoke_table(journal->j_revoke_table[0]); + if (journal->j_revoke_table[1]) + journal_destroy_revoke_table(journal->j_revoke_table[1]); } diff --git a/fs/jbd/transaction.c b/fs/jbd/transaction.c index 67ff2024c23c..8dee32007500 100644 --- a/fs/jbd/transaction.c +++ b/fs/jbd/transaction.c @@ -1648,12 +1648,42 @@ out: return; } +/* + * journal_try_to_free_buffers() could race with journal_commit_transaction() + * The latter might still hold the a count on buffers when inspecting + * them on t_syncdata_list or t_locked_list. + * + * journal_try_to_free_buffers() will call this function to + * wait for the current transaction to finish syncing data buffers, before + * tryinf to free that buffer. + * + * Called with journal->j_state_lock held. + */ +static void journal_wait_for_transaction_sync_data(journal_t *journal) +{ + transaction_t *transaction = NULL; + tid_t tid; + + spin_lock(&journal->j_state_lock); + transaction = journal->j_committing_transaction; + + if (!transaction) { + spin_unlock(&journal->j_state_lock); + return; + } + + tid = transaction->t_tid; + spin_unlock(&journal->j_state_lock); + log_wait_commit(journal, tid); +} /** * int journal_try_to_free_buffers() - try to free page buffers. * @journal: journal for operation * @page: to try and free - * @unused_gfp_mask: unused + * @gfp_mask: we use the mask to detect how hard should we try to release + * buffers. If __GFP_WAIT and __GFP_FS is set, we wait for commit code to + * release the buffers. * * * For all the buffers on this page, @@ -1682,9 +1712,11 @@ out: * journal_try_to_free_buffer() is changing its state. But that * cannot happen because we never reallocate freed data as metadata * while the data is part of a transaction. Yes? + * + * Return 0 on failure, 1 on success */ int journal_try_to_free_buffers(journal_t *journal, - struct page *page, gfp_t unused_gfp_mask) + struct page *page, gfp_t gfp_mask) { struct buffer_head *head; struct buffer_head *bh; @@ -1713,7 +1745,28 @@ int journal_try_to_free_buffers(journal_t *journal, if (buffer_jbd(bh)) goto busy; } while ((bh = bh->b_this_page) != head); + ret = try_to_free_buffers(page); + + /* + * There are a number of places where journal_try_to_free_buffers() + * could race with journal_commit_transaction(), the later still + * holds the reference to the buffers to free while processing them. + * try_to_free_buffers() failed to free those buffers. Some of the + * caller of releasepage() request page buffers to be dropped, otherwise + * treat the fail-to-free as errors (such as generic_file_direct_IO()) + * + * So, if the caller of try_to_release_page() wants the synchronous + * behaviour(i.e make sure buffers are dropped upon return), + * let's wait for the current transaction to finish flush of + * dirty data buffers, then try to free those buffers again, + * with the journal locked. + */ + if (ret == 0 && (gfp_mask & __GFP_WAIT) && (gfp_mask & __GFP_FS)) { + journal_wait_for_transaction_sync_data(journal); + ret = try_to_free_buffers(page); + } + busy: return ret; } diff --git a/fs/jfs/super.c b/fs/jfs/super.c index 0288e6d7936a..359c091d8965 100644 --- a/fs/jfs/super.c +++ b/fs/jfs/super.c @@ -22,6 +22,7 @@ #include <linux/parser.h> #include <linux/completion.h> #include <linux/vfs.h> +#include <linux/quotaops.h> #include <linux/mount.h> #include <linux/moduleparam.h> #include <linux/kthread.h> diff --git a/fs/lockd/clntproc.c b/fs/lockd/clntproc.c index 1f6dc518505c..31668b690e03 100644 --- a/fs/lockd/clntproc.c +++ b/fs/lockd/clntproc.c @@ -582,7 +582,15 @@ again: } if (status < 0) goto out_unlock; - status = nlm_stat_to_errno(resp->status); + /* + * EAGAIN doesn't make sense for sleeping locks, and in some + * cases NLM_LCK_DENIED is returned for a permanent error. So + * turn it into an ENOLCK. + */ + if (resp->status == nlm_lck_denied && (fl_flags & FL_SLEEP)) + status = -ENOLCK; + else + status = nlm_stat_to_errno(resp->status); out_unblock: nlmclnt_finish_block(block); out: diff --git a/fs/lockd/svclock.c b/fs/lockd/svclock.c index 821b9acdfb66..cf0d5c2c318d 100644 --- a/fs/lockd/svclock.c +++ b/fs/lockd/svclock.c @@ -418,8 +418,8 @@ nlmsvc_lock(struct svc_rqst *rqstp, struct nlm_file *file, goto out; case -EAGAIN: ret = nlm_lck_denied; - break; - case -EINPROGRESS: + goto out; + case FILE_LOCK_DEFERRED: if (wait) break; /* Filesystem lock operation is in progress @@ -434,10 +434,6 @@ nlmsvc_lock(struct svc_rqst *rqstp, struct nlm_file *file, goto out; } - ret = nlm_lck_denied; - if (!wait) - goto out; - ret = nlm_lck_blocked; /* Append to list of blocked */ @@ -507,7 +503,7 @@ nlmsvc_testlock(struct svc_rqst *rqstp, struct nlm_file *file, } error = vfs_test_lock(file->f_file, &lock->fl); - if (error == -EINPROGRESS) { + if (error == FILE_LOCK_DEFERRED) { ret = nlmsvc_defer_lock_rqst(rqstp, block); goto out; } @@ -731,8 +727,7 @@ nlmsvc_grant_blocked(struct nlm_block *block) switch (error) { case 0: break; - case -EAGAIN: - case -EINPROGRESS: + case FILE_LOCK_DEFERRED: dprintk("lockd: lock still blocked error %d\n", error); nlmsvc_insert_block(block, NLM_NEVER); nlmsvc_release_block(block); diff --git a/fs/locks.c b/fs/locks.c index dce8c747371c..01490300f7cb 100644 --- a/fs/locks.c +++ b/fs/locks.c @@ -779,8 +779,10 @@ find_conflict: if (!flock_locks_conflict(request, fl)) continue; error = -EAGAIN; - if (request->fl_flags & FL_SLEEP) - locks_insert_block(fl, request); + if (!(request->fl_flags & FL_SLEEP)) + goto out; + error = FILE_LOCK_DEFERRED; + locks_insert_block(fl, request); goto out; } if (request->fl_flags & FL_ACCESS) @@ -836,7 +838,7 @@ static int __posix_lock_file(struct inode *inode, struct file_lock *request, str error = -EDEADLK; if (posix_locks_deadlock(request, fl)) goto out; - error = -EAGAIN; + error = FILE_LOCK_DEFERRED; locks_insert_block(fl, request); goto out; } @@ -1035,7 +1037,7 @@ int posix_lock_file_wait(struct file *filp, struct file_lock *fl) might_sleep (); for (;;) { error = posix_lock_file(filp, fl, NULL); - if ((error != -EAGAIN) || !(fl->fl_flags & FL_SLEEP)) + if (error != FILE_LOCK_DEFERRED) break; error = wait_event_interruptible(fl->fl_wait, !fl->fl_next); if (!error) @@ -1107,9 +1109,7 @@ int locks_mandatory_area(int read_write, struct inode *inode, for (;;) { error = __posix_lock_file(inode, &fl, NULL); - if (error != -EAGAIN) - break; - if (!(fl.fl_flags & FL_SLEEP)) + if (error != FILE_LOCK_DEFERRED) break; error = wait_event_interruptible(fl.fl_wait, !fl.fl_next); if (!error) { @@ -1531,7 +1531,7 @@ int flock_lock_file_wait(struct file *filp, struct file_lock *fl) might_sleep(); for (;;) { error = flock_lock_file(filp, fl); - if ((error != -EAGAIN) || !(fl->fl_flags & FL_SLEEP)) + if (error != FILE_LOCK_DEFERRED) break; error = wait_event_interruptible(fl->fl_wait, !fl->fl_next); if (!error) @@ -1716,17 +1716,17 @@ out: * fl_grant is set. Callers expecting ->lock() to return asynchronously * will only use F_SETLK, not F_SETLKW; they will set FL_SLEEP if (and only if) * the request is for a blocking lock. When ->lock() does return asynchronously, - * it must return -EINPROGRESS, and call ->fl_grant() when the lock + * it must return FILE_LOCK_DEFERRED, and call ->fl_grant() when the lock * request completes. * If the request is for non-blocking lock the file system should return - * -EINPROGRESS then try to get the lock and call the callback routine with - * the result. If the request timed out the callback routine will return a + * FILE_LOCK_DEFERRED then try to get the lock and call the callback routine + * with the result. If the request timed out the callback routine will return a * nonzero return code and the file system should release the lock. The file * system is also responsible to keep a corresponding posix lock when it * grants a lock so the VFS can find out which locks are locally held and do * the correct lock cleanup when required. * The underlying filesystem must not drop the kernel lock or call - * ->fl_grant() before returning to the caller with a -EINPROGRESS + * ->fl_grant() before returning to the caller with a FILE_LOCK_DEFERRED * return code. */ int vfs_lock_file(struct file *filp, unsigned int cmd, struct file_lock *fl, struct file_lock *conf) @@ -1738,6 +1738,30 @@ int vfs_lock_file(struct file *filp, unsigned int cmd, struct file_lock *fl, str } EXPORT_SYMBOL_GPL(vfs_lock_file); +static int do_lock_file_wait(struct file *filp, unsigned int cmd, + struct file_lock *fl) +{ + int error; + + error = security_file_lock(filp, fl->fl_type); + if (error) + return error; + + for (;;) { + error = vfs_lock_file(filp, cmd, fl, NULL); + if (error != FILE_LOCK_DEFERRED) + break; + error = wait_event_interruptible(fl->fl_wait, !fl->fl_next); + if (!error) + continue; + + locks_delete_block(fl); + break; + } + + return error; +} + /* Apply the lock described by l to an open file descriptor. * This implements both the F_SETLK and F_SETLKW commands of fcntl(). */ @@ -1795,26 +1819,7 @@ again: goto out; } - error = security_file_lock(filp, file_lock->fl_type); - if (error) - goto out; - - if (filp->f_op && filp->f_op->lock != NULL) - error = filp->f_op->lock(filp, cmd, file_lock); - else { - for (;;) { - error = posix_lock_file(filp, file_lock, NULL); - if (error != -EAGAIN || cmd == F_SETLK) - break; - error = wait_event_interruptible(file_lock->fl_wait, - !file_lock->fl_next); - if (!error) - continue; - - locks_delete_block(file_lock); - break; - } - } + error = do_lock_file_wait(filp, cmd, file_lock); /* * Attempt to detect a close/fcntl race and recover by @@ -1932,26 +1937,7 @@ again: goto out; } - error = security_file_lock(filp, file_lock->fl_type); - if (error) - goto out; - - if (filp->f_op && filp->f_op->lock != NULL) - error = filp->f_op->lock(filp, cmd, file_lock); - else { - for (;;) { - error = posix_lock_file(filp, file_lock, NULL); - if (error != -EAGAIN || cmd == F_SETLK64) - break; - error = wait_event_interruptible(file_lock->fl_wait, - !file_lock->fl_next); - if (!error) - continue; - - locks_delete_block(file_lock); - break; - } - } + error = do_lock_file_wait(filp, cmd, file_lock); /* * Attempt to detect a close/fcntl race and recover by diff --git a/fs/minix/inode.c b/fs/minix/inode.c index 84f6242ba6fc..523d73713418 100644 --- a/fs/minix/inode.c +++ b/fs/minix/inode.c @@ -256,9 +256,6 @@ static int minix_fill_super(struct super_block *s, void *data, int silent) if (!s->s_root) goto out_iput; - if (!NO_TRUNCATE) - s->s_root->d_op = &minix_dentry_operations; - if (!(s->s_flags & MS_RDONLY)) { if (sbi->s_version != MINIX_V3) /* s_state is now out from V3 sb */ ms->s_state &= ~MINIX_VALID_FS; diff --git a/fs/minix/minix.h b/fs/minix/minix.h index 326edfe96108..e6a0b193bea4 100644 --- a/fs/minix/minix.h +++ b/fs/minix/minix.h @@ -2,11 +2,6 @@ #include <linux/pagemap.h> #include <linux/minix_fs.h> -/* - * change the define below to 0 if you want names > info->s_namelen chars to be - * truncated. Else they will be disallowed (ENAMETOOLONG). - */ -#define NO_TRUNCATE 1 #define INODE_VERSION(inode) minix_sb(inode->i_sb)->s_version #define MINIX_V1 0x0001 /* original minix fs */ #define MINIX_V2 0x0002 /* minix V2 fs */ @@ -83,7 +78,6 @@ extern const struct inode_operations minix_file_inode_operations; extern const struct inode_operations minix_dir_inode_operations; extern const struct file_operations minix_file_operations; extern const struct file_operations minix_dir_operations; -extern struct dentry_operations minix_dentry_operations; static inline struct minix_sb_info *minix_sb(struct super_block *sb) { diff --git a/fs/minix/namei.c b/fs/minix/namei.c index 102241bc9c79..32b131cd6121 100644 --- a/fs/minix/namei.c +++ b/fs/minix/namei.c @@ -18,30 +18,6 @@ static int add_nondir(struct dentry *dentry, struct inode *inode) return err; } -static int minix_hash(struct dentry *dentry, struct qstr *qstr) -{ - unsigned long hash; - int i; - const unsigned char *name; - - i = minix_sb(dentry->d_inode->i_sb)->s_namelen; - if (i >= qstr->len) - return 0; - /* Truncate the name in place, avoids having to define a compare - function. */ - qstr->len = i; - name = qstr->name; - hash = init_name_hash(); - while (i--) - hash = partial_name_hash(*name++, hash); - qstr->hash = end_name_hash(hash); - return 0; -} - -struct dentry_operations minix_dentry_operations = { - .d_hash = minix_hash, -}; - static struct dentry *minix_lookup(struct inode * dir, struct dentry *dentry, struct nameidata *nd) { struct inode * inode = NULL; diff --git a/fs/msdos/namei.c b/fs/msdos/namei.c index 1f7f2956412a..e844b9809d27 100644 --- a/fs/msdos/namei.c +++ b/fs/msdos/namei.c @@ -14,12 +14,7 @@ /* Characters that are undesirable in an MS-DOS file name */ static unsigned char bad_chars[] = "*?<>|\""; -static unsigned char bad_if_strict_pc[] = "+=,; "; -/* GEMDOS is less restrictive */ -static unsigned char bad_if_strict_atari[] = " "; - -#define bad_if_strict(opts) \ - ((opts)->atari ? bad_if_strict_atari : bad_if_strict_pc) +static unsigned char bad_if_strict[] = "+=,; "; /***** Formats an MS-DOS file name. Rejects invalid names. */ static int msdos_format_name(const unsigned char *name, int len, @@ -40,21 +35,20 @@ static int msdos_format_name(const unsigned char *name, int len, /* Get rid of dot - test for it elsewhere */ name++; len--; - } else if (!opts->atari) + } else return -EINVAL; } /* - * disallow names that _really_ start with a dot for MS-DOS, - * GEMDOS does not care + * disallow names that _really_ start with a dot */ - space = !opts->atari; + space = 1; c = 0; for (walk = res; len && walk - res < 8; walk++) { c = *name++; len--; if (opts->name_check != 'r' && strchr(bad_chars, c)) return -EINVAL; - if (opts->name_check == 's' && strchr(bad_if_strict(opts), c)) + if (opts->name_check == 's' && strchr(bad_if_strict, c)) return -EINVAL; if (c >= 'A' && c <= 'Z' && opts->name_check == 's') return -EINVAL; @@ -94,7 +88,7 @@ static int msdos_format_name(const unsigned char *name, int len, if (opts->name_check != 'r' && strchr(bad_chars, c)) return -EINVAL; if (opts->name_check == 's' && - strchr(bad_if_strict(opts), c)) + strchr(bad_if_strict, c)) return -EINVAL; if (c < ' ' || c == ':' || c == '\\') return -EINVAL; @@ -243,6 +237,7 @@ static int msdos_add_entry(struct inode *dir, const unsigned char *name, int is_dir, int is_hid, int cluster, struct timespec *ts, struct fat_slot_info *sinfo) { + struct msdos_sb_info *sbi = MSDOS_SB(dir->i_sb); struct msdos_dir_entry de; __le16 time, date; int err; @@ -252,7 +247,7 @@ static int msdos_add_entry(struct inode *dir, const unsigned char *name, if (is_hid) de.attr |= ATTR_HIDDEN; de.lcase = 0; - fat_date_unix2dos(ts->tv_sec, &time, &date); + fat_date_unix2dos(ts->tv_sec, &time, &date, sbi->options.tz_utc); de.cdate = de.adate = 0; de.ctime = 0; de.ctime_cs = 0; diff --git a/fs/nfsd/lockd.c b/fs/nfsd/lockd.c index 6b6225ac4926..15c6faeec77c 100644 --- a/fs/nfsd/lockd.c +++ b/fs/nfsd/lockd.c @@ -19,6 +19,13 @@ #define NFSDDBG_FACILITY NFSDDBG_LOCKD +#ifdef CONFIG_LOCKD_V4 +#define nlm_stale_fh nlm4_stale_fh +#define nlm_failed nlm4_failed +#else +#define nlm_stale_fh nlm_lck_denied_nolocks +#define nlm_failed nlm_lck_denied_nolocks +#endif /* * Note: we hold the dentry use count while the file is open. */ @@ -47,12 +54,10 @@ nlm_fopen(struct svc_rqst *rqstp, struct nfs_fh *f, struct file **filp) return 0; case nfserr_dropit: return nlm_drop_reply; -#ifdef CONFIG_LOCKD_V4 case nfserr_stale: - return nlm4_stale_fh; -#endif + return nlm_stale_fh; default: - return nlm_lck_denied; + return nlm_failed; } } diff --git a/fs/partitions/check.c b/fs/partitions/check.c index efef715135d3..7d6b34e201db 100644 --- a/fs/partitions/check.c +++ b/fs/partitions/check.c @@ -344,18 +344,18 @@ static ssize_t whole_disk_show(struct device *dev, static DEVICE_ATTR(whole_disk, S_IRUSR | S_IRGRP | S_IROTH, whole_disk_show, NULL); -void add_partition(struct gendisk *disk, int part, sector_t start, sector_t len, int flags) +int add_partition(struct gendisk *disk, int part, sector_t start, sector_t len, int flags) { struct hd_struct *p; int err; p = kzalloc(sizeof(*p), GFP_KERNEL); if (!p) - return; + return -ENOMEM; if (!init_part_stats(p)) { - kfree(p); - return; + err = -ENOMEM; + goto out0; } p->start_sect = start; p->nr_sects = len; @@ -378,15 +378,31 @@ void add_partition(struct gendisk *disk, int part, sector_t start, sector_t len, /* delay uevent until 'holders' subdir is created */ p->dev.uevent_suppress = 1; - device_add(&p->dev); + err = device_add(&p->dev); + if (err) + goto out1; partition_sysfs_add_subdir(p); p->dev.uevent_suppress = 0; - if (flags & ADDPART_FLAG_WHOLEDISK) + if (flags & ADDPART_FLAG_WHOLEDISK) { err = device_create_file(&p->dev, &dev_attr_whole_disk); + if (err) + goto out2; + } /* suppress uevent if the disk supresses it */ if (!disk->dev.uevent_suppress) kobject_uevent(&p->dev.kobj, KOBJ_ADD); + + return 0; + +out2: + device_del(&p->dev); +out1: + put_device(&p->dev); + free_part_stats(p); +out0: + kfree(p); + return err; } /* Not exported, helper to add_disk(). */ @@ -483,10 +499,16 @@ int rescan_partitions(struct gendisk *disk, struct block_device *bdev) if (!size) continue; if (from + size > get_capacity(disk)) { - printk(" %s: p%d exceeds device capacity\n", + printk(KERN_ERR " %s: p%d exceeds device capacity\n", disk->disk_name, p); + continue; + } + res = add_partition(disk, p, from, size, state->parts[p].flags); + if (res) { + printk(KERN_ERR " %s: p%d could not be added: %d\n", + disk->disk_name, p, -res); + continue; } - add_partition(disk, p, from, size, state->parts[p].flags); #ifdef CONFIG_BLK_DEV_MD if (state->parts[p].flags & ADDPART_FLAG_RAID) md_autodetect_dev(bdev->bd_dev+p); diff --git a/fs/partitions/efi.c b/fs/partitions/efi.c index e7b07006bc41..038a6022152f 100644 --- a/fs/partitions/efi.c +++ b/fs/partitions/efi.c @@ -95,13 +95,6 @@ #include "check.h" #include "efi.h" -#undef EFI_DEBUG -#ifdef EFI_DEBUG -#define Dprintk(x...) printk(KERN_DEBUG x) -#else -#define Dprintk(x...) -#endif - /* This allows a kernel command line option 'gpt' to override * the test for invalid PMBR. Not __initdata because reloading * the partition tables happens after init too. @@ -305,10 +298,10 @@ is_gpt_valid(struct block_device *bdev, u64 lba, /* Check the GUID Partition Table signature */ if (le64_to_cpu((*gpt)->signature) != GPT_HEADER_SIGNATURE) { - Dprintk("GUID Partition Table Header signature is wrong:" - "%lld != %lld\n", - (unsigned long long)le64_to_cpu((*gpt)->signature), - (unsigned long long)GPT_HEADER_SIGNATURE); + pr_debug("GUID Partition Table Header signature is wrong:" + "%lld != %lld\n", + (unsigned long long)le64_to_cpu((*gpt)->signature), + (unsigned long long)GPT_HEADER_SIGNATURE); goto fail; } @@ -318,9 +311,8 @@ is_gpt_valid(struct block_device *bdev, u64 lba, crc = efi_crc32((const unsigned char *) (*gpt), le32_to_cpu((*gpt)->header_size)); if (crc != origcrc) { - Dprintk - ("GUID Partition Table Header CRC is wrong: %x != %x\n", - crc, origcrc); + pr_debug("GUID Partition Table Header CRC is wrong: %x != %x\n", + crc, origcrc); goto fail; } (*gpt)->header_crc32 = cpu_to_le32(origcrc); @@ -328,9 +320,9 @@ is_gpt_valid(struct block_device *bdev, u64 lba, /* Check that the my_lba entry points to the LBA that contains * the GUID Partition Table */ if (le64_to_cpu((*gpt)->my_lba) != lba) { - Dprintk("GPT my_lba incorrect: %lld != %lld\n", - (unsigned long long)le64_to_cpu((*gpt)->my_lba), - (unsigned long long)lba); + pr_debug("GPT my_lba incorrect: %lld != %lld\n", + (unsigned long long)le64_to_cpu((*gpt)->my_lba), + (unsigned long long)lba); goto fail; } @@ -339,15 +331,15 @@ is_gpt_valid(struct block_device *bdev, u64 lba, */ lastlba = last_lba(bdev); if (le64_to_cpu((*gpt)->first_usable_lba) > lastlba) { - Dprintk("GPT: first_usable_lba incorrect: %lld > %lld\n", - (unsigned long long)le64_to_cpu((*gpt)->first_usable_lba), - (unsigned long long)lastlba); + pr_debug("GPT: first_usable_lba incorrect: %lld > %lld\n", + (unsigned long long)le64_to_cpu((*gpt)->first_usable_lba), + (unsigned long long)lastlba); goto fail; } if (le64_to_cpu((*gpt)->last_usable_lba) > lastlba) { - Dprintk("GPT: last_usable_lba incorrect: %lld > %lld\n", - (unsigned long long)le64_to_cpu((*gpt)->last_usable_lba), - (unsigned long long)lastlba); + pr_debug("GPT: last_usable_lba incorrect: %lld > %lld\n", + (unsigned long long)le64_to_cpu((*gpt)->last_usable_lba), + (unsigned long long)lastlba); goto fail; } @@ -360,7 +352,7 @@ is_gpt_valid(struct block_device *bdev, u64 lba, le32_to_cpu((*gpt)->sizeof_partition_entry)); if (crc != le32_to_cpu((*gpt)->partition_entry_array_crc32)) { - Dprintk("GUID Partitition Entry Array CRC check failed.\n"); + pr_debug("GUID Partitition Entry Array CRC check failed.\n"); goto fail_ptes; } @@ -616,7 +608,7 @@ efi_partition(struct parsed_partitions *state, struct block_device *bdev) return 0; } - Dprintk("GUID Partition Table is valid! Yea!\n"); + pr_debug("GUID Partition Table is valid! Yea!\n"); for (i = 0; i < le32_to_cpu(gpt->num_partition_entries) && i < state->limit-1; i++) { if (!is_pte_valid(&ptes[i], last_lba(bdev))) diff --git a/fs/partitions/ldm.c b/fs/partitions/ldm.c index 0fdda2e8a4cc..8652fb99e962 100644 --- a/fs/partitions/ldm.c +++ b/fs/partitions/ldm.c @@ -133,17 +133,17 @@ static bool ldm_parse_privhead(const u8 *data, struct privhead *ph) bool is_vista = false; BUG_ON(!data || !ph); - if (MAGIC_PRIVHEAD != BE64(data)) { + if (MAGIC_PRIVHEAD != get_unaligned_be64(data)) { ldm_error("Cannot find PRIVHEAD structure. LDM database is" " corrupt. Aborting."); return false; } - ph->ver_major = BE16(data + 0x000C); - ph->ver_minor = BE16(data + 0x000E); - ph->logical_disk_start = BE64(data + 0x011B); - ph->logical_disk_size = BE64(data + 0x0123); - ph->config_start = BE64(data + 0x012B); - ph->config_size = BE64(data + 0x0133); + ph->ver_major = get_unaligned_be16(data + 0x000C); + ph->ver_minor = get_unaligned_be16(data + 0x000E); + ph->logical_disk_start = get_unaligned_be64(data + 0x011B); + ph->logical_disk_size = get_unaligned_be64(data + 0x0123); + ph->config_start = get_unaligned_be64(data + 0x012B); + ph->config_size = get_unaligned_be64(data + 0x0133); /* Version 2.11 is Win2k/XP and version 2.12 is Vista. */ if (ph->ver_major == 2 && ph->ver_minor == 12) is_vista = true; @@ -191,14 +191,14 @@ static bool ldm_parse_tocblock (const u8 *data, struct tocblock *toc) { BUG_ON (!data || !toc); - if (MAGIC_TOCBLOCK != BE64 (data)) { + if (MAGIC_TOCBLOCK != get_unaligned_be64(data)) { ldm_crit ("Cannot find TOCBLOCK, database may be corrupt."); return false; } strncpy (toc->bitmap1_name, data + 0x24, sizeof (toc->bitmap1_name)); toc->bitmap1_name[sizeof (toc->bitmap1_name) - 1] = 0; - toc->bitmap1_start = BE64 (data + 0x2E); - toc->bitmap1_size = BE64 (data + 0x36); + toc->bitmap1_start = get_unaligned_be64(data + 0x2E); + toc->bitmap1_size = get_unaligned_be64(data + 0x36); if (strncmp (toc->bitmap1_name, TOC_BITMAP1, sizeof (toc->bitmap1_name)) != 0) { @@ -208,8 +208,8 @@ static bool ldm_parse_tocblock (const u8 *data, struct tocblock *toc) } strncpy (toc->bitmap2_name, data + 0x46, sizeof (toc->bitmap2_name)); toc->bitmap2_name[sizeof (toc->bitmap2_name) - 1] = 0; - toc->bitmap2_start = BE64 (data + 0x50); - toc->bitmap2_size = BE64 (data + 0x58); + toc->bitmap2_start = get_unaligned_be64(data + 0x50); + toc->bitmap2_size = get_unaligned_be64(data + 0x58); if (strncmp (toc->bitmap2_name, TOC_BITMAP2, sizeof (toc->bitmap2_name)) != 0) { ldm_crit ("TOCBLOCK's second bitmap is '%s', should be '%s'.", @@ -237,22 +237,22 @@ static bool ldm_parse_vmdb (const u8 *data, struct vmdb *vm) { BUG_ON (!data || !vm); - if (MAGIC_VMDB != BE32 (data)) { + if (MAGIC_VMDB != get_unaligned_be32(data)) { ldm_crit ("Cannot find the VMDB, database may be corrupt."); return false; } - vm->ver_major = BE16 (data + 0x12); - vm->ver_minor = BE16 (data + 0x14); + vm->ver_major = get_unaligned_be16(data + 0x12); + vm->ver_minor = get_unaligned_be16(data + 0x14); if ((vm->ver_major != 4) || (vm->ver_minor != 10)) { ldm_error ("Expected VMDB version %d.%d, got %d.%d. " "Aborting.", 4, 10, vm->ver_major, vm->ver_minor); return false; } - vm->vblk_size = BE32 (data + 0x08); - vm->vblk_offset = BE32 (data + 0x0C); - vm->last_vblk_seq = BE32 (data + 0x04); + vm->vblk_size = get_unaligned_be32(data + 0x08); + vm->vblk_offset = get_unaligned_be32(data + 0x0C); + vm->last_vblk_seq = get_unaligned_be32(data + 0x04); ldm_debug ("Parsed VMDB successfully."); return true; @@ -507,7 +507,7 @@ static bool ldm_validate_vmdb (struct block_device *bdev, unsigned long base, goto out; /* Already logged */ /* Are there uncommitted transactions? */ - if (BE16(data + 0x10) != 0x01) { + if (get_unaligned_be16(data + 0x10) != 0x01) { ldm_crit ("Database is not in a consistent state. Aborting."); goto out; } @@ -802,7 +802,7 @@ static bool ldm_parse_cmp3 (const u8 *buffer, int buflen, struct vblk *vb) return false; len += VBLK_SIZE_CMP3; - if (len != BE32 (buffer + 0x14)) + if (len != get_unaligned_be32(buffer + 0x14)) return false; comp = &vb->vblk.comp; @@ -851,7 +851,7 @@ static int ldm_parse_dgr3 (const u8 *buffer, int buflen, struct vblk *vb) return false; len += VBLK_SIZE_DGR3; - if (len != BE32 (buffer + 0x14)) + if (len != get_unaligned_be32(buffer + 0x14)) return false; dgrp = &vb->vblk.dgrp; @@ -895,7 +895,7 @@ static bool ldm_parse_dgr4 (const u8 *buffer, int buflen, struct vblk *vb) return false; len += VBLK_SIZE_DGR4; - if (len != BE32 (buffer + 0x14)) + if (len != get_unaligned_be32(buffer + 0x14)) return false; dgrp = &vb->vblk.dgrp; @@ -931,7 +931,7 @@ static bool ldm_parse_dsk3 (const u8 *buffer, int buflen, struct vblk *vb) return false; len += VBLK_SIZE_DSK3; - if (len != BE32 (buffer + 0x14)) + if (len != get_unaligned_be32(buffer + 0x14)) return false; disk = &vb->vblk.disk; @@ -968,7 +968,7 @@ static bool ldm_parse_dsk4 (const u8 *buffer, int buflen, struct vblk *vb) return false; len += VBLK_SIZE_DSK4; - if (len != BE32 (buffer + 0x14)) + if (len != get_unaligned_be32(buffer + 0x14)) return false; disk = &vb->vblk.disk; @@ -1034,14 +1034,14 @@ static bool ldm_parse_prt3(const u8 *buffer, int buflen, struct vblk *vb) return false; } len += VBLK_SIZE_PRT3; - if (len > BE32(buffer + 0x14)) { + if (len > get_unaligned_be32(buffer + 0x14)) { ldm_error("len %d > BE32(buffer + 0x14) %d", len, - BE32(buffer + 0x14)); + get_unaligned_be32(buffer + 0x14)); return false; } part = &vb->vblk.part; - part->start = BE64(buffer + 0x24 + r_name); - part->volume_offset = BE64(buffer + 0x2C + r_name); + part->start = get_unaligned_be64(buffer + 0x24 + r_name); + part->volume_offset = get_unaligned_be64(buffer + 0x2C + r_name); part->size = ldm_get_vnum(buffer + 0x34 + r_name); part->parent_id = ldm_get_vnum(buffer + 0x34 + r_size); part->disk_id = ldm_get_vnum(buffer + 0x34 + r_parent); @@ -1139,9 +1139,9 @@ static bool ldm_parse_vol5(const u8 *buffer, int buflen, struct vblk *vb) return false; } len += VBLK_SIZE_VOL5; - if (len > BE32(buffer + 0x14)) { + if (len > get_unaligned_be32(buffer + 0x14)) { ldm_error("len %d > BE32(buffer + 0x14) %d", len, - BE32(buffer + 0x14)); + get_unaligned_be32(buffer + 0x14)); return false; } volu = &vb->vblk.volu; @@ -1294,9 +1294,9 @@ static bool ldm_frag_add (const u8 *data, int size, struct list_head *frags) BUG_ON (!data || !frags); - group = BE32 (data + 0x08); - rec = BE16 (data + 0x0C); - num = BE16 (data + 0x0E); + group = get_unaligned_be32(data + 0x08); + rec = get_unaligned_be16(data + 0x0C); + num = get_unaligned_be16(data + 0x0E); if ((num < 1) || (num > 4)) { ldm_error ("A VBLK claims to have %d parts.", num); return false; @@ -1425,12 +1425,12 @@ static bool ldm_get_vblks (struct block_device *bdev, unsigned long base, } for (v = 0; v < perbuf; v++, data+=size) { /* For each vblk */ - if (MAGIC_VBLK != BE32 (data)) { + if (MAGIC_VBLK != get_unaligned_be32(data)) { ldm_error ("Expected to find a VBLK."); goto out; } - recs = BE16 (data + 0x0E); /* Number of records */ + recs = get_unaligned_be16(data + 0x0E); /* Number of records */ if (recs == 1) { if (!ldm_ldmdb_add (data, size, ldb)) goto out; /* Already logged */ diff --git a/fs/partitions/ldm.h b/fs/partitions/ldm.h index 80f63b5fdd9f..30e08e809c1d 100644 --- a/fs/partitions/ldm.h +++ b/fs/partitions/ldm.h @@ -98,11 +98,6 @@ struct parsed_partitions; #define TOC_BITMAP1 "config" /* Names of the two defined */ #define TOC_BITMAP2 "log" /* bitmaps in the TOCBLOCK. */ -/* Most numbers we deal with are big-endian and won't be aligned. */ -#define BE16(x) ((u16)be16_to_cpu(get_unaligned((__be16*)(x)))) -#define BE32(x) ((u32)be32_to_cpu(get_unaligned((__be32*)(x)))) -#define BE64(x) ((u64)be64_to_cpu(get_unaligned((__be64*)(x)))) - /* Borrowed from msdos.c */ #define SYS_IND(p) (get_unaligned(&(p)->sys_ind)) diff --git a/fs/proc/Kconfig b/fs/proc/Kconfig new file mode 100644 index 000000000000..73cd7a418f06 --- /dev/null +++ b/fs/proc/Kconfig @@ -0,0 +1,59 @@ +config PROC_FS + bool "/proc file system support" if EMBEDDED + default y + help + This is a virtual file system providing information about the status + of the system. "Virtual" means that it doesn't take up any space on + your hard disk: the files are created on the fly by the kernel when + you try to access them. Also, you cannot read the files with older + version of the program less: you need to use more or cat. + + It's totally cool; for example, "cat /proc/interrupts" gives + information about what the different IRQs are used for at the moment + (there is a small number of Interrupt ReQuest lines in your computer + that are used by the attached devices to gain the CPU's attention -- + often a source of trouble if two devices are mistakenly configured + to use the same IRQ). The program procinfo to display some + information about your system gathered from the /proc file system. + + Before you can use the /proc file system, it has to be mounted, + meaning it has to be given a location in the directory hierarchy. + That location should be /proc. A command such as "mount -t proc proc + /proc" or the equivalent line in /etc/fstab does the job. + + The /proc file system is explained in the file + <file:Documentation/filesystems/proc.txt> and on the proc(5) manpage + ("man 5 proc"). + + This option will enlarge your kernel by about 67 KB. Several + programs depend on this, so everyone should say Y here. + +config PROC_KCORE + bool "/proc/kcore support" if !ARM + depends on PROC_FS && MMU + +config PROC_VMCORE + bool "/proc/vmcore support (EXPERIMENTAL)" + depends on PROC_FS && CRASH_DUMP + default y + help + Exports the dump image of crashed kernel in ELF format. + +config PROC_SYSCTL + bool "Sysctl support (/proc/sys)" if EMBEDDED + depends on PROC_FS + select SYSCTL + default y + ---help--- + The sysctl interface provides a means of dynamically changing + certain kernel parameters and variables on the fly without requiring + a recompile of the kernel or reboot of the system. The primary + interface is through /proc/sys. If you say Y here a tree of + modifiable sysctl entries will be generated beneath the + /proc/sys directory. They are explained in the files + in <file:Documentation/sysctl/>. Note that enabling this + option will enlarge the kernel by at least 8 KB. + + As it is generally a good thing, you should say Y here unless + building a kernel for install/rescue disks or your system is very + limited in memory. diff --git a/fs/proc/base.c b/fs/proc/base.c index 58c3e6a8e15e..a891fe4cb43b 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c @@ -2376,29 +2376,82 @@ static int proc_base_fill_cache(struct file *filp, void *dirent, } #ifdef CONFIG_TASK_IO_ACCOUNTING -static int proc_pid_io_accounting(struct task_struct *task, char *buffer) -{ +static int do_io_accounting(struct task_struct *task, char *buffer, int whole) +{ + u64 rchar, wchar, syscr, syscw; + struct task_io_accounting ioac; + + if (!whole) { + rchar = task->rchar; + wchar = task->wchar; + syscr = task->syscr; + syscw = task->syscw; + memcpy(&ioac, &task->ioac, sizeof(ioac)); + } else { + unsigned long flags; + struct task_struct *t = task; + rchar = wchar = syscr = syscw = 0; + memset(&ioac, 0, sizeof(ioac)); + + rcu_read_lock(); + do { + rchar += t->rchar; + wchar += t->wchar; + syscr += t->syscr; + syscw += t->syscw; + + ioac.read_bytes += t->ioac.read_bytes; + ioac.write_bytes += t->ioac.write_bytes; + ioac.cancelled_write_bytes += + t->ioac.cancelled_write_bytes; + t = next_thread(t); + } while (t != task); + rcu_read_unlock(); + + if (lock_task_sighand(task, &flags)) { + struct signal_struct *sig = task->signal; + + rchar += sig->rchar; + wchar += sig->wchar; + syscr += sig->syscr; + syscw += sig->syscw; + + ioac.read_bytes += sig->ioac.read_bytes; + ioac.write_bytes += sig->ioac.write_bytes; + ioac.cancelled_write_bytes += + sig->ioac.cancelled_write_bytes; + + unlock_task_sighand(task, &flags); + } + } + return sprintf(buffer, -#ifdef CONFIG_TASK_XACCT "rchar: %llu\n" "wchar: %llu\n" "syscr: %llu\n" "syscw: %llu\n" -#endif "read_bytes: %llu\n" "write_bytes: %llu\n" "cancelled_write_bytes: %llu\n", -#ifdef CONFIG_TASK_XACCT - (unsigned long long)task->rchar, - (unsigned long long)task->wchar, - (unsigned long long)task->syscr, - (unsigned long long)task->syscw, -#endif - (unsigned long long)task->ioac.read_bytes, - (unsigned long long)task->ioac.write_bytes, - (unsigned long long)task->ioac.cancelled_write_bytes); + (unsigned long long)rchar, + (unsigned long long)wchar, + (unsigned long long)syscr, + (unsigned long long)syscw, + (unsigned long long)ioac.read_bytes, + (unsigned long long)ioac.write_bytes, + (unsigned long long)ioac.cancelled_write_bytes); +} + +static int proc_tid_io_accounting(struct task_struct *task, char *buffer) +{ + return do_io_accounting(task, buffer, 0); } -#endif + +static int proc_tgid_io_accounting(struct task_struct *task, char *buffer) +{ + return do_io_accounting(task, buffer, 1); +} +#endif /* CONFIG_TASK_IO_ACCOUNTING */ /* * Thread groups @@ -2470,7 +2523,7 @@ static const struct pid_entry tgid_base_stuff[] = { REG("coredump_filter", S_IRUGO|S_IWUSR, coredump_filter), #endif #ifdef CONFIG_TASK_IO_ACCOUNTING - INF("io", S_IRUGO, pid_io_accounting), + INF("io", S_IRUGO, tgid_io_accounting), #endif }; @@ -2797,6 +2850,9 @@ static const struct pid_entry tid_base_stuff[] = { #ifdef CONFIG_FAULT_INJECTION REG("make-it-fail", S_IRUGO|S_IWUSR, fault_inject), #endif +#ifdef CONFIG_TASK_IO_ACCOUNTING + INF("io", S_IRUGO, tid_io_accounting), +#endif }; static int proc_tid_base_readdir(struct file * filp, diff --git a/fs/proc/generic.c b/fs/proc/generic.c index 43e54e86cefd..bc0a0dd2d844 100644 --- a/fs/proc/generic.c +++ b/fs/proc/generic.c @@ -597,6 +597,7 @@ static struct proc_dir_entry *__proc_create(struct proc_dir_entry **parent, ent->pde_users = 0; spin_lock_init(&ent->pde_unload_lock); ent->pde_unload_completion = NULL; + INIT_LIST_HEAD(&ent->pde_openers); out: return ent; } @@ -789,6 +790,19 @@ void remove_proc_entry(const char *name, struct proc_dir_entry *parent) spin_unlock(&de->pde_unload_lock); continue_removing: + spin_lock(&de->pde_unload_lock); + while (!list_empty(&de->pde_openers)) { + struct pde_opener *pdeo; + + pdeo = list_first_entry(&de->pde_openers, struct pde_opener, lh); + list_del(&pdeo->lh); + spin_unlock(&de->pde_unload_lock); + pdeo->release(pdeo->inode, pdeo->file); + kfree(pdeo); + spin_lock(&de->pde_unload_lock); + } + spin_unlock(&de->pde_unload_lock); + if (S_ISDIR(de->mode)) parent->nlink--; de->nlink = 0; diff --git a/fs/proc/inode.c b/fs/proc/inode.c index b08d10017911..02eca2ed9dd7 100644 --- a/fs/proc/inode.c +++ b/fs/proc/inode.c @@ -111,27 +111,25 @@ int __init proc_init_inodecache(void) return 0; } -static int proc_remount(struct super_block *sb, int *flags, char *data) -{ - *flags |= MS_NODIRATIME; - return 0; -} - static const struct super_operations proc_sops = { .alloc_inode = proc_alloc_inode, .destroy_inode = proc_destroy_inode, .drop_inode = generic_delete_inode, .delete_inode = proc_delete_inode, .statfs = simple_statfs, - .remount_fs = proc_remount, }; -static void pde_users_dec(struct proc_dir_entry *pde) +static void __pde_users_dec(struct proc_dir_entry *pde) { - spin_lock(&pde->pde_unload_lock); pde->pde_users--; if (pde->pde_unload_completion && pde->pde_users == 0) complete(pde->pde_unload_completion); +} + +static void pde_users_dec(struct proc_dir_entry *pde) +{ + spin_lock(&pde->pde_unload_lock); + __pde_users_dec(pde); spin_unlock(&pde->pde_unload_lock); } @@ -318,36 +316,97 @@ static int proc_reg_open(struct inode *inode, struct file *file) struct proc_dir_entry *pde = PDE(inode); int rv = 0; int (*open)(struct inode *, struct file *); + int (*release)(struct inode *, struct file *); + struct pde_opener *pdeo; + + /* + * What for, you ask? Well, we can have open, rmmod, remove_proc_entry + * sequence. ->release won't be called because ->proc_fops will be + * cleared. Depending on complexity of ->release, consequences vary. + * + * We can't wait for mercy when close will be done for real, it's + * deadlockable: rmmod foo </proc/foo . So, we're going to do ->release + * by hand in remove_proc_entry(). For this, save opener's credentials + * for later. + */ + pdeo = kmalloc(sizeof(struct pde_opener), GFP_KERNEL); + if (!pdeo) + return -ENOMEM; spin_lock(&pde->pde_unload_lock); if (!pde->proc_fops) { spin_unlock(&pde->pde_unload_lock); + kfree(pdeo); return rv; } pde->pde_users++; open = pde->proc_fops->open; + release = pde->proc_fops->release; spin_unlock(&pde->pde_unload_lock); if (open) rv = open(inode, file); - pde_users_dec(pde); + spin_lock(&pde->pde_unload_lock); + if (rv == 0 && release) { + /* To know what to release. */ + pdeo->inode = inode; + pdeo->file = file; + /* Strictly for "too late" ->release in proc_reg_release(). */ + pdeo->release = release; + list_add(&pdeo->lh, &pde->pde_openers); + } else + kfree(pdeo); + __pde_users_dec(pde); + spin_unlock(&pde->pde_unload_lock); return rv; } +static struct pde_opener *find_pde_opener(struct proc_dir_entry *pde, + struct inode *inode, struct file *file) +{ + struct pde_opener *pdeo; + + list_for_each_entry(pdeo, &pde->pde_openers, lh) { + if (pdeo->inode == inode && pdeo->file == file) + return pdeo; + } + return NULL; +} + static int proc_reg_release(struct inode *inode, struct file *file) { struct proc_dir_entry *pde = PDE(inode); int rv = 0; int (*release)(struct inode *, struct file *); + struct pde_opener *pdeo; spin_lock(&pde->pde_unload_lock); + pdeo = find_pde_opener(pde, inode, file); if (!pde->proc_fops) { - spin_unlock(&pde->pde_unload_lock); + /* + * Can't simply exit, __fput() will think that everything is OK, + * and move on to freeing struct file. remove_proc_entry() will + * find slacker in opener's list and will try to do non-trivial + * things with struct file. Therefore, remove opener from list. + * + * But if opener is removed from list, who will ->release it? + */ + if (pdeo) { + list_del(&pdeo->lh); + spin_unlock(&pde->pde_unload_lock); + rv = pdeo->release(inode, file); + kfree(pdeo); + } else + spin_unlock(&pde->pde_unload_lock); return rv; } pde->pde_users++; release = pde->proc_fops->release; + if (pdeo) { + list_del(&pdeo->lh); + kfree(pdeo); + } spin_unlock(&pde->pde_unload_lock); if (release) diff --git a/fs/proc/internal.h b/fs/proc/internal.h index 28cbca805905..442202314d53 100644 --- a/fs/proc/internal.h +++ b/fs/proc/internal.h @@ -63,6 +63,7 @@ extern const struct file_operations proc_smaps_operations; extern const struct file_operations proc_clear_refs_operations; extern const struct file_operations proc_pagemap_operations; extern const struct file_operations proc_net_operations; +extern const struct file_operations proc_kmsg_operations; extern const struct inode_operations proc_net_inode_operations; void free_proc_entry(struct proc_dir_entry *de); @@ -88,3 +89,10 @@ struct dentry *proc_lookup_de(struct proc_dir_entry *de, struct inode *ino, struct dentry *dentry); int proc_readdir_de(struct proc_dir_entry *de, struct file *filp, void *dirent, filldir_t filldir); + +struct pde_opener { + struct inode *inode; + struct file *file; + int (*release)(struct inode *, struct file *); + struct list_head lh; +}; diff --git a/fs/proc/kcore.c b/fs/proc/kcore.c index e78c81fcf547..c2370c76fb71 100644 --- a/fs/proc/kcore.c +++ b/fs/proc/kcore.c @@ -23,6 +23,10 @@ #define CORE_STR "CORE" +#ifndef ELF_CORE_EFLAGS +#define ELF_CORE_EFLAGS 0 +#endif + static int open_kcore(struct inode * inode, struct file * filp) { return capable(CAP_SYS_RAWIO) ? 0 : -EPERM; @@ -164,11 +168,7 @@ static void elf_kcore_store_hdr(char *bufp, int nphdr, int dataoff) elf->e_entry = 0; elf->e_phoff = sizeof(struct elfhdr); elf->e_shoff = 0; -#if defined(CONFIG_H8300) - elf->e_flags = ELF_FLAGS; -#else - elf->e_flags = 0; -#endif + elf->e_flags = ELF_CORE_EFLAGS; elf->e_ehsize = sizeof(struct elfhdr); elf->e_phentsize= sizeof(struct elf_phdr); elf->e_phnum = nphdr; diff --git a/fs/proc/kmsg.c b/fs/proc/kmsg.c index ff3b90b56e9d..9fd5df3f40ce 100644 --- a/fs/proc/kmsg.c +++ b/fs/proc/kmsg.c @@ -15,6 +15,8 @@ #include <asm/uaccess.h> #include <asm/io.h> +#include "internal.h" + extern wait_queue_head_t log_wait; extern int do_syslog(int type, char __user *bug, int count); diff --git a/fs/quota.c b/fs/quota.c index db1cc9f3c7aa..7f4386ebc23a 100644 --- a/fs/quota.c +++ b/fs/quota.c @@ -186,7 +186,7 @@ static void quota_sync_sb(struct super_block *sb, int type) void sync_dquots(struct super_block *sb, int type) { - int cnt, dirty; + int cnt; if (sb) { if (sb->s_qcop->quota_sync) @@ -198,11 +198,17 @@ void sync_dquots(struct super_block *sb, int type) restart: list_for_each_entry(sb, &super_blocks, s_list) { /* This test just improves performance so it needn't be reliable... */ - for (cnt = 0, dirty = 0; cnt < MAXQUOTAS; cnt++) - if ((type == cnt || type == -1) && sb_has_quota_enabled(sb, cnt) - && info_any_dirty(&sb_dqopt(sb)->info[cnt])) - dirty = 1; - if (!dirty) + for (cnt = 0; cnt < MAXQUOTAS; cnt++) { + if (type != -1 && type != cnt) + continue; + if (!sb_has_quota_enabled(sb, cnt)) + continue; + if (!info_dirty(&sb_dqopt(sb)->info[cnt]) && + list_empty(&sb_dqopt(sb)->info[cnt].dqi_dirty_list)) + continue; + break; + } + if (cnt == MAXQUOTAS) continue; sb->s_count++; spin_unlock(&sb_lock); diff --git a/fs/quota_v1.c b/fs/quota_v1.c index a6cf9269105c..5ae15b13eeb0 100644 --- a/fs/quota_v1.c +++ b/fs/quota_v1.c @@ -1,6 +1,7 @@ #include <linux/errno.h> #include <linux/fs.h> #include <linux/quota.h> +#include <linux/quotaops.h> #include <linux/dqblk_v1.h> #include <linux/quotaio_v1.h> #include <linux/kernel.h> diff --git a/fs/quota_v2.c b/fs/quota_v2.c index 234ada903633..b53827dc02d9 100644 --- a/fs/quota_v2.c +++ b/fs/quota_v2.c @@ -11,6 +11,7 @@ #include <linux/init.h> #include <linux/module.h> #include <linux/slab.h> +#include <linux/quotaops.h> #include <asm/byteorder.h> diff --git a/fs/reiserfs/journal.c b/fs/reiserfs/journal.c index e396b2fa4743..c8f60ee183b5 100644 --- a/fs/reiserfs/journal.c +++ b/fs/reiserfs/journal.c @@ -34,15 +34,10 @@ ** from within kupdate, it will ignore the immediate flag */ -#include <asm/uaccess.h> -#include <asm/system.h> - #include <linux/time.h> #include <linux/semaphore.h> - #include <linux/vmalloc.h> #include <linux/reiserfs_fs.h> - #include <linux/kernel.h> #include <linux/errno.h> #include <linux/fcntl.h> @@ -54,6 +49,9 @@ #include <linux/writeback.h> #include <linux/blkdev.h> #include <linux/backing-dev.h> +#include <linux/uaccess.h> + +#include <asm/system.h> /* gets a struct reiserfs_journal_list * from a list head */ #define JOURNAL_LIST_ENTRY(h) (list_entry((h), struct reiserfs_journal_list, \ @@ -558,13 +556,13 @@ static inline void insert_journal_hash(struct reiserfs_journal_cnode **table, static inline void lock_journal(struct super_block *p_s_sb) { PROC_INFO_INC(p_s_sb, journal.lock_journal); - down(&SB_JOURNAL(p_s_sb)->j_lock); + mutex_lock(&SB_JOURNAL(p_s_sb)->j_mutex); } /* unlock the current transaction */ static inline void unlock_journal(struct super_block *p_s_sb) { - up(&SB_JOURNAL(p_s_sb)->j_lock); + mutex_unlock(&SB_JOURNAL(p_s_sb)->j_mutex); } static inline void get_journal_list(struct reiserfs_journal_list *jl) @@ -1045,9 +1043,9 @@ static int flush_commit_list(struct super_block *s, } /* make sure nobody is trying to flush this one at the same time */ - down(&jl->j_commit_lock); + mutex_lock(&jl->j_commit_mutex); if (!journal_list_still_alive(s, trans_id)) { - up(&jl->j_commit_lock); + mutex_unlock(&jl->j_commit_mutex); goto put_jl; } BUG_ON(jl->j_trans_id == 0); @@ -1057,7 +1055,7 @@ static int flush_commit_list(struct super_block *s, if (flushall) { atomic_set(&(jl->j_older_commits_done), 1); } - up(&jl->j_commit_lock); + mutex_unlock(&jl->j_commit_mutex); goto put_jl; } @@ -1181,7 +1179,7 @@ static int flush_commit_list(struct super_block *s, if (flushall) { atomic_set(&(jl->j_older_commits_done), 1); } - up(&jl->j_commit_lock); + mutex_unlock(&jl->j_commit_mutex); put_jl: put_journal_list(s, jl); @@ -1411,8 +1409,8 @@ static int flush_journal_list(struct super_block *s, /* if flushall == 0, the lock is already held */ if (flushall) { - down(&journal->j_flush_sem); - } else if (!down_trylock(&journal->j_flush_sem)) { + mutex_lock(&journal->j_flush_mutex); + } else if (mutex_trylock(&journal->j_flush_mutex)) { BUG(); } @@ -1642,7 +1640,7 @@ static int flush_journal_list(struct super_block *s, jl->j_state = 0; put_journal_list(s, jl); if (flushall) - up(&journal->j_flush_sem); + mutex_unlock(&journal->j_flush_mutex); put_fs_excl(); return err; } @@ -1772,12 +1770,12 @@ static int kupdate_transactions(struct super_block *s, struct reiserfs_journal *journal = SB_JOURNAL(s); chunk.nr = 0; - down(&journal->j_flush_sem); + mutex_lock(&journal->j_flush_mutex); if (!journal_list_still_alive(s, orig_trans_id)) { goto done; } - /* we've got j_flush_sem held, nobody is going to delete any + /* we've got j_flush_mutex held, nobody is going to delete any * of these lists out from underneath us */ while ((num_trans && transactions_flushed < num_trans) || @@ -1812,7 +1810,7 @@ static int kupdate_transactions(struct super_block *s, } done: - up(&journal->j_flush_sem); + mutex_unlock(&journal->j_flush_mutex); return ret; } @@ -2556,7 +2554,7 @@ static struct reiserfs_journal_list *alloc_journal_list(struct super_block *s) INIT_LIST_HEAD(&jl->j_working_list); INIT_LIST_HEAD(&jl->j_tail_bh_list); INIT_LIST_HEAD(&jl->j_bh_list); - sema_init(&jl->j_commit_lock, 1); + mutex_init(&jl->j_commit_mutex); SB_JOURNAL(s)->j_num_lists++; get_journal_list(jl); return jl; @@ -2837,8 +2835,8 @@ int journal_init(struct super_block *p_s_sb, const char *j_dev_name, journal->j_last = NULL; journal->j_first = NULL; init_waitqueue_head(&(journal->j_join_wait)); - sema_init(&journal->j_lock, 1); - sema_init(&journal->j_flush_sem, 1); + mutex_init(&journal->j_mutex); + mutex_init(&journal->j_flush_mutex); journal->j_trans_id = 10; journal->j_mount_id = 10; @@ -4030,7 +4028,7 @@ static int do_journal_end(struct reiserfs_transaction_handle *th, * the new transaction is fully setup, and we've already flushed the * ordered bh list */ - down(&jl->j_commit_lock); + mutex_lock(&jl->j_commit_mutex); /* save the transaction id in case we need to commit it later */ commit_trans_id = jl->j_trans_id; @@ -4196,7 +4194,7 @@ static int do_journal_end(struct reiserfs_transaction_handle *th, lock_kernel(); } BUG_ON(!list_empty(&jl->j_tail_bh_list)); - up(&jl->j_commit_lock); + mutex_unlock(&jl->j_commit_mutex); /* honor the flush wishes from the caller, simple commits can ** be done outside the journal lock, they are done below diff --git a/fs/reiserfs/super.c b/fs/reiserfs/super.c index 1d40f2bd1970..2ec748ba0bd3 100644 --- a/fs/reiserfs/super.c +++ b/fs/reiserfs/super.c @@ -22,6 +22,7 @@ #include <linux/blkdev.h> #include <linux/buffer_head.h> #include <linux/exportfs.h> +#include <linux/quotaops.h> #include <linux/vfs.h> #include <linux/mnt_namespace.h> #include <linux/mount.h> @@ -182,7 +183,7 @@ static int finish_unfinished(struct super_block *s) int ret = reiserfs_quota_on_mount(s, i); if (ret < 0) reiserfs_warning(s, - "reiserfs: cannot turn on journalled quota: error %d", + "reiserfs: cannot turn on journaled quota: error %d", ret); } } @@ -876,7 +877,9 @@ static int reiserfs_parse_options(struct super_block *s, char *options, /* strin mount options were selected. */ unsigned long *blocks, /* strtol-ed from NNN of resize=NNN */ char **jdev_name, - unsigned int *commit_max_age) + unsigned int *commit_max_age, + char **qf_names, + unsigned int *qfmt) { int c; char *arg = NULL; @@ -992,9 +995,11 @@ static int reiserfs_parse_options(struct super_block *s, char *options, /* strin if (c == 'u' || c == 'g') { int qtype = c == 'u' ? USRQUOTA : GRPQUOTA; - if (sb_any_quota_enabled(s)) { + if ((sb_any_quota_enabled(s) || + sb_any_quota_suspended(s)) && + (!*arg != !REISERFS_SB(s)->s_qf_names[qtype])) { reiserfs_warning(s, - "reiserfs_parse_options: cannot change journalled quota options when quota turned on."); + "reiserfs_parse_options: cannot change journaled quota options when quota turned on."); return 0; } if (*arg) { /* Some filename specified? */ @@ -1011,46 +1016,54 @@ static int reiserfs_parse_options(struct super_block *s, char *options, /* strin "reiserfs_parse_options: quotafile must be on filesystem root."); return 0; } - REISERFS_SB(s)->s_qf_names[qtype] = + qf_names[qtype] = kmalloc(strlen(arg) + 1, GFP_KERNEL); - if (!REISERFS_SB(s)->s_qf_names[qtype]) { + if (!qf_names[qtype]) { reiserfs_warning(s, "reiserfs_parse_options: not enough memory for storing quotafile name."); return 0; } - strcpy(REISERFS_SB(s)->s_qf_names[qtype], arg); + strcpy(qf_names[qtype], arg); *mount_options |= 1 << REISERFS_QUOTA; } else { - kfree(REISERFS_SB(s)->s_qf_names[qtype]); - REISERFS_SB(s)->s_qf_names[qtype] = NULL; + if (qf_names[qtype] != + REISERFS_SB(s)->s_qf_names[qtype]) + kfree(qf_names[qtype]); + qf_names[qtype] = NULL; } } if (c == 'f') { if (!strcmp(arg, "vfsold")) - REISERFS_SB(s)->s_jquota_fmt = QFMT_VFS_OLD; + *qfmt = QFMT_VFS_OLD; else if (!strcmp(arg, "vfsv0")) - REISERFS_SB(s)->s_jquota_fmt = QFMT_VFS_V0; + *qfmt = QFMT_VFS_V0; else { reiserfs_warning(s, "reiserfs_parse_options: unknown quota format specified."); return 0; } + if ((sb_any_quota_enabled(s) || + sb_any_quota_suspended(s)) && + *qfmt != REISERFS_SB(s)->s_jquota_fmt) { + reiserfs_warning(s, + "reiserfs_parse_options: cannot change journaled quota options when quota turned on."); + return 0; + } } #else if (c == 'u' || c == 'g' || c == 'f') { reiserfs_warning(s, - "reiserfs_parse_options: journalled quota options not supported."); + "reiserfs_parse_options: journaled quota options not supported."); return 0; } #endif } #ifdef CONFIG_QUOTA - if (!REISERFS_SB(s)->s_jquota_fmt - && (REISERFS_SB(s)->s_qf_names[USRQUOTA] - || REISERFS_SB(s)->s_qf_names[GRPQUOTA])) { + if (!REISERFS_SB(s)->s_jquota_fmt && !*qfmt + && (qf_names[USRQUOTA] || qf_names[GRPQUOTA])) { reiserfs_warning(s, - "reiserfs_parse_options: journalled quota format not specified."); + "reiserfs_parse_options: journaled quota format not specified."); return 0; } /* This checking is not precise wrt the quota type but for our purposes it is sufficient */ @@ -1130,6 +1143,21 @@ static void handle_attrs(struct super_block *s) } } +#ifdef CONFIG_QUOTA +static void handle_quota_files(struct super_block *s, char **qf_names, + unsigned int *qfmt) +{ + int i; + + for (i = 0; i < MAXQUOTAS; i++) { + if (qf_names[i] != REISERFS_SB(s)->s_qf_names[i]) + kfree(REISERFS_SB(s)->s_qf_names[i]); + REISERFS_SB(s)->s_qf_names[i] = qf_names[i]; + } + REISERFS_SB(s)->s_jquota_fmt = *qfmt; +} +#endif + static int reiserfs_remount(struct super_block *s, int *mount_flags, char *arg) { struct reiserfs_super_block *rs; @@ -1141,23 +1169,30 @@ static int reiserfs_remount(struct super_block *s, int *mount_flags, char *arg) struct reiserfs_journal *journal = SB_JOURNAL(s); char *new_opts = kstrdup(arg, GFP_KERNEL); int err; + char *qf_names[MAXQUOTAS]; + unsigned int qfmt = 0; #ifdef CONFIG_QUOTA int i; + + memcpy(qf_names, REISERFS_SB(s)->s_qf_names, sizeof(qf_names)); #endif rs = SB_DISK_SUPER_BLOCK(s); if (!reiserfs_parse_options - (s, arg, &mount_options, &blocks, NULL, &commit_max_age)) { + (s, arg, &mount_options, &blocks, NULL, &commit_max_age, + qf_names, &qfmt)) { #ifdef CONFIG_QUOTA - for (i = 0; i < MAXQUOTAS; i++) { - kfree(REISERFS_SB(s)->s_qf_names[i]); - REISERFS_SB(s)->s_qf_names[i] = NULL; - } + for (i = 0; i < MAXQUOTAS; i++) + if (qf_names[i] != REISERFS_SB(s)->s_qf_names[i]) + kfree(qf_names[i]); #endif err = -EINVAL; goto out_err; } +#ifdef CONFIG_QUOTA + handle_quota_files(s, qf_names, &qfmt); +#endif handle_attrs(s); @@ -1570,6 +1605,8 @@ static int reiserfs_fill_super(struct super_block *s, void *data, int silent) char *jdev_name; struct reiserfs_sb_info *sbi; int errval = -EINVAL; + char *qf_names[MAXQUOTAS] = {}; + unsigned int qfmt = 0; save_mount_options(s, data); @@ -1597,9 +1634,12 @@ static int reiserfs_fill_super(struct super_block *s, void *data, int silent) jdev_name = NULL; if (reiserfs_parse_options (s, (char *)data, &(sbi->s_mount_opt), &blocks, &jdev_name, - &commit_max_age) == 0) { + &commit_max_age, qf_names, &qfmt) == 0) { goto error; } +#ifdef CONFIG_QUOTA + handle_quota_files(s, qf_names, &qfmt); +#endif if (blocks) { SWARN(silent, s, "jmacd-7: reiserfs_fill_super: resize option " @@ -1819,7 +1859,7 @@ static int reiserfs_fill_super(struct super_block *s, void *data, int silent) return (0); - error: +error: if (jinit_done) { /* kill the commit thread, free journal ram */ journal_release_error(NULL, s); } @@ -1830,10 +1870,8 @@ static int reiserfs_fill_super(struct super_block *s, void *data, int silent) #ifdef CONFIG_QUOTA { int j; - for (j = 0; j < MAXQUOTAS; j++) { - kfree(sbi->s_qf_names[j]); - sbi->s_qf_names[j] = NULL; - } + for (j = 0; j < MAXQUOTAS; j++) + kfree(qf_names[j]); } #endif kfree(sbi); @@ -1980,7 +2018,7 @@ static int reiserfs_release_dquot(struct dquot *dquot) static int reiserfs_mark_dquot_dirty(struct dquot *dquot) { - /* Are we journalling quotas? */ + /* Are we journaling quotas? */ if (REISERFS_SB(dquot->dq_sb)->s_qf_names[USRQUOTA] || REISERFS_SB(dquot->dq_sb)->s_qf_names[GRPQUOTA]) { dquot_mark_dquot_dirty(dquot); @@ -2026,6 +2064,7 @@ static int reiserfs_quota_on(struct super_block *sb, int type, int format_id, int err; struct nameidata nd; struct inode *inode; + struct reiserfs_transaction_handle th; if (!(REISERFS_SB(sb)->s_mount_opt & (1 << REISERFS_QUOTA))) return -EINVAL; @@ -2053,17 +2092,28 @@ static int reiserfs_quota_on(struct super_block *sb, int type, int format_id, } mark_inode_dirty(inode); } - /* Not journalling quota? No more tests needed... */ - if (!REISERFS_SB(sb)->s_qf_names[USRQUOTA] && - !REISERFS_SB(sb)->s_qf_names[GRPQUOTA]) { - path_put(&nd.path); - return vfs_quota_on(sb, type, format_id, path, 0); - } - /* Quotafile not of fs root? */ - if (nd.path.dentry->d_parent->d_inode != sb->s_root->d_inode) - reiserfs_warning(sb, + /* Journaling quota? */ + if (REISERFS_SB(sb)->s_qf_names[type]) { + /* Quotafile not of fs root? */ + if (nd.path.dentry->d_parent->d_inode != sb->s_root->d_inode) + reiserfs_warning(sb, "reiserfs: Quota file not on filesystem root. " "Journalled quota will not work."); + } + + /* + * When we journal data on quota file, we have to flush journal to see + * all updates to the file when we bypass pagecache... + */ + if (reiserfs_file_data_log(inode)) { + /* Just start temporary transaction and finish it */ + err = journal_begin(&th, sb, 1); + if (err) + return err; + err = journal_end_sync(&th, sb, 1); + if (err) + return err; + } path_put(&nd.path); return vfs_quota_on(sb, type, format_id, path, 0); } diff --git a/fs/reiserfs/xattr_security.c b/fs/reiserfs/xattr_security.c index 5e90a95ad60b..056008db1377 100644 --- a/fs/reiserfs/xattr_security.c +++ b/fs/reiserfs/xattr_security.c @@ -6,8 +6,6 @@ #include <linux/reiserfs_xattr.h> #include <asm/uaccess.h> -#define XATTR_SECURITY_PREFIX "security." - static int security_get(struct inode *inode, const char *name, void *buffer, size_t size) { diff --git a/fs/reiserfs/xattr_trusted.c b/fs/reiserfs/xattr_trusted.c index 024a938ca60f..60abe2bb1f98 100644 --- a/fs/reiserfs/xattr_trusted.c +++ b/fs/reiserfs/xattr_trusted.c @@ -7,8 +7,6 @@ #include <linux/reiserfs_xattr.h> #include <asm/uaccess.h> -#define XATTR_TRUSTED_PREFIX "trusted." - static int trusted_get(struct inode *inode, const char *name, void *buffer, size_t size) { diff --git a/fs/reiserfs/xattr_user.c b/fs/reiserfs/xattr_user.c index 073f39364b11..1384efcb938e 100644 --- a/fs/reiserfs/xattr_user.c +++ b/fs/reiserfs/xattr_user.c @@ -10,8 +10,6 @@ # include <linux/reiserfs_acl.h> #endif -#define XATTR_USER_PREFIX "user." - static int user_get(struct inode *inode, const char *name, void *buffer, size_t size) { diff --git a/fs/smbfs/cache.c b/fs/smbfs/cache.c index 8182f0542a21..8c177eb7e344 100644 --- a/fs/smbfs/cache.c +++ b/fs/smbfs/cache.c @@ -13,7 +13,6 @@ #include <linux/errno.h> #include <linux/kernel.h> #include <linux/mm.h> -#include <linux/dirent.h> #include <linux/smb_fs.h> #include <linux/pagemap.h> #include <linux/net.h> diff --git a/fs/smbfs/proc.c b/fs/smbfs/proc.c index d517a27b7f4b..ee536e8a649a 100644 --- a/fs/smbfs/proc.c +++ b/fs/smbfs/proc.c @@ -16,7 +16,6 @@ #include <linux/stat.h> #include <linux/fcntl.h> #include <linux/dcache.h> -#include <linux/dirent.h> #include <linux/nls.h> #include <linux/smp_lock.h> #include <linux/net.h> diff --git a/fs/ufs/super.c b/fs/ufs/super.c index 506f724055c2..227c9d700040 100644 --- a/fs/ufs/super.c +++ b/fs/ufs/super.c @@ -76,6 +76,7 @@ #include <linux/errno.h> #include <linux/fs.h> +#include <linux/quotaops.h> #include <linux/slab.h> #include <linux/time.h> #include <linux/stat.h> diff --git a/fs/vfat/namei.c b/fs/vfat/namei.c index b546ba69be82..155c10b4adbd 100644 --- a/fs/vfat/namei.c +++ b/fs/vfat/namei.c @@ -621,7 +621,7 @@ shortname: memcpy(de->name, msdos_name, MSDOS_NAME); de->attr = is_dir ? ATTR_DIR : ATTR_ARCH; de->lcase = lcase; - fat_date_unix2dos(ts->tv_sec, &time, &date); + fat_date_unix2dos(ts->tv_sec, &time, &date, sbi->options.tz_utc); de->time = de->ctime = time; de->date = de->cdate = de->adate = date; de->ctime_cs = 0; diff --git a/include/asm-alpha/thread_info.h b/include/asm-alpha/thread_info.h index fb3185196298..15fda4344424 100644 --- a/include/asm-alpha/thread_info.h +++ b/include/asm-alpha/thread_info.h @@ -50,10 +50,8 @@ register struct thread_info *__current_thread_info __asm__("$8"); #define current_thread_info() __current_thread_info /* Thread information allocation. */ +#define THREAD_SIZE_ORDER 1 #define THREAD_SIZE (2*PAGE_SIZE) -#define alloc_thread_info(tsk) \ - ((struct thread_info *) __get_free_pages(GFP_KERNEL,1)) -#define free_thread_info(ti) free_pages((unsigned long) (ti), 1) #endif /* __ASSEMBLY__ */ diff --git a/include/asm-arm/ptrace.h b/include/asm-arm/ptrace.h index 7aaa206cb54e..8382b7510f94 100644 --- a/include/asm-arm/ptrace.h +++ b/include/asm-arm/ptrace.h @@ -139,8 +139,6 @@ static inline int valid_user_regs(struct pt_regs *regs) return 0; } -#endif /* __KERNEL__ */ - #define pc_pointer(v) \ ((v) & ~PCMASK) @@ -153,10 +151,10 @@ extern unsigned long profile_pc(struct pt_regs *regs); #define profile_pc(regs) instruction_pointer(regs) #endif -#ifdef __KERNEL__ #define predicate(x) ((x) & 0xf0000000) #define PREDICATE_ALWAYS 0xe0000000 -#endif + +#endif /* __KERNEL__ */ #endif /* __ASSEMBLY__ */ diff --git a/include/asm-arm/thread_info.h b/include/asm-arm/thread_info.h index f5a664786311..d4be2d646160 100644 --- a/include/asm-arm/thread_info.h +++ b/include/asm-arm/thread_info.h @@ -97,19 +97,6 @@ static inline struct thread_info *current_thread_info(void) return (struct thread_info *)(sp & ~(THREAD_SIZE - 1)); } -/* thread information allocation */ -#ifdef CONFIG_DEBUG_STACK_USAGE -#define alloc_thread_info(tsk) \ - ((struct thread_info *)__get_free_pages(GFP_KERNEL | __GFP_ZERO, \ - THREAD_SIZE_ORDER)) -#else -#define alloc_thread_info(tsk) \ - ((struct thread_info *)__get_free_pages(GFP_KERNEL, THREAD_SIZE_ORDER)) -#endif - -#define free_thread_info(info) \ - free_pages((unsigned long)info, THREAD_SIZE_ORDER); - #define thread_saved_pc(tsk) \ ((unsigned long)(pc_pointer(task_thread_info(tsk)->cpu_context.pc))) #define thread_saved_fp(tsk) \ diff --git a/include/asm-avr32/thread_info.h b/include/asm-avr32/thread_info.h index df68631b7b27..294b25f9323d 100644 --- a/include/asm-avr32/thread_info.h +++ b/include/asm-avr32/thread_info.h @@ -61,10 +61,6 @@ static inline struct thread_info *current_thread_info(void) return (struct thread_info *)addr; } -/* thread information allocation */ -#define alloc_thread_info(ti) \ - ((struct thread_info *) __get_free_pages(GFP_KERNEL, THREAD_SIZE_ORDER)) -#define free_thread_info(ti) free_pages((unsigned long)(ti), 1) #define get_thread_info(ti) get_task_struct((ti)->task) #define put_thread_info(ti) put_task_struct((ti)->task) diff --git a/include/asm-blackfin/ptrace.h b/include/asm-blackfin/ptrace.h index b8346cd3a6f6..a45a80e54adc 100644 --- a/include/asm-blackfin/ptrace.h +++ b/include/asm-blackfin/ptrace.h @@ -83,14 +83,14 @@ struct pt_regs { #define PTRACE_GETREGS 12 #define PTRACE_SETREGS 13 /* ptrace signal */ -#ifdef CONFIG_BINFMT_ELF_FDPIC #define PTRACE_GETFDPIC 31 #define PTRACE_GETFDPIC_EXEC 0 #define PTRACE_GETFDPIC_INTERP 1 -#endif #define PS_S (0x0002) +#ifdef __KERNEL__ + /* user_mode returns true if only one bit is set in IPEND, other than the master interrupt enable. */ #define user_mode(regs) (!(((regs)->ipend & ~0x10) & (((regs)->ipend & ~0x10) - 1))) @@ -98,6 +98,8 @@ struct pt_regs { #define profile_pc(regs) instruction_pointer(regs) extern void show_regs(struct pt_regs *); +#endif /* __KERNEL__ */ + #endif /* __ASSEMBLY__ */ /* diff --git a/include/asm-blackfin/thread_info.h b/include/asm-blackfin/thread_info.h index bc2fe5accf20..642769329d12 100644 --- a/include/asm-blackfin/thread_info.h +++ b/include/asm-blackfin/thread_info.h @@ -42,6 +42,7 @@ /* * Size of kernel stack for each process. This must be a power of 2... */ +#define THREAD_SIZE_ORDER 1 #define THREAD_SIZE 8192 /* 2 pages */ #ifndef __ASSEMBLY__ @@ -94,10 +95,6 @@ static inline struct thread_info *current_thread_info(void) return (struct thread_info *)((long)ti & ~((long)THREAD_SIZE-1)); } -/* thread information allocation */ -#define alloc_thread_info(tsk) ((struct thread_info *) \ - __get_free_pages(GFP_KERNEL, 1)) -#define free_thread_info(ti) free_pages((unsigned long) (ti), 1) #endif /* __ASSEMBLY__ */ /* diff --git a/include/asm-cris/arch-v10/Kbuild b/include/asm-cris/arch-v10/Kbuild index 60e7e1b73cec..7a192e1290b1 100644 --- a/include/asm-cris/arch-v10/Kbuild +++ b/include/asm-cris/arch-v10/Kbuild @@ -1,4 +1,3 @@ -header-y += ptrace.h header-y += user.h header-y += svinto.h header-y += sv_addr_ag.h diff --git a/include/asm-cris/arch-v10/ptrace.h b/include/asm-cris/arch-v10/ptrace.h index fb14c5ee37f9..2f464eab3a51 100644 --- a/include/asm-cris/arch-v10/ptrace.h +++ b/include/asm-cris/arch-v10/ptrace.h @@ -106,10 +106,14 @@ struct switch_stack { unsigned long return_ip; /* ip that _resume will return to */ }; +#ifdef __KERNEL__ + /* bit 8 is user-mode flag */ #define user_mode(regs) (((regs)->dccr & 0x100) != 0) #define instruction_pointer(regs) ((regs)->irp) #define profile_pc(regs) instruction_pointer(regs) extern void show_regs(struct pt_regs *); +#endif /* __KERNEL__ */ + #endif diff --git a/include/asm-cris/arch-v32/Kbuild b/include/asm-cris/arch-v32/Kbuild index a0ec545e242e..35f2fc4f993e 100644 --- a/include/asm-cris/arch-v32/Kbuild +++ b/include/asm-cris/arch-v32/Kbuild @@ -1,3 +1,2 @@ -header-y += ptrace.h header-y += user.h header-y += cryptocop.h diff --git a/include/asm-cris/arch-v32/ptrace.h b/include/asm-cris/arch-v32/ptrace.h index 516cc7062d94..41f4e8662bc2 100644 --- a/include/asm-cris/arch-v32/ptrace.h +++ b/include/asm-cris/arch-v32/ptrace.h @@ -106,9 +106,13 @@ struct switch_stack { unsigned long return_ip; /* ip that _resume will return to */ }; +#ifdef __KERNEL__ + #define user_mode(regs) (((regs)->ccs & (1 << (U_CCS_BITNR + CCS_SHIFT))) != 0) #define instruction_pointer(regs) ((regs)->erp) extern void show_regs(struct pt_regs *); #define profile_pc(regs) instruction_pointer(regs) +#endif /* __KERNEL__ */ + #endif diff --git a/include/asm-cris/ptrace.h b/include/asm-cris/ptrace.h index 1ec69a7ea836..d910925e3174 100644 --- a/include/asm-cris/ptrace.h +++ b/include/asm-cris/ptrace.h @@ -4,11 +4,13 @@ #include <asm/arch/ptrace.h> #ifdef __KERNEL__ + /* Arbitrarily choose the same ptrace numbers as used by the Sparc code. */ #define PTRACE_GETREGS 12 #define PTRACE_SETREGS 13 -#endif #define profile_pc(regs) instruction_pointer(regs) +#endif /* __KERNEL__ */ + #endif /* _CRIS_PTRACE_H */ diff --git a/include/asm-cris/thread_info.h b/include/asm-cris/thread_info.h index 784668ab0fa2..7efe1000f99d 100644 --- a/include/asm-cris/thread_info.h +++ b/include/asm-cris/thread_info.h @@ -11,6 +11,8 @@ #ifdef __KERNEL__ +#define __HAVE_ARCH_THREAD_INFO_ALLOCATOR + #ifndef __ASSEMBLY__ #include <asm/types.h> #include <asm/processor.h> diff --git a/include/asm-frv/Kbuild b/include/asm-frv/Kbuild index bc3f12c5b7e0..0f8956def738 100644 --- a/include/asm-frv/Kbuild +++ b/include/asm-frv/Kbuild @@ -3,4 +3,3 @@ include include/asm-generic/Kbuild.asm header-y += registers.h unifdef-y += termios.h -unifdef-y += ptrace.h diff --git a/include/asm-frv/thread_info.h b/include/asm-frv/thread_info.h index 348b8f1df17e..b7ac6bf2844c 100644 --- a/include/asm-frv/thread_info.h +++ b/include/asm-frv/thread_info.h @@ -82,6 +82,8 @@ register struct thread_info *__current_thread_info asm("gr15"); #define current_thread_info() ({ __current_thread_info; }) +#define __HAVE_ARCH_THREAD_INFO_ALLOCATOR + /* thread information allocation */ #ifdef CONFIG_DEBUG_STACK_USAGE #define alloc_thread_info(tsk) \ diff --git a/include/asm-generic/bug.h b/include/asm-generic/bug.h index 2632328d8646..a3f738cffdb6 100644 --- a/include/asm-generic/bug.h +++ b/include/asm-generic/bug.h @@ -34,9 +34,14 @@ struct bug_entry { #ifndef __WARN #ifndef __ASSEMBLY__ extern void warn_on_slowpath(const char *file, const int line); +extern void warn_slowpath(const char *file, const int line, + const char *fmt, ...) __attribute__((format(printf, 3, 4))); #define WANT_WARN_ON_SLOWPATH #endif #define __WARN() warn_on_slowpath(__FILE__, __LINE__) +#define __WARN_printf(arg...) warn_slowpath(__FILE__, __LINE__, arg) +#else +#define __WARN_printf(arg...) __WARN() #endif #ifndef WARN_ON @@ -48,6 +53,15 @@ extern void warn_on_slowpath(const char *file, const int line); }) #endif +#ifndef WARN +#define WARN(condition, format...) ({ \ + int __ret_warn_on = !!(condition); \ + if (unlikely(__ret_warn_on)) \ + __WARN_printf(format); \ + unlikely(__ret_warn_on); \ +}) +#endif + #else /* !CONFIG_BUG */ #ifndef HAVE_ARCH_BUG #define BUG() @@ -63,6 +77,14 @@ extern void warn_on_slowpath(const char *file, const int line); unlikely(__ret_warn_on); \ }) #endif + +#ifndef WARN +#define WARN(condition, format...) ({ \ + int __ret_warn_on = !!(condition); \ + unlikely(__ret_warn_on); \ +}) +#endif + #endif #define WARN_ON_ONCE(condition) ({ \ @@ -75,6 +97,9 @@ extern void warn_on_slowpath(const char *file, const int line); unlikely(__ret_warn_once); \ }) +#define WARN_ON_RATELIMIT(condition, state) \ + WARN_ON((condition) && __ratelimit(state)) + #ifdef CONFIG_SMP # define WARN_ON_SMP(x) WARN_ON(x) #else diff --git a/include/asm-generic/gpio.h b/include/asm-generic/gpio.h index 6be061d09da9..a3034d20ebd5 100644 --- a/include/asm-generic/gpio.h +++ b/include/asm-generic/gpio.h @@ -3,7 +3,7 @@ #include <linux/types.h> -#ifdef CONFIG_HAVE_GPIO_LIB +#ifdef CONFIG_GPIOLIB #include <linux/compiler.h> @@ -32,6 +32,8 @@ struct module; /** * struct gpio_chip - abstract a GPIO controller * @label: for diagnostics + * @dev: optional device providing the GPIOs + * @owner: helps prevent removal of modules exporting active GPIOs * @direction_input: configures signal "offset" as input, or returns error * @get: returns value for signal "offset"; for output signals this * returns either the value actually sensed, or zero @@ -59,6 +61,7 @@ struct module; */ struct gpio_chip { char *label; + struct device *dev; struct module *owner; int (*direction_input)(struct gpio_chip *chip, @@ -74,6 +77,7 @@ struct gpio_chip { int base; u16 ngpio; unsigned can_sleep:1; + unsigned exported:1; }; extern const char *gpiochip_is_requested(struct gpio_chip *chip, @@ -108,7 +112,18 @@ extern void __gpio_set_value(unsigned gpio, int value); extern int __gpio_cansleep(unsigned gpio); -#else +#ifdef CONFIG_GPIO_SYSFS + +/* + * A sysfs interface can be exported by individual drivers if they want, + * but more typically is configured entirely from userspace. + */ +extern int gpio_export(unsigned gpio, bool direction_may_change); +extern void gpio_unexport(unsigned gpio); + +#endif /* CONFIG_GPIO_SYSFS */ + +#else /* !CONFIG_HAVE_GPIO_LIB */ static inline int gpio_is_valid(int number) { @@ -137,6 +152,20 @@ static inline void gpio_set_value_cansleep(unsigned gpio, int value) gpio_set_value(gpio, value); } -#endif +#endif /* !CONFIG_HAVE_GPIO_LIB */ + +#ifndef CONFIG_GPIO_SYSFS + +/* sysfs support is only available with gpiolib, where it's optional */ + +static inline int gpio_export(unsigned gpio, bool direction_may_change) +{ + return -ENOSYS; +} + +static inline void gpio_unexport(unsigned gpio) +{ +} +#endif /* CONFIG_GPIO_SYSFS */ #endif /* _ASM_GENERIC_GPIO_H */ diff --git a/include/asm-generic/int-ll64.h b/include/asm-generic/int-ll64.h index 260948905e4e..f9bc9ac29b36 100644 --- a/include/asm-generic/int-ll64.h +++ b/include/asm-generic/int-ll64.h @@ -26,7 +26,7 @@ typedef unsigned int __u32; #ifdef __GNUC__ __extension__ typedef __signed__ long long __s64; __extension__ typedef unsigned long long __u64; -#elif defined(__STDC_VERSION__) && __STDC_VERSION__ >= 199901L +#else typedef __signed__ long long __s64; typedef unsigned long long __u64; #endif diff --git a/include/asm-h8300/elf.h b/include/asm-h8300/elf.h index 26bfc7e641da..a8b57d1f4128 100644 --- a/include/asm-h8300/elf.h +++ b/include/asm-h8300/elf.h @@ -26,10 +26,10 @@ typedef unsigned long elf_fpregset_t; #define ELF_DATA ELFDATA2MSB #define ELF_ARCH EM_H8_300 #if defined(__H8300H__) -#define ELF_FLAGS 0x810000 +#define ELF_CORE_EFLAGS 0x810000 #endif #if defined(__H8300S__) -#define ELF_FLAGS 0x820000 +#define ELF_CORE_EFLAGS 0x820000 #endif #define ELF_PLAT_INIT(_r) _r->er1 = 0 diff --git a/include/asm-h8300/thread_info.h b/include/asm-h8300/thread_info.h index 27bb95e2944c..aafd4d322ec3 100644 --- a/include/asm-h8300/thread_info.h +++ b/include/asm-h8300/thread_info.h @@ -49,6 +49,7 @@ struct thread_info { /* * Size of kernel stack for each process. This must be a power of 2... */ +#define THREAD_SIZE_ORDER 1 #define THREAD_SIZE 8192 /* 2 pages */ @@ -65,10 +66,6 @@ static inline struct thread_info *current_thread_info(void) return ti; } -/* thread information allocation */ -#define alloc_thread_info(tsk) ((struct thread_info *) \ - __get_free_pages(GFP_KERNEL, 1)) -#define free_thread_info(ti) free_pages((unsigned long) (ti), 1) #endif /* __ASSEMBLY__ */ /* diff --git a/include/asm-ia64/thread_info.h b/include/asm-ia64/thread_info.h index 2422ac61658a..7c60fcdd2efd 100644 --- a/include/asm-ia64/thread_info.h +++ b/include/asm-ia64/thread_info.h @@ -54,6 +54,8 @@ struct thread_info { }, \ } +#define __HAVE_ARCH_THREAD_INFO_ALLOCATOR + #ifndef ASM_OFFSETS_C /* how to get the thread information struct from C */ #define current_thread_info() ((struct thread_info *) ((char *) current + IA64_TASK_SIZE)) diff --git a/include/asm-m32r/thread_info.h b/include/asm-m32r/thread_info.h index 1effcd0f5e63..8589d462df27 100644 --- a/include/asm-m32r/thread_info.h +++ b/include/asm-m32r/thread_info.h @@ -94,6 +94,8 @@ static inline struct thread_info *current_thread_info(void) return ti; } +#define __HAVE_ARCH_THREAD_INFO_ALLOCATOR + /* thread information allocation */ #ifdef CONFIG_DEBUG_STACK_USAGE #define alloc_thread_info(tsk) \ diff --git a/include/asm-m68k/thread_info.h b/include/asm-m68k/thread_info.h index d635a3752488..abc002798a2b 100644 --- a/include/asm-m68k/thread_info.h +++ b/include/asm-m68k/thread_info.h @@ -25,13 +25,7 @@ struct thread_info { } /* THREAD_SIZE should be 8k, so handle differently for 4k and 8k machines */ -#if PAGE_SHIFT == 13 /* 8k machines */ -#define alloc_thread_info(tsk) ((struct thread_info *)__get_free_pages(GFP_KERNEL,0)) -#define free_thread_info(ti) free_pages((unsigned long)(ti),0) -#else /* otherwise assume 4k pages */ -#define alloc_thread_info(tsk) ((struct thread_info *)__get_free_pages(GFP_KERNEL,1)) -#define free_thread_info(ti) free_pages((unsigned long)(ti),1) -#endif /* PAGE_SHIFT == 13 */ +#define THREAD_SIZE_ORDER (13 - PAGE_SHIFT) #define init_thread_info (init_task.thread.info) #define init_stack (init_thread_union.stack) diff --git a/include/asm-m68knommu/ptrace.h b/include/asm-m68knommu/ptrace.h index 47258e86e8c4..8c9194b98548 100644 --- a/include/asm-m68knommu/ptrace.h +++ b/include/asm-m68knommu/ptrace.h @@ -68,10 +68,8 @@ struct switch_stack { /* Arbitrarily choose the same ptrace numbers as used by the Sparc code. */ #define PTRACE_GETREGS 12 #define PTRACE_SETREGS 13 -#ifdef CONFIG_FPU #define PTRACE_GETFPREGS 14 #define PTRACE_SETFPREGS 15 -#endif #ifdef __KERNEL__ diff --git a/include/asm-m68knommu/thread_info.h b/include/asm-m68knommu/thread_info.h index 95996d978bed..0c9bc095f3f0 100644 --- a/include/asm-m68knommu/thread_info.h +++ b/include/asm-m68knommu/thread_info.h @@ -71,10 +71,6 @@ static inline struct thread_info *current_thread_info(void) return ti; } -/* thread information allocation */ -#define alloc_thread_info(tsk) ((struct thread_info *) \ - __get_free_pages(GFP_KERNEL, THREAD_SIZE_ORDER)) -#define free_thread_info(ti) free_pages((unsigned long) (ti), THREAD_SIZE_ORDER) #endif /* __ASSEMBLY__ */ #define PREEMPT_ACTIVE 0x4000000 diff --git a/include/asm-mips/mach-generic/gpio.h b/include/asm-mips/mach-generic/gpio.h index e6b376bd9d06..b4e70208da64 100644 --- a/include/asm-mips/mach-generic/gpio.h +++ b/include/asm-mips/mach-generic/gpio.h @@ -1,7 +1,7 @@ #ifndef __ASM_MACH_GENERIC_GPIO_H #define __ASM_MACH_GENERIC_GPIO_H -#ifdef CONFIG_HAVE_GPIO_LIB +#ifdef CONFIG_GPIOLIB #define gpio_get_value __gpio_get_value #define gpio_set_value __gpio_set_value #define gpio_cansleep __gpio_cansleep diff --git a/include/asm-mips/thread_info.h b/include/asm-mips/thread_info.h index b2772df1a1bd..bb3060699df2 100644 --- a/include/asm-mips/thread_info.h +++ b/include/asm-mips/thread_info.h @@ -82,6 +82,8 @@ register struct thread_info *__current_thread_info __asm__("$28"); #define THREAD_SIZE (PAGE_SIZE << THREAD_SIZE_ORDER) #define THREAD_MASK (THREAD_SIZE - 1UL) +#define __HAVE_ARCH_THREAD_INFO_ALLOCATOR + #ifdef CONFIG_DEBUG_STACK_USAGE #define alloc_thread_info(tsk) \ ({ \ diff --git a/include/asm-mn10300/ptrace.h b/include/asm-mn10300/ptrace.h index b3684689fcce..7b06cc623d8b 100644 --- a/include/asm-mn10300/ptrace.h +++ b/include/asm-mn10300/ptrace.h @@ -88,12 +88,16 @@ extern struct pt_regs *__frame; /* current frame pointer */ /* options set using PTRACE_SETOPTIONS */ #define PTRACE_O_TRACESYSGOOD 0x00000001 -#if defined(__KERNEL__) && !defined(__ASSEMBLY__) +#if defined(__KERNEL__) + +#if !defined(__ASSEMBLY__) #define user_mode(regs) (((regs)->epsw & EPSW_nSL) == EPSW_nSL) #define instruction_pointer(regs) ((regs)->pc) extern void show_regs(struct pt_regs *); -#endif +#endif /* !__ASSEMBLY */ #define profile_pc(regs) ((regs)->pc) +#endif /* __KERNEL__ */ + #endif /* _ASM_PTRACE_H */ diff --git a/include/asm-mn10300/thread_info.h b/include/asm-mn10300/thread_info.h index e397e7192785..78a3881f3c12 100644 --- a/include/asm-mn10300/thread_info.h +++ b/include/asm-mn10300/thread_info.h @@ -112,6 +112,8 @@ static inline unsigned long current_stack_pointer(void) return sp; } +#define __HAVE_ARCH_THREAD_INFO_ALLOCATOR + /* thread information allocation */ #ifdef CONFIG_DEBUG_STACK_USAGE #define alloc_thread_info(tsk) kzalloc(THREAD_SIZE, GFP_KERNEL) diff --git a/include/asm-parisc/ptrace.h b/include/asm-parisc/ptrace.h index 93f990e418f1..3e94c5d85ff5 100644 --- a/include/asm-parisc/ptrace.h +++ b/include/asm-parisc/ptrace.h @@ -33,7 +33,6 @@ struct pt_regs { unsigned long ipsw; /* CR22 */ }; -#define task_regs(task) ((struct pt_regs *) ((char *)(task) + TASK_REGS)) /* * The numbers chosen here are somewhat arbitrary but absolutely MUST * not overlap with any of the number assigned in <linux/ptrace.h>. @@ -43,8 +42,11 @@ struct pt_regs { * since we have taken branch traps too) */ #define PTRACE_SINGLEBLOCK 12 /* resume execution until next branch */ + #ifdef __KERNEL__ +#define task_regs(task) ((struct pt_regs *) ((char *)(task) + TASK_REGS)) + /* XXX should we use iaoq[1] or iaoq[0] ? */ #define user_mode(regs) (((regs)->iaoq[0] & 3) ? 1 : 0) #define user_space(regs) (((regs)->iasq[1] != 0) ? 1 : 0) diff --git a/include/asm-parisc/thread_info.h b/include/asm-parisc/thread_info.h index 2d9c7500867b..9f812741c355 100644 --- a/include/asm-parisc/thread_info.h +++ b/include/asm-parisc/thread_info.h @@ -34,15 +34,11 @@ struct thread_info { /* thread information allocation */ -#define THREAD_ORDER 2 +#define THREAD_SIZE_ORDER 2 /* Be sure to hunt all references to this down when you change the size of * the kernel stack */ -#define THREAD_SIZE (PAGE_SIZE << THREAD_ORDER) -#define THREAD_SHIFT (PAGE_SHIFT + THREAD_ORDER) - -#define alloc_thread_info(tsk) ((struct thread_info *) \ - __get_free_pages(GFP_KERNEL, THREAD_ORDER)) -#define free_thread_info(ti) free_pages((unsigned long) (ti), THREAD_ORDER) +#define THREAD_SIZE (PAGE_SIZE << THREAD_SIZE_ORDER) +#define THREAD_SHIFT (PAGE_SHIFT + THREAD_SIZE_ORDER) /* how to get the thread information struct from C */ #define current_thread_info() ((struct thread_info *)mfctl(30)) diff --git a/include/asm-powerpc/Kbuild b/include/asm-powerpc/Kbuild index 04ce8f8a2ee7..5ab7d7fe198c 100644 --- a/include/asm-powerpc/Kbuild +++ b/include/asm-powerpc/Kbuild @@ -29,7 +29,6 @@ unifdef-y += elf.h unifdef-y += nvram.h unifdef-y += param.h unifdef-y += posix_types.h -unifdef-y += ptrace.h unifdef-y += seccomp.h unifdef-y += signal.h unifdef-y += spu_info.h diff --git a/include/asm-powerpc/cputable.h b/include/asm-powerpc/cputable.h index 2a3e9075a5a0..ef8a248dfd55 100644 --- a/include/asm-powerpc/cputable.h +++ b/include/asm-powerpc/cputable.h @@ -127,6 +127,8 @@ extern struct cpu_spec *identify_cpu(unsigned long offset, unsigned int pvr); extern void do_feature_fixups(unsigned long value, void *fixup_start, void *fixup_end); +extern const char *powerpc_base_platform; + #endif /* __ASSEMBLY__ */ /* CPU kernel features */ diff --git a/include/asm-powerpc/elf.h b/include/asm-powerpc/elf.h index 89664675b469..80d1f399ee51 100644 --- a/include/asm-powerpc/elf.h +++ b/include/asm-powerpc/elf.h @@ -217,6 +217,14 @@ typedef elf_vrregset_t elf_fpxregset_t; #define ELF_PLATFORM (cur_cpu_spec->platform) +/* While ELF_PLATFORM indicates the ISA supported by the platform, it + * may not accurately reflect the underlying behavior of the hardware + * (as in the case of running in Power5+ compatibility mode on a + * Power6 machine). ELF_BASE_PLATFORM allows ld.so to load libraries + * that are tuned for the real hardware. + */ +#define ELF_BASE_PLATFORM (powerpc_base_platform) + #ifdef __powerpc64__ # define ELF_PLAT_INIT(_r, load_addr) do { \ _r->gpr[2] = load_addr; \ diff --git a/include/asm-powerpc/firmware.h b/include/asm-powerpc/firmware.h index ef328995ba9d..3a179827528d 100644 --- a/include/asm-powerpc/firmware.h +++ b/include/asm-powerpc/firmware.h @@ -46,6 +46,7 @@ #define FW_FEATURE_PS3_LV1 ASM_CONST(0x0000000000800000) #define FW_FEATURE_BEAT ASM_CONST(0x0000000001000000) #define FW_FEATURE_BULK_REMOVE ASM_CONST(0x0000000002000000) +#define FW_FEATURE_CMO ASM_CONST(0x0000000004000000) #ifndef __ASSEMBLY__ @@ -58,7 +59,7 @@ enum { FW_FEATURE_MIGRATE | FW_FEATURE_PERFMON | FW_FEATURE_CRQ | FW_FEATURE_VIO | FW_FEATURE_RDMA | FW_FEATURE_LLAN | FW_FEATURE_BULK | FW_FEATURE_XDABR | FW_FEATURE_MULTITCE | - FW_FEATURE_SPLPAR | FW_FEATURE_LPAR, + FW_FEATURE_SPLPAR | FW_FEATURE_LPAR | FW_FEATURE_CMO, FW_FEATURE_PSERIES_ALWAYS = 0, FW_FEATURE_ISERIES_POSSIBLE = FW_FEATURE_ISERIES | FW_FEATURE_LPAR, FW_FEATURE_ISERIES_ALWAYS = FW_FEATURE_ISERIES | FW_FEATURE_LPAR, diff --git a/include/asm-powerpc/gpio.h b/include/asm-powerpc/gpio.h index 77ad3a890f30..ea04632399d8 100644 --- a/include/asm-powerpc/gpio.h +++ b/include/asm-powerpc/gpio.h @@ -17,7 +17,7 @@ #include <linux/errno.h> #include <asm-generic/gpio.h> -#ifdef CONFIG_HAVE_GPIO_LIB +#ifdef CONFIG_GPIOLIB /* * We don't (yet) implement inlined/rapid versions for on-chip gpios. @@ -51,6 +51,6 @@ static inline int irq_to_gpio(unsigned int irq) return -EINVAL; } -#endif /* CONFIG_HAVE_GPIO_LIB */ +#endif /* CONFIG_GPIOLIB */ #endif /* __ASM_POWERPC_GPIO_H */ diff --git a/include/asm-powerpc/hvcall.h b/include/asm-powerpc/hvcall.h index bf6cd7cb996c..fbe2932fa9e9 100644 --- a/include/asm-powerpc/hvcall.h +++ b/include/asm-powerpc/hvcall.h @@ -92,6 +92,11 @@ #define H_EXACT (1UL<<(63-24)) /* Use exact PTE or return H_PTEG_FULL */ #define H_R_XLATE (1UL<<(63-25)) /* include a valid logical page num in the pte if the valid bit is set */ #define H_READ_4 (1UL<<(63-26)) /* Return 4 PTEs */ +#define H_PAGE_STATE_CHANGE (1UL<<(63-28)) +#define H_PAGE_UNUSED ((1UL<<(63-29)) | (1UL<<(63-30))) +#define H_PAGE_SET_UNUSED (H_PAGE_STATE_CHANGE | H_PAGE_UNUSED) +#define H_PAGE_SET_LOANED (H_PAGE_SET_UNUSED | (1UL<<(63-31))) +#define H_PAGE_SET_ACTIVE H_PAGE_STATE_CHANGE #define H_AVPN (1UL<<(63-32)) /* An avpn is provided as a sanity test */ #define H_ANDCOND (1UL<<(63-33)) #define H_ICACHE_INVALIDATE (1UL<<(63-40)) /* icbi, etc. (ignored for IO pages) */ @@ -210,7 +215,9 @@ #define H_JOIN 0x298 #define H_VASI_STATE 0x2A4 #define H_ENABLE_CRQ 0x2B0 -#define MAX_HCALL_OPCODE H_ENABLE_CRQ +#define H_SET_MPP 0x2D0 +#define H_GET_MPP 0x2D4 +#define MAX_HCALL_OPCODE H_GET_MPP #ifndef __ASSEMBLY__ @@ -270,6 +277,20 @@ struct hcall_stats { }; #define HCALL_STAT_ARRAY_SIZE ((MAX_HCALL_OPCODE >> 2) + 1) +struct hvcall_mpp_data { + unsigned long entitled_mem; + unsigned long mapped_mem; + unsigned short group_num; + unsigned short pool_num; + unsigned char mem_weight; + unsigned char unallocated_mem_weight; + unsigned long unallocated_entitlement; /* value in bytes */ + unsigned long pool_size; + signed long loan_request; + unsigned long backing_mem; +}; + +int h_get_mpp(struct hvcall_mpp_data *); #endif /* __ASSEMBLY__ */ #endif /* __KERNEL__ */ #endif /* _ASM_POWERPC_HVCALL_H */ diff --git a/include/asm-powerpc/lppaca.h b/include/asm-powerpc/lppaca.h index 567ed92cd91f..2fe268b10333 100644 --- a/include/asm-powerpc/lppaca.h +++ b/include/asm-powerpc/lppaca.h @@ -125,7 +125,10 @@ struct lppaca { // NOTE: This value will ALWAYS be zero for dedicated processors and // will NEVER be zero for shared processors (ie, initialized to a 1). volatile u32 yield_count; // PLIC increments each dispatchx00-x03 - u8 reserved6[124]; // Reserved x04-x7F + u32 reserved6; + volatile u64 cmo_faults; // CMO page fault count x08-x0F + volatile u64 cmo_fault_time; // CMO page fault time x10-x17 + u8 reserved7[104]; // Reserved x18-x7F //============================================================================= // CACHE_LINE_4-5 0x0180 - 0x027F Contains PMC interrupt data diff --git a/include/asm-powerpc/machdep.h b/include/asm-powerpc/machdep.h index 1233d735fd28..893aafd87fde 100644 --- a/include/asm-powerpc/machdep.h +++ b/include/asm-powerpc/machdep.h @@ -76,7 +76,7 @@ struct machdep_calls { * destroyed as well */ void (*hpte_clear_all)(void); - void (*tce_build)(struct iommu_table * tbl, + int (*tce_build)(struct iommu_table *tbl, long index, long npages, unsigned long uaddr, diff --git a/include/asm-powerpc/mpc52xx_psc.h b/include/asm-powerpc/mpc52xx_psc.h index 710c5d36efaa..8917ed630565 100644 --- a/include/asm-powerpc/mpc52xx_psc.h +++ b/include/asm-powerpc/mpc52xx_psc.h @@ -60,10 +60,12 @@ #define MPC52xx_PSC_RXTX_FIFO_ALARM 0x0002 #define MPC52xx_PSC_RXTX_FIFO_EMPTY 0x0001 -/* PSC interrupt mask bits */ +/* PSC interrupt status/mask bits */ #define MPC52xx_PSC_IMR_TXRDY 0x0100 #define MPC52xx_PSC_IMR_RXRDY 0x0200 #define MPC52xx_PSC_IMR_DB 0x0400 +#define MPC52xx_PSC_IMR_TXEMP 0x0800 +#define MPC52xx_PSC_IMR_ORERR 0x1000 #define MPC52xx_PSC_IMR_IPC 0x8000 /* PSC input port change bit */ @@ -92,6 +94,34 @@ #define MPC52xx_PSC_RFNUM_MASK 0x01ff +#define MPC52xx_PSC_SICR_DTS1 (1 << 29) +#define MPC52xx_PSC_SICR_SHDR (1 << 28) +#define MPC52xx_PSC_SICR_SIM_MASK (0xf << 24) +#define MPC52xx_PSC_SICR_SIM_UART (0x0 << 24) +#define MPC52xx_PSC_SICR_SIM_UART_DCD (0x8 << 24) +#define MPC52xx_PSC_SICR_SIM_CODEC_8 (0x1 << 24) +#define MPC52xx_PSC_SICR_SIM_CODEC_16 (0x2 << 24) +#define MPC52xx_PSC_SICR_SIM_AC97 (0x3 << 24) +#define MPC52xx_PSC_SICR_SIM_SIR (0x8 << 24) +#define MPC52xx_PSC_SICR_SIM_SIR_DCD (0xc << 24) +#define MPC52xx_PSC_SICR_SIM_MIR (0x5 << 24) +#define MPC52xx_PSC_SICR_SIM_FIR (0x6 << 24) +#define MPC52xx_PSC_SICR_SIM_CODEC_24 (0x7 << 24) +#define MPC52xx_PSC_SICR_SIM_CODEC_32 (0xf << 24) +#define MPC52xx_PSC_SICR_GENCLK (1 << 23) +#define MPC52xx_PSC_SICR_I2S (1 << 22) +#define MPC52xx_PSC_SICR_CLKPOL (1 << 21) +#define MPC52xx_PSC_SICR_SYNCPOL (1 << 20) +#define MPC52xx_PSC_SICR_CELLSLAVE (1 << 19) +#define MPC52xx_PSC_SICR_CELL2XCLK (1 << 18) +#define MPC52xx_PSC_SICR_ESAI (1 << 17) +#define MPC52xx_PSC_SICR_ENAC97 (1 << 16) +#define MPC52xx_PSC_SICR_SPI (1 << 15) +#define MPC52xx_PSC_SICR_MSTR (1 << 14) +#define MPC52xx_PSC_SICR_CPOL (1 << 13) +#define MPC52xx_PSC_SICR_CPHA (1 << 12) +#define MPC52xx_PSC_SICR_USEEOF (1 << 11) +#define MPC52xx_PSC_SICR_DISABLEEOF (1 << 10) /* Structure of the hardware registers */ struct mpc52xx_psc { @@ -132,8 +162,12 @@ struct mpc52xx_psc { u8 reserved5[3]; u8 ctlr; /* PSC + 0x1c */ u8 reserved6[3]; - u16 ccr; /* PSC + 0x20 */ - u8 reserved7[14]; + /* BitClkDiv field of CCR is byte swapped in + * the hardware for mpc5200/b compatibility */ + u32 ccr; /* PSC + 0x20 */ + u32 ac97_slots; /* PSC + 0x24 */ + u32 ac97_cmd; /* PSC + 0x28 */ + u32 ac97_data; /* PSC + 0x2c */ u8 ivr; /* PSC + 0x30 */ u8 reserved8[3]; u8 ip; /* PSC + 0x34 */ diff --git a/include/asm-powerpc/pgtable.h b/include/asm-powerpc/pgtable.h index d18ffe7bc7c4..dbb8ca172e44 100644 --- a/include/asm-powerpc/pgtable.h +++ b/include/asm-powerpc/pgtable.h @@ -38,6 +38,19 @@ extern void paging_init(void); remap_pfn_range(vma, vaddr, pfn, size, prot) #include <asm-generic/pgtable.h> + + +/* + * This gets called at the end of handling a page fault, when + * the kernel has put a new PTE into the page table for the process. + * We use it to ensure coherency between the i-cache and d-cache + * for the page which has just been mapped in. + * On machines which use an MMU hash table, we use this to put a + * corresponding HPTE into the hash table ahead of time, instead of + * waiting for the inevitable extra hash-table miss exception. + */ +extern void update_mmu_cache(struct vm_area_struct *, unsigned long, pte_t); + #endif /* __ASSEMBLY__ */ #endif /* __KERNEL__ */ diff --git a/include/asm-powerpc/syscalls.h b/include/asm-powerpc/syscalls.h index 2b8a458f990a..eb8eb400c664 100644 --- a/include/asm-powerpc/syscalls.h +++ b/include/asm-powerpc/syscalls.h @@ -31,6 +31,7 @@ asmlinkage int sys_vfork(unsigned long p1, unsigned long p2, unsigned long p3, unsigned long p4, unsigned long p5, unsigned long p6, struct pt_regs *regs); asmlinkage long sys_pipe(int __user *fildes); +asmlinkage long sys_pipe2(int __user *fildes, int flags); asmlinkage long sys_rt_sigaction(int sig, const struct sigaction __user *act, struct sigaction __user *oact, size_t sigsetsize); diff --git a/include/asm-powerpc/systbl.h b/include/asm-powerpc/systbl.h index ae7085c65692..e084272ed1c2 100644 --- a/include/asm-powerpc/systbl.h +++ b/include/asm-powerpc/systbl.h @@ -316,3 +316,9 @@ COMPAT_SYS(fallocate) SYSCALL(subpage_prot) COMPAT_SYS_SPU(timerfd_settime) COMPAT_SYS_SPU(timerfd_gettime) +COMPAT_SYS_SPU(signalfd4) +SYSCALL_SPU(eventfd2) +SYSCALL_SPU(epoll_create1) +SYSCALL_SPU(dup3) +SYSCALL_SPU(pipe2) +SYSCALL(inotify_init1) diff --git a/include/asm-powerpc/system.h b/include/asm-powerpc/system.h index e6e25e2364eb..d6648c143322 100644 --- a/include/asm-powerpc/system.h +++ b/include/asm-powerpc/system.h @@ -110,6 +110,8 @@ static inline int debugger_fault_handler(struct pt_regs *regs) { return 0; } #endif extern int set_dabr(unsigned long dabr); +extern void do_dabr(struct pt_regs *regs, unsigned long address, + unsigned long error_code); extern void print_backtrace(unsigned long *); extern void show_regs(struct pt_regs * regs); extern void flush_instruction_cache(void); diff --git a/include/asm-powerpc/thread_info.h b/include/asm-powerpc/thread_info.h index b705c2a7651a..a9db562df69a 100644 --- a/include/asm-powerpc/thread_info.h +++ b/include/asm-powerpc/thread_info.h @@ -66,20 +66,12 @@ struct thread_info { #if THREAD_SHIFT >= PAGE_SHIFT -#define THREAD_ORDER (THREAD_SHIFT - PAGE_SHIFT) - -#ifdef CONFIG_DEBUG_STACK_USAGE -#define alloc_thread_info(tsk) \ - ((struct thread_info *)__get_free_pages(GFP_KERNEL | \ - __GFP_ZERO, THREAD_ORDER)) -#else -#define alloc_thread_info(tsk) \ - ((struct thread_info *)__get_free_pages(GFP_KERNEL, THREAD_ORDER)) -#endif -#define free_thread_info(ti) free_pages((unsigned long)ti, THREAD_ORDER) +#define THREAD_SIZE_ORDER (THREAD_SHIFT - PAGE_SHIFT) #else /* THREAD_SHIFT < PAGE_SHIFT */ +#define __HAVE_ARCH_THREAD_INFO_ALLOCATOR + extern struct thread_info *alloc_thread_info(struct task_struct *tsk); extern void free_thread_info(struct thread_info *ti); diff --git a/include/asm-powerpc/tlbflush.h b/include/asm-powerpc/tlbflush.h index 5c9108147644..361cd5c7a32b 100644 --- a/include/asm-powerpc/tlbflush.h +++ b/include/asm-powerpc/tlbflush.h @@ -162,16 +162,5 @@ extern void __flush_hash_table_range(struct mm_struct *mm, unsigned long start, #endif -/* - * This gets called at the end of handling a page fault, when - * the kernel has put a new PTE into the page table for the process. - * We use it to ensure coherency between the i-cache and d-cache - * for the page which has just been mapped in. - * On machines which use an MMU hash table, we use this to put a - * corresponding HPTE into the hash table ahead of time, instead of - * waiting for the inevitable extra hash-table miss exception. - */ -extern void update_mmu_cache(struct vm_area_struct *, unsigned long, pte_t); - #endif /*__KERNEL__ */ #endif /* _ASM_POWERPC_TLBFLUSH_H */ diff --git a/include/asm-powerpc/unistd.h b/include/asm-powerpc/unistd.h index ce91bb662063..e07d0c76ed77 100644 --- a/include/asm-powerpc/unistd.h +++ b/include/asm-powerpc/unistd.h @@ -335,10 +335,16 @@ #define __NR_subpage_prot 310 #define __NR_timerfd_settime 311 #define __NR_timerfd_gettime 312 +#define __NR_signalfd4 313 +#define __NR_eventfd2 314 +#define __NR_epoll_create1 315 +#define __NR_dup3 316 +#define __NR_pipe2 317 +#define __NR_inotify_init1 318 #ifdef __KERNEL__ -#define __NR_syscalls 313 +#define __NR_syscalls 319 #define __NR__exit __NR_exit #define NR_syscalls __NR_syscalls diff --git a/include/asm-powerpc/vio.h b/include/asm-powerpc/vio.h index 56512a968dab..0a290a195946 100644 --- a/include/asm-powerpc/vio.h +++ b/include/asm-powerpc/vio.h @@ -39,16 +39,32 @@ #define VIO_IRQ_DISABLE 0UL #define VIO_IRQ_ENABLE 1UL +/* + * VIO CMO minimum entitlement for all devices and spare entitlement + */ +#define VIO_CMO_MIN_ENT 1562624 + struct iommu_table; -/* - * The vio_dev structure is used to describe virtual I/O devices. +/** + * vio_dev - This structure is used to describe virtual I/O devices. + * + * @desired: set from return of driver's get_desired_dma() function + * @entitled: bytes of IO data that has been reserved for this device. + * @allocated: bytes of IO data currently in use by the device. + * @allocs_failed: number of DMA failures due to insufficient entitlement. */ struct vio_dev { const char *name; const char *type; uint32_t unit_address; unsigned int irq; + struct { + size_t desired; + size_t entitled; + size_t allocated; + atomic_t allocs_failed; + } cmo; struct device dev; }; @@ -56,12 +72,19 @@ struct vio_driver { const struct vio_device_id *id_table; int (*probe)(struct vio_dev *dev, const struct vio_device_id *id); int (*remove)(struct vio_dev *dev); + /* A driver must have a get_desired_dma() function to + * be loaded in a CMO environment if it uses DMA. + */ + unsigned long (*get_desired_dma)(struct vio_dev *dev); struct device_driver driver; }; extern int vio_register_driver(struct vio_driver *drv); extern void vio_unregister_driver(struct vio_driver *drv); +extern int vio_cmo_entitlement_update(size_t); +extern void vio_cmo_set_dev_desired(struct vio_dev *viodev, size_t desired); + extern void __devinit vio_unregister_device(struct vio_dev *dev); struct device_node; diff --git a/include/asm-s390/kvm_virtio.h b/include/asm-s390/kvm_virtio.h index 5c871a990c29..146100224def 100644 --- a/include/asm-s390/kvm_virtio.h +++ b/include/asm-s390/kvm_virtio.h @@ -50,4 +50,14 @@ struct kvm_vqconfig { #define KVM_S390_VIRTIO_RESET 1 #define KVM_S390_VIRTIO_SET_STATUS 2 +#ifdef __KERNEL__ +/* early virtio console setup */ +#ifdef CONFIG_VIRTIO_CONSOLE +extern void s390_virtio_console_init(void); +#else +static inline void s390_virtio_console_init(void) +{ +} +#endif /* CONFIG_VIRTIO_CONSOLE */ +#endif /* __KERNEL__ */ #endif diff --git a/include/asm-s390/thread_info.h b/include/asm-s390/thread_info.h index 99bbed99a3b2..91a8f93ad355 100644 --- a/include/asm-s390/thread_info.h +++ b/include/asm-s390/thread_info.h @@ -78,10 +78,7 @@ static inline struct thread_info *current_thread_info(void) return (struct thread_info *)((*(unsigned long *) __LC_KERNEL_STACK)-THREAD_SIZE); } -/* thread information allocation */ -#define alloc_thread_info(tsk) ((struct thread_info *) \ - __get_free_pages(GFP_KERNEL,THREAD_ORDER)) -#define free_thread_info(ti) free_pages((unsigned long) (ti),THREAD_ORDER) +#define THREAD_SIZE_ORDER THREAD_ORDER #endif diff --git a/include/asm-sh/ptrace.h b/include/asm-sh/ptrace.h index 8d6c92b3e770..7d36dc3bee69 100644 --- a/include/asm-sh/ptrace.h +++ b/include/asm-sh/ptrace.h @@ -5,7 +5,7 @@ * Copyright (C) 1999, 2000 Niibe Yutaka * */ -#if defined(__SH5__) || defined(CONFIG_SUPERH64) +#if defined(__SH5__) struct pt_regs { unsigned long long pc; unsigned long long sr; diff --git a/include/asm-sh/thread_info.h b/include/asm-sh/thread_info.h index c50e5d35fe84..5131e3907525 100644 --- a/include/asm-sh/thread_info.h +++ b/include/asm-sh/thread_info.h @@ -92,6 +92,8 @@ static inline struct thread_info *current_thread_info(void) return ti; } +#define __HAVE_ARCH_THREAD_INFO_ALLOCATOR + /* thread information allocation */ #ifdef CONFIG_DEBUG_STACK_USAGE #define alloc_thread_info(ti) kzalloc(THREAD_SIZE, GFP_KERNEL) diff --git a/include/asm-sparc/thread_info_32.h b/include/asm-sparc/thread_info_32.h index 91b9f5888c85..2cf9db044055 100644 --- a/include/asm-sparc/thread_info_32.h +++ b/include/asm-sparc/thread_info_32.h @@ -86,6 +86,8 @@ register struct thread_info *current_thread_info_reg asm("g6"); #define THREAD_INFO_ORDER 1 #endif +#define __HAVE_ARCH_THREAD_INFO_ALLOCATOR + BTFIXUPDEF_CALL(struct thread_info *, alloc_thread_info, void) #define alloc_thread_info(tsk) BTFIXUP_CALL(alloc_thread_info)() diff --git a/include/asm-sparc/thread_info_64.h b/include/asm-sparc/thread_info_64.h index c6d2e6c7f844..960969d5ad06 100644 --- a/include/asm-sparc/thread_info_64.h +++ b/include/asm-sparc/thread_info_64.h @@ -155,6 +155,8 @@ register struct thread_info *current_thread_info_reg asm("g6"); #define __THREAD_INFO_ORDER 0 #endif /* PAGE_SHIFT == 13 */ +#define __HAVE_ARCH_THREAD_INFO_ALLOCATOR + #ifdef CONFIG_DEBUG_STACK_USAGE #define alloc_thread_info(tsk) \ ({ \ diff --git a/include/asm-um/thread_info.h b/include/asm-um/thread_info.h index 356b83e2c22e..e07e72846c7a 100644 --- a/include/asm-um/thread_info.h +++ b/include/asm-um/thread_info.h @@ -53,21 +53,7 @@ static inline struct thread_info *current_thread_info(void) return ti; } -#ifdef CONFIG_DEBUG_STACK_USAGE - -#define alloc_thread_info(tsk) \ - ((struct thread_info *) __get_free_pages(GFP_KERNEL | __GFP_ZERO, \ - CONFIG_KERNEL_STACK_ORDER)) -#else - -/* thread information allocation */ -#define alloc_thread_info(tsk) \ - ((struct thread_info *) __get_free_pages(GFP_KERNEL, \ - CONFIG_KERNEL_STACK_ORDER)) -#endif - -#define free_thread_info(ti) \ - free_pages((unsigned long)(ti),CONFIG_KERNEL_STACK_ORDER) +#define THREAD_SIZE_ORDER CONFIG_KERNEL_STACK_ORDER #endif diff --git a/include/asm-x86/Kbuild b/include/asm-x86/Kbuild index 811e9828ccb3..4a8e80cdcfa5 100644 --- a/include/asm-x86/Kbuild +++ b/include/asm-x86/Kbuild @@ -18,7 +18,6 @@ unifdef-y += msr.h unifdef-y += mtrr.h unifdef-y += posix_types_32.h unifdef-y += posix_types_64.h -unifdef-y += ptrace.h unifdef-y += unistd_32.h unifdef-y += unistd_64.h unifdef-y += vm86.h diff --git a/include/asm-x86/gpio.h b/include/asm-x86/gpio.h index ff87fca0caf9..116e9147fe66 100644 --- a/include/asm-x86/gpio.h +++ b/include/asm-x86/gpio.h @@ -1,6 +1,62 @@ +/* + * Generic GPIO API implementation for x86. + * + * Derived from the generic GPIO API for powerpc: + * + * Copyright (c) 2007-2008 MontaVista Software, Inc. + * + * Author: Anton Vorontsov <avorontsov@ru.mvista.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + */ + #ifndef _ASM_I386_GPIO_H #define _ASM_I386_GPIO_H +#ifdef CONFIG_X86_RDC321X #include <gpio.h> +#else /* CONFIG_X86_RDC321X */ + +#include <asm-generic/gpio.h> + +#ifdef CONFIG_GPIOLIB + +/* + * Just call gpiolib. + */ +static inline int gpio_get_value(unsigned int gpio) +{ + return __gpio_get_value(gpio); +} + +static inline void gpio_set_value(unsigned int gpio, int value) +{ + __gpio_set_value(gpio, value); +} + +static inline int gpio_cansleep(unsigned int gpio) +{ + return __gpio_cansleep(gpio); +} + +/* + * Not implemented, yet. + */ +static inline int gpio_to_irq(unsigned int gpio) +{ + return -ENOSYS; +} + +static inline int irq_to_gpio(unsigned int irq) +{ + return -EINVAL; +} + +#endif /* CONFIG_GPIOLIB */ + +#endif /* CONFIG_X86_RDC321X */ #endif /* _ASM_I386_GPIO_H */ diff --git a/include/asm-x86/thread_info.h b/include/asm-x86/thread_info.h index 3f2de1050988..da0a675adf94 100644 --- a/include/asm-x86/thread_info.h +++ b/include/asm-x86/thread_info.h @@ -152,6 +152,8 @@ struct thread_info { #define THREAD_FLAGS GFP_KERNEL #endif +#define __HAVE_ARCH_THREAD_INFO_ALLOCATOR + #define alloc_thread_info(tsk) \ ((struct thread_info *)__get_free_pages(THREAD_FLAGS, THREAD_ORDER)) diff --git a/include/asm-xtensa/ptrace.h b/include/asm-xtensa/ptrace.h index 422c73e26937..089b0db44816 100644 --- a/include/asm-xtensa/ptrace.h +++ b/include/asm-xtensa/ptrace.h @@ -73,10 +73,10 @@ #define PTRACE_GETXTREGS 18 #define PTRACE_SETXTREGS 19 -#ifndef __ASSEMBLY__ - #ifdef __KERNEL__ +#ifndef __ASSEMBLY__ + /* * This struct defines the way the registers are stored on the * kernel stack during a system call or other kernel entry. @@ -122,14 +122,14 @@ extern void show_regs(struct pt_regs *); # ifndef CONFIG_SMP # define profile_pc(regs) instruction_pointer(regs) # endif -#endif /* __KERNEL__ */ #else /* __ASSEMBLY__ */ -#ifdef __KERNEL__ # include <asm/asm-offsets.h> #define PT_REGS_OFFSET (KERNEL_STACK_SIZE - PT_USER_SIZE) -#endif #endif /* !__ASSEMBLY__ */ + +#endif /* __KERNEL__ */ + #endif /* _XTENSA_PTRACE_H */ diff --git a/include/asm-xtensa/thread_info.h b/include/asm-xtensa/thread_info.h index a2c640682ed9..7e4131dd546c 100644 --- a/include/asm-xtensa/thread_info.h +++ b/include/asm-xtensa/thread_info.h @@ -111,10 +111,6 @@ static inline struct thread_info *current_thread_info(void) return ti; } -/* thread information allocation */ -#define alloc_thread_info(tsk) ((struct thread_info *) __get_free_pages(GFP_KERNEL,1)) -#define free_thread_info(ti) free_pages((unsigned long) (ti), 1) - #else /* !__ASSEMBLY__ */ /* how to get the thread information struct from ASM */ @@ -160,6 +156,7 @@ static inline struct thread_info *current_thread_info(void) #define TS_USEDFPU 0x0001 /* FPU was used by this task this quantum (SMP) */ #define THREAD_SIZE 8192 //(2*PAGE_SIZE) +#define THREAD_SIZE_ORDER 1 #endif /* __KERNEL__ */ #endif /* _XTENSA_THREAD_INFO */ diff --git a/include/linux/Kbuild b/include/linux/Kbuild index 402c8f55d713..4c4142c5aa6e 100644 --- a/include/linux/Kbuild +++ b/include/linux/Kbuild @@ -189,7 +189,6 @@ unifdef-y += connector.h unifdef-y += cuda.h unifdef-y += cyclades.h unifdef-y += dccp.h -unifdef-y += dirent.h unifdef-y += dlm.h unifdef-y += dlm_plock.h unifdef-y += edd.h diff --git a/include/linux/acct.h b/include/linux/acct.h index e8cae54e8d88..882dc7248766 100644 --- a/include/linux/acct.h +++ b/include/linux/acct.h @@ -120,17 +120,20 @@ struct acct_v3 struct vfsmount; struct super_block; struct pacct_struct; +struct pid_namespace; extern void acct_auto_close_mnt(struct vfsmount *m); extern void acct_auto_close(struct super_block *sb); extern void acct_init_pacct(struct pacct_struct *pacct); extern void acct_collect(long exitcode, int group_dead); extern void acct_process(void); +extern void acct_exit_ns(struct pid_namespace *); #else #define acct_auto_close_mnt(x) do { } while (0) #define acct_auto_close(x) do { } while (0) #define acct_init_pacct(x) do { } while (0) #define acct_collect(x,y) do { } while (0) #define acct_process() do { } while (0) +#define acct_exit_ns(ns) do { } while (0) #endif /* diff --git a/include/linux/auxvec.h b/include/linux/auxvec.h index 0da17d14fd13..d7afa9dd6635 100644 --- a/include/linux/auxvec.h +++ b/include/linux/auxvec.h @@ -26,9 +26,13 @@ #define AT_SECURE 23 /* secure mode boolean */ +#define AT_BASE_PLATFORM 24 /* string identifying real platform, may + * differ from AT_PLATFORM. */ + #define AT_EXECFN 31 /* filename of program */ + #ifdef __KERNEL__ -#define AT_VECTOR_SIZE_BASE 17 /* NEW_AUX_ENT entries in auxiliary table */ +#define AT_VECTOR_SIZE_BASE 18 /* NEW_AUX_ENT entries in auxiliary table */ /* number of "#define AT_.*" above, minus {AT_NULL, AT_IGNORE, AT_NOTELF} */ #endif diff --git a/include/linux/bootmem.h b/include/linux/bootmem.h index 4ddf2922fc8d..652470b687c9 100644 --- a/include/linux/bootmem.h +++ b/include/linux/bootmem.h @@ -103,17 +103,16 @@ extern void *__alloc_bootmem_low_node(pg_data_t *pgdat, __alloc_bootmem(x, PAGE_SIZE, __pa(MAX_DMA_ADDRESS)) #define alloc_bootmem_low_pages(x) \ __alloc_bootmem_low(x, PAGE_SIZE, 0) -#endif /* !CONFIG_HAVE_ARCH_BOOTMEM_NODE */ - -extern int reserve_bootmem_generic(unsigned long addr, unsigned long size, - int flags); - #define alloc_bootmem_node(pgdat, x) \ __alloc_bootmem_node(pgdat, x, SMP_CACHE_BYTES, __pa(MAX_DMA_ADDRESS)) #define alloc_bootmem_pages_node(pgdat, x) \ __alloc_bootmem_node(pgdat, x, PAGE_SIZE, __pa(MAX_DMA_ADDRESS)) #define alloc_bootmem_low_pages_node(pgdat, x) \ __alloc_bootmem_low_node(pgdat, x, PAGE_SIZE, 0) +#endif /* !CONFIG_HAVE_ARCH_BOOTMEM_NODE */ + +extern int reserve_bootmem_generic(unsigned long addr, unsigned long size, + int flags); extern void *alloc_bootmem_section(unsigned long size, unsigned long section_nr); diff --git a/include/linux/byteorder/big_endian.h b/include/linux/byteorder/big_endian.h index 961ed4b48d8e..44f95b92393b 100644 --- a/include/linux/byteorder/big_endian.h +++ b/include/linux/byteorder/big_endian.h @@ -94,12 +94,12 @@ static inline __u16 __be16_to_cpup(const __be16 *p) #define __le32_to_cpus(x) __swab32s((x)) #define __cpu_to_le16s(x) __swab16s((x)) #define __le16_to_cpus(x) __swab16s((x)) -#define __cpu_to_be64s(x) do {} while (0) -#define __be64_to_cpus(x) do {} while (0) -#define __cpu_to_be32s(x) do {} while (0) -#define __be32_to_cpus(x) do {} while (0) -#define __cpu_to_be16s(x) do {} while (0) -#define __be16_to_cpus(x) do {} while (0) +#define __cpu_to_be64s(x) do { (void)(x); } while (0) +#define __be64_to_cpus(x) do { (void)(x); } while (0) +#define __cpu_to_be32s(x) do { (void)(x); } while (0) +#define __be32_to_cpus(x) do { (void)(x); } while (0) +#define __cpu_to_be16s(x) do { (void)(x); } while (0) +#define __be16_to_cpus(x) do { (void)(x); } while (0) #ifdef __KERNEL__ #include <linux/byteorder/generic.h> diff --git a/include/linux/byteorder/little_endian.h b/include/linux/byteorder/little_endian.h index 05dc7c35b3b2..4cc170a31762 100644 --- a/include/linux/byteorder/little_endian.h +++ b/include/linux/byteorder/little_endian.h @@ -88,12 +88,12 @@ static inline __u16 __be16_to_cpup(const __be16 *p) { return __swab16p((__u16 *)p); } -#define __cpu_to_le64s(x) do {} while (0) -#define __le64_to_cpus(x) do {} while (0) -#define __cpu_to_le32s(x) do {} while (0) -#define __le32_to_cpus(x) do {} while (0) -#define __cpu_to_le16s(x) do {} while (0) -#define __le16_to_cpus(x) do {} while (0) +#define __cpu_to_le64s(x) do { (void)(x); } while (0) +#define __le64_to_cpus(x) do { (void)(x); } while (0) +#define __cpu_to_le32s(x) do { (void)(x); } while (0) +#define __le32_to_cpus(x) do { (void)(x); } while (0) +#define __cpu_to_le16s(x) do { (void)(x); } while (0) +#define __le16_to_cpus(x) do { (void)(x); } while (0) #define __cpu_to_be64s(x) __swab64s((x)) #define __be64_to_cpus(x) __swab64s((x)) #define __cpu_to_be32s(x) __swab32s((x)) diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h index e155aa78d859..c98dd7cb7076 100644 --- a/include/linux/cgroup.h +++ b/include/linux/cgroup.h @@ -21,11 +21,13 @@ struct cgroupfs_root; struct cgroup_subsys; struct inode; +struct cgroup; extern int cgroup_init_early(void); extern int cgroup_init(void); extern void cgroup_init_smp(void); extern void cgroup_lock(void); +extern bool cgroup_lock_live_group(struct cgroup *cgrp); extern void cgroup_unlock(void); extern void cgroup_fork(struct task_struct *p); extern void cgroup_fork_callbacks(struct task_struct *p); @@ -205,50 +207,64 @@ struct cftype { * subsystem, followed by a period */ char name[MAX_CFTYPE_NAME]; int private; - int (*open) (struct inode *inode, struct file *file); - ssize_t (*read) (struct cgroup *cgrp, struct cftype *cft, - struct file *file, - char __user *buf, size_t nbytes, loff_t *ppos); + + /* + * If non-zero, defines the maximum length of string that can + * be passed to write_string; defaults to 64 + */ + size_t max_write_len; + + int (*open)(struct inode *inode, struct file *file); + ssize_t (*read)(struct cgroup *cgrp, struct cftype *cft, + struct file *file, + char __user *buf, size_t nbytes, loff_t *ppos); /* * read_u64() is a shortcut for the common case of returning a * single integer. Use it in place of read() */ - u64 (*read_u64) (struct cgroup *cgrp, struct cftype *cft); + u64 (*read_u64)(struct cgroup *cgrp, struct cftype *cft); /* * read_s64() is a signed version of read_u64() */ - s64 (*read_s64) (struct cgroup *cgrp, struct cftype *cft); + s64 (*read_s64)(struct cgroup *cgrp, struct cftype *cft); /* * read_map() is used for defining a map of key/value * pairs. It should call cb->fill(cb, key, value) for each * entry. The key/value pairs (and their ordering) should not * change between reboots. */ - int (*read_map) (struct cgroup *cont, struct cftype *cft, - struct cgroup_map_cb *cb); + int (*read_map)(struct cgroup *cont, struct cftype *cft, + struct cgroup_map_cb *cb); /* * read_seq_string() is used for outputting a simple sequence * using seqfile. */ - int (*read_seq_string) (struct cgroup *cont, struct cftype *cft, - struct seq_file *m); + int (*read_seq_string)(struct cgroup *cont, struct cftype *cft, + struct seq_file *m); - ssize_t (*write) (struct cgroup *cgrp, struct cftype *cft, - struct file *file, - const char __user *buf, size_t nbytes, loff_t *ppos); + ssize_t (*write)(struct cgroup *cgrp, struct cftype *cft, + struct file *file, + const char __user *buf, size_t nbytes, loff_t *ppos); /* * write_u64() is a shortcut for the common case of accepting * a single integer (as parsed by simple_strtoull) from * userspace. Use in place of write(); return 0 or error. */ - int (*write_u64) (struct cgroup *cgrp, struct cftype *cft, u64 val); + int (*write_u64)(struct cgroup *cgrp, struct cftype *cft, u64 val); /* * write_s64() is a signed version of write_u64() */ - int (*write_s64) (struct cgroup *cgrp, struct cftype *cft, s64 val); + int (*write_s64)(struct cgroup *cgrp, struct cftype *cft, s64 val); /* + * write_string() is passed a nul-terminated kernelspace + * buffer of maximum length determined by max_write_len. + * Returns 0 or -ve error code. + */ + int (*write_string)(struct cgroup *cgrp, struct cftype *cft, + const char *buffer); + /* * trigger() callback can be used to get some kick from the * userspace, when the actual string written is not important * at all. The private field can be used to determine the @@ -256,7 +272,7 @@ struct cftype { */ int (*trigger)(struct cgroup *cgrp, unsigned int event); - int (*release) (struct inode *inode, struct file *file); + int (*release)(struct inode *inode, struct file *file); }; struct cgroup_scanner { @@ -348,7 +364,8 @@ static inline struct cgroup* task_cgroup(struct task_struct *task, return task_subsys_state(task, subsys_id)->cgroup; } -int cgroup_clone(struct task_struct *tsk, struct cgroup_subsys *ss); +int cgroup_clone(struct task_struct *tsk, struct cgroup_subsys *ss, + char *nodename); /* A cgroup_iter should be treated as an opaque object */ struct cgroup_iter { diff --git a/include/linux/coda.h b/include/linux/coda.h index b5cf0780c51a..96c87693800b 100644 --- a/include/linux/coda.h +++ b/include/linux/coda.h @@ -199,28 +199,6 @@ typedef u_int32_t vuid_t; typedef u_int32_t vgid_t; #endif /*_VUID_T_ */ -#ifdef CONFIG_CODA_FS_OLD_API -struct CodaFid { - u_int32_t opaque[3]; -}; - -static __inline__ ino_t coda_f2i(struct CodaFid *fid) -{ - if ( ! fid ) - return 0; - if (fid->opaque[1] == 0xfffffffe || fid->opaque[1] == 0xffffffff) - return ((fid->opaque[0] << 20) | (fid->opaque[2] & 0xfffff)); - else - return (fid->opaque[2] + (fid->opaque[1]<<10) + (fid->opaque[0]<<20)); -} - -struct coda_cred { - vuid_t cr_uid, cr_euid, cr_suid, cr_fsuid; /* Real, efftve, set, fs uid*/ - vgid_t cr_groupid, cr_egid, cr_sgid, cr_fsgid; /* same for groups */ -}; - -#else /* not defined(CONFIG_CODA_FS_OLD_API) */ - struct CodaFid { u_int32_t opaque[4]; }; @@ -228,8 +206,6 @@ struct CodaFid { #define coda_f2i(fid)\ (fid ? (fid->opaque[3] ^ (fid->opaque[2]<<10) ^ (fid->opaque[1]<<20) ^ fid->opaque[0]) : 0) -#endif - #ifndef _VENUS_VATTR_T_ #define _VENUS_VATTR_T_ /* @@ -313,15 +289,7 @@ struct coda_statfs { #define CIOC_KERNEL_VERSION _IOWR('c', 10, size_t) -#if 0 -#define CODA_KERNEL_VERSION 0 /* don't care about kernel version number */ -#define CODA_KERNEL_VERSION 1 /* The old venus 4.6 compatible interface */ -#endif -#ifdef CONFIG_CODA_FS_OLD_API -#define CODA_KERNEL_VERSION 2 /* venus_lookup got an extra parameter */ -#else #define CODA_KERNEL_VERSION 3 /* 128-bit file identifiers */ -#endif /* * Venus <-> Coda RPC arguments @@ -329,16 +297,9 @@ struct coda_statfs { struct coda_in_hdr { u_int32_t opcode; u_int32_t unique; /* Keep multiple outstanding msgs distinct */ -#ifdef CONFIG_CODA_FS_OLD_API - u_int16_t pid; /* Common to all */ - u_int16_t pgid; /* Common to all */ - u_int16_t sid; /* Common to all */ - struct coda_cred cred; /* Common to all */ -#else pid_t pid; pid_t pgid; vuid_t uid; -#endif }; /* Really important that opcode and unique are 1st two fields! */ @@ -613,11 +574,7 @@ struct coda_vget_out { /* CODA_PURGEUSER is a venus->kernel call */ struct coda_purgeuser_out { struct coda_out_hdr oh; -#ifdef CONFIG_CODA_FS_OLD_API - struct coda_cred cred; -#else vuid_t uid; -#endif }; /* coda_zapfile: */ diff --git a/include/linux/cpu.h b/include/linux/cpu.h index 7464ba3b4333..d7faf8808497 100644 --- a/include/linux/cpu.h +++ b/include/linux/cpu.h @@ -69,10 +69,11 @@ static inline void unregister_cpu_notifier(struct notifier_block *nb) #endif int cpu_up(unsigned int cpu); - extern void cpu_hotplug_init(void); +extern void cpu_maps_update_begin(void); +extern void cpu_maps_update_done(void); -#else +#else /* CONFIG_SMP */ static inline int register_cpu_notifier(struct notifier_block *nb) { @@ -87,10 +88,16 @@ static inline void cpu_hotplug_init(void) { } +static inline void cpu_maps_update_begin(void) +{ +} + +static inline void cpu_maps_update_done(void) +{ +} + #endif /* CONFIG_SMP */ extern struct sysdev_class cpu_sysdev_class; -extern void cpu_maps_update_begin(void); -extern void cpu_maps_update_done(void); #ifdef CONFIG_HOTPLUG_CPU /* Stop CPUs going up and down. */ diff --git a/include/linux/crash_dump.h b/include/linux/crash_dump.h index 22c7ac5cd80c..6cd39a927e1f 100644 --- a/include/linux/crash_dump.h +++ b/include/linux/crash_dump.h @@ -22,5 +22,13 @@ extern struct proc_dir_entry *proc_vmcore; #define vmcore_elf_check_arch(x) (elf_check_arch(x) || vmcore_elf_check_arch_cross(x)) +static inline int is_kdump_kernel(void) +{ + return (elfcorehdr_addr != ELFCORE_ADDR_MAX) ? 1 : 0; +} +#else /* !CONFIG_CRASH_DUMP */ +static inline int is_kdump_kernel(void) { return 0; } #endif /* CONFIG_CRASH_DUMP */ + +extern unsigned long saved_max_pfn; #endif /* LINUX_CRASHDUMP_H */ diff --git a/include/linux/delayacct.h b/include/linux/delayacct.h index ab94bc083558..f352f06fa063 100644 --- a/include/linux/delayacct.h +++ b/include/linux/delayacct.h @@ -39,6 +39,8 @@ extern void __delayacct_blkio_start(void); extern void __delayacct_blkio_end(void); extern int __delayacct_add_tsk(struct taskstats *, struct task_struct *); extern __u64 __delayacct_blkio_ticks(struct task_struct *); +extern void __delayacct_freepages_start(void); +extern void __delayacct_freepages_end(void); static inline int delayacct_is_task_waiting_on_io(struct task_struct *p) { @@ -107,6 +109,18 @@ static inline __u64 delayacct_blkio_ticks(struct task_struct *tsk) return 0; } +static inline void delayacct_freepages_start(void) +{ + if (current->delays) + __delayacct_freepages_start(); +} + +static inline void delayacct_freepages_end(void) +{ + if (current->delays) + __delayacct_freepages_end(); +} + #else static inline void delayacct_set_flag(int flag) {} @@ -129,6 +143,11 @@ static inline __u64 delayacct_blkio_ticks(struct task_struct *tsk) { return 0; } static inline int delayacct_is_task_waiting_on_io(struct task_struct *p) { return 0; } +static inline void delayacct_freepages_start(void) +{} +static inline void delayacct_freepages_end(void) +{} + #endif /* CONFIG_TASK_DELAY_ACCT */ #endif diff --git a/include/linux/dirent.h b/include/linux/dirent.h index 5d6023b87800..f072fb8d10a3 100644 --- a/include/linux/dirent.h +++ b/include/linux/dirent.h @@ -1,23 +1,6 @@ #ifndef _LINUX_DIRENT_H #define _LINUX_DIRENT_H -struct dirent { - long d_ino; - __kernel_off_t d_off; - unsigned short d_reclen; - char d_name[256]; /* We must not include limits.h! */ -}; - -struct dirent64 { - __u64 d_ino; - __s64 d_off; - unsigned short d_reclen; - unsigned char d_type; - char d_name[256]; -}; - -#ifdef __KERNEL__ - struct linux_dirent64 { u64 d_ino; s64 d_off; @@ -26,7 +9,4 @@ struct linux_dirent64 { char d_name[0]; }; -#endif /* __KERNEL__ */ - - #endif diff --git a/include/linux/ext2_fs.h b/include/linux/ext2_fs.h index 84cec2aa9f1e..2efe7b863cff 100644 --- a/include/linux/ext2_fs.h +++ b/include/linux/ext2_fs.h @@ -284,8 +284,8 @@ struct ext2_inode { #ifdef __hurd__ #define i_translator osd1.hurd1.h_i_translator -#define i_frag osd2.hurd2.h_i_frag; -#define i_fsize osd2.hurd2.h_i_fsize; +#define i_frag osd2.hurd2.h_i_frag +#define i_fsize osd2.hurd2.h_i_fsize #define i_uid_high osd2.hurd2.h_i_uid_high #define i_gid_high osd2.hurd2.h_i_gid_high #define i_author osd2.hurd2.h_i_author diff --git a/include/linux/ext3_fs.h b/include/linux/ext3_fs.h index 36c540396377..80171ee89a22 100644 --- a/include/linux/ext3_fs.h +++ b/include/linux/ext3_fs.h @@ -832,6 +832,7 @@ extern void ext3_discard_reservation (struct inode *); extern void ext3_dirty_inode(struct inode *); extern int ext3_change_inode_journal_flag(struct inode *, int); extern int ext3_get_inode_loc(struct inode *, struct ext3_iloc *); +extern int ext3_can_truncate(struct inode *inode); extern void ext3_truncate (struct inode *); extern void ext3_set_inode_flags(struct inode *); extern void ext3_get_inode_flags(struct ext3_inode_info *); diff --git a/include/linux/fd1772.h b/include/linux/fd1772.h deleted file mode 100644 index 871d6e4c677e..000000000000 --- a/include/linux/fd1772.h +++ /dev/null @@ -1,80 +0,0 @@ -#ifndef _LINUX_FD1772REG_H -#define _LINUX_FD1772REG_H - -/* -** WD1772 stuff - originally from the M68K Linux - * Modified for Archimedes by Dave Gilbert (gilbertd@cs.man.ac.uk) - */ - -/* register codes */ - -#define FDC1772SELREG_STP (0x80) /* command/status register */ -#define FDC1772SELREG_TRA (0x82) /* track register */ -#define FDC1772SELREG_SEC (0x84) /* sector register */ -#define FDC1772SELREG_DTA (0x86) /* data register */ - -/* register names for FDC1772_READ/WRITE macros */ - -#define FDC1772REG_CMD 0 -#define FDC1772REG_STATUS 0 -#define FDC1772REG_TRACK 2 -#define FDC1772REG_SECTOR 4 -#define FDC1772REG_DATA 6 - -/* command opcodes */ - -#define FDC1772CMD_RESTORE (0x00) /* - */ -#define FDC1772CMD_SEEK (0x10) /* | */ -#define FDC1772CMD_STEP (0x20) /* | TYP 1 Commands */ -#define FDC1772CMD_STIN (0x40) /* | */ -#define FDC1772CMD_STOT (0x60) /* - */ -#define FDC1772CMD_RDSEC (0x80) /* - TYP 2 Commands */ -#define FDC1772CMD_WRSEC (0xa0) /* - " */ -#define FDC1772CMD_RDADR (0xc0) /* - */ -#define FDC1772CMD_RDTRA (0xe0) /* | TYP 3 Commands */ -#define FDC1772CMD_WRTRA (0xf0) /* - */ -#define FDC1772CMD_FORCI (0xd0) /* - TYP 4 Command */ - -/* command modifier bits */ - -#define FDC1772CMDADD_SR6 (0x00) /* step rate settings */ -#define FDC1772CMDADD_SR12 (0x01) -#define FDC1772CMDADD_SR2 (0x02) -#define FDC1772CMDADD_SR3 (0x03) -#define FDC1772CMDADD_V (0x04) /* verify */ -#define FDC1772CMDADD_H (0x08) /* wait for spin-up */ -#define FDC1772CMDADD_U (0x10) /* update track register */ -#define FDC1772CMDADD_M (0x10) /* multiple sector access */ -#define FDC1772CMDADD_E (0x04) /* head settling flag */ -#define FDC1772CMDADD_P (0x02) /* precompensation */ -#define FDC1772CMDADD_A0 (0x01) /* DAM flag */ - -/* status register bits */ - -#define FDC1772STAT_MOTORON (0x80) /* motor on */ -#define FDC1772STAT_WPROT (0x40) /* write protected (FDC1772CMD_WR*) */ -#define FDC1772STAT_SPINUP (0x20) /* motor speed stable (Type I) */ -#define FDC1772STAT_DELDAM (0x20) /* sector has deleted DAM (Type II+III) */ -#define FDC1772STAT_RECNF (0x10) /* record not found */ -#define FDC1772STAT_CRC (0x08) /* CRC error */ -#define FDC1772STAT_TR00 (0x04) /* Track 00 flag (Type I) */ -#define FDC1772STAT_LOST (0x04) /* Lost Data (Type II+III) */ -#define FDC1772STAT_IDX (0x02) /* Index status (Type I) */ -#define FDC1772STAT_DRQ (0x02) /* DRQ status (Type II+III) */ -#define FDC1772STAT_BUSY (0x01) /* FDC1772 is busy */ - - -/* PSG Port A Bit Nr 0 .. Side Sel .. 0 -> Side 1 1 -> Side 2 */ -#define DSKSIDE (0x01) - -#define DSKDRVNONE (0x06) -#define DSKDRV0 (0x02) -#define DSKDRV1 (0x04) - -/* step rates */ -#define FDC1772STEP_6 0x00 -#define FDC1772STEP_12 0x01 -#define FDC1772STEP_2 0x02 -#define FDC1772STEP_3 0x03 - -#endif diff --git a/include/linux/fs.h b/include/linux/fs.h index 4b86f806014c..49d8eb7a71be 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -886,6 +886,12 @@ static inline int file_check_writeable(struct file *filp) #define FL_SLEEP 128 /* A blocking lock */ /* + * Special return value from posix_lock_file() and vfs_lock_file() for + * asynchronous locking. + */ +#define FILE_LOCK_DEFERRED 1 + +/* * The POSIX file lock owner is determined by * the "struct files_struct" in the thread group * (or NULL for no owner - BSD locks). diff --git a/include/linux/fuse.h b/include/linux/fuse.h index d48282197696..265635dc9908 100644 --- a/include/linux/fuse.h +++ b/include/linux/fuse.h @@ -104,11 +104,14 @@ struct fuse_file_lock { /** * INIT request/reply flags + * + * FUSE_EXPORT_SUPPORT: filesystem handles lookups of "." and ".." */ #define FUSE_ASYNC_READ (1 << 0) #define FUSE_POSIX_LOCKS (1 << 1) #define FUSE_FILE_OPS (1 << 2) #define FUSE_ATOMIC_O_TRUNC (1 << 3) +#define FUSE_EXPORT_SUPPORT (1 << 4) #define FUSE_BIG_WRITES (1 << 5) /** diff --git a/include/linux/genhd.h b/include/linux/genhd.h index e8787417f65a..118216f1bd3c 100644 --- a/include/linux/genhd.h +++ b/include/linux/genhd.h @@ -541,7 +541,7 @@ extern dev_t blk_lookup_devt(const char *name, int part); extern char *disk_name (struct gendisk *hd, int part, char *buf); extern int rescan_partitions(struct gendisk *disk, struct block_device *bdev); -extern void add_partition(struct gendisk *, int, sector_t, sector_t, int); +extern int __must_check add_partition(struct gendisk *, int, sector_t, sector_t, int); extern void delete_partition(struct gendisk *, int); extern void printk_all_partitions(void); diff --git a/include/linux/gpio.h b/include/linux/gpio.h index 98be6c5762b9..730a20b83576 100644 --- a/include/linux/gpio.h +++ b/include/linux/gpio.h @@ -79,6 +79,19 @@ static inline void gpio_set_value_cansleep(unsigned gpio, int value) WARN_ON(1); } +static inline int gpio_export(unsigned gpio, bool direction_may_change) +{ + /* GPIO can never have been requested or set as {in,out}put */ + WARN_ON(1); + return -EINVAL; +} + +static inline void gpio_unexport(unsigned gpio) +{ + /* GPIO can never have been exported */ + WARN_ON(1); +} + static inline int gpio_to_irq(unsigned gpio) { /* GPIO can never have been requested or set as input */ diff --git a/include/linux/i2c/max732x.h b/include/linux/i2c/max732x.h new file mode 100644 index 000000000000..e10336631c62 --- /dev/null +++ b/include/linux/i2c/max732x.h @@ -0,0 +1,19 @@ +#ifndef __LINUX_I2C_MAX732X_H +#define __LINUX_I2C_MAX732X_H + +/* platform data for the MAX732x 8/16-bit I/O expander driver */ + +struct max732x_platform_data { + /* number of the first GPIO */ + unsigned gpio_base; + + void *context; /* param to setup/teardown */ + + int (*setup)(struct i2c_client *client, + unsigned gpio, unsigned ngpio, + void *context); + int (*teardown)(struct i2c_client *client, + unsigned gpio, unsigned ngpio, + void *context); +}; +#endif /* __LINUX_I2C_MAX732X_H */ diff --git a/include/linux/idr.h b/include/linux/idr.h index 9a2d762124de..fa035f96f2a3 100644 --- a/include/linux/idr.h +++ b/include/linux/idr.h @@ -15,6 +15,7 @@ #include <linux/types.h> #include <linux/bitops.h> #include <linux/init.h> +#include <linux/rcupdate.h> #if BITS_PER_LONG == 32 # define IDR_BITS 5 @@ -51,6 +52,7 @@ struct idr_layer { unsigned long bitmap; /* A zero bit means "space here" */ struct idr_layer *ary[1<<IDR_BITS]; int count; /* When zero, we can release it */ + struct rcu_head rcu_head; }; struct idr { @@ -71,6 +73,28 @@ struct idr { } #define DEFINE_IDR(name) struct idr name = IDR_INIT(name) +/* Actions to be taken after a call to _idr_sub_alloc */ +#define IDR_NEED_TO_GROW -2 +#define IDR_NOMORE_SPACE -3 + +#define _idr_rc_to_errno(rc) ((rc) == -1 ? -EAGAIN : -ENOSPC) + +/** + * idr synchronization (stolen from radix-tree.h) + * + * idr_find() is able to be called locklessly, using RCU. The caller must + * ensure calls to this function are made within rcu_read_lock() regions. + * Other readers (lock-free or otherwise) and modifications may be running + * concurrently. + * + * It is still required that the caller manage the synchronization and + * lifetimes of the items. So if RCU lock-free lookups are used, typically + * this would mean that the items have their own locks, or are amenable to + * lock-free access; and that the items are freed by RCU (or only freed after + * having been deleted from the idr tree *and* a synchronize_rcu() grace + * period). + */ + /* * This is what we export. */ diff --git a/include/linux/init.h b/include/linux/init.h index 21d658cdfa27..42ae95411a93 100644 --- a/include/linux/init.h +++ b/include/linux/init.h @@ -275,13 +275,7 @@ void __init parse_early_param(void); #define security_initcall(fn) module_init(fn) -/* These macros create a dummy inline: gcc 2.9x does not count alias - as usage, hence the `unused function' warning when __init functions - are declared static. We use the dummy __*_module_inline functions - both to kill the warning and check the type of the init/cleanup - function. */ - -/* Each module must use one module_init(), or one no_module_init */ +/* Each module must use one module_init(). */ #define module_init(initfn) \ static inline initcall_t __inittest(void) \ { return initfn; } \ diff --git a/include/linux/init_task.h b/include/linux/init_task.h index 93c45acf249a..021d8e720c79 100644 --- a/include/linux/init_task.h +++ b/include/linux/init_task.h @@ -122,7 +122,7 @@ extern struct group_info init_groups; .state = 0, \ .stack = &init_thread_info, \ .usage = ATOMIC_INIT(2), \ - .flags = 0, \ + .flags = PF_KTHREAD, \ .lock_depth = -1, \ .prio = MAX_PRIO-20, \ .static_prio = MAX_PRIO-20, \ diff --git a/include/linux/ipc_namespace.h b/include/linux/ipc_namespace.h index ea6c18a8b0d4..ea330f9e7100 100644 --- a/include/linux/ipc_namespace.h +++ b/include/linux/ipc_namespace.h @@ -36,6 +36,7 @@ struct ipc_namespace { int msg_ctlmni; atomic_t msg_bytes; atomic_t msg_hdrs; + int auto_msgmni; size_t shm_ctlmax; size_t shm_ctlall; @@ -53,7 +54,7 @@ extern atomic_t nr_ipc_ns; extern int register_ipcns_notifier(struct ipc_namespace *); extern int cond_register_ipcns_notifier(struct ipc_namespace *); -extern int unregister_ipcns_notifier(struct ipc_namespace *); +extern void unregister_ipcns_notifier(struct ipc_namespace *); extern int ipcns_notify(unsigned long); #else /* CONFIG_SYSVIPC */ diff --git a/include/linux/irqflags.h b/include/linux/irqflags.h index 2b1c2e58566e..74bde13224c9 100644 --- a/include/linux/irqflags.h +++ b/include/linux/irqflags.h @@ -11,6 +11,8 @@ #ifndef _LINUX_TRACE_IRQFLAGS_H #define _LINUX_TRACE_IRQFLAGS_H +#include <linux/typecheck.h> + #ifdef CONFIG_TRACE_IRQFLAGS extern void trace_softirqs_on(unsigned long ip); extern void trace_softirqs_off(unsigned long ip); @@ -58,18 +60,24 @@ do { trace_hardirqs_on(); raw_local_irq_enable(); } while (0) #define local_irq_disable() \ do { raw_local_irq_disable(); trace_hardirqs_off(); } while (0) -#define local_irq_save(flags) \ - do { raw_local_irq_save(flags); trace_hardirqs_off(); } while (0) +#define local_irq_save(flags) \ + do { \ + typecheck(unsigned long, flags); \ + raw_local_irq_save(flags); \ + trace_hardirqs_off(); \ + } while (0) -#define local_irq_restore(flags) \ - do { \ - if (raw_irqs_disabled_flags(flags)) { \ - raw_local_irq_restore(flags); \ - trace_hardirqs_off(); \ - } else { \ - trace_hardirqs_on(); \ - raw_local_irq_restore(flags); \ - } \ + +#define local_irq_restore(flags) \ + do { \ + typecheck(unsigned long, flags); \ + if (raw_irqs_disabled_flags(flags)) { \ + raw_local_irq_restore(flags); \ + trace_hardirqs_off(); \ + } else { \ + trace_hardirqs_on(); \ + raw_local_irq_restore(flags); \ + } \ } while (0) #else /* !CONFIG_TRACE_IRQFLAGS_SUPPORT */ /* @@ -78,8 +86,16 @@ */ # define raw_local_irq_disable() local_irq_disable() # define raw_local_irq_enable() local_irq_enable() -# define raw_local_irq_save(flags) local_irq_save(flags) -# define raw_local_irq_restore(flags) local_irq_restore(flags) +# define raw_local_irq_save(flags) \ + do { \ + typecheck(unsigned long, flags); \ + local_irq_save(flags); \ + } while (0) +# define raw_local_irq_restore(flags) \ + do { \ + typecheck(unsigned long, flags); \ + local_irq_restore(flags); \ + } while (0) #endif /* CONFIG_TRACE_IRQFLAGS_SUPPORT */ #ifdef CONFIG_TRACE_IRQFLAGS_SUPPORT @@ -89,7 +105,11 @@ raw_safe_halt(); \ } while (0) -#define local_save_flags(flags) raw_local_save_flags(flags) +#define local_save_flags(flags) \ + do { \ + typecheck(unsigned long, flags); \ + raw_local_save_flags(flags); \ + } while (0) #define irqs_disabled() \ ({ \ @@ -99,7 +119,11 @@ raw_irqs_disabled_flags(_flags); \ }) -#define irqs_disabled_flags(flags) raw_irqs_disabled_flags(flags) +#define irqs_disabled_flags(flags) \ +({ \ + typecheck(unsigned long, flags); \ + raw_irqs_disabled_flags(flags); \ +}) #endif /* CONFIG_X86 */ #endif diff --git a/include/linux/kallsyms.h b/include/linux/kallsyms.h index 00c1801099fa..57aefa160a92 100644 --- a/include/linux/kallsyms.h +++ b/include/linux/kallsyms.h @@ -6,6 +6,7 @@ #define _LINUX_KALLSYMS_H #include <linux/errno.h> +#include <linux/kernel.h> #include <linux/stddef.h> #define KSYM_NAME_LEN 128 @@ -105,18 +106,10 @@ static inline void print_fn_descriptor_symbol(const char *fmt, void *addr) print_symbol(fmt, (unsigned long)addr); } -#ifndef CONFIG_64BIT -#define print_ip_sym(ip) \ -do { \ - printk("[<%08lx>]", ip); \ - print_symbol(" %s\n", ip); \ -} while(0) -#else -#define print_ip_sym(ip) \ -do { \ - printk("[<%016lx>]", ip); \ - print_symbol(" %s\n", ip); \ -} while(0) -#endif +static inline void print_ip_sym(unsigned long ip) +{ + printk("[<%p>]", (void *) ip); + print_symbol(" %s\n", ip); +} #endif /*_LINUX_KALLSYMS_H*/ diff --git a/include/linux/kernel.h b/include/linux/kernel.h index f9cd7a513f9c..fdbbf72ca2eb 100644 --- a/include/linux/kernel.h +++ b/include/linux/kernel.h @@ -14,6 +14,8 @@ #include <linux/compiler.h> #include <linux/bitops.h> #include <linux/log2.h> +#include <linux/typecheck.h> +#include <linux/ratelimit.h> #include <asm/byteorder.h> #include <asm/bug.h> @@ -188,11 +190,8 @@ asmlinkage int vprintk(const char *fmt, va_list args) asmlinkage int printk(const char * fmt, ...) __attribute__ ((format (printf, 1, 2))) __cold; -extern int printk_ratelimit_jiffies; -extern int printk_ratelimit_burst; +extern struct ratelimit_state printk_ratelimit_state; extern int printk_ratelimit(void); -extern int __ratelimit(int ratelimit_jiffies, int ratelimit_burst); -extern int __printk_ratelimit(int ratelimit_jiffies, int ratelimit_burst); extern bool printk_timed_ratelimit(unsigned long *caller_jiffies, unsigned int interval_msec); #else @@ -203,8 +202,6 @@ static inline int printk(const char *s, ...) __attribute__ ((format (printf, 1, 2))); static inline int __cold printk(const char *s, ...) { return 0; } static inline int printk_ratelimit(void) { return 0; } -static inline int __printk_ratelimit(int ratelimit_jiffies, \ - int ratelimit_burst) { return 0; } static inline bool printk_timed_ratelimit(unsigned long *caller_jiffies, \ unsigned int interval_msec) \ { return false; } @@ -441,26 +438,6 @@ static inline char *pack_hex_byte(char *buf, u8 byte) const typeof( ((type *)0)->member ) *__mptr = (ptr); \ (type *)( (char *)__mptr - offsetof(type,member) );}) -/* - * Check at compile time that something is of a particular type. - * Always evaluates to 1 so you may use it easily in comparisons. - */ -#define typecheck(type,x) \ -({ type __dummy; \ - typeof(x) __dummy2; \ - (void)(&__dummy == &__dummy2); \ - 1; \ -}) - -/* - * Check at compile time that 'function' is a certain type, or is a pointer - * to that type (needs to use typedef for the function type.) - */ -#define typecheck_fn(type,function) \ -({ typeof(type) __tmp = function; \ - (void)__tmp; \ -}) - struct sysinfo; extern int do_sysinfo(struct sysinfo *info); diff --git a/include/linux/kmod.h b/include/linux/kmod.h index 0509c4ce4857..a1a91577813c 100644 --- a/include/linux/kmod.h +++ b/include/linux/kmod.h @@ -19,6 +19,7 @@ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ +#include <linux/gfp.h> #include <linux/stddef.h> #include <linux/errno.h> #include <linux/compiler.h> @@ -41,8 +42,8 @@ struct file; struct subprocess_info; /* Allocate a subprocess_info structure */ -struct subprocess_info *call_usermodehelper_setup(char *path, - char **argv, char **envp); +struct subprocess_info *call_usermodehelper_setup(char *path, char **argv, + char **envp, gfp_t gfp_mask); /* Set various pieces of state into the subprocess_info structure */ void call_usermodehelper_setkeys(struct subprocess_info *info, @@ -69,8 +70,9 @@ static inline int call_usermodehelper(char *path, char **argv, char **envp, enum umh_wait wait) { struct subprocess_info *info; + gfp_t gfp_mask = (wait == UMH_NO_WAIT) ? GFP_ATOMIC : GFP_KERNEL; - info = call_usermodehelper_setup(path, argv, envp); + info = call_usermodehelper_setup(path, argv, envp, gfp_mask); if (info == NULL) return -ENOMEM; return call_usermodehelper_exec(info, wait); @@ -81,8 +83,9 @@ call_usermodehelper_keys(char *path, char **argv, char **envp, struct key *session_keyring, enum umh_wait wait) { struct subprocess_info *info; + gfp_t gfp_mask = (wait == UMH_NO_WAIT) ? GFP_ATOMIC : GFP_KERNEL; - info = call_usermodehelper_setup(path, argv, envp); + info = call_usermodehelper_setup(path, argv, envp, gfp_mask); if (info == NULL) return -ENOMEM; diff --git a/include/linux/kprobes.h b/include/linux/kprobes.h index 04a3556bdea6..0be7795655fa 100644 --- a/include/linux/kprobes.h +++ b/include/linux/kprobes.h @@ -157,11 +157,10 @@ struct kretprobe { int nmissed; size_t data_size; struct hlist_head free_instances; - struct hlist_head used_instances; + spinlock_t lock; }; struct kretprobe_instance { - struct hlist_node uflist; /* either on free list or used list */ struct hlist_node hlist; struct kretprobe *rp; kprobe_opcode_t *ret_addr; @@ -201,7 +200,6 @@ static inline int init_test_probes(void) } #endif /* CONFIG_KPROBES_SANITY_TEST */ -extern spinlock_t kretprobe_lock; extern struct mutex kprobe_mutex; extern int arch_prepare_kprobe(struct kprobe *p); extern void arch_arm_kprobe(struct kprobe *p); @@ -214,6 +212,9 @@ extern void kprobes_inc_nmissed_count(struct kprobe *p); /* Get the kprobe at this addr (if any) - called with preemption disabled */ struct kprobe *get_kprobe(void *addr); +void kretprobe_hash_lock(struct task_struct *tsk, + struct hlist_head **head, unsigned long *flags); +void kretprobe_hash_unlock(struct task_struct *tsk, unsigned long *flags); struct hlist_head * kretprobe_inst_table_head(struct task_struct *tsk); /* kprobe_running() will just return the current_kprobe on this CPU */ diff --git a/include/linux/kthread.h b/include/linux/kthread.h index 00dd957e245b..aabc8a13ba71 100644 --- a/include/linux/kthread.h +++ b/include/linux/kthread.h @@ -6,7 +6,8 @@ struct task_struct *kthread_create(int (*threadfn)(void *data), void *data, - const char namefmt[], ...); + const char namefmt[], ...) + __attribute__((format(printf, 3, 4))); /** * kthread_run - create and wake a thread. diff --git a/include/linux/list.h b/include/linux/list.h index 139ec41d9c2e..453916bc0412 100644 --- a/include/linux/list.h +++ b/include/linux/list.h @@ -61,14 +61,10 @@ extern void __list_add(struct list_head *new, * Insert a new entry after the specified head. * This is good for implementing stacks. */ -#ifndef CONFIG_DEBUG_LIST static inline void list_add(struct list_head *new, struct list_head *head) { __list_add(new, head, head->next); } -#else -extern void list_add(struct list_head *new, struct list_head *head); -#endif /** diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index e6608776bc96..fdf3967e1397 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -35,7 +35,10 @@ extern int mem_cgroup_charge(struct page *page, struct mm_struct *mm, extern int mem_cgroup_cache_charge(struct page *page, struct mm_struct *mm, gfp_t gfp_mask); extern void mem_cgroup_uncharge_page(struct page *page); +extern void mem_cgroup_uncharge_cache_page(struct page *page); extern void mem_cgroup_move_lists(struct page *page, bool active); +extern int mem_cgroup_shrink_usage(struct mm_struct *mm, gfp_t gfp_mask); + extern unsigned long mem_cgroup_isolate_pages(unsigned long nr_to_scan, struct list_head *dst, unsigned long *scanned, int order, @@ -50,9 +53,9 @@ extern struct mem_cgroup *mem_cgroup_from_task(struct task_struct *p); #define mm_match_cgroup(mm, cgroup) \ ((cgroup) == mem_cgroup_from_task((mm)->owner)) -extern int mem_cgroup_prepare_migration(struct page *page); +extern int +mem_cgroup_prepare_migration(struct page *page, struct page *newpage); extern void mem_cgroup_end_migration(struct page *page); -extern void mem_cgroup_page_migration(struct page *page, struct page *newpage); /* * For memory reclaim. @@ -97,6 +100,15 @@ static inline void mem_cgroup_uncharge_page(struct page *page) { } +static inline void mem_cgroup_uncharge_cache_page(struct page *page) +{ +} + +static inline int mem_cgroup_shrink_usage(struct mm_struct *mm, gfp_t gfp_mask) +{ + return 0; +} + static inline void mem_cgroup_move_lists(struct page *page, bool active) { } @@ -112,7 +124,8 @@ static inline int task_in_mem_cgroup(struct task_struct *task, return 1; } -static inline int mem_cgroup_prepare_migration(struct page *page) +static inline int +mem_cgroup_prepare_migration(struct page *page, struct page *newpage) { return 0; } @@ -121,11 +134,6 @@ static inline void mem_cgroup_end_migration(struct page *page) { } -static inline void -mem_cgroup_page_migration(struct page *page, struct page *newpage) -{ -} - static inline int mem_cgroup_calc_mapped_ratio(struct mem_cgroup *mem) { return 0; diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index 02a27ae78539..746f975b58ef 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -159,6 +159,17 @@ struct vm_area_struct { #endif }; +struct core_thread { + struct task_struct *task; + struct core_thread *next; +}; + +struct core_state { + atomic_t nr_threads; + struct core_thread dumper; + struct completion startup; +}; + struct mm_struct { struct vm_area_struct * mmap; /* list of VMAs */ struct rb_root mm_rb; @@ -175,7 +186,6 @@ struct mm_struct { atomic_t mm_users; /* How many users with user space? */ atomic_t mm_count; /* How many references to "struct mm_struct" (users count as 1) */ int map_count; /* number of VMAs */ - int core_waiters; struct rw_semaphore mmap_sem; spinlock_t page_table_lock; /* Protects page tables and some counters */ @@ -219,8 +229,7 @@ struct mm_struct { unsigned long flags; /* Must use atomic bitops to access the bits */ - /* coredumping support */ - struct completion *core_startup_done, core_done; + struct core_state *core_state; /* coredumping support */ /* aio bits */ rwlock_t ioctx_list_lock; /* aio lock */ diff --git a/include/linux/msdos_fs.h b/include/linux/msdos_fs.h index 81cd36b735b0..ba63858056c7 100644 --- a/include/linux/msdos_fs.h +++ b/include/linux/msdos_fs.h @@ -2,11 +2,11 @@ #define _LINUX_MSDOS_FS_H #include <linux/magic.h> +#include <asm/byteorder.h> /* * The MS-DOS filesystem constants/structures */ -#include <asm/byteorder.h> #define SECTOR_SIZE 512 /* sector size (bytes) */ #define SECTOR_BITS 9 /* log2(SECTOR_SIZE) */ @@ -89,24 +89,22 @@ #define IS_FSINFO(x) (le32_to_cpu((x)->signature1) == FAT_FSINFO_SIG1 \ && le32_to_cpu((x)->signature2) == FAT_FSINFO_SIG2) +struct __fat_dirent { + long d_ino; + __kernel_off_t d_off; + unsigned short d_reclen; + char d_name[256]; /* We must not include limits.h! */ +}; + /* * ioctl commands */ -#define VFAT_IOCTL_READDIR_BOTH _IOR('r', 1, struct dirent [2]) -#define VFAT_IOCTL_READDIR_SHORT _IOR('r', 2, struct dirent [2]) +#define VFAT_IOCTL_READDIR_BOTH _IOR('r', 1, struct __fat_dirent[2]) +#define VFAT_IOCTL_READDIR_SHORT _IOR('r', 2, struct __fat_dirent[2]) /* <linux/videotext.h> has used 0x72 ('r') in collision, so skip a few */ #define FAT_IOCTL_GET_ATTRIBUTES _IOR('r', 0x10, __u32) #define FAT_IOCTL_SET_ATTRIBUTES _IOW('r', 0x11, __u32) -/* - * vfat shortname flags - */ -#define VFAT_SFN_DISPLAY_LOWER 0x0001 /* convert to lowercase for display */ -#define VFAT_SFN_DISPLAY_WIN95 0x0002 /* emulate win95 rule for display */ -#define VFAT_SFN_DISPLAY_WINNT 0x0004 /* emulate winnt rule for display */ -#define VFAT_SFN_CREATE_WIN95 0x0100 /* emulate win95 rule for create */ -#define VFAT_SFN_CREATE_WINNT 0x0200 /* emulate winnt rule for create */ - struct fat_boot_sector { __u8 ignored[3]; /* Boot strap short or near jump */ __u8 system_id[8]; /* Name - can be used to special case @@ -168,14 +166,6 @@ struct msdos_dir_slot { __u8 name11_12[4]; /* last 2 characters in name */ }; -struct fat_slot_info { - loff_t i_pos; /* on-disk position of directory entry */ - loff_t slot_off; /* offset for slot or de start */ - int nr_slots; /* number of slots + 1(de) in filename */ - struct msdos_dir_entry *de; - struct buffer_head *bh; -}; - #ifdef __KERNEL__ #include <linux/buffer_head.h> @@ -184,6 +174,15 @@ struct fat_slot_info { #include <linux/fs.h> #include <linux/mutex.h> +/* + * vfat shortname flags + */ +#define VFAT_SFN_DISPLAY_LOWER 0x0001 /* convert to lowercase for display */ +#define VFAT_SFN_DISPLAY_WIN95 0x0002 /* emulate win95 rule for display */ +#define VFAT_SFN_DISPLAY_WINNT 0x0004 /* emulate winnt rule for display */ +#define VFAT_SFN_CREATE_WIN95 0x0100 /* emulate win95 rule for create */ +#define VFAT_SFN_CREATE_WINNT 0x0200 /* emulate winnt rule for create */ + struct fat_mount_options { uid_t fs_uid; gid_t fs_gid; @@ -202,10 +201,10 @@ struct fat_mount_options { utf8:1, /* Use of UTF-8 character set (Default) */ unicode_xlate:1, /* create escape sequences for unhandled Unicode */ numtail:1, /* Does first alias have a numeric '~1' type tail? */ - atari:1, /* Use Atari GEMDOS variation of MS-DOS fs */ flush:1, /* write things quickly */ nocase:1, /* Does this need case conversion? 0=need case conversion*/ - usefree:1; /* Use free_clusters for FAT32 */ + usefree:1, /* Use free_clusters for FAT32 */ + tz_utc:1; /* Filesystem timestamps are in UTC */ }; #define FAT_HASH_BITS 8 @@ -267,6 +266,14 @@ struct msdos_inode_info { struct inode vfs_inode; }; +struct fat_slot_info { + loff_t i_pos; /* on-disk position of directory entry */ + loff_t slot_off; /* offset for slot or de start */ + int nr_slots; /* number of slots + 1(de) in filename */ + struct msdos_dir_entry *de; + struct buffer_head *bh; +}; + static inline struct msdos_sb_info *MSDOS_SB(struct super_block *sb) { return sb->s_fs_info; @@ -428,8 +435,9 @@ extern int fat_flush_inodes(struct super_block *sb, struct inode *i1, extern void fat_fs_panic(struct super_block *s, const char *fmt, ...); extern void fat_clusters_flush(struct super_block *sb); extern int fat_chain_add(struct inode *inode, int new_dclus, int nr_cluster); -extern int date_dos2unix(unsigned short time, unsigned short date); -extern void fat_date_unix2dos(int unix_date, __le16 *time, __le16 *date); +extern int date_dos2unix(unsigned short time, unsigned short date, int tz_utc); +extern void fat_date_unix2dos(int unix_date, __le16 *time, __le16 *date, + int tz_utc); extern int fat_sync_bhs(struct buffer_head **bhs, int nr_bhs); int fat_cache_init(void); diff --git a/include/linux/mtd/ubi.h b/include/linux/mtd/ubi.h index f71201d0f3e7..6316fafe5c2a 100644 --- a/include/linux/mtd/ubi.h +++ b/include/linux/mtd/ubi.h @@ -45,13 +45,13 @@ enum { * @size: how many physical eraseblocks are reserved for this volume * @used_bytes: how many bytes of data this volume contains * @used_ebs: how many physical eraseblocks of this volume actually contain any - * data + * data * @vol_type: volume type (%UBI_DYNAMIC_VOLUME or %UBI_STATIC_VOLUME) * @corrupted: non-zero if the volume is corrupted (static volumes only) * @upd_marker: non-zero if the volume has update marker set * @alignment: volume alignment * @usable_leb_size: how many bytes are available in logical eraseblocks of - * this volume + * this volume * @name_len: volume name length * @name: volume name * @cdev: UBI volume character device major and minor numbers @@ -152,6 +152,7 @@ int ubi_leb_erase(struct ubi_volume_desc *desc, int lnum); int ubi_leb_unmap(struct ubi_volume_desc *desc, int lnum); int ubi_leb_map(struct ubi_volume_desc *desc, int lnum, int dtype); int ubi_is_mapped(struct ubi_volume_desc *desc, int lnum); +int ubi_sync(int ubi_num); /* * This function is the same as the 'ubi_leb_read()' function, but it does not diff --git a/include/linux/net.h b/include/linux/net.h index 2f999fbb188d..4a9a30f2d68f 100644 --- a/include/linux/net.h +++ b/include/linux/net.h @@ -351,8 +351,7 @@ static const struct proto_ops name##_ops = { \ #ifdef CONFIG_SYSCTL #include <linux/sysctl.h> -extern int net_msg_cost; -extern int net_msg_burst; +extern struct ratelimit_state net_ratelimit_state; #endif #endif /* __KERNEL__ */ diff --git a/include/linux/nfsd/nfsd.h b/include/linux/nfsd/nfsd.h index a2861d95ecc3..108f47e5fd95 100644 --- a/include/linux/nfsd/nfsd.h +++ b/include/linux/nfsd/nfsd.h @@ -12,7 +12,6 @@ #include <linux/types.h> #include <linux/unistd.h> -#include <linux/dirent.h> #include <linux/fs.h> #include <linux/posix_acl.h> #include <linux/mount.h> diff --git a/include/linux/notifier.h b/include/linux/notifier.h index bd3d72ddf333..da2698b0fdd1 100644 --- a/include/linux/notifier.h +++ b/include/linux/notifier.h @@ -214,6 +214,8 @@ static inline int notifier_to_errno(int ret) #define CPU_DEAD 0x0007 /* CPU (unsigned)v dead */ #define CPU_DYING 0x0008 /* CPU (unsigned)v not running any task, * not handling interrupts, soon dead */ +#define CPU_POST_DEAD 0x0009 /* CPU (unsigned)v dead, cpu_hotplug + * lock is dropped */ /* Used for CPU hotplug events occuring while tasks are frozen due to a suspend * operation in progress diff --git a/include/linux/nsproxy.h b/include/linux/nsproxy.h index 0e66b57631fc..c8a768e59640 100644 --- a/include/linux/nsproxy.h +++ b/include/linux/nsproxy.h @@ -82,9 +82,12 @@ static inline void get_nsproxy(struct nsproxy *ns) } #ifdef CONFIG_CGROUP_NS -int ns_cgroup_clone(struct task_struct *tsk); +int ns_cgroup_clone(struct task_struct *tsk, struct pid *pid); #else -static inline int ns_cgroup_clone(struct task_struct *tsk) { return 0; } +static inline int ns_cgroup_clone(struct task_struct *tsk, struct pid *pid) +{ + return 0; +} #endif #endif diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h index 119ae7b8f028..c3b1761aba26 100644 --- a/include/linux/pci_ids.h +++ b/include/linux/pci_ids.h @@ -2400,6 +2400,9 @@ #define PCI_DEVICE_ID_INTEL_ICH10_4 0x3a30 #define PCI_DEVICE_ID_INTEL_ICH10_5 0x3a60 #define PCI_DEVICE_ID_INTEL_IOAT_SNB 0x402f +#define PCI_DEVICE_ID_INTEL_5100_16 0x65f0 +#define PCI_DEVICE_ID_INTEL_5100_21 0x65f5 +#define PCI_DEVICE_ID_INTEL_5100_22 0x65f6 #define PCI_DEVICE_ID_INTEL_5400_ERR 0x4030 #define PCI_DEVICE_ID_INTEL_5400_FBD0 0x4035 #define PCI_DEVICE_ID_INTEL_5400_FBD1 0x4036 diff --git a/include/linux/pid.h b/include/linux/pid.h index c21c7e8124a7..22921ac4cfd9 100644 --- a/include/linux/pid.h +++ b/include/linux/pid.h @@ -48,7 +48,7 @@ enum pid_type */ struct upid { - /* Try to keep pid_chain in the same cacheline as nr for find_pid */ + /* Try to keep pid_chain in the same cacheline as nr for find_vpid */ int nr; struct pid_namespace *ns; struct hlist_node pid_chain; @@ -57,10 +57,10 @@ struct upid { struct pid { atomic_t count; + unsigned int level; /* lists of tasks that use this pid */ struct hlist_head tasks[PIDTYPE_MAX]; struct rcu_head rcu; - unsigned int level; struct upid numbers[1]; }; @@ -105,14 +105,12 @@ extern struct pid_namespace init_pid_ns; * or rcu_read_lock() held. * * find_pid_ns() finds the pid in the namespace specified - * find_pid() find the pid by its global id, i.e. in the init namespace * find_vpid() finr the pid by its virtual id, i.e. in the current namespace * - * see also find_task_by_pid() set in include/linux/sched.h + * see also find_task_by_vpid() set in include/linux/sched.h */ extern struct pid *find_pid_ns(int nr, struct pid_namespace *ns); extern struct pid *find_vpid(int nr); -extern struct pid *find_pid(int nr); /* * Lookup a PID in the hash table, and return with it's count elevated. diff --git a/include/linux/pid_namespace.h b/include/linux/pid_namespace.h index caff5283d15c..1af82c4e17d4 100644 --- a/include/linux/pid_namespace.h +++ b/include/linux/pid_namespace.h @@ -14,6 +14,8 @@ struct pidmap { #define PIDMAP_ENTRIES ((PID_MAX_LIMIT + 8*PAGE_SIZE - 1)/PAGE_SIZE/8) +struct bsd_acct_struct; + struct pid_namespace { struct kref kref; struct pidmap pidmap[PIDMAP_ENTRIES]; @@ -25,6 +27,9 @@ struct pid_namespace { #ifdef CONFIG_PROC_FS struct vfsmount *proc_mnt; #endif +#ifdef CONFIG_BSD_PROCESS_ACCT + struct bsd_acct_struct *bacct; +#endif }; extern struct pid_namespace init_pid_ns; @@ -85,4 +90,7 @@ static inline struct task_struct *task_child_reaper(struct task_struct *tsk) return tsk->nsproxy->pid_ns->child_reaper; } +void pidhash_init(void); +void pidmap_init(void); + #endif /* _LINUX_PID_NS_H */ diff --git a/include/linux/proc_fs.h b/include/linux/proc_fs.h index 15a9eaf4a802..f560d1705afe 100644 --- a/include/linux/proc_fs.h +++ b/include/linux/proc_fs.h @@ -79,6 +79,7 @@ struct proc_dir_entry { int pde_users; /* number of callers into module in progress */ spinlock_t pde_unload_lock; /* proc_fops checks and pde_users bumps */ struct completion *pde_unload_completion; + struct list_head pde_openers; /* who did ->open, but not ->release */ }; struct kcore_list { @@ -138,7 +139,6 @@ extern int proc_readdir(struct file *, void *, filldir_t); extern struct dentry *proc_lookup(struct inode *, struct dentry *, struct nameidata *); extern const struct file_operations proc_kcore_operations; -extern const struct file_operations proc_kmsg_operations; extern const struct file_operations ppc_htab_operations; extern int pid_ns_prepare_proc(struct pid_namespace *ns); diff --git a/include/linux/profile.h b/include/linux/profile.h index 05c1cc736937..7e7087239af5 100644 --- a/include/linux/profile.h +++ b/include/linux/profile.h @@ -8,8 +8,6 @@ #include <asm/errno.h> -extern int prof_on __read_mostly; - #define CPU_PROFILING 1 #define SCHED_PROFILING 2 #define SLEEP_PROFILING 3 @@ -19,14 +17,31 @@ struct proc_dir_entry; struct pt_regs; struct notifier_block; +#if defined(CONFIG_PROFILING) && defined(CONFIG_PROC_FS) +void create_prof_cpu_mask(struct proc_dir_entry *de); +#else +static inline void create_prof_cpu_mask(struct proc_dir_entry *de) +{ +} +#endif + +enum profile_type { + PROFILE_TASK_EXIT, + PROFILE_MUNMAP +}; + +#ifdef CONFIG_PROFILING + +extern int prof_on __read_mostly; + /* init basic kernel profiler */ void __init profile_init(void); -void profile_tick(int); +void profile_tick(int type); /* * Add multiple profiler hits to a given address: */ -void profile_hits(int, void *ip, unsigned int nr_hits); +void profile_hits(int type, void *ip, unsigned int nr_hits); /* * Single profiler hit: @@ -40,19 +55,6 @@ static inline void profile_hit(int type, void *ip) profile_hits(type, ip, 1); } -#ifdef CONFIG_PROC_FS -void create_prof_cpu_mask(struct proc_dir_entry *); -#else -#define create_prof_cpu_mask(x) do { (void)(x); } while (0) -#endif - -enum profile_type { - PROFILE_TASK_EXIT, - PROFILE_MUNMAP -}; - -#ifdef CONFIG_PROFILING - struct task_struct; struct mm_struct; @@ -80,6 +82,28 @@ struct pt_regs; #else +#define prof_on 0 + +static inline void profile_init(void) +{ + return; +} + +static inline void profile_tick(int type) +{ + return; +} + +static inline void profile_hits(int type, void *ip, unsigned int nr_hits) +{ + return; +} + +static inline void profile_hit(int type, void *ip) +{ + return; +} + static inline int task_handoff_register(struct notifier_block * n) { return -ENOSYS; diff --git a/include/linux/quota.h b/include/linux/quota.h index dcddfb200947..376a05048bc5 100644 --- a/include/linux/quota.h +++ b/include/linux/quota.h @@ -41,9 +41,6 @@ #define __DQUOT_VERSION__ "dquot_6.5.1" #define __DQUOT_NUM_VERSION__ 6*10000+5*100+1 -typedef __kernel_uid32_t qid_t; /* Type in which we store ids in memory */ -typedef __u64 qsize_t; /* Type in which we store sizes */ - /* Size of blocks in which are counted size limits */ #define QUOTABLOCK_BITS 10 #define QUOTABLOCK_SIZE (1 << QUOTABLOCK_BITS) @@ -138,6 +135,10 @@ struct if_dqinfo { #define QUOTA_NL_BHARDWARN 4 /* Block hardlimit reached */ #define QUOTA_NL_BSOFTLONGWARN 5 /* Block grace time expired */ #define QUOTA_NL_BSOFTWARN 6 /* Block softlimit reached */ +#define QUOTA_NL_IHARDBELOW 7 /* Usage got below inode hardlimit */ +#define QUOTA_NL_ISOFTBELOW 8 /* Usage got below inode softlimit */ +#define QUOTA_NL_BHARDBELOW 9 /* Usage got below block hardlimit */ +#define QUOTA_NL_BSOFTBELOW 10 /* Usage got below block softlimit */ enum { QUOTA_NL_C_UNSPEC, @@ -172,6 +173,9 @@ enum { #include <asm/atomic.h> +typedef __kernel_uid32_t qid_t; /* Type in which we store ids in memory */ +typedef __u64 qsize_t; /* Type in which we store sizes */ + extern spinlock_t dq_data_lock; /* Maximal numbers of writes for quota operation (insert/delete/update) @@ -223,12 +227,10 @@ struct super_block; #define DQF_INFO_DIRTY (1 << DQF_INFO_DIRTY_B) /* Is info dirty? */ extern void mark_info_dirty(struct super_block *sb, int type); -#define info_dirty(info) test_bit(DQF_INFO_DIRTY_B, &(info)->dqi_flags) -#define info_any_dquot_dirty(info) (!list_empty(&(info)->dqi_dirty_list)) -#define info_any_dirty(info) (info_dirty(info) || info_any_dquot_dirty(info)) - -#define sb_dqopt(sb) (&(sb)->s_dquot) -#define sb_dqinfo(sb, type) (sb_dqopt(sb)->info+(type)) +static inline int info_dirty(struct mem_dqinfo *info) +{ + return test_bit(DQF_INFO_DIRTY_B, &info->dqi_flags); +} struct dqstats { int lookups; @@ -337,19 +339,6 @@ struct quota_info { struct quota_format_ops *ops[MAXQUOTAS]; /* Operations for each type */ }; -#define sb_has_quota_enabled(sb, type) ((type)==USRQUOTA ? \ - (sb_dqopt(sb)->flags & DQUOT_USR_ENABLED) : (sb_dqopt(sb)->flags & DQUOT_GRP_ENABLED)) - -#define sb_any_quota_enabled(sb) (sb_has_quota_enabled(sb, USRQUOTA) | \ - sb_has_quota_enabled(sb, GRPQUOTA)) - -#define sb_has_quota_suspended(sb, type) \ - ((type) == USRQUOTA ? (sb_dqopt(sb)->flags & DQUOT_USR_SUSPENDED) : \ - (sb_dqopt(sb)->flags & DQUOT_GRP_SUSPENDED)) - -#define sb_any_quota_suspended(sb) (sb_has_quota_suspended(sb, USRQUOTA) | \ - sb_has_quota_suspended(sb, GRPQUOTA)) - int register_quota_format(struct quota_format_type *fmt); void unregister_quota_format(struct quota_format_type *fmt); diff --git a/include/linux/quotaops.h b/include/linux/quotaops.h index f86702053853..742187f7a05c 100644 --- a/include/linux/quotaops.h +++ b/include/linux/quotaops.h @@ -11,42 +11,85 @@ #define _LINUX_QUOTAOPS_ #include <linux/smp_lock.h> - #include <linux/fs.h> +static inline struct quota_info *sb_dqopt(struct super_block *sb) +{ + return &sb->s_dquot; +} + #if defined(CONFIG_QUOTA) /* * declaration of quota_function calls in kernel. */ -extern void sync_dquots(struct super_block *sb, int type); - -extern int dquot_initialize(struct inode *inode, int type); -extern int dquot_drop(struct inode *inode); - -extern int dquot_alloc_space(struct inode *inode, qsize_t number, int prealloc); -extern int dquot_alloc_inode(const struct inode *inode, unsigned long number); - -extern int dquot_free_space(struct inode *inode, qsize_t number); -extern int dquot_free_inode(const struct inode *inode, unsigned long number); - -extern int dquot_transfer(struct inode *inode, struct iattr *iattr); -extern int dquot_commit(struct dquot *dquot); -extern int dquot_acquire(struct dquot *dquot); -extern int dquot_release(struct dquot *dquot); -extern int dquot_commit_info(struct super_block *sb, int type); -extern int dquot_mark_dquot_dirty(struct dquot *dquot); - -extern int vfs_quota_on(struct super_block *sb, int type, int format_id, - char *path, int remount); -extern int vfs_quota_on_mount(struct super_block *sb, char *qf_name, - int format_id, int type); -extern int vfs_quota_off(struct super_block *sb, int type, int remount); -extern int vfs_quota_sync(struct super_block *sb, int type); -extern int vfs_get_dqinfo(struct super_block *sb, int type, struct if_dqinfo *ii); -extern int vfs_set_dqinfo(struct super_block *sb, int type, struct if_dqinfo *ii); -extern int vfs_get_dqblk(struct super_block *sb, int type, qid_t id, struct if_dqblk *di); -extern int vfs_set_dqblk(struct super_block *sb, int type, qid_t id, struct if_dqblk *di); +void sync_dquots(struct super_block *sb, int type); + +int dquot_initialize(struct inode *inode, int type); +int dquot_drop(struct inode *inode); + +int dquot_alloc_space(struct inode *inode, qsize_t number, int prealloc); +int dquot_alloc_inode(const struct inode *inode, unsigned long number); + +int dquot_free_space(struct inode *inode, qsize_t number); +int dquot_free_inode(const struct inode *inode, unsigned long number); + +int dquot_transfer(struct inode *inode, struct iattr *iattr); +int dquot_commit(struct dquot *dquot); +int dquot_acquire(struct dquot *dquot); +int dquot_release(struct dquot *dquot); +int dquot_commit_info(struct super_block *sb, int type); +int dquot_mark_dquot_dirty(struct dquot *dquot); + +int vfs_quota_on(struct super_block *sb, int type, int format_id, + char *path, int remount); +int vfs_quota_on_mount(struct super_block *sb, char *qf_name, + int format_id, int type); +int vfs_quota_off(struct super_block *sb, int type, int remount); +int vfs_quota_sync(struct super_block *sb, int type); +int vfs_get_dqinfo(struct super_block *sb, int type, struct if_dqinfo *ii); +int vfs_set_dqinfo(struct super_block *sb, int type, struct if_dqinfo *ii); +int vfs_get_dqblk(struct super_block *sb, int type, qid_t id, struct if_dqblk *di); +int vfs_set_dqblk(struct super_block *sb, int type, qid_t id, struct if_dqblk *di); + +void vfs_dq_drop(struct inode *inode); +int vfs_dq_transfer(struct inode *inode, struct iattr *iattr); +int vfs_dq_quota_on_remount(struct super_block *sb); + +static inline struct mem_dqinfo *sb_dqinfo(struct super_block *sb, int type) +{ + return sb_dqopt(sb)->info + type; +} + +/* + * Functions for checking status of quota + */ + +static inline int sb_has_quota_enabled(struct super_block *sb, int type) +{ + if (type == USRQUOTA) + return sb_dqopt(sb)->flags & DQUOT_USR_ENABLED; + return sb_dqopt(sb)->flags & DQUOT_GRP_ENABLED; +} + +static inline int sb_any_quota_enabled(struct super_block *sb) +{ + return sb_has_quota_enabled(sb, USRQUOTA) || + sb_has_quota_enabled(sb, GRPQUOTA); +} + +static inline int sb_has_quota_suspended(struct super_block *sb, int type) +{ + if (type == USRQUOTA) + return sb_dqopt(sb)->flags & DQUOT_USR_SUSPENDED; + return sb_dqopt(sb)->flags & DQUOT_GRP_SUSPENDED; +} + +static inline int sb_any_quota_suspended(struct super_block *sb) +{ + return sb_has_quota_suspended(sb, USRQUOTA) || + sb_has_quota_suspended(sb, GRPQUOTA); +} /* * Operations supported for diskquotas. @@ -59,38 +102,16 @@ extern struct quotactl_ops vfs_quotactl_ops; /* It is better to call this function outside of any transaction as it might * need a lot of space in journal for dquot structure allocation. */ -static inline void DQUOT_INIT(struct inode *inode) +static inline void vfs_dq_init(struct inode *inode) { BUG_ON(!inode->i_sb); if (sb_any_quota_enabled(inode->i_sb) && !IS_NOQUOTA(inode)) inode->i_sb->dq_op->initialize(inode, -1); } -/* The same as with DQUOT_INIT */ -static inline void DQUOT_DROP(struct inode *inode) -{ - /* Here we can get arbitrary inode from clear_inode() so we have - * to be careful. OTOH we don't need locking as quota operations - * are allowed to change only at mount time */ - if (!IS_NOQUOTA(inode) && inode->i_sb && inode->i_sb->dq_op - && inode->i_sb->dq_op->drop) { - int cnt; - /* Test before calling to rule out calls from proc and such - * where we are not allowed to block. Note that this is - * actually reliable test even without the lock - the caller - * must assure that nobody can come after the DQUOT_DROP and - * add quota pointers back anyway */ - for (cnt = 0; cnt < MAXQUOTAS; cnt++) - if (inode->i_dquot[cnt] != NODQUOT) - break; - if (cnt < MAXQUOTAS) - inode->i_sb->dq_op->drop(inode); - } -} - /* The following allocation/freeing/transfer functions *must* be called inside * a transaction (deadlocks possible otherwise) */ -static inline int DQUOT_PREALLOC_SPACE_NODIRTY(struct inode *inode, qsize_t nr) +static inline int vfs_dq_prealloc_space_nodirty(struct inode *inode, qsize_t nr) { if (sb_any_quota_enabled(inode->i_sb)) { /* Used space is updated in alloc_space() */ @@ -102,15 +123,15 @@ static inline int DQUOT_PREALLOC_SPACE_NODIRTY(struct inode *inode, qsize_t nr) return 0; } -static inline int DQUOT_PREALLOC_SPACE(struct inode *inode, qsize_t nr) +static inline int vfs_dq_prealloc_space(struct inode *inode, qsize_t nr) { int ret; - if (!(ret = DQUOT_PREALLOC_SPACE_NODIRTY(inode, nr))) + if (!(ret = vfs_dq_prealloc_space_nodirty(inode, nr))) mark_inode_dirty(inode); return ret; } -static inline int DQUOT_ALLOC_SPACE_NODIRTY(struct inode *inode, qsize_t nr) +static inline int vfs_dq_alloc_space_nodirty(struct inode *inode, qsize_t nr) { if (sb_any_quota_enabled(inode->i_sb)) { /* Used space is updated in alloc_space() */ @@ -122,25 +143,25 @@ static inline int DQUOT_ALLOC_SPACE_NODIRTY(struct inode *inode, qsize_t nr) return 0; } -static inline int DQUOT_ALLOC_SPACE(struct inode *inode, qsize_t nr) +static inline int vfs_dq_alloc_space(struct inode *inode, qsize_t nr) { int ret; - if (!(ret = DQUOT_ALLOC_SPACE_NODIRTY(inode, nr))) + if (!(ret = vfs_dq_alloc_space_nodirty(inode, nr))) mark_inode_dirty(inode); return ret; } -static inline int DQUOT_ALLOC_INODE(struct inode *inode) +static inline int vfs_dq_alloc_inode(struct inode *inode) { if (sb_any_quota_enabled(inode->i_sb)) { - DQUOT_INIT(inode); + vfs_dq_init(inode); if (inode->i_sb->dq_op->alloc_inode(inode, 1) == NO_QUOTA) return 1; } return 0; } -static inline void DQUOT_FREE_SPACE_NODIRTY(struct inode *inode, qsize_t nr) +static inline void vfs_dq_free_space_nodirty(struct inode *inode, qsize_t nr) { if (sb_any_quota_enabled(inode->i_sb)) inode->i_sb->dq_op->free_space(inode, nr); @@ -148,35 +169,25 @@ static inline void DQUOT_FREE_SPACE_NODIRTY(struct inode *inode, qsize_t nr) inode_sub_bytes(inode, nr); } -static inline void DQUOT_FREE_SPACE(struct inode *inode, qsize_t nr) +static inline void vfs_dq_free_space(struct inode *inode, qsize_t nr) { - DQUOT_FREE_SPACE_NODIRTY(inode, nr); + vfs_dq_free_space_nodirty(inode, nr); mark_inode_dirty(inode); } -static inline void DQUOT_FREE_INODE(struct inode *inode) +static inline void vfs_dq_free_inode(struct inode *inode) { if (sb_any_quota_enabled(inode->i_sb)) inode->i_sb->dq_op->free_inode(inode, 1); } -static inline int DQUOT_TRANSFER(struct inode *inode, struct iattr *iattr) -{ - if (sb_any_quota_enabled(inode->i_sb) && !IS_NOQUOTA(inode)) { - DQUOT_INIT(inode); - if (inode->i_sb->dq_op->transfer(inode, iattr) == NO_QUOTA) - return 1; - } - return 0; -} - /* The following two functions cannot be called inside a transaction */ -static inline void DQUOT_SYNC(struct super_block *sb) +static inline void vfs_dq_sync(struct super_block *sb) { sync_dquots(sb, -1); } -static inline int DQUOT_OFF(struct super_block *sb, int remount) +static inline int vfs_dq_off(struct super_block *sb, int remount) { int ret = -ENOSYS; @@ -185,22 +196,27 @@ static inline int DQUOT_OFF(struct super_block *sb, int remount) return ret; } -static inline int DQUOT_ON_REMOUNT(struct super_block *sb) +#else + +static inline int sb_has_quota_enabled(struct super_block *sb, int type) { - int cnt; - int ret = 0, err; + return 0; +} - if (!sb->s_qcop || !sb->s_qcop->quota_on) - return -ENOSYS; - for (cnt = 0; cnt < MAXQUOTAS; cnt++) { - err = sb->s_qcop->quota_on(sb, cnt, 0, NULL, 1); - if (err < 0 && !ret) - ret = err; - } - return ret; +static inline int sb_any_quota_enabled(struct super_block *sb) +{ + return 0; } -#else +static inline int sb_has_quota_suspended(struct super_block *sb, int type) +{ + return 0; +} + +static inline int sb_any_quota_suspended(struct super_block *sb) +{ + return 0; +} /* * NO-OP when quota not configured. @@ -208,113 +224,144 @@ static inline int DQUOT_ON_REMOUNT(struct super_block *sb) #define sb_dquot_ops (NULL) #define sb_quotactl_ops (NULL) -static inline void DQUOT_INIT(struct inode *inode) +static inline void vfs_dq_init(struct inode *inode) { } -static inline void DQUOT_DROP(struct inode *inode) +static inline void vfs_dq_drop(struct inode *inode) { } -static inline int DQUOT_ALLOC_INODE(struct inode *inode) +static inline int vfs_dq_alloc_inode(struct inode *inode) { return 0; } -static inline void DQUOT_FREE_INODE(struct inode *inode) +static inline void vfs_dq_free_inode(struct inode *inode) { } -static inline void DQUOT_SYNC(struct super_block *sb) +static inline void vfs_dq_sync(struct super_block *sb) { } -static inline int DQUOT_OFF(struct super_block *sb, int remount) +static inline int vfs_dq_off(struct super_block *sb, int remount) { return 0; } -static inline int DQUOT_ON_REMOUNT(struct super_block *sb) +static inline int vfs_dq_quota_on_remount(struct super_block *sb) { return 0; } -static inline int DQUOT_TRANSFER(struct inode *inode, struct iattr *iattr) +static inline int vfs_dq_transfer(struct inode *inode, struct iattr *iattr) { return 0; } -static inline int DQUOT_PREALLOC_SPACE_NODIRTY(struct inode *inode, qsize_t nr) +static inline int vfs_dq_prealloc_space_nodirty(struct inode *inode, qsize_t nr) { inode_add_bytes(inode, nr); return 0; } -static inline int DQUOT_PREALLOC_SPACE(struct inode *inode, qsize_t nr) +static inline int vfs_dq_prealloc_space(struct inode *inode, qsize_t nr) { - DQUOT_PREALLOC_SPACE_NODIRTY(inode, nr); + vfs_dq_prealloc_space_nodirty(inode, nr); mark_inode_dirty(inode); return 0; } -static inline int DQUOT_ALLOC_SPACE_NODIRTY(struct inode *inode, qsize_t nr) +static inline int vfs_dq_alloc_space_nodirty(struct inode *inode, qsize_t nr) { inode_add_bytes(inode, nr); return 0; } -static inline int DQUOT_ALLOC_SPACE(struct inode *inode, qsize_t nr) +static inline int vfs_dq_alloc_space(struct inode *inode, qsize_t nr) { - DQUOT_ALLOC_SPACE_NODIRTY(inode, nr); + vfs_dq_alloc_space_nodirty(inode, nr); mark_inode_dirty(inode); return 0; } -static inline void DQUOT_FREE_SPACE_NODIRTY(struct inode *inode, qsize_t nr) +static inline void vfs_dq_free_space_nodirty(struct inode *inode, qsize_t nr) { inode_sub_bytes(inode, nr); } -static inline void DQUOT_FREE_SPACE(struct inode *inode, qsize_t nr) +static inline void vfs_dq_free_space(struct inode *inode, qsize_t nr) { - DQUOT_FREE_SPACE_NODIRTY(inode, nr); + vfs_dq_free_space_nodirty(inode, nr); mark_inode_dirty(inode); } #endif /* CONFIG_QUOTA */ -static inline int DQUOT_PREALLOC_BLOCK_NODIRTY(struct inode *inode, qsize_t nr) +static inline int vfs_dq_prealloc_block_nodirty(struct inode *inode, qsize_t nr) { - return DQUOT_PREALLOC_SPACE_NODIRTY(inode, + return vfs_dq_prealloc_space_nodirty(inode, nr << inode->i_sb->s_blocksize_bits); } -static inline int DQUOT_PREALLOC_BLOCK(struct inode *inode, qsize_t nr) +static inline int vfs_dq_prealloc_block(struct inode *inode, qsize_t nr) { - return DQUOT_PREALLOC_SPACE(inode, + return vfs_dq_prealloc_space(inode, nr << inode->i_sb->s_blocksize_bits); } -static inline int DQUOT_ALLOC_BLOCK_NODIRTY(struct inode *inode, qsize_t nr) +static inline int vfs_dq_alloc_block_nodirty(struct inode *inode, qsize_t nr) { - return DQUOT_ALLOC_SPACE_NODIRTY(inode, + return vfs_dq_alloc_space_nodirty(inode, nr << inode->i_sb->s_blocksize_bits); } -static inline int DQUOT_ALLOC_BLOCK(struct inode *inode, qsize_t nr) +static inline int vfs_dq_alloc_block(struct inode *inode, qsize_t nr) { - return DQUOT_ALLOC_SPACE(inode, + return vfs_dq_alloc_space(inode, nr << inode->i_sb->s_blocksize_bits); } -static inline void DQUOT_FREE_BLOCK_NODIRTY(struct inode *inode, qsize_t nr) +static inline void vfs_dq_free_block_nodirty(struct inode *inode, qsize_t nr) { - DQUOT_FREE_SPACE_NODIRTY(inode, nr << inode->i_sb->s_blocksize_bits); + vfs_dq_free_space_nodirty(inode, nr << inode->i_sb->s_blocksize_bits); } -static inline void DQUOT_FREE_BLOCK(struct inode *inode, qsize_t nr) +static inline void vfs_dq_free_block(struct inode *inode, qsize_t nr) { - DQUOT_FREE_SPACE(inode, nr << inode->i_sb->s_blocksize_bits); + vfs_dq_free_space(inode, nr << inode->i_sb->s_blocksize_bits); } +/* + * Define uppercase equivalents for compatibility with old function names + * Can go away when we think all users have been converted (15/04/2008) + */ +#define DQUOT_INIT(inode) vfs_dq_init(inode) +#define DQUOT_DROP(inode) vfs_dq_drop(inode) +#define DQUOT_PREALLOC_SPACE_NODIRTY(inode, nr) \ + vfs_dq_prealloc_space_nodirty(inode, nr) +#define DQUOT_PREALLOC_SPACE(inode, nr) vfs_dq_prealloc_space(inode, nr) +#define DQUOT_ALLOC_SPACE_NODIRTY(inode, nr) \ + vfs_dq_alloc_space_nodirty(inode, nr) +#define DQUOT_ALLOC_SPACE(inode, nr) vfs_dq_alloc_space(inode, nr) +#define DQUOT_PREALLOC_BLOCK_NODIRTY(inode, nr) \ + vfs_dq_prealloc_block_nodirty(inode, nr) +#define DQUOT_PREALLOC_BLOCK(inode, nr) vfs_dq_prealloc_block(inode, nr) +#define DQUOT_ALLOC_BLOCK_NODIRTY(inode, nr) \ + vfs_dq_alloc_block_nodirty(inode, nr) +#define DQUOT_ALLOC_BLOCK(inode, nr) vfs_dq_alloc_block(inode, nr) +#define DQUOT_ALLOC_INODE(inode) vfs_dq_alloc_inode(inode) +#define DQUOT_FREE_SPACE_NODIRTY(inode, nr) \ + vfs_dq_free_space_nodirty(inode, nr) +#define DQUOT_FREE_SPACE(inode, nr) vfs_dq_free_space(inode, nr) +#define DQUOT_FREE_BLOCK_NODIRTY(inode, nr) \ + vfs_dq_free_block_nodirty(inode, nr) +#define DQUOT_FREE_BLOCK(inode, nr) vfs_dq_free_block(inode, nr) +#define DQUOT_FREE_INODE(inode) vfs_dq_free_inode(inode) +#define DQUOT_TRANSFER(inode, iattr) vfs_dq_transfer(inode, iattr) +#define DQUOT_SYNC(sb) vfs_dq_sync(sb) +#define DQUOT_OFF(sb, remount) vfs_dq_off(sb, remount) +#define DQUOT_ON_REMOUNT(sb) vfs_dq_quota_on_remount(sb) + #endif /* _LINUX_QUOTAOPS_ */ diff --git a/include/linux/ratelimit.h b/include/linux/ratelimit.h new file mode 100644 index 000000000000..18a5b9ba9d40 --- /dev/null +++ b/include/linux/ratelimit.h @@ -0,0 +1,27 @@ +#ifndef _LINUX_RATELIMIT_H +#define _LINUX_RATELIMIT_H +#include <linux/param.h> + +#define DEFAULT_RATELIMIT_INTERVAL (5 * HZ) +#define DEFAULT_RATELIMIT_BURST 10 + +struct ratelimit_state { + int interval; + int burst; + int printed; + int missed; + unsigned long begin; +}; + +#define DEFINE_RATELIMIT_STATE(name, interval, burst) \ + struct ratelimit_state name = {interval, burst,} + +extern int __ratelimit(struct ratelimit_state *rs); + +static inline int ratelimit(void) +{ + static DEFINE_RATELIMIT_STATE(rs, DEFAULT_RATELIMIT_INTERVAL, + DEFAULT_RATELIMIT_BURST); + return __ratelimit(&rs); +} +#endif diff --git a/include/linux/rcupreempt.h b/include/linux/rcupreempt.h index f04b64eca636..0967f03b0705 100644 --- a/include/linux/rcupreempt.h +++ b/include/linux/rcupreempt.h @@ -115,16 +115,21 @@ DECLARE_PER_CPU(struct rcu_dyntick_sched, rcu_dyntick_sched); static inline void rcu_enter_nohz(void) { + static DEFINE_RATELIMIT_STATE(rs, 10 * HZ, 1); + smp_mb(); /* CPUs seeing ++ must see prior RCU read-side crit sects */ __get_cpu_var(rcu_dyntick_sched).dynticks++; - WARN_ON(__get_cpu_var(rcu_dyntick_sched).dynticks & 0x1); + WARN_ON_RATELIMIT(__get_cpu_var(rcu_dyntick_sched).dynticks & 0x1, &rs); } static inline void rcu_exit_nohz(void) { + static DEFINE_RATELIMIT_STATE(rs, 10 * HZ, 1); + smp_mb(); /* CPUs seeing ++ must see later RCU read-side crit sects */ __get_cpu_var(rcu_dyntick_sched).dynticks++; - WARN_ON(!(__get_cpu_var(rcu_dyntick_sched).dynticks & 0x1)); + WARN_ON_RATELIMIT(!(__get_cpu_var(rcu_dyntick_sched).dynticks & 0x1), + &rs); } #else /* CONFIG_NO_HZ */ diff --git a/include/linux/reiserfs_fs.h b/include/linux/reiserfs_fs.h index 4aacaeecb56f..e9963af16cda 100644 --- a/include/linux/reiserfs_fs.h +++ b/include/linux/reiserfs_fs.h @@ -526,8 +526,8 @@ struct item_head { ** p is the array of __u32, i is the index into the array, v is the value ** to store there. */ -#define get_block_num(p, i) le32_to_cpu(get_unaligned((p) + (i))) -#define put_block_num(p, i, v) put_unaligned(cpu_to_le32(v), (p) + (i)) +#define get_block_num(p, i) get_unaligned_le32((p) + (i)) +#define put_block_num(p, i, v) put_unaligned_le32((v), (p) + (i)) // // in old version uniqueness field shows key type diff --git a/include/linux/reiserfs_fs_sb.h b/include/linux/reiserfs_fs_sb.h index 336ee43ed7d8..315517e8bfa1 100644 --- a/include/linux/reiserfs_fs_sb.h +++ b/include/linux/reiserfs_fs_sb.h @@ -152,7 +152,7 @@ struct reiserfs_journal_list { atomic_t j_nonzerolen; atomic_t j_commit_left; atomic_t j_older_commits_done; /* all commits older than this on disk */ - struct semaphore j_commit_lock; + struct mutex j_commit_mutex; unsigned long j_trans_id; time_t j_timestamp; struct reiserfs_list_bitmap *j_list_bitmap; @@ -193,8 +193,8 @@ struct reiserfs_journal { struct buffer_head *j_header_bh; time_t j_trans_start_time; /* time this transaction started */ - struct semaphore j_lock; - struct semaphore j_flush_sem; + struct mutex j_mutex; + struct mutex j_flush_mutex; wait_queue_head_t j_join_wait; /* wait for current transaction to finish before starting new one */ atomic_t j_jlock; /* lock for j_join_wait */ int j_list_bitmap_index; /* number of next list bitmap to use */ diff --git a/include/linux/res_counter.h b/include/linux/res_counter.h index 6d9e1fca098c..fdeadd9740dc 100644 --- a/include/linux/res_counter.h +++ b/include/linux/res_counter.h @@ -63,9 +63,14 @@ u64 res_counter_read_u64(struct res_counter *counter, int member); ssize_t res_counter_read(struct res_counter *counter, int member, const char __user *buf, size_t nbytes, loff_t *pos, int (*read_strategy)(unsigned long long val, char *s)); -ssize_t res_counter_write(struct res_counter *counter, int member, - const char __user *buf, size_t nbytes, loff_t *pos, - int (*write_strategy)(char *buf, unsigned long long *val)); + +typedef int (*write_strategy_fn)(const char *buf, unsigned long long *val); + +int res_counter_memparse_write_strategy(const char *buf, + unsigned long long *res); + +int res_counter_write(struct res_counter *counter, int member, + const char *buffer, write_strategy_fn write_strategy); /* * the field descriptors. one for each member of res_counter @@ -95,8 +100,10 @@ void res_counter_init(struct res_counter *counter); * counter->limit _locked call expects the counter->lock to be taken */ -int res_counter_charge_locked(struct res_counter *counter, unsigned long val); -int res_counter_charge(struct res_counter *counter, unsigned long val); +int __must_check res_counter_charge_locked(struct res_counter *counter, + unsigned long val); +int __must_check res_counter_charge(struct res_counter *counter, + unsigned long val); /* * uncharge - tell that some portion of the resource is released @@ -151,4 +158,20 @@ static inline void res_counter_reset_failcnt(struct res_counter *cnt) cnt->failcnt = 0; spin_unlock_irqrestore(&cnt->lock, flags); } + +static inline int res_counter_set_limit(struct res_counter *cnt, + unsigned long long limit) +{ + unsigned long flags; + int ret = -EBUSY; + + spin_lock_irqsave(&cnt->lock, flags); + if (cnt->usage < limit) { + cnt->limit = limit; + ret = 0; + } + spin_unlock_irqrestore(&cnt->lock, flags); + return ret; +} + #endif diff --git a/include/linux/sched.h b/include/linux/sched.h index 6aca4a16e377..42036ffe6b00 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -506,6 +506,10 @@ struct signal_struct { unsigned long nvcsw, nivcsw, cnvcsw, cnivcsw; unsigned long min_flt, maj_flt, cmin_flt, cmaj_flt; unsigned long inblock, oublock, cinblock, coublock; +#ifdef CONFIG_TASK_XACCT + u64 rchar, wchar, syscr, syscw; +#endif + struct task_io_accounting ioac; /* * Cumulative ns of scheduled CPU time for dead threads in the @@ -668,6 +672,10 @@ struct task_delay_info { /* io operations performed */ u32 swapin_count; /* total count of the number of swapin block */ /* io operations performed */ + + struct timespec freepages_start, freepages_end; + u64 freepages_delay; /* wait for memory reclaim */ + u32 freepages_count; /* total count of memory reclaim */ }; #endif /* CONFIG_TASK_DELAY_ACCT */ @@ -1257,7 +1265,7 @@ struct task_struct { #if defined(CONFIG_TASK_XACCT) u64 acct_rss_mem1; /* accumulated rss usage */ u64 acct_vm_mem1; /* accumulated virtual memory usage */ - cputime_t acct_stimexpd;/* stime since last update */ + cputime_t acct_timexpd; /* stime + utime since last update */ #endif #ifdef CONFIG_CPUSETS nodemask_t mems_allowed; @@ -1496,7 +1504,7 @@ static inline void put_task_struct(struct task_struct *t) #define PF_KSWAPD 0x00040000 /* I am kswapd */ #define PF_SWAPOFF 0x00080000 /* I am in swapoff */ #define PF_LESS_THROTTLE 0x00100000 /* Throttle me less: I clean memory */ -#define PF_BORROWED_MM 0x00200000 /* I am a kthread doing use_mm */ +#define PF_KTHREAD 0x00200000 /* I am a kernel thread */ #define PF_RANDOMIZE 0x00400000 /* randomize virtual address space */ #define PF_SWAPWRITE 0x00800000 /* Allowed to write to swap */ #define PF_SPREAD_PAGE 0x01000000 /* Spread page cache over cpuset */ @@ -1715,19 +1723,13 @@ extern struct pid_namespace init_pid_ns; * finds a task by its pid in the specified namespace * find_task_by_vpid(): * finds a task by its virtual pid - * find_task_by_pid(): - * finds a task by its global pid * - * see also find_pid() etc in include/linux/pid.h + * see also find_vpid() etc in include/linux/pid.h */ extern struct task_struct *find_task_by_pid_type_ns(int type, int pid, struct pid_namespace *ns); -static inline struct task_struct *__deprecated find_task_by_pid(pid_t nr) -{ - return find_task_by_pid_type_ns(PIDTYPE_PID, nr, &init_pid_ns); -} extern struct task_struct *find_task_by_vpid(pid_t nr); extern struct task_struct *find_task_by_pid_ns(pid_t nr, struct pid_namespace *ns); @@ -1800,7 +1802,6 @@ extern void force_sig(int, struct task_struct *); extern void force_sig_specific(int, struct task_struct *); extern int send_sig(int, struct task_struct *, int); extern void zap_other_threads(struct task_struct *p); -extern int kill_proc(pid_t, int, int); extern struct sigqueue *sigqueue_alloc(void); extern void sigqueue_free(struct sigqueue *); extern int send_sigqueue(struct sigqueue *, struct task_struct *, int group); @@ -2054,9 +2055,6 @@ static inline int signal_pending_state(long state, struct task_struct *p) if (!signal_pending(p)) return 0; - if (state & (__TASK_STOPPED | __TASK_TRACED)) - return 0; - return (state & TASK_INTERRUPTIBLE) || __fatal_signal_pending(p); } diff --git a/include/linux/sem.h b/include/linux/sem.h index c8eaad9e4b72..1b191c176bcd 100644 --- a/include/linux/sem.h +++ b/include/linux/sem.h @@ -78,6 +78,7 @@ struct seminfo { #ifdef __KERNEL__ #include <asm/atomic.h> +#include <linux/rcupdate.h> struct task_struct; @@ -93,23 +94,19 @@ struct sem_array { time_t sem_otime; /* last semop time */ time_t sem_ctime; /* last change time */ struct sem *sem_base; /* ptr to first semaphore in array */ - struct sem_queue *sem_pending; /* pending operations to be processed */ - struct sem_queue **sem_pending_last; /* last pending operation */ - struct sem_undo *undo; /* undo requests on this array */ + struct list_head sem_pending; /* pending operations to be processed */ + struct list_head list_id; /* undo requests on this array */ unsigned long sem_nsems; /* no. of semaphores in array */ }; /* One queue for each sleeping process in the system. */ struct sem_queue { - struct sem_queue * next; /* next entry in the queue */ - struct sem_queue ** prev; /* previous entry in the queue, *(q->prev) == q */ - struct task_struct* sleeper; /* this process */ - struct sem_undo * undo; /* undo structure */ + struct list_head list; /* queue of pending operations */ + struct task_struct *sleeper; /* this process */ + struct sem_undo *undo; /* undo structure */ int pid; /* process id of requesting process */ int status; /* completion status of operation */ - struct sem_array * sma; /* semaphore array for operations */ - int id; /* internal sem id */ - struct sembuf * sops; /* array of pending operations */ + struct sembuf *sops; /* array of pending operations */ int nsops; /* number of operations */ int alter; /* does the operation alter the array? */ }; @@ -118,8 +115,11 @@ struct sem_queue { * when the process exits. */ struct sem_undo { - struct sem_undo * proc_next; /* next entry on this process */ - struct sem_undo * id_next; /* next entry on this semaphore set */ + struct list_head list_proc; /* per-process list: all undos from one process. */ + /* rcu protected */ + struct rcu_head rcu; /* rcu struct for sem_undo() */ + struct sem_undo_list *ulp; /* sem_undo_list for the process */ + struct list_head list_id; /* per semaphore array list: all undos for one array */ int semid; /* semaphore set identifier */ short * semadj; /* array of adjustments, one per semaphore */ }; @@ -128,9 +128,9 @@ struct sem_undo { * that may be shared among all a CLONE_SYSVSEM task group. */ struct sem_undo_list { - atomic_t refcnt; - spinlock_t lock; - struct sem_undo *proc_list; + atomic_t refcnt; + spinlock_t lock; + struct list_head list_proc; }; struct sysv_sem { diff --git a/include/linux/sm501.h b/include/linux/sm501.h index b530fa6a1d34..214f93209b8c 100644 --- a/include/linux/sm501.h +++ b/include/linux/sm501.h @@ -46,24 +46,6 @@ extern unsigned long sm501_modify_reg(struct device *dev, unsigned long set, unsigned long clear); -/* sm501_gpio_set - * - * set the state of the given GPIO line -*/ - -extern void sm501_gpio_set(struct device *dev, - unsigned long gpio, - unsigned int to, - unsigned int dir); - -/* sm501_gpio_get - * - * get the state of the given GPIO line -*/ - -extern unsigned long sm501_gpio_get(struct device *dev, - unsigned long gpio); - /* Platform data definitions */ @@ -104,11 +86,19 @@ struct sm501_platdata_fb { struct sm501_platdata_fbsub *fb_pnl; }; -/* gpio i2c */ +/* gpio i2c + * + * Note, we have to pass in the bus number, as the number used will be + * passed to the i2c-gpio driver's platform_device.id, subsequently used + * to register the i2c bus. +*/ struct sm501_platdata_gpio_i2c { + unsigned int bus_num; unsigned int pin_sda; unsigned int pin_scl; + int udelay; + int timeout; }; /* sm501_initdata @@ -131,6 +121,7 @@ struct sm501_reg_init { #define SM501_USE_FBACCEL (1<<6) #define SM501_USE_AC97 (1<<7) #define SM501_USE_I2S (1<<8) +#define SM501_USE_GPIO (1<<9) #define SM501_USE_ALL (0xffffffff) @@ -157,6 +148,8 @@ struct sm501_init_gpio { struct sm501_reg_init gpio_ddr_high; }; +#define SM501_FLAG_SUSPEND_OFF (1<<4) + /* sm501_platdata * * This is passed with the platform device to allow the board @@ -170,6 +163,12 @@ struct sm501_platdata { struct sm501_init_gpio *init_gpiop; struct sm501_platdata_fb *fb; + int flags; + int gpio_base; + + int (*get_power)(struct device *dev); + int (*set_power)(struct device *dev, unsigned int on); + struct sm501_platdata_gpio_i2c *gpio_i2c; unsigned int gpio_i2c_nr; }; diff --git a/include/linux/smb_fs.h b/include/linux/smb_fs.h index 2c5cd55f44ff..923cd8a247b1 100644 --- a/include/linux/smb_fs.h +++ b/include/linux/smb_fs.h @@ -43,18 +43,13 @@ static inline struct smb_inode_info *SMB_I(struct inode *inode) } /* macro names are short for word, double-word, long value (?) */ -#define WVAL(buf,pos) \ - (le16_to_cpu(get_unaligned((__le16 *)((u8 *)(buf) + (pos))))) -#define DVAL(buf,pos) \ - (le32_to_cpu(get_unaligned((__le32 *)((u8 *)(buf) + (pos))))) -#define LVAL(buf,pos) \ - (le64_to_cpu(get_unaligned((__le64 *)((u8 *)(buf) + (pos))))) -#define WSET(buf,pos,val) \ - put_unaligned(cpu_to_le16((u16)(val)), (__le16 *)((u8 *)(buf) + (pos))) -#define DSET(buf,pos,val) \ - put_unaligned(cpu_to_le32((u32)(val)), (__le32 *)((u8 *)(buf) + (pos))) -#define LSET(buf,pos,val) \ - put_unaligned(cpu_to_le64((u64)(val)), (__le64 *)((u8 *)(buf) + (pos))) +#define WVAL(buf, pos) (get_unaligned_le16((u8 *)(buf) + (pos))) +#define DVAL(buf, pos) (get_unaligned_le32((u8 *)(buf) + (pos))) +#define LVAL(buf, pos) (get_unaligned_le64((u8 *)(buf) + (pos))) + +#define WSET(buf, pos, val) put_unaligned_le16((val), (u8 *)(buf) + (pos)) +#define DSET(buf, pos, val) put_unaligned_le32((val), (u8 *)(buf) + (pos)) +#define LSET(buf, pos, val) put_unaligned_le64((val), (u8 *)(buf) + (pos)) /* where to find the base of the SMB packet proper */ #define smb_base(buf) ((u8 *)(((u8 *)(buf))+4)) diff --git a/include/linux/spi/mcp23s08.h b/include/linux/spi/mcp23s08.h index 835ddf47d45c..22ef107d7704 100644 --- a/include/linux/spi/mcp23s08.h +++ b/include/linux/spi/mcp23s08.h @@ -1,18 +1,25 @@ -/* FIXME driver should be able to handle all four slaves that - * can be hooked up to each chipselect, as well as IRQs... - */ +/* FIXME driver should be able to handle IRQs... */ + +struct mcp23s08_chip_info { + bool is_present; /* true iff populated */ + u8 pullups; /* BIT(x) means enable pullup x */ +}; struct mcp23s08_platform_data { - /* four slaves can share one SPI chipselect */ - u8 slave; + /* Four slaves (numbered 0..3) can share one SPI chipselect, and + * will provide 8..32 GPIOs using 1..4 gpio_chip instances. + */ + struct mcp23s08_chip_info chip[4]; - /* number assigned to the first GPIO */ + /* "base" is the number of the first GPIO. Dynamic assignment is + * not currently supported, and even if there are gaps in chip + * addressing the GPIO numbers are sequential .. so for example + * if only slaves 0 and 3 are present, their GPIOs range from + * base to base+15. + */ unsigned base; - /* pins with pullups */ - u8 pullups; - void *context; /* param to setup/teardown */ int (*setup)(struct spi_device *spi, diff --git a/include/linux/spinlock.h b/include/linux/spinlock.h index d311a090fae7..61e5610ad165 100644 --- a/include/linux/spinlock.h +++ b/include/linux/spinlock.h @@ -46,6 +46,7 @@ * linux/spinlock.h: builds the final spin_*() APIs. */ +#include <linux/typecheck.h> #include <linux/preempt.h> #include <linux/linkage.h> #include <linux/compiler.h> @@ -191,23 +192,53 @@ do { \ #if defined(CONFIG_SMP) || defined(CONFIG_DEBUG_SPINLOCK) -#define spin_lock_irqsave(lock, flags) flags = _spin_lock_irqsave(lock) -#define read_lock_irqsave(lock, flags) flags = _read_lock_irqsave(lock) -#define write_lock_irqsave(lock, flags) flags = _write_lock_irqsave(lock) +#define spin_lock_irqsave(lock, flags) \ + do { \ + typecheck(unsigned long, flags); \ + flags = _spin_lock_irqsave(lock); \ + } while (0) +#define read_lock_irqsave(lock, flags) \ + do { \ + typecheck(unsigned long, flags); \ + flags = _read_lock_irqsave(lock); \ + } while (0) +#define write_lock_irqsave(lock, flags) \ + do { \ + typecheck(unsigned long, flags); \ + flags = _write_lock_irqsave(lock); \ + } while (0) #ifdef CONFIG_DEBUG_LOCK_ALLOC -#define spin_lock_irqsave_nested(lock, flags, subclass) \ - flags = _spin_lock_irqsave_nested(lock, subclass) +#define spin_lock_irqsave_nested(lock, flags, subclass) \ + do { \ + typecheck(unsigned long, flags); \ + flags = _spin_lock_irqsave_nested(lock, subclass); \ + } while (0) #else -#define spin_lock_irqsave_nested(lock, flags, subclass) \ - flags = _spin_lock_irqsave(lock) +#define spin_lock_irqsave_nested(lock, flags, subclass) \ + do { \ + typecheck(unsigned long, flags); \ + flags = _spin_lock_irqsave(lock); \ + } while (0) #endif #else -#define spin_lock_irqsave(lock, flags) _spin_lock_irqsave(lock, flags) -#define read_lock_irqsave(lock, flags) _read_lock_irqsave(lock, flags) -#define write_lock_irqsave(lock, flags) _write_lock_irqsave(lock, flags) +#define spin_lock_irqsave(lock, flags) \ + do { \ + typecheck(unsigned long, flags); \ + _spin_lock_irqsave(lock, flags); \ + } while (0) +#define read_lock_irqsave(lock, flags) \ + do { \ + typecheck(unsigned long, flags); \ + _read_lock_irqsave(lock, flags); \ + } while (0) +#define write_lock_irqsave(lock, flags) \ + do { \ + typecheck(unsigned long, flags); \ + _write_lock_irqsave(lock, flags); \ + } while (0) #define spin_lock_irqsave_nested(lock, flags, subclass) \ spin_lock_irqsave(lock, flags) @@ -260,16 +291,25 @@ do { \ } while (0) #endif -#define spin_unlock_irqrestore(lock, flags) \ - _spin_unlock_irqrestore(lock, flags) +#define spin_unlock_irqrestore(lock, flags) \ + do { \ + typecheck(unsigned long, flags); \ + _spin_unlock_irqrestore(lock, flags); \ + } while (0) #define spin_unlock_bh(lock) _spin_unlock_bh(lock) -#define read_unlock_irqrestore(lock, flags) \ - _read_unlock_irqrestore(lock, flags) +#define read_unlock_irqrestore(lock, flags) \ + do { \ + typecheck(unsigned long, flags); \ + _read_unlock_irqrestore(lock, flags); \ + } while (0) #define read_unlock_bh(lock) _read_unlock_bh(lock) -#define write_unlock_irqrestore(lock, flags) \ - _write_unlock_irqrestore(lock, flags) +#define write_unlock_irqrestore(lock, flags) \ + do { \ + typecheck(unsigned long, flags); \ + _write_unlock_irqrestore(lock, flags); \ + } while (0) #define write_unlock_bh(lock) _write_unlock_bh(lock) #define spin_trylock_bh(lock) __cond_lock(lock, _spin_trylock_bh(lock)) diff --git a/include/linux/taskstats.h b/include/linux/taskstats.h index 5d69c0744fff..18269e956a71 100644 --- a/include/linux/taskstats.h +++ b/include/linux/taskstats.h @@ -31,7 +31,7 @@ */ -#define TASKSTATS_VERSION 6 +#define TASKSTATS_VERSION 7 #define TS_COMM_LEN 32 /* should be >= TASK_COMM_LEN * in linux/sched.h */ @@ -157,6 +157,10 @@ struct taskstats { __u64 ac_utimescaled; /* utime scaled on frequency etc */ __u64 ac_stimescaled; /* stime scaled on frequency etc */ __u64 cpu_scaled_run_real_total; /* scaled cpu_run_real_total */ + + /* Delay waiting for memory reclaim */ + __u64 freepages_count; + __u64 freepages_delay_total; }; diff --git a/include/linux/typecheck.h b/include/linux/typecheck.h new file mode 100644 index 000000000000..eb5b74a575be --- /dev/null +++ b/include/linux/typecheck.h @@ -0,0 +1,24 @@ +#ifndef TYPECHECK_H_INCLUDED +#define TYPECHECK_H_INCLUDED + +/* + * Check at compile time that something is of a particular type. + * Always evaluates to 1 so you may use it easily in comparisons. + */ +#define typecheck(type,x) \ +({ type __dummy; \ + typeof(x) __dummy2; \ + (void)(&__dummy == &__dummy2); \ + 1; \ +}) + +/* + * Check at compile time that 'function' is a certain type, or is a pointer + * to that type (needs to use typedef for the function type.) + */ +#define typecheck_fn(type,function) \ +({ typeof(type) __tmp = function; \ + (void)__tmp; \ +}) + +#endif /* TYPECHECK_H_INCLUDED */ diff --git a/include/linux/usb/composite.h b/include/linux/usb/composite.h index 747c3a49cdc9..c932390c6da0 100644 --- a/include/linux/usb/composite.h +++ b/include/linux/usb/composite.h @@ -330,7 +330,7 @@ extern int usb_string_id(struct usb_composite_dev *c); dev_vdbg(&(d)->gadget->dev , fmt , ## args) #define ERROR(d, fmt, args...) \ dev_err(&(d)->gadget->dev , fmt , ## args) -#define WARN(d, fmt, args...) \ +#define WARNING(d, fmt, args...) \ dev_warn(&(d)->gadget->dev , fmt , ## args) #define INFO(d, fmt, args...) \ dev_info(&(d)->gadget->dev , fmt , ## args) diff --git a/include/linux/virtio_9p.h b/include/linux/virtio_9p.h index 8eff0b53910b..b3c4a60ceeb3 100644 --- a/include/linux/virtio_9p.h +++ b/include/linux/virtio_9p.h @@ -1,5 +1,7 @@ #ifndef _LINUX_VIRTIO_9P_H #define _LINUX_VIRTIO_9P_H +/* This header is BSD licensed so anyone can use the definitions to implement + * compatible drivers/servers. */ #include <linux/virtio_config.h> /* The ID for virtio console */ diff --git a/include/linux/virtio_balloon.h b/include/linux/virtio_balloon.h index 979524ee75b7..c30c7bfbf39b 100644 --- a/include/linux/virtio_balloon.h +++ b/include/linux/virtio_balloon.h @@ -1,5 +1,7 @@ #ifndef _LINUX_VIRTIO_BALLOON_H #define _LINUX_VIRTIO_BALLOON_H +/* This header is BSD licensed so anyone can use the definitions to implement + * compatible drivers/servers. */ #include <linux/virtio_config.h> /* The ID for virtio_balloon */ diff --git a/include/linux/virtio_blk.h b/include/linux/virtio_blk.h index 5f79a5f9de79..c1aef85243bf 100644 --- a/include/linux/virtio_blk.h +++ b/include/linux/virtio_blk.h @@ -1,5 +1,7 @@ #ifndef _LINUX_VIRTIO_BLK_H #define _LINUX_VIRTIO_BLK_H +/* This header is BSD licensed so anyone can use the definitions to implement + * compatible drivers/servers. */ #include <linux/virtio_config.h> /* The ID for virtio_block */ @@ -11,6 +13,7 @@ #define VIRTIO_BLK_F_SEG_MAX 2 /* Indicates maximum # of segments */ #define VIRTIO_BLK_F_GEOMETRY 4 /* Legacy geometry available */ #define VIRTIO_BLK_F_RO 5 /* Disk is read-only */ +#define VIRTIO_BLK_F_BLK_SIZE 6 /* Block size of disk is available*/ struct virtio_blk_config { @@ -26,6 +29,8 @@ struct virtio_blk_config __u8 heads; __u8 sectors; } geometry; + /* block size of device (if VIRTIO_BLK_F_BLK_SIZE) */ + __u32 blk_size; } __attribute__((packed)); /* These two define direction. */ diff --git a/include/linux/virtio_config.h b/include/linux/virtio_config.h index f364bbf63c34..bf8ec283b232 100644 --- a/include/linux/virtio_config.h +++ b/include/linux/virtio_config.h @@ -1,5 +1,8 @@ #ifndef _LINUX_VIRTIO_CONFIG_H #define _LINUX_VIRTIO_CONFIG_H +/* This header, excluding the #ifdef __KERNEL__ part, is BSD licensed so + * anyone can use the definitions to implement compatible drivers/servers. */ + /* Virtio devices use a standardized configuration space to define their * features and pass configuration information, but each implementation can * store and access that space differently. */ @@ -15,6 +18,12 @@ /* We've given up on this device. */ #define VIRTIO_CONFIG_S_FAILED 0x80 +/* Some virtio feature bits (currently bits 28 through 31) are reserved for the + * transport being used (eg. virtio_ring), the rest are per-device feature + * bits. */ +#define VIRTIO_TRANSPORT_F_START 28 +#define VIRTIO_TRANSPORT_F_END 32 + /* Do we get callbacks when the ring is completely used, even if we've * suppressed them? */ #define VIRTIO_F_NOTIFY_ON_EMPTY 24 @@ -52,9 +61,10 @@ * @get_features: get the array of feature bits for this device. * vdev: the virtio_device * Returns the first 32 feature bits (all we currently need). - * @set_features: confirm what device features we'll be using. + * @finalize_features: confirm what device features we'll be using. * vdev: the virtio_device - * feature: the first 32 feature bits + * This gives the final feature bits for the device: it can change + * the dev->feature bits if it wants. */ struct virtio_config_ops { @@ -70,7 +80,7 @@ struct virtio_config_ops void (*callback)(struct virtqueue *)); void (*del_vq)(struct virtqueue *vq); u32 (*get_features)(struct virtio_device *vdev); - void (*set_features)(struct virtio_device *vdev, u32 features); + void (*finalize_features)(struct virtio_device *vdev); }; /* If driver didn't advertise the feature, it will never appear. */ diff --git a/include/linux/virtio_console.h b/include/linux/virtio_console.h index ed2d4ead7eb7..19a0da0dba41 100644 --- a/include/linux/virtio_console.h +++ b/include/linux/virtio_console.h @@ -1,6 +1,8 @@ #ifndef _LINUX_VIRTIO_CONSOLE_H #define _LINUX_VIRTIO_CONSOLE_H #include <linux/virtio_config.h> +/* This header, excluding the #ifdef __KERNEL__ part, is BSD licensed so + * anyone can use the definitions to implement compatible drivers/servers. */ /* The ID for virtio console */ #define VIRTIO_ID_CONSOLE 3 diff --git a/include/linux/virtio_net.h b/include/linux/virtio_net.h index 38c0571820fb..5e33761b9b8a 100644 --- a/include/linux/virtio_net.h +++ b/include/linux/virtio_net.h @@ -1,5 +1,7 @@ #ifndef _LINUX_VIRTIO_NET_H #define _LINUX_VIRTIO_NET_H +/* This header is BSD licensed so anyone can use the definitions to implement + * compatible drivers/servers. */ #include <linux/virtio_config.h> /* The ID for virtio_net */ diff --git a/include/linux/virtio_pci.h b/include/linux/virtio_pci.h index b3151659cf49..cdef35742932 100644 --- a/include/linux/virtio_pci.h +++ b/include/linux/virtio_pci.h @@ -9,9 +9,8 @@ * Authors: * Anthony Liguori <aliguori@us.ibm.com> * - * This work is licensed under the terms of the GNU GPL, version 2 or later. - * See the COPYING file in the top-level directory. - * + * This header is BSD licensed so anyone can use the definitions to implement + * compatible drivers/servers. */ #ifndef _LINUX_VIRTIO_PCI_H diff --git a/include/linux/virtio_ring.h b/include/linux/virtio_ring.h index abe481ed990e..c4a598fb3826 100644 --- a/include/linux/virtio_ring.h +++ b/include/linux/virtio_ring.h @@ -120,6 +120,8 @@ struct virtqueue *vring_new_virtqueue(unsigned int num, void (*notify)(struct virtqueue *vq), void (*callback)(struct virtqueue *vq)); void vring_del_virtqueue(struct virtqueue *vq); +/* Filter out transport-specific feature bits. */ +void vring_transport_features(struct virtio_device *vdev); irqreturn_t vring_interrupt(int irq, void *_vq); #endif /* __KERNEL__ */ diff --git a/include/linux/virtio_rng.h b/include/linux/virtio_rng.h index 331afb6c9f62..1a85dab8a940 100644 --- a/include/linux/virtio_rng.h +++ b/include/linux/virtio_rng.h @@ -1,5 +1,7 @@ #ifndef _LINUX_VIRTIO_RNG_H #define _LINUX_VIRTIO_RNG_H +/* This header is BSD licensed so anyone can use the definitions to implement + * compatible drivers/servers. */ #include <linux/virtio_config.h> /* The ID for virtio_rng */ diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h index 14d47120682b..5c158c477ac7 100644 --- a/include/linux/workqueue.h +++ b/include/linux/workqueue.h @@ -201,6 +201,8 @@ extern int keventd_up(void); extern void init_workqueues(void); int execute_in_process_context(work_func_t fn, struct execute_work *); +extern int flush_work(struct work_struct *work); + extern int cancel_work_sync(struct work_struct *work); /* diff --git a/include/mtd/ubi-user.h b/include/mtd/ubi-user.h index a7421f130cc0..ccdc562e444e 100644 --- a/include/mtd/ubi-user.h +++ b/include/mtd/ubi-user.h @@ -58,6 +58,13 @@ * device should be used. A &struct ubi_rsvol_req object has to be properly * filled and a pointer to it has to be passed to the IOCTL. * + * UBI volumes re-name + * ~~~~~~~~~~~~~~~~~~~ + * + * To re-name several volumes atomically at one go, the %UBI_IOCRNVOL command + * of the UBI character device should be used. A &struct ubi_rnvol_req object + * has to be properly filled and a pointer to it has to be passed to the IOCTL. + * * UBI volume update * ~~~~~~~~~~~~~~~~~ * @@ -104,6 +111,8 @@ #define UBI_IOCRMVOL _IOW(UBI_IOC_MAGIC, 1, int32_t) /* Re-size an UBI volume */ #define UBI_IOCRSVOL _IOW(UBI_IOC_MAGIC, 2, struct ubi_rsvol_req) +/* Re-name volumes */ +#define UBI_IOCRNVOL _IOW(UBI_IOC_MAGIC, 3, struct ubi_rnvol_req) /* IOCTL commands of the UBI control character device */ @@ -128,6 +137,9 @@ /* Maximum MTD device name length supported by UBI */ #define MAX_UBI_MTD_NAME_LEN 127 +/* Maximum amount of UBI volumes that can be re-named at one go */ +#define UBI_MAX_RNVOL 32 + /* * UBI data type hint constants. * @@ -176,20 +188,20 @@ enum { * it will be 512 in case of a 2KiB page NAND flash with 4 512-byte sub-pages. * * But in rare cases, if this optimizes things, the VID header may be placed to - * a different offset. For example, the boot-loader might do things faster if the - * VID header sits at the end of the first 2KiB NAND page with 4 sub-pages. As - * the boot-loader would not normally need to read EC headers (unless it needs - * UBI in RW mode), it might be faster to calculate ECC. This is weird example, - * but it real-life example. So, in this example, @vid_hdr_offer would be - * 2KiB-64 bytes = 1984. Note, that this position is not even 512-bytes - * aligned, which is OK, as UBI is clever enough to realize this is 4th sub-page - * of the first page and add needed padding. + * a different offset. For example, the boot-loader might do things faster if + * the VID header sits at the end of the first 2KiB NAND page with 4 sub-pages. + * As the boot-loader would not normally need to read EC headers (unless it + * needs UBI in RW mode), it might be faster to calculate ECC. This is weird + * example, but it real-life example. So, in this example, @vid_hdr_offer would + * be 2KiB-64 bytes = 1984. Note, that this position is not even 512-bytes + * aligned, which is OK, as UBI is clever enough to realize this is 4th + * sub-page of the first page and add needed padding. */ struct ubi_attach_req { int32_t ubi_num; int32_t mtd_num; int32_t vid_hdr_offset; - uint8_t padding[12]; + int8_t padding[12]; }; /** @@ -251,6 +263,48 @@ struct ubi_rsvol_req { } __attribute__ ((packed)); /** + * struct ubi_rnvol_req - volumes re-name request. + * @count: count of volumes to re-name + * @padding1: reserved for future, not used, has to be zeroed + * @vol_id: ID of the volume to re-name + * @name_len: name length + * @padding2: reserved for future, not used, has to be zeroed + * @name: new volume name + * + * UBI allows to re-name up to %32 volumes at one go. The count of volumes to + * re-name is specified in the @count field. The ID of the volumes to re-name + * and the new names are specified in the @vol_id and @name fields. + * + * The UBI volume re-name operation is atomic, which means that should power cut + * happen, the volumes will have either old name or new name. So the possible + * use-cases of this command is atomic upgrade. Indeed, to upgrade, say, volumes + * A and B one may create temporary volumes %A1 and %B1 with the new contents, + * then atomically re-name A1->A and B1->B, in which case old %A and %B will + * be removed. + * + * If it is not desirable to remove old A and B, the re-name request has to + * contain 4 entries: A1->A, A->A1, B1->B, B->B1, in which case old A1 and B1 + * become A and B, and old A and B will become A1 and B1. + * + * It is also OK to request: A1->A, A1->X, B1->B, B->Y, in which case old A1 + * and B1 become A and B, and old A and B become X and Y. + * + * In other words, in case of re-naming into an existing volume name, the + * existing volume is removed, unless it is re-named as well at the same + * re-name request. + */ +struct ubi_rnvol_req { + int32_t count; + int8_t padding1[12]; + struct { + int32_t vol_id; + int16_t name_len; + int8_t padding2[2]; + char name[UBI_MAX_VOLUME_NAME + 1]; + } ents[UBI_MAX_RNVOL]; +} __attribute__ ((packed)); + +/** * struct ubi_leb_change_req - a data structure used in atomic logical * eraseblock change requests. * @lnum: logical eraseblock number to change @@ -261,8 +315,8 @@ struct ubi_rsvol_req { struct ubi_leb_change_req { int32_t lnum; int32_t bytes; - uint8_t dtype; - uint8_t padding[7]; + int8_t dtype; + int8_t padding[7]; } __attribute__ ((packed)); #endif /* __UBI_USER_H__ */ diff --git a/include/net/ieee80211_radiotap.h b/include/net/ieee80211_radiotap.h index dfd8bf66ce27..d364fd594ea4 100644 --- a/include/net/ieee80211_radiotap.h +++ b/include/net/ieee80211_radiotap.h @@ -262,7 +262,7 @@ static inline int ieee80211_get_radiotap_len(unsigned char *data) struct ieee80211_radiotap_header *hdr = (struct ieee80211_radiotap_header *)data; - return le16_to_cpu(get_unaligned(&hdr->it_len)); + return get_unaligned_le16(&hdr->it_len); } #endif /* IEEE80211_RADIOTAP_H */ diff --git a/init/do_mounts.c b/init/do_mounts.c index a1de1bf3d6b9..f769fac4f4c0 100644 --- a/init/do_mounts.c +++ b/init/do_mounts.c @@ -12,6 +12,7 @@ #include <linux/device.h> #include <linux/init.h> #include <linux/fs.h> +#include <linux/initrd.h> #include <linux/nfs_fs.h> #include <linux/nfs_fs_sb.h> diff --git a/init/do_mounts_rd.c b/init/do_mounts_rd.c index 46dfd64ae8fb..fedef93b586f 100644 --- a/init/do_mounts_rd.c +++ b/init/do_mounts_rd.c @@ -10,8 +10,6 @@ #include "do_mounts.h" -#define BUILD_CRAMDISK - int __initdata rd_prompt = 1;/* 1 = prompt for RAM disk, 0 = don't prompt */ static int __init prompt_ramdisk(char *str) @@ -162,14 +160,8 @@ int __init rd_load_image(char *from) goto done; if (nblocks == 0) { -#ifdef BUILD_CRAMDISK if (crd_load(in_fd, out_fd) == 0) goto successful_load; -#else - printk(KERN_NOTICE - "RAMDISK: Kernel does not support compressed " - "RAM disk images\n"); -#endif goto done; } @@ -267,8 +259,6 @@ int __init rd_load_disk(int n) return rd_load_image("/dev/root"); } -#ifdef BUILD_CRAMDISK - /* * gzip declarations */ @@ -313,32 +303,11 @@ static int crd_infd, crd_outfd; static int __init fill_inbuf(void); static void __init flush_window(void); -static void __init *malloc(size_t size); -static void __init free(void *where); static void __init error(char *m); -static void __init gzip_mark(void **); -static void __init gzip_release(void **); - -#include "../lib/inflate.c" -static void __init *malloc(size_t size) -{ - return kmalloc(size, GFP_KERNEL); -} - -static void __init free(void *where) -{ - kfree(where); -} - -static void __init gzip_mark(void **ptr) -{ -} - -static void __init gzip_release(void **ptr) -{ -} +#define NO_INFLATE_MALLOC +#include "../lib/inflate.c" /* =========================================================================== * Fill the input buffer. This is called only when the buffer is empty @@ -425,5 +394,3 @@ static int __init crd_load(int in_fd, int out_fd) kfree(window); return result; } - -#endif /* BUILD_CRAMDISK */ diff --git a/init/initramfs.c b/init/initramfs.c index 8eeeccb328c9..644fc01ad5f0 100644 --- a/init/initramfs.c +++ b/init/initramfs.c @@ -14,16 +14,6 @@ static void __init error(char *x) message = x; } -static void __init *malloc(size_t size) -{ - return kmalloc(size, GFP_KERNEL); -} - -static void __init free(void *where) -{ - kfree(where); -} - /* link hash */ #define N_ALIGN(len) ((((len) + 1) & ~3) + 2) @@ -407,18 +397,10 @@ static long bytes_out; static void __init flush_window(void); static void __init error(char *m); -static void __init gzip_mark(void **); -static void __init gzip_release(void **); -#include "../lib/inflate.c" +#define NO_INFLATE_MALLOC -static void __init gzip_mark(void **ptr) -{ -} - -static void __init gzip_release(void **ptr) -{ -} +#include "../lib/inflate.c" /* =========================================================================== * Write the output window window[0..outcnt-1] and update crc and bytes_out. diff --git a/init/main.c b/init/main.c index 2769dc031c62..0604cbcaf1e4 100644 --- a/init/main.c +++ b/init/main.c @@ -87,8 +87,6 @@ extern void init_IRQ(void); extern void fork_init(unsigned long); extern void mca_init(void); extern void sbus_init(void); -extern void pidhash_init(void); -extern void pidmap_init(void); extern void prio_tree_init(void); extern void radix_tree_init(void); extern void free_initmem(void); diff --git a/init/version.c b/init/version.c index 9d17d70ee02d..52a8b98642b8 100644 --- a/init/version.c +++ b/init/version.c @@ -13,10 +13,13 @@ #include <linux/utsrelease.h> #include <linux/version.h> +#ifndef CONFIG_KALLSYMS #define version(a) Version_ ## a #define version_string(a) version(a) +extern int version_string(LINUX_VERSION_CODE); int version_string(LINUX_VERSION_CODE); +#endif struct uts_namespace init_uts_ns = { .kref = { diff --git a/ipc/ipc_sysctl.c b/ipc/ipc_sysctl.c index d3497465cc0a..69bc85978ba0 100644 --- a/ipc/ipc_sysctl.c +++ b/ipc/ipc_sysctl.c @@ -27,15 +27,17 @@ static void *get_ipc(ctl_table *table) } /* - * Routine that is called when a tunable has successfully been changed by - * hand and it has a callback routine registered on the ipc namespace notifier - * chain: we don't want such tunables to be recomputed anymore upon memory - * add/remove or ipc namespace creation/removal. - * They can come back to a recomputable state by being set to a <0 value. + * Routine that is called when the file "auto_msgmni" has successfully been + * written. + * Two values are allowed: + * 0: unregister msgmni's callback routine from the ipc namespace notifier + * chain. This means that msgmni won't be recomputed anymore upon memory + * add/remove or ipc namespace creation/removal. + * 1: register back the callback routine. */ -static void tunable_set_callback(int val) +static void ipc_auto_callback(int val) { - if (val >= 0) + if (!val) unregister_ipcns_notifier(current->nsproxy->ipc_ns); else { /* @@ -71,7 +73,12 @@ static int proc_ipc_callback_dointvec(ctl_table *table, int write, rc = proc_dointvec(&ipc_table, write, filp, buffer, lenp, ppos); if (write && !rc && lenp_bef == *lenp) - tunable_set_callback(*((int *)(ipc_table.data))); + /* + * Tunable has successfully been changed by hand. Disable its + * automatic adjustment. This simply requires unregistering + * the notifiers that trigger recalculation. + */ + unregister_ipcns_notifier(current->nsproxy->ipc_ns); return rc; } @@ -87,10 +94,39 @@ static int proc_ipc_doulongvec_minmax(ctl_table *table, int write, lenp, ppos); } +static int proc_ipcauto_dointvec_minmax(ctl_table *table, int write, + struct file *filp, void __user *buffer, size_t *lenp, loff_t *ppos) +{ + struct ctl_table ipc_table; + size_t lenp_bef = *lenp; + int oldval; + int rc; + + memcpy(&ipc_table, table, sizeof(ipc_table)); + ipc_table.data = get_ipc(table); + oldval = *((int *)(ipc_table.data)); + + rc = proc_dointvec_minmax(&ipc_table, write, filp, buffer, lenp, ppos); + + if (write && !rc && lenp_bef == *lenp) { + int newval = *((int *)(ipc_table.data)); + /* + * The file "auto_msgmni" has correctly been set. + * React by (un)registering the corresponding tunable, if the + * value has changed. + */ + if (newval != oldval) + ipc_auto_callback(newval); + } + + return rc; +} + #else #define proc_ipc_doulongvec_minmax NULL #define proc_ipc_dointvec NULL #define proc_ipc_callback_dointvec NULL +#define proc_ipcauto_dointvec_minmax NULL #endif #ifdef CONFIG_SYSCTL_SYSCALL @@ -142,14 +178,11 @@ static int sysctl_ipc_registered_data(ctl_table *table, int __user *name, rc = sysctl_ipc_data(table, name, nlen, oldval, oldlenp, newval, newlen); - if (newval && newlen && rc > 0) { + if (newval && newlen && rc > 0) /* * Tunable has successfully been changed from userland */ - int *data = get_ipc(table); - - tunable_set_callback(*data); - } + unregister_ipcns_notifier(current->nsproxy->ipc_ns); return rc; } @@ -158,6 +191,9 @@ static int sysctl_ipc_registered_data(ctl_table *table, int __user *name, #define sysctl_ipc_registered_data NULL #endif +static int zero; +static int one = 1; + static struct ctl_table ipc_kern_table[] = { { .ctl_name = KERN_SHMMAX, @@ -222,6 +258,16 @@ static struct ctl_table ipc_kern_table[] = { .proc_handler = proc_ipc_dointvec, .strategy = sysctl_ipc_data, }, + { + .ctl_name = CTL_UNNUMBERED, + .procname = "auto_msgmni", + .data = &init_ipc_ns.auto_msgmni, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_ipcauto_dointvec_minmax, + .extra1 = &zero, + .extra2 = &one, + }, {} }; diff --git a/ipc/ipcns_notifier.c b/ipc/ipcns_notifier.c index 70ff09183f7b..b9b31a4f77e1 100644 --- a/ipc/ipcns_notifier.c +++ b/ipc/ipcns_notifier.c @@ -55,25 +55,35 @@ static int ipcns_callback(struct notifier_block *self, int register_ipcns_notifier(struct ipc_namespace *ns) { + int rc; + memset(&ns->ipcns_nb, 0, sizeof(ns->ipcns_nb)); ns->ipcns_nb.notifier_call = ipcns_callback; ns->ipcns_nb.priority = IPCNS_CALLBACK_PRI; - return blocking_notifier_chain_register(&ipcns_chain, &ns->ipcns_nb); + rc = blocking_notifier_chain_register(&ipcns_chain, &ns->ipcns_nb); + if (!rc) + ns->auto_msgmni = 1; + return rc; } int cond_register_ipcns_notifier(struct ipc_namespace *ns) { + int rc; + memset(&ns->ipcns_nb, 0, sizeof(ns->ipcns_nb)); ns->ipcns_nb.notifier_call = ipcns_callback; ns->ipcns_nb.priority = IPCNS_CALLBACK_PRI; - return blocking_notifier_chain_cond_register(&ipcns_chain, + rc = blocking_notifier_chain_cond_register(&ipcns_chain, &ns->ipcns_nb); + if (!rc) + ns->auto_msgmni = 1; + return rc; } -int unregister_ipcns_notifier(struct ipc_namespace *ns) +void unregister_ipcns_notifier(struct ipc_namespace *ns) { - return blocking_notifier_chain_unregister(&ipcns_chain, - &ns->ipcns_nb); + blocking_notifier_chain_unregister(&ipcns_chain, &ns->ipcns_nb); + ns->auto_msgmni = 0; } int ipcns_notify(unsigned long val) diff --git a/ipc/mqueue.c b/ipc/mqueue.c index 3e84b958186b..1fdc2eb2f6d8 100644 --- a/ipc/mqueue.c +++ b/ipc/mqueue.c @@ -314,15 +314,11 @@ static int mqueue_unlink(struct inode *dir, struct dentry *dentry) * through std routines) */ static ssize_t mqueue_read_file(struct file *filp, char __user *u_data, - size_t count, loff_t * off) + size_t count, loff_t *off) { struct mqueue_inode_info *info = MQUEUE_I(filp->f_path.dentry->d_inode); char buffer[FILENT_SIZE]; - size_t slen; - loff_t o; - - if (!count) - return 0; + ssize_t ret; spin_lock(&info->lock); snprintf(buffer, sizeof(buffer), @@ -335,21 +331,14 @@ static ssize_t mqueue_read_file(struct file *filp, char __user *u_data, pid_vnr(info->notify_owner)); spin_unlock(&info->lock); buffer[sizeof(buffer)-1] = '\0'; - slen = strlen(buffer)+1; - - o = *off; - if (o > slen) - return 0; - - if (o + count > slen) - count = slen - o; - if (copy_to_user(u_data, buffer + o, count)) - return -EFAULT; + ret = simple_read_from_buffer(u_data, count, off, buffer, + strlen(buffer)); + if (ret <= 0) + return ret; - *off = o + count; filp->f_path.dentry->d_inode->i_atime = filp->f_path.dentry->d_inode->i_ctime = CURRENT_TIME; - return count; + return ret; } static int mqueue_flush_file(struct file *filp, fl_owner_t id) diff --git a/ipc/sem.c b/ipc/sem.c index e9418df5ff3e..bf1bc36cb7ee 100644 --- a/ipc/sem.c +++ b/ipc/sem.c @@ -272,9 +272,8 @@ static int newary(struct ipc_namespace *ns, struct ipc_params *params) ns->used_sems += nsems; sma->sem_base = (struct sem *) &sma[1]; - /* sma->sem_pending = NULL; */ - sma->sem_pending_last = &sma->sem_pending; - /* sma->undo = NULL; */ + INIT_LIST_HEAD(&sma->sem_pending); + INIT_LIST_HEAD(&sma->list_id); sma->sem_nsems = nsems; sma->sem_ctime = get_seconds(); sem_unlock(sma); @@ -331,38 +330,6 @@ asmlinkage long sys_semget(key_t key, int nsems, int semflg) return ipcget(ns, &sem_ids(ns), &sem_ops, &sem_params); } -/* Manage the doubly linked list sma->sem_pending as a FIFO: - * insert new queue elements at the tail sma->sem_pending_last. - */ -static inline void append_to_queue (struct sem_array * sma, - struct sem_queue * q) -{ - *(q->prev = sma->sem_pending_last) = q; - *(sma->sem_pending_last = &q->next) = NULL; -} - -static inline void prepend_to_queue (struct sem_array * sma, - struct sem_queue * q) -{ - q->next = sma->sem_pending; - *(q->prev = &sma->sem_pending) = q; - if (q->next) - q->next->prev = &q->next; - else /* sma->sem_pending_last == &sma->sem_pending */ - sma->sem_pending_last = &q->next; -} - -static inline void remove_from_queue (struct sem_array * sma, - struct sem_queue * q) -{ - *(q->prev) = q->next; - if (q->next) - q->next->prev = q->prev; - else /* sma->sem_pending_last == &q->next */ - sma->sem_pending_last = q->prev; - q->prev = NULL; /* mark as removed */ -} - /* * Determine whether a sequence of semaphore operations would succeed * all at once. Return 0 if yes, 1 if need to sleep, else return error code. @@ -438,16 +405,15 @@ static void update_queue (struct sem_array * sma) int error; struct sem_queue * q; - q = sma->sem_pending; - while(q) { + q = list_entry(sma->sem_pending.next, struct sem_queue, list); + while (&q->list != &sma->sem_pending) { error = try_atomic_semop(sma, q->sops, q->nsops, q->undo, q->pid); /* Does q->sleeper still need to sleep? */ if (error <= 0) { struct sem_queue *n; - remove_from_queue(sma,q); - q->status = IN_WAKEUP; + /* * Continue scanning. The next operation * that must be checked depends on the type of the @@ -458,11 +424,26 @@ static void update_queue (struct sem_array * sma) * for semaphore values to become 0. * - if the operation didn't modify the array, * then just continue. + * The order of list_del() and reading ->next + * is crucial: In the former case, the list_del() + * must be done first [because we might be the + * first entry in ->sem_pending], in the latter + * case the list_del() must be done last + * [because the list is invalid after the list_del()] */ - if (q->alter) - n = sma->sem_pending; - else - n = q->next; + if (q->alter) { + list_del(&q->list); + n = list_entry(sma->sem_pending.next, + struct sem_queue, list); + } else { + n = list_entry(q->list.next, struct sem_queue, + list); + list_del(&q->list); + } + + /* wake up the waiting thread */ + q->status = IN_WAKEUP; + wake_up_process(q->sleeper); /* hands-off: q will disappear immediately after * writing q->status. @@ -471,7 +452,7 @@ static void update_queue (struct sem_array * sma) q->status = error; q = n; } else { - q = q->next; + q = list_entry(q->list.next, struct sem_queue, list); } } } @@ -491,7 +472,7 @@ static int count_semncnt (struct sem_array * sma, ushort semnum) struct sem_queue * q; semncnt = 0; - for (q = sma->sem_pending; q; q = q->next) { + list_for_each_entry(q, &sma->sem_pending, list) { struct sembuf * sops = q->sops; int nsops = q->nsops; int i; @@ -503,13 +484,14 @@ static int count_semncnt (struct sem_array * sma, ushort semnum) } return semncnt; } + static int count_semzcnt (struct sem_array * sma, ushort semnum) { int semzcnt; struct sem_queue * q; semzcnt = 0; - for (q = sma->sem_pending; q; q = q->next) { + list_for_each_entry(q, &sma->sem_pending, list) { struct sembuf * sops = q->sops; int nsops = q->nsops; int i; @@ -522,35 +504,41 @@ static int count_semzcnt (struct sem_array * sma, ushort semnum) return semzcnt; } +void free_un(struct rcu_head *head) +{ + struct sem_undo *un = container_of(head, struct sem_undo, rcu); + kfree(un); +} + /* Free a semaphore set. freeary() is called with sem_ids.rw_mutex locked * as a writer and the spinlock for this semaphore set hold. sem_ids.rw_mutex * remains locked on exit. */ static void freeary(struct ipc_namespace *ns, struct kern_ipc_perm *ipcp) { - struct sem_undo *un; - struct sem_queue *q; + struct sem_undo *un, *tu; + struct sem_queue *q, *tq; struct sem_array *sma = container_of(ipcp, struct sem_array, sem_perm); - /* Invalidate the existing undo structures for this semaphore set. - * (They will be freed without any further action in exit_sem() - * or during the next semop.) - */ - for (un = sma->undo; un; un = un->id_next) + /* Free the existing undo structures for this semaphore set. */ + assert_spin_locked(&sma->sem_perm.lock); + list_for_each_entry_safe(un, tu, &sma->list_id, list_id) { + list_del(&un->list_id); + spin_lock(&un->ulp->lock); un->semid = -1; + list_del_rcu(&un->list_proc); + spin_unlock(&un->ulp->lock); + call_rcu(&un->rcu, free_un); + } /* Wake up all pending processes and let them fail with EIDRM. */ - q = sma->sem_pending; - while(q) { - struct sem_queue *n; - /* lazy remove_from_queue: we are killing the whole queue */ - q->prev = NULL; - n = q->next; + list_for_each_entry_safe(q, tq, &sma->sem_pending, list) { + list_del(&q->list); + q->status = IN_WAKEUP; wake_up_process(q->sleeper); /* doesn't sleep */ smp_wmb(); q->status = -EIDRM; /* hands-off q */ - q = n; } /* Remove the semaphore set from the IDR */ @@ -763,9 +751,12 @@ static int semctl_main(struct ipc_namespace *ns, int semid, int semnum, for (i = 0; i < nsems; i++) sma->sem_base[i].semval = sem_io[i]; - for (un = sma->undo; un; un = un->id_next) + + assert_spin_locked(&sma->sem_perm.lock); + list_for_each_entry(un, &sma->list_id, list_id) { for (i = 0; i < nsems; i++) un->semadj[i] = 0; + } sma->sem_ctime = get_seconds(); /* maybe some queued-up processes were waiting for this */ update_queue(sma); @@ -797,12 +788,15 @@ static int semctl_main(struct ipc_namespace *ns, int semid, int semnum, { int val = arg.val; struct sem_undo *un; + err = -ERANGE; if (val > SEMVMX || val < 0) goto out_unlock; - for (un = sma->undo; un; un = un->id_next) + assert_spin_locked(&sma->sem_perm.lock); + list_for_each_entry(un, &sma->list_id, list_id) un->semadj[semnum] = 0; + curr->semval = val; curr->sempid = task_tgid_vnr(current); sma->sem_ctime = get_seconds(); @@ -952,6 +946,8 @@ static inline int get_undo_list(struct sem_undo_list **undo_listp) return -ENOMEM; spin_lock_init(&undo_list->lock); atomic_set(&undo_list->refcnt, 1); + INIT_LIST_HEAD(&undo_list->list_proc); + current->sysvsem.undo_list = undo_list; } *undo_listp = undo_list; @@ -960,25 +956,27 @@ static inline int get_undo_list(struct sem_undo_list **undo_listp) static struct sem_undo *lookup_undo(struct sem_undo_list *ulp, int semid) { - struct sem_undo **last, *un; + struct sem_undo *walk; - last = &ulp->proc_list; - un = *last; - while(un != NULL) { - if(un->semid==semid) - break; - if(un->semid==-1) { - *last=un->proc_next; - kfree(un); - } else { - last=&un->proc_next; - } - un=*last; + list_for_each_entry_rcu(walk, &ulp->list_proc, list_proc) { + if (walk->semid == semid) + return walk; } - return un; + return NULL; } -static struct sem_undo *find_undo(struct ipc_namespace *ns, int semid) +/** + * find_alloc_undo - Lookup (and if not present create) undo array + * @ns: namespace + * @semid: semaphore array id + * + * The function looks up (and if not present creates) the undo structure. + * The size of the undo structure depends on the size of the semaphore + * array, thus the alloc path is not that straightforward. + * Lifetime-rules: sem_undo is rcu-protected, on success, the function + * performs a rcu_read_lock(). + */ +static struct sem_undo *find_alloc_undo(struct ipc_namespace *ns, int semid) { struct sem_array *sma; struct sem_undo_list *ulp; @@ -990,13 +988,16 @@ static struct sem_undo *find_undo(struct ipc_namespace *ns, int semid) if (error) return ERR_PTR(error); + rcu_read_lock(); spin_lock(&ulp->lock); un = lookup_undo(ulp, semid); spin_unlock(&ulp->lock); if (likely(un!=NULL)) goto out; + rcu_read_unlock(); /* no undo structure around - allocate one. */ + /* step 1: figure out the size of the semaphore array */ sma = sem_lock_check(ns, semid); if (IS_ERR(sma)) return ERR_PTR(PTR_ERR(sma)); @@ -1004,37 +1005,45 @@ static struct sem_undo *find_undo(struct ipc_namespace *ns, int semid) nsems = sma->sem_nsems; sem_getref_and_unlock(sma); + /* step 2: allocate new undo structure */ new = kzalloc(sizeof(struct sem_undo) + sizeof(short)*nsems, GFP_KERNEL); if (!new) { sem_putref(sma); return ERR_PTR(-ENOMEM); } - new->semadj = (short *) &new[1]; - new->semid = semid; - spin_lock(&ulp->lock); - un = lookup_undo(ulp, semid); - if (un) { - spin_unlock(&ulp->lock); - kfree(new); - sem_putref(sma); - goto out; - } + /* step 3: Acquire the lock on semaphore array */ sem_lock_and_putref(sma); if (sma->sem_perm.deleted) { sem_unlock(sma); - spin_unlock(&ulp->lock); kfree(new); un = ERR_PTR(-EIDRM); goto out; } - new->proc_next = ulp->proc_list; - ulp->proc_list = new; - new->id_next = sma->undo; - sma->undo = new; - sem_unlock(sma); + spin_lock(&ulp->lock); + + /* + * step 4: check for races: did someone else allocate the undo struct? + */ + un = lookup_undo(ulp, semid); + if (un) { + kfree(new); + goto success; + } + /* step 5: initialize & link new undo structure */ + new->semadj = (short *) &new[1]; + new->ulp = ulp; + new->semid = semid; + assert_spin_locked(&ulp->lock); + list_add_rcu(&new->list_proc, &ulp->list_proc); + assert_spin_locked(&sma->sem_perm.lock); + list_add(&new->list_id, &sma->list_id); un = new; + +success: spin_unlock(&ulp->lock); + rcu_read_lock(); + sem_unlock(sma); out: return un; } @@ -1090,9 +1099,8 @@ asmlinkage long sys_semtimedop(int semid, struct sembuf __user *tsops, alter = 1; } -retry_undos: if (undos) { - un = find_undo(ns, semid); + un = find_alloc_undo(ns, semid); if (IS_ERR(un)) { error = PTR_ERR(un); goto out_free; @@ -1102,19 +1110,37 @@ retry_undos: sma = sem_lock_check(ns, semid); if (IS_ERR(sma)) { + if (un) + rcu_read_unlock(); error = PTR_ERR(sma); goto out_free; } /* - * semid identifiers are not unique - find_undo may have + * semid identifiers are not unique - find_alloc_undo may have * allocated an undo structure, it was invalidated by an RMID - * and now a new array with received the same id. Check and retry. + * and now a new array with received the same id. Check and fail. + * This case can be detected checking un->semid. The existance of + * "un" itself is guaranteed by rcu. */ - if (un && un->semid == -1) { - sem_unlock(sma); - goto retry_undos; + error = -EIDRM; + if (un) { + if (un->semid == -1) { + rcu_read_unlock(); + goto out_unlock_free; + } else { + /* + * rcu lock can be released, "un" cannot disappear: + * - sem_lock is acquired, thus IPC_RMID is + * impossible. + * - exit_sem is impossible, it always operates on + * current (or a dead task). + */ + + rcu_read_unlock(); + } } + error = -EFBIG; if (max >= sma->sem_nsems) goto out_unlock_free; @@ -1138,17 +1164,15 @@ retry_undos: * task into the pending queue and go to sleep. */ - queue.sma = sma; queue.sops = sops; queue.nsops = nsops; queue.undo = un; queue.pid = task_tgid_vnr(current); - queue.id = semid; queue.alter = alter; if (alter) - append_to_queue(sma ,&queue); + list_add_tail(&queue.list, &sma->sem_pending); else - prepend_to_queue(sma ,&queue); + list_add(&queue.list, &sma->sem_pending); queue.status = -EINTR; queue.sleeper = current; @@ -1174,7 +1198,6 @@ retry_undos: sma = sem_lock(ns, semid); if (IS_ERR(sma)) { - BUG_ON(queue.prev != NULL); error = -EIDRM; goto out_free; } @@ -1192,7 +1215,7 @@ retry_undos: */ if (timeout && jiffies_left == 0) error = -EAGAIN; - remove_from_queue(sma,&queue); + list_del(&queue.list); goto out_unlock_free; out_unlock_free: @@ -1243,56 +1266,62 @@ int copy_semundo(unsigned long clone_flags, struct task_struct *tsk) */ void exit_sem(struct task_struct *tsk) { - struct sem_undo_list *undo_list; - struct sem_undo *u, **up; - struct ipc_namespace *ns; + struct sem_undo_list *ulp; - undo_list = tsk->sysvsem.undo_list; - if (!undo_list) + ulp = tsk->sysvsem.undo_list; + if (!ulp) return; tsk->sysvsem.undo_list = NULL; - if (!atomic_dec_and_test(&undo_list->refcnt)) + if (!atomic_dec_and_test(&ulp->refcnt)) return; - ns = tsk->nsproxy->ipc_ns; - /* There's no need to hold the semundo list lock, as current - * is the last task exiting for this undo list. - */ - for (up = &undo_list->proc_list; (u = *up); *up = u->proc_next, kfree(u)) { + for (;;) { struct sem_array *sma; - int nsems, i; - struct sem_undo *un, **unp; + struct sem_undo *un; int semid; - - semid = u->semid; + int i; - if(semid == -1) - continue; - sma = sem_lock(ns, semid); + rcu_read_lock(); + un = list_entry(rcu_dereference(ulp->list_proc.next), + struct sem_undo, list_proc); + if (&un->list_proc == &ulp->list_proc) + semid = -1; + else + semid = un->semid; + rcu_read_unlock(); + + if (semid == -1) + break; + + sma = sem_lock_check(tsk->nsproxy->ipc_ns, un->semid); + + /* exit_sem raced with IPC_RMID, nothing to do */ if (IS_ERR(sma)) continue; - if (u->semid == -1) - goto next_entry; + un = lookup_undo(ulp, semid); + if (un == NULL) { + /* exit_sem raced with IPC_RMID+semget() that created + * exactly the same semid. Nothing to do. + */ + sem_unlock(sma); + continue; + } - BUG_ON(sem_checkid(sma, u->semid)); + /* remove un from the linked lists */ + assert_spin_locked(&sma->sem_perm.lock); + list_del(&un->list_id); - /* remove u from the sma->undo list */ - for (unp = &sma->undo; (un = *unp); unp = &un->id_next) { - if (u == un) - goto found; - } - printk ("exit_sem undo list error id=%d\n", u->semid); - goto next_entry; -found: - *unp = un->id_next; - /* perform adjustments registered in u */ - nsems = sma->sem_nsems; - for (i = 0; i < nsems; i++) { + spin_lock(&ulp->lock); + list_del_rcu(&un->list_proc); + spin_unlock(&ulp->lock); + + /* perform adjustments registered in un */ + for (i = 0; i < sma->sem_nsems; i++) { struct sem * semaphore = &sma->sem_base[i]; - if (u->semadj[i]) { - semaphore->semval += u->semadj[i]; + if (un->semadj[i]) { + semaphore->semval += un->semadj[i]; /* * Range checks of the new semaphore value, * not defined by sus: @@ -1316,10 +1345,11 @@ found: sma->sem_otime = get_seconds(); /* maybe some queued-up processes were waiting for this */ update_queue(sma); -next_entry: sem_unlock(sma); + + call_rcu(&un->rcu, free_un); } - kfree(undo_list); + kfree(ulp); } #ifdef CONFIG_PROC_FS diff --git a/ipc/shm.c b/ipc/shm.c index a726aebce7d7..e77ec698cf40 100644 --- a/ipc/shm.c +++ b/ipc/shm.c @@ -112,23 +112,8 @@ void __init shm_init (void) } /* - * shm_lock_(check_)down routines are called in the paths where the rw_mutex - * is held to protect access to the idr tree. - */ -static inline struct shmid_kernel *shm_lock_down(struct ipc_namespace *ns, - int id) -{ - struct kern_ipc_perm *ipcp = ipc_lock_down(&shm_ids(ns), id); - - if (IS_ERR(ipcp)) - return (struct shmid_kernel *)ipcp; - - return container_of(ipcp, struct shmid_kernel, shm_perm); -} - -/* * shm_lock_(check_) routines are called in the paths where the rw_mutex - * is not held. + * is not necessarily held. */ static inline struct shmid_kernel *shm_lock(struct ipc_namespace *ns, int id) { @@ -211,7 +196,7 @@ static void shm_close(struct vm_area_struct *vma) down_write(&shm_ids(ns).rw_mutex); /* remove from the list of attaches of the shm segment */ - shp = shm_lock_down(ns, sfd->id); + shp = shm_lock(ns, sfd->id); BUG_ON(IS_ERR(shp)); shp->shm_lprid = task_tgid_vnr(current); shp->shm_dtim = get_seconds(); @@ -932,7 +917,7 @@ invalid: out_nattch: down_write(&shm_ids(ns).rw_mutex); - shp = shm_lock_down(ns, shmid); + shp = shm_lock(ns, shmid); BUG_ON(IS_ERR(shp)); shp->shm_nattch--; if(shp->shm_nattch == 0 && diff --git a/ipc/util.c b/ipc/util.c index 3339177b336c..49b3ea615dc5 100644 --- a/ipc/util.c +++ b/ipc/util.c @@ -688,10 +688,6 @@ void ipc64_perm_to_ipc_perm (struct ipc64_perm *in, struct ipc_perm *out) * Look for an id in the ipc ids idr and lock the associated ipc object. * * The ipc object is locked on exit. - * - * This is the routine that should be called when the rw_mutex is not already - * held, i.e. idr tree not protected: it protects the idr tree in read mode - * during the idr_find(). */ struct kern_ipc_perm *ipc_lock(struct ipc_ids *ids, int id) @@ -699,18 +695,13 @@ struct kern_ipc_perm *ipc_lock(struct ipc_ids *ids, int id) struct kern_ipc_perm *out; int lid = ipcid_to_idx(id); - down_read(&ids->rw_mutex); - rcu_read_lock(); out = idr_find(&ids->ipcs_idr, lid); if (out == NULL) { rcu_read_unlock(); - up_read(&ids->rw_mutex); return ERR_PTR(-EINVAL); } - up_read(&ids->rw_mutex); - spin_lock(&out->lock); /* ipc_rmid() may have already freed the ID while ipc_lock @@ -725,56 +716,6 @@ struct kern_ipc_perm *ipc_lock(struct ipc_ids *ids, int id) return out; } -/** - * ipc_lock_down - Lock an ipc structure with rw_sem held - * @ids: IPC identifier set - * @id: ipc id to look for - * - * Look for an id in the ipc ids idr and lock the associated ipc object. - * - * The ipc object is locked on exit. - * - * This is the routine that should be called when the rw_mutex is already - * held, i.e. idr tree protected. - */ - -struct kern_ipc_perm *ipc_lock_down(struct ipc_ids *ids, int id) -{ - struct kern_ipc_perm *out; - int lid = ipcid_to_idx(id); - - rcu_read_lock(); - out = idr_find(&ids->ipcs_idr, lid); - if (out == NULL) { - rcu_read_unlock(); - return ERR_PTR(-EINVAL); - } - - spin_lock(&out->lock); - - /* - * No need to verify that the structure is still valid since the - * rw_mutex is held. - */ - return out; -} - -struct kern_ipc_perm *ipc_lock_check_down(struct ipc_ids *ids, int id) -{ - struct kern_ipc_perm *out; - - out = ipc_lock_down(ids, id); - if (IS_ERR(out)) - return out; - - if (ipc_checkid(out, id)) { - ipc_unlock(out); - return ERR_PTR(-EIDRM); - } - - return out; -} - struct kern_ipc_perm *ipc_lock_check(struct ipc_ids *ids, int id) { struct kern_ipc_perm *out; @@ -846,7 +787,7 @@ struct kern_ipc_perm *ipcctl_pre_down(struct ipc_ids *ids, int id, int cmd, int err; down_write(&ids->rw_mutex); - ipcp = ipc_lock_check_down(ids, id); + ipcp = ipc_lock_check(ids, id); if (IS_ERR(ipcp)) { err = PTR_ERR(ipcp); goto out_up; diff --git a/ipc/util.h b/ipc/util.h index cdb966aebe07..3646b45a03c9 100644 --- a/ipc/util.h +++ b/ipc/util.h @@ -102,11 +102,6 @@ void* ipc_rcu_alloc(int size); void ipc_rcu_getref(void *ptr); void ipc_rcu_putref(void *ptr); -/* - * ipc_lock_down: called with rw_mutex held - * ipc_lock: called without that lock held - */ -struct kern_ipc_perm *ipc_lock_down(struct ipc_ids *, int); struct kern_ipc_perm *ipc_lock(struct ipc_ids *, int); void kernel_to_ipc64_perm(struct kern_ipc_perm *in, struct ipc64_perm *out); @@ -155,7 +150,6 @@ static inline void ipc_unlock(struct kern_ipc_perm *perm) rcu_read_unlock(); } -struct kern_ipc_perm *ipc_lock_check_down(struct ipc_ids *ids, int id); struct kern_ipc_perm *ipc_lock_check(struct ipc_ids *ids, int id); int ipcget(struct ipc_namespace *ns, struct ipc_ids *ids, struct ipc_ops *ops, struct ipc_params *params); diff --git a/kernel/Makefile b/kernel/Makefile index 15ab63ffe64d..54f69837d35a 100644 --- a/kernel/Makefile +++ b/kernel/Makefile @@ -2,7 +2,7 @@ # Makefile for the linux kernel. # -obj-y = sched.o fork.o exec_domain.o panic.o printk.o profile.o \ +obj-y = sched.o fork.o exec_domain.o panic.o printk.o \ cpu.o exit.o itimer.o time.o softirq.o resource.o \ sysctl.o capability.o ptrace.o timer.o user.o \ signal.o sys.o kmod.o workqueue.o pid.o \ @@ -24,6 +24,7 @@ CFLAGS_REMOVE_sched_clock.o = -pg CFLAGS_REMOVE_sched.o = -mno-spe -pg endif +obj-$(CONFIG_PROFILING) += profile.o obj-$(CONFIG_SYSCTL_SYSCALL_CHECK) += sysctl_check.o obj-$(CONFIG_STACKTRACE) += stacktrace.o obj-y += time/ diff --git a/kernel/acct.c b/kernel/acct.c index 91e1cfd734d2..dd68b9059418 100644 --- a/kernel/acct.c +++ b/kernel/acct.c @@ -75,37 +75,39 @@ int acct_parm[3] = {4, 2, 30}; /* * External references and all of the globals. */ -static void do_acct_process(struct pid_namespace *ns, struct file *); +static void do_acct_process(struct bsd_acct_struct *acct, + struct pid_namespace *ns, struct file *); /* * This structure is used so that all the data protected by lock * can be placed in the same cache line as the lock. This primes * the cache line to have the data after getting the lock. */ -struct acct_glbs { - spinlock_t lock; +struct bsd_acct_struct { volatile int active; volatile int needcheck; struct file *file; struct pid_namespace *ns; struct timer_list timer; + struct list_head list; }; -static struct acct_glbs acct_globals __cacheline_aligned = - {__SPIN_LOCK_UNLOCKED(acct_globals.lock)}; +static DEFINE_SPINLOCK(acct_lock); +static LIST_HEAD(acct_list); /* * Called whenever the timer says to check the free space. */ -static void acct_timeout(unsigned long unused) +static void acct_timeout(unsigned long x) { - acct_globals.needcheck = 1; + struct bsd_acct_struct *acct = (struct bsd_acct_struct *)x; + acct->needcheck = 1; } /* * Check the amount of free space and suspend/resume accordingly. */ -static int check_free_space(struct file *file) +static int check_free_space(struct bsd_acct_struct *acct, struct file *file) { struct kstatfs sbuf; int res; @@ -113,11 +115,11 @@ static int check_free_space(struct file *file) sector_t resume; sector_t suspend; - spin_lock(&acct_globals.lock); - res = acct_globals.active; - if (!file || !acct_globals.needcheck) + spin_lock(&acct_lock); + res = acct->active; + if (!file || !acct->needcheck) goto out; - spin_unlock(&acct_globals.lock); + spin_unlock(&acct_lock); /* May block */ if (vfs_statfs(file->f_path.dentry, &sbuf)) @@ -136,35 +138,35 @@ static int check_free_space(struct file *file) act = 0; /* - * If some joker switched acct_globals.file under us we'ld better be + * If some joker switched acct->file under us we'ld better be * silent and _not_ touch anything. */ - spin_lock(&acct_globals.lock); - if (file != acct_globals.file) { + spin_lock(&acct_lock); + if (file != acct->file) { if (act) res = act>0; goto out; } - if (acct_globals.active) { + if (acct->active) { if (act < 0) { - acct_globals.active = 0; + acct->active = 0; printk(KERN_INFO "Process accounting paused\n"); } } else { if (act > 0) { - acct_globals.active = 1; + acct->active = 1; printk(KERN_INFO "Process accounting resumed\n"); } } - del_timer(&acct_globals.timer); - acct_globals.needcheck = 0; - acct_globals.timer.expires = jiffies + ACCT_TIMEOUT*HZ; - add_timer(&acct_globals.timer); - res = acct_globals.active; + del_timer(&acct->timer); + acct->needcheck = 0; + acct->timer.expires = jiffies + ACCT_TIMEOUT*HZ; + add_timer(&acct->timer); + res = acct->active; out: - spin_unlock(&acct_globals.lock); + spin_unlock(&acct_lock); return res; } @@ -172,39 +174,41 @@ out: * Close the old accounting file (if currently open) and then replace * it with file (if non-NULL). * - * NOTE: acct_globals.lock MUST be held on entry and exit. + * NOTE: acct_lock MUST be held on entry and exit. */ -static void acct_file_reopen(struct file *file) +static void acct_file_reopen(struct bsd_acct_struct *acct, struct file *file, + struct pid_namespace *ns) { struct file *old_acct = NULL; struct pid_namespace *old_ns = NULL; - if (acct_globals.file) { - old_acct = acct_globals.file; - old_ns = acct_globals.ns; - del_timer(&acct_globals.timer); - acct_globals.active = 0; - acct_globals.needcheck = 0; - acct_globals.file = NULL; + if (acct->file) { + old_acct = acct->file; + old_ns = acct->ns; + del_timer(&acct->timer); + acct->active = 0; + acct->needcheck = 0; + acct->file = NULL; + acct->ns = NULL; + list_del(&acct->list); } if (file) { - acct_globals.file = file; - acct_globals.ns = get_pid_ns(task_active_pid_ns(current)); - acct_globals.needcheck = 0; - acct_globals.active = 1; + acct->file = file; + acct->ns = ns; + acct->needcheck = 0; + acct->active = 1; + list_add(&acct->list, &acct_list); /* It's been deleted if it was used before so this is safe */ - init_timer(&acct_globals.timer); - acct_globals.timer.function = acct_timeout; - acct_globals.timer.expires = jiffies + ACCT_TIMEOUT*HZ; - add_timer(&acct_globals.timer); + setup_timer(&acct->timer, acct_timeout, (unsigned long)acct); + acct->timer.expires = jiffies + ACCT_TIMEOUT*HZ; + add_timer(&acct->timer); } if (old_acct) { mnt_unpin(old_acct->f_path.mnt); - spin_unlock(&acct_globals.lock); - do_acct_process(old_ns, old_acct); + spin_unlock(&acct_lock); + do_acct_process(acct, old_ns, old_acct); filp_close(old_acct, NULL); - put_pid_ns(old_ns); - spin_lock(&acct_globals.lock); + spin_lock(&acct_lock); } } @@ -212,6 +216,8 @@ static int acct_on(char *name) { struct file *file; int error; + struct pid_namespace *ns; + struct bsd_acct_struct *acct = NULL; /* Difference from BSD - they don't do O_APPEND */ file = filp_open(name, O_WRONLY|O_APPEND|O_LARGEFILE, 0); @@ -228,18 +234,34 @@ static int acct_on(char *name) return -EIO; } + ns = task_active_pid_ns(current); + if (ns->bacct == NULL) { + acct = kzalloc(sizeof(struct bsd_acct_struct), GFP_KERNEL); + if (acct == NULL) { + filp_close(file, NULL); + return -ENOMEM; + } + } + error = security_acct(file); if (error) { + kfree(acct); filp_close(file, NULL); return error; } - spin_lock(&acct_globals.lock); + spin_lock(&acct_lock); + if (ns->bacct == NULL) { + ns->bacct = acct; + acct = NULL; + } + mnt_pin(file->f_path.mnt); - acct_file_reopen(file); - spin_unlock(&acct_globals.lock); + acct_file_reopen(ns->bacct, file, ns); + spin_unlock(&acct_lock); mntput(file->f_path.mnt); /* it's pinned, now give up active reference */ + kfree(acct); return 0; } @@ -269,11 +291,17 @@ asmlinkage long sys_acct(const char __user *name) error = acct_on(tmp); putname(tmp); } else { + struct bsd_acct_struct *acct; + + acct = task_active_pid_ns(current)->bacct; + if (acct == NULL) + return 0; + error = security_acct(NULL); if (!error) { - spin_lock(&acct_globals.lock); - acct_file_reopen(NULL); - spin_unlock(&acct_globals.lock); + spin_lock(&acct_lock); + acct_file_reopen(acct, NULL, NULL); + spin_unlock(&acct_lock); } } return error; @@ -288,10 +316,16 @@ asmlinkage long sys_acct(const char __user *name) */ void acct_auto_close_mnt(struct vfsmount *m) { - spin_lock(&acct_globals.lock); - if (acct_globals.file && acct_globals.file->f_path.mnt == m) - acct_file_reopen(NULL); - spin_unlock(&acct_globals.lock); + struct bsd_acct_struct *acct; + + spin_lock(&acct_lock); +restart: + list_for_each_entry(acct, &acct_list, list) + if (acct->file && acct->file->f_path.mnt == m) { + acct_file_reopen(acct, NULL, NULL); + goto restart; + } + spin_unlock(&acct_lock); } /** @@ -303,12 +337,31 @@ void acct_auto_close_mnt(struct vfsmount *m) */ void acct_auto_close(struct super_block *sb) { - spin_lock(&acct_globals.lock); - if (acct_globals.file && - acct_globals.file->f_path.mnt->mnt_sb == sb) { - acct_file_reopen(NULL); + struct bsd_acct_struct *acct; + + spin_lock(&acct_lock); +restart: + list_for_each_entry(acct, &acct_list, list) + if (acct->file && acct->file->f_path.mnt->mnt_sb == sb) { + acct_file_reopen(acct, NULL, NULL); + goto restart; + } + spin_unlock(&acct_lock); +} + +void acct_exit_ns(struct pid_namespace *ns) +{ + struct bsd_acct_struct *acct; + + spin_lock(&acct_lock); + acct = ns->bacct; + if (acct != NULL) { + if (acct->file != NULL) + acct_file_reopen(acct, NULL, NULL); + + kfree(acct); } - spin_unlock(&acct_globals.lock); + spin_unlock(&acct_lock); } /* @@ -425,7 +478,8 @@ static u32 encode_float(u64 value) /* * do_acct_process does all actual work. Caller holds the reference to file. */ -static void do_acct_process(struct pid_namespace *ns, struct file *file) +static void do_acct_process(struct bsd_acct_struct *acct, + struct pid_namespace *ns, struct file *file) { struct pacct_struct *pacct = ¤t->signal->pacct; acct_t ac; @@ -440,7 +494,7 @@ static void do_acct_process(struct pid_namespace *ns, struct file *file) * First check to see if there is enough free_space to continue * the process accounting system. */ - if (!check_free_space(file)) + if (!check_free_space(acct, file)) return; /* @@ -577,34 +631,46 @@ void acct_collect(long exitcode, int group_dead) spin_unlock_irq(¤t->sighand->siglock); } -/** - * acct_process - now just a wrapper around do_acct_process - * @exitcode: task exit code - * - * handles process accounting for an exiting task - */ -void acct_process(void) +static void acct_process_in_ns(struct pid_namespace *ns) { struct file *file = NULL; - struct pid_namespace *ns; + struct bsd_acct_struct *acct; + acct = ns->bacct; /* * accelerate the common fastpath: */ - if (!acct_globals.file) + if (!acct || !acct->file) return; - spin_lock(&acct_globals.lock); - file = acct_globals.file; + spin_lock(&acct_lock); + file = acct->file; if (unlikely(!file)) { - spin_unlock(&acct_globals.lock); + spin_unlock(&acct_lock); return; } get_file(file); - ns = get_pid_ns(acct_globals.ns); - spin_unlock(&acct_globals.lock); + spin_unlock(&acct_lock); - do_acct_process(ns, file); + do_acct_process(acct, ns, file); fput(file); - put_pid_ns(ns); +} + +/** + * acct_process - now just a wrapper around acct_process_in_ns, + * which in turn is a wrapper around do_acct_process. + * + * handles process accounting for an exiting task + */ +void acct_process(void) +{ + struct pid_namespace *ns; + + /* + * This loop is safe lockless, since current is still + * alive and holds its namespace, which in turn holds + * its parent. + */ + for (ns = task_active_pid_ns(current); ns != NULL; ns = ns->parent) + acct_process_in_ns(ns); } diff --git a/kernel/cgroup.c b/kernel/cgroup.c index 15ac0e1e4f4d..66ec9fd21e0c 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c @@ -89,11 +89,7 @@ struct cgroupfs_root { /* Hierarchy-specific flags */ unsigned long flags; - /* The path to use for release notifications. No locking - * between setting and use - so if userspace updates this - * while child cgroups exist, you could miss a - * notification. We ensure that it's always a valid - * NUL-terminated string */ + /* The path to use for release notifications. */ char release_agent_path[PATH_MAX]; }; @@ -118,7 +114,7 @@ static int root_count; * extra work in the fork/exit path if none of the subsystems need to * be called. */ -static int need_forkexit_callback; +static int need_forkexit_callback __read_mostly; static int need_mm_owner_callback __read_mostly; /* convenient tests for these bits */ @@ -220,7 +216,7 @@ static struct hlist_head *css_set_hash(struct cgroup_subsys_state *css[]) * task until after the first call to cgroup_iter_start(). This * reduces the fork()/exit() overhead for people who have cgroups * compiled into their kernel but not actually in use */ -static int use_task_css_set_links; +static int use_task_css_set_links __read_mostly; /* When we create or destroy a css_set, the operation simply * takes/releases a reference count on all the cgroups referenced @@ -241,17 +237,20 @@ static int use_task_css_set_links; */ static void unlink_css_set(struct css_set *cg) { + struct cg_cgroup_link *link; + struct cg_cgroup_link *saved_link; + write_lock(&css_set_lock); hlist_del(&cg->hlist); css_set_count--; - while (!list_empty(&cg->cg_links)) { - struct cg_cgroup_link *link; - link = list_entry(cg->cg_links.next, - struct cg_cgroup_link, cg_link_list); + + list_for_each_entry_safe(link, saved_link, &cg->cg_links, + cg_link_list) { list_del(&link->cg_link_list); list_del(&link->cgrp_link_list); kfree(link); } + write_unlock(&css_set_lock); } @@ -363,15 +362,14 @@ static struct css_set *find_existing_css_set( static int allocate_cg_links(int count, struct list_head *tmp) { struct cg_cgroup_link *link; + struct cg_cgroup_link *saved_link; int i; INIT_LIST_HEAD(tmp); for (i = 0; i < count; i++) { link = kmalloc(sizeof(*link), GFP_KERNEL); if (!link) { - while (!list_empty(tmp)) { - link = list_entry(tmp->next, - struct cg_cgroup_link, - cgrp_link_list); + list_for_each_entry_safe(link, saved_link, tmp, + cgrp_link_list) { list_del(&link->cgrp_link_list); kfree(link); } @@ -384,11 +382,10 @@ static int allocate_cg_links(int count, struct list_head *tmp) static void free_cg_links(struct list_head *tmp) { - while (!list_empty(tmp)) { - struct cg_cgroup_link *link; - link = list_entry(tmp->next, - struct cg_cgroup_link, - cgrp_link_list); + struct cg_cgroup_link *link; + struct cg_cgroup_link *saved_link; + + list_for_each_entry_safe(link, saved_link, tmp, cgrp_link_list) { list_del(&link->cgrp_link_list); kfree(link); } @@ -415,11 +412,11 @@ static struct css_set *find_css_set( /* First see if we already have a cgroup group that matches * the desired set */ - write_lock(&css_set_lock); + read_lock(&css_set_lock); res = find_existing_css_set(oldcg, cgrp, template); if (res) get_css_set(res); - write_unlock(&css_set_lock); + read_unlock(&css_set_lock); if (res) return res; @@ -507,10 +504,6 @@ static struct css_set *find_css_set( * knows that the cgroup won't be removed, as cgroup_rmdir() * needs that mutex. * - * The cgroup_common_file_write handler for operations that modify - * the cgroup hierarchy holds cgroup_mutex across the entire operation, - * single threading all such cgroup modifications across the system. - * * The fork and exit callbacks cgroup_fork() and cgroup_exit(), don't * (usually) take cgroup_mutex. These are the two most performance * critical pieces of code here. The exception occurs on cgroup_exit(), @@ -1093,6 +1086,8 @@ static void cgroup_kill_sb(struct super_block *sb) { struct cgroupfs_root *root = sb->s_fs_info; struct cgroup *cgrp = &root->top_cgroup; int ret; + struct cg_cgroup_link *link; + struct cg_cgroup_link *saved_link; BUG_ON(!root); @@ -1112,10 +1107,9 @@ static void cgroup_kill_sb(struct super_block *sb) { * root cgroup */ write_lock(&css_set_lock); - while (!list_empty(&cgrp->css_sets)) { - struct cg_cgroup_link *link; - link = list_entry(cgrp->css_sets.next, - struct cg_cgroup_link, cgrp_link_list); + + list_for_each_entry_safe(link, saved_link, &cgrp->css_sets, + cgrp_link_list) { list_del(&link->cg_link_list); list_del(&link->cgrp_link_list); kfree(link); @@ -1281,18 +1275,14 @@ int cgroup_attach_task(struct cgroup *cgrp, struct task_struct *tsk) } /* - * Attach task with pid 'pid' to cgroup 'cgrp'. Call with - * cgroup_mutex, may take task_lock of task + * Attach task with pid 'pid' to cgroup 'cgrp'. Call with cgroup_mutex + * held. May take task_lock of task */ -static int attach_task_by_pid(struct cgroup *cgrp, char *pidbuf) +static int attach_task_by_pid(struct cgroup *cgrp, u64 pid) { - pid_t pid; struct task_struct *tsk; int ret; - if (sscanf(pidbuf, "%d", &pid) != 1) - return -EIO; - if (pid) { rcu_read_lock(); tsk = find_task_by_vpid(pid); @@ -1318,6 +1308,16 @@ static int attach_task_by_pid(struct cgroup *cgrp, char *pidbuf) return ret; } +static int cgroup_tasks_write(struct cgroup *cgrp, struct cftype *cft, u64 pid) +{ + int ret; + if (!cgroup_lock_live_group(cgrp)) + return -ENODEV; + ret = attach_task_by_pid(cgrp, pid); + cgroup_unlock(); + return ret; +} + /* The various types of files and directories in a cgroup file system */ enum cgroup_filetype { FILE_ROOT, @@ -1327,12 +1327,54 @@ enum cgroup_filetype { FILE_RELEASE_AGENT, }; +/** + * cgroup_lock_live_group - take cgroup_mutex and check that cgrp is alive. + * @cgrp: the cgroup to be checked for liveness + * + * On success, returns true; the lock should be later released with + * cgroup_unlock(). On failure returns false with no lock held. + */ +bool cgroup_lock_live_group(struct cgroup *cgrp) +{ + mutex_lock(&cgroup_mutex); + if (cgroup_is_removed(cgrp)) { + mutex_unlock(&cgroup_mutex); + return false; + } + return true; +} + +static int cgroup_release_agent_write(struct cgroup *cgrp, struct cftype *cft, + const char *buffer) +{ + BUILD_BUG_ON(sizeof(cgrp->root->release_agent_path) < PATH_MAX); + if (!cgroup_lock_live_group(cgrp)) + return -ENODEV; + strcpy(cgrp->root->release_agent_path, buffer); + cgroup_unlock(); + return 0; +} + +static int cgroup_release_agent_show(struct cgroup *cgrp, struct cftype *cft, + struct seq_file *seq) +{ + if (!cgroup_lock_live_group(cgrp)) + return -ENODEV; + seq_puts(seq, cgrp->root->release_agent_path); + seq_putc(seq, '\n'); + cgroup_unlock(); + return 0; +} + +/* A buffer size big enough for numbers or short strings */ +#define CGROUP_LOCAL_BUFFER_SIZE 64 + static ssize_t cgroup_write_X64(struct cgroup *cgrp, struct cftype *cft, struct file *file, const char __user *userbuf, size_t nbytes, loff_t *unused_ppos) { - char buffer[64]; + char buffer[CGROUP_LOCAL_BUFFER_SIZE]; int retval = 0; char *end; @@ -1361,68 +1403,36 @@ static ssize_t cgroup_write_X64(struct cgroup *cgrp, struct cftype *cft, return retval; } -static ssize_t cgroup_common_file_write(struct cgroup *cgrp, - struct cftype *cft, - struct file *file, - const char __user *userbuf, - size_t nbytes, loff_t *unused_ppos) +static ssize_t cgroup_write_string(struct cgroup *cgrp, struct cftype *cft, + struct file *file, + const char __user *userbuf, + size_t nbytes, loff_t *unused_ppos) { - enum cgroup_filetype type = cft->private; - char *buffer; + char local_buffer[CGROUP_LOCAL_BUFFER_SIZE]; int retval = 0; + size_t max_bytes = cft->max_write_len; + char *buffer = local_buffer; - if (nbytes >= PATH_MAX) + if (!max_bytes) + max_bytes = sizeof(local_buffer) - 1; + if (nbytes >= max_bytes) return -E2BIG; - - /* +1 for nul-terminator */ - buffer = kmalloc(nbytes + 1, GFP_KERNEL); - if (buffer == NULL) - return -ENOMEM; - - if (copy_from_user(buffer, userbuf, nbytes)) { - retval = -EFAULT; - goto out1; + /* Allocate a dynamic buffer if we need one */ + if (nbytes >= sizeof(local_buffer)) { + buffer = kmalloc(nbytes + 1, GFP_KERNEL); + if (buffer == NULL) + return -ENOMEM; } - buffer[nbytes] = 0; /* nul-terminate */ - strstrip(buffer); /* strip -just- trailing whitespace */ - - mutex_lock(&cgroup_mutex); + if (nbytes && copy_from_user(buffer, userbuf, nbytes)) + return -EFAULT; - /* - * This was already checked for in cgroup_file_write(), but - * check again now we're holding cgroup_mutex. - */ - if (cgroup_is_removed(cgrp)) { - retval = -ENODEV; - goto out2; - } - - switch (type) { - case FILE_TASKLIST: - retval = attach_task_by_pid(cgrp, buffer); - break; - case FILE_NOTIFY_ON_RELEASE: - clear_bit(CGRP_RELEASABLE, &cgrp->flags); - if (simple_strtoul(buffer, NULL, 10) != 0) - set_bit(CGRP_NOTIFY_ON_RELEASE, &cgrp->flags); - else - clear_bit(CGRP_NOTIFY_ON_RELEASE, &cgrp->flags); - break; - case FILE_RELEASE_AGENT: - BUILD_BUG_ON(sizeof(cgrp->root->release_agent_path) < PATH_MAX); - strcpy(cgrp->root->release_agent_path, buffer); - break; - default: - retval = -EINVAL; - goto out2; - } - - if (retval == 0) + buffer[nbytes] = 0; /* nul-terminate */ + strstrip(buffer); + retval = cft->write_string(cgrp, cft, buffer); + if (!retval) retval = nbytes; -out2: - mutex_unlock(&cgroup_mutex); -out1: - kfree(buffer); + if (buffer != local_buffer) + kfree(buffer); return retval; } @@ -1438,6 +1448,8 @@ static ssize_t cgroup_file_write(struct file *file, const char __user *buf, return cft->write(cgrp, cft, file, buf, nbytes, ppos); if (cft->write_u64 || cft->write_s64) return cgroup_write_X64(cgrp, cft, file, buf, nbytes, ppos); + if (cft->write_string) + return cgroup_write_string(cgrp, cft, file, buf, nbytes, ppos); if (cft->trigger) { int ret = cft->trigger(cgrp, (unsigned int)cft->private); return ret ? ret : nbytes; @@ -1450,7 +1462,7 @@ static ssize_t cgroup_read_u64(struct cgroup *cgrp, struct cftype *cft, char __user *buf, size_t nbytes, loff_t *ppos) { - char tmp[64]; + char tmp[CGROUP_LOCAL_BUFFER_SIZE]; u64 val = cft->read_u64(cgrp, cft); int len = sprintf(tmp, "%llu\n", (unsigned long long) val); @@ -1462,56 +1474,13 @@ static ssize_t cgroup_read_s64(struct cgroup *cgrp, struct cftype *cft, char __user *buf, size_t nbytes, loff_t *ppos) { - char tmp[64]; + char tmp[CGROUP_LOCAL_BUFFER_SIZE]; s64 val = cft->read_s64(cgrp, cft); int len = sprintf(tmp, "%lld\n", (long long) val); return simple_read_from_buffer(buf, nbytes, ppos, tmp, len); } -static ssize_t cgroup_common_file_read(struct cgroup *cgrp, - struct cftype *cft, - struct file *file, - char __user *buf, - size_t nbytes, loff_t *ppos) -{ - enum cgroup_filetype type = cft->private; - char *page; - ssize_t retval = 0; - char *s; - - if (!(page = (char *)__get_free_page(GFP_KERNEL))) - return -ENOMEM; - - s = page; - - switch (type) { - case FILE_RELEASE_AGENT: - { - struct cgroupfs_root *root; - size_t n; - mutex_lock(&cgroup_mutex); - root = cgrp->root; - n = strnlen(root->release_agent_path, - sizeof(root->release_agent_path)); - n = min(n, (size_t) PAGE_SIZE); - strncpy(s, root->release_agent_path, n); - mutex_unlock(&cgroup_mutex); - s += n; - break; - } - default: - retval = -EINVAL; - goto out; - } - *s++ = '\n'; - - retval = simple_read_from_buffer(buf, nbytes, ppos, page, s - page); -out: - free_page((unsigned long)page); - return retval; -} - static ssize_t cgroup_file_read(struct file *file, char __user *buf, size_t nbytes, loff_t *ppos) { @@ -1569,6 +1538,7 @@ int cgroup_seqfile_release(struct inode *inode, struct file *file) static struct file_operations cgroup_seqfile_operations = { .read = seq_read, + .write = cgroup_file_write, .llseek = seq_lseek, .release = cgroup_seqfile_release, }; @@ -1756,15 +1726,11 @@ int cgroup_add_files(struct cgroup *cgrp, int cgroup_task_count(const struct cgroup *cgrp) { int count = 0; - struct list_head *l; + struct cg_cgroup_link *link; read_lock(&css_set_lock); - l = cgrp->css_sets.next; - while (l != &cgrp->css_sets) { - struct cg_cgroup_link *link = - list_entry(l, struct cg_cgroup_link, cgrp_link_list); + list_for_each_entry(link, &cgrp->css_sets, cgrp_link_list) { count += atomic_read(&link->cg->ref.refcount); - l = l->next; } read_unlock(&css_set_lock); return count; @@ -2227,6 +2193,18 @@ static u64 cgroup_read_notify_on_release(struct cgroup *cgrp, return notify_on_release(cgrp); } +static int cgroup_write_notify_on_release(struct cgroup *cgrp, + struct cftype *cft, + u64 val) +{ + clear_bit(CGRP_RELEASABLE, &cgrp->flags); + if (val) + set_bit(CGRP_NOTIFY_ON_RELEASE, &cgrp->flags); + else + clear_bit(CGRP_NOTIFY_ON_RELEASE, &cgrp->flags); + return 0; +} + /* * for the common functions, 'private' gives the type of file */ @@ -2235,7 +2213,7 @@ static struct cftype files[] = { .name = "tasks", .open = cgroup_tasks_open, .read = cgroup_tasks_read, - .write = cgroup_common_file_write, + .write_u64 = cgroup_tasks_write, .release = cgroup_tasks_release, .private = FILE_TASKLIST, }, @@ -2243,15 +2221,16 @@ static struct cftype files[] = { { .name = "notify_on_release", .read_u64 = cgroup_read_notify_on_release, - .write = cgroup_common_file_write, + .write_u64 = cgroup_write_notify_on_release, .private = FILE_NOTIFY_ON_RELEASE, }, }; static struct cftype cft_release_agent = { .name = "release_agent", - .read = cgroup_common_file_read, - .write = cgroup_common_file_write, + .read_seq_string = cgroup_release_agent_show, + .write_string = cgroup_release_agent_write, + .max_write_len = PATH_MAX, .private = FILE_RELEASE_AGENT, }; @@ -2869,16 +2848,17 @@ void cgroup_exit(struct task_struct *tsk, int run_callbacks) * cgroup_clone - clone the cgroup the given subsystem is attached to * @tsk: the task to be moved * @subsys: the given subsystem + * @nodename: the name for the new cgroup * * Duplicate the current cgroup in the hierarchy that the given * subsystem is attached to, and move this task into the new * child. */ -int cgroup_clone(struct task_struct *tsk, struct cgroup_subsys *subsys) +int cgroup_clone(struct task_struct *tsk, struct cgroup_subsys *subsys, + char *nodename) { struct dentry *dentry; int ret = 0; - char nodename[MAX_CGROUP_TYPE_NAMELEN]; struct cgroup *parent, *child; struct inode *inode; struct css_set *cg; @@ -2903,8 +2883,6 @@ int cgroup_clone(struct task_struct *tsk, struct cgroup_subsys *subsys) cg = tsk->cgroups; parent = task_cgroup(tsk, subsys->subsys_id); - snprintf(nodename, MAX_CGROUP_TYPE_NAMELEN, "%d", tsk->pid); - /* Pin the hierarchy */ atomic_inc(&parent->root->sb->s_active); @@ -3078,27 +3056,24 @@ static void cgroup_release_agent(struct work_struct *work) while (!list_empty(&release_list)) { char *argv[3], *envp[3]; int i; - char *pathbuf; + char *pathbuf = NULL, *agentbuf = NULL; struct cgroup *cgrp = list_entry(release_list.next, struct cgroup, release_list); list_del_init(&cgrp->release_list); spin_unlock(&release_list_lock); pathbuf = kmalloc(PAGE_SIZE, GFP_KERNEL); - if (!pathbuf) { - spin_lock(&release_list_lock); - continue; - } - - if (cgroup_path(cgrp, pathbuf, PAGE_SIZE) < 0) { - kfree(pathbuf); - spin_lock(&release_list_lock); - continue; - } + if (!pathbuf) + goto continue_free; + if (cgroup_path(cgrp, pathbuf, PAGE_SIZE) < 0) + goto continue_free; + agentbuf = kstrdup(cgrp->root->release_agent_path, GFP_KERNEL); + if (!agentbuf) + goto continue_free; i = 0; - argv[i++] = cgrp->root->release_agent_path; - argv[i++] = (char *)pathbuf; + argv[i++] = agentbuf; + argv[i++] = pathbuf; argv[i] = NULL; i = 0; @@ -3112,8 +3087,10 @@ static void cgroup_release_agent(struct work_struct *work) * be a slow process */ mutex_unlock(&cgroup_mutex); call_usermodehelper(argv[0], argv, envp, UMH_WAIT_EXEC); - kfree(pathbuf); mutex_lock(&cgroup_mutex); + continue_free: + kfree(pathbuf); + kfree(agentbuf); spin_lock(&release_list_lock); } spin_unlock(&release_list_lock); diff --git a/kernel/cpu.c b/kernel/cpu.c index 2cc409ce0a8f..10ba5f1004a5 100644 --- a/kernel/cpu.c +++ b/kernel/cpu.c @@ -285,6 +285,11 @@ out_allowed: set_cpus_allowed_ptr(current, &old_allowed); out_release: cpu_hotplug_done(); + if (!err) { + if (raw_notifier_call_chain(&cpu_chain, CPU_POST_DEAD | mod, + hcpu) == NOTIFY_BAD) + BUG(); + } return err; } diff --git a/kernel/cpuset.c b/kernel/cpuset.c index d5738910c34c..91cf85b36dd5 100644 --- a/kernel/cpuset.c +++ b/kernel/cpuset.c @@ -227,10 +227,6 @@ static struct cpuset top_cpuset = { * The task_struct fields mems_allowed and mems_generation may only * be accessed in the context of that task, so require no locks. * - * The cpuset_common_file_write handler for operations that modify - * the cpuset hierarchy holds cgroup_mutex across the entire operation, - * single threading all such cpuset modifications across the system. - * * The cpuset_common_file_read() handlers only hold callback_mutex across * small pieces of code, such as when reading out possibly multi-word * cpumasks and nodemasks. @@ -369,7 +365,7 @@ void cpuset_update_task_memory_state(void) my_cpusets_mem_gen = top_cpuset.mems_generation; } else { rcu_read_lock(); - my_cpusets_mem_gen = task_cs(current)->mems_generation; + my_cpusets_mem_gen = task_cs(tsk)->mems_generation; rcu_read_unlock(); } @@ -500,11 +496,16 @@ update_domain_attr(struct sched_domain_attr *dattr, struct cpuset *c) /* * rebuild_sched_domains() * - * If the flag 'sched_load_balance' of any cpuset with non-empty - * 'cpus' changes, or if the 'cpus' allowed changes in any cpuset - * which has that flag enabled, or if any cpuset with a non-empty - * 'cpus' is removed, then call this routine to rebuild the - * scheduler's dynamic sched domains. + * This routine will be called to rebuild the scheduler's dynamic + * sched domains: + * - if the flag 'sched_load_balance' of any cpuset with non-empty + * 'cpus' changes, + * - or if the 'cpus' allowed changes in any cpuset which has that + * flag enabled, + * - or if the 'sched_relax_domain_level' of any cpuset which has + * that flag enabled and with non-empty 'cpus' changes, + * - or if any cpuset with non-empty 'cpus' is removed, + * - or if a cpu gets offlined. * * This routine builds a partial partition of the systems CPUs * (the set of non-overlappping cpumask_t's in the array 'part' @@ -609,8 +610,13 @@ void rebuild_sched_domains(void) while (__kfifo_get(q, (void *)&cp, sizeof(cp))) { struct cgroup *cont; struct cpuset *child; /* scans child cpusets of cp */ + + if (cpus_empty(cp->cpus_allowed)) + continue; + if (is_sched_load_balance(cp)) csa[csn++] = cp; + list_for_each_entry(cont, &cp->css.cgroup->children, sibling) { child = cgroup_cs(cont); __kfifo_put(q, (void *)&child, sizeof(cp)); @@ -703,36 +709,6 @@ done: /* Don't kfree(dattr) -- partition_sched_domains() does that. */ } -static inline int started_after_time(struct task_struct *t1, - struct timespec *time, - struct task_struct *t2) -{ - int start_diff = timespec_compare(&t1->start_time, time); - if (start_diff > 0) { - return 1; - } else if (start_diff < 0) { - return 0; - } else { - /* - * Arbitrarily, if two processes started at the same - * time, we'll say that the lower pointer value - * started first. Note that t2 may have exited by now - * so this may not be a valid pointer any longer, but - * that's fine - it still serves to distinguish - * between two tasks started (effectively) - * simultaneously. - */ - return t1 > t2; - } -} - -static inline int started_after(void *p1, void *p2) -{ - struct task_struct *t1 = p1; - struct task_struct *t2 = p2; - return started_after_time(t1, &t2->start_time, t2); -} - /** * cpuset_test_cpumask - test a task's cpus_allowed versus its cpuset's * @tsk: task to test @@ -768,15 +744,49 @@ static void cpuset_change_cpumask(struct task_struct *tsk, } /** + * update_tasks_cpumask - Update the cpumasks of tasks in the cpuset. + * @cs: the cpuset in which each task's cpus_allowed mask needs to be changed + * + * Called with cgroup_mutex held + * + * The cgroup_scan_tasks() function will scan all the tasks in a cgroup, + * calling callback functions for each. + * + * Return 0 if successful, -errno if not. + */ +static int update_tasks_cpumask(struct cpuset *cs) +{ + struct cgroup_scanner scan; + struct ptr_heap heap; + int retval; + + /* + * cgroup_scan_tasks() will initialize heap->gt for us. + * heap_init() is still needed here for we should not change + * cs->cpus_allowed when heap_init() fails. + */ + retval = heap_init(&heap, PAGE_SIZE, GFP_KERNEL, NULL); + if (retval) + return retval; + + scan.cg = cs->css.cgroup; + scan.test_task = cpuset_test_cpumask; + scan.process_task = cpuset_change_cpumask; + scan.heap = &heap; + retval = cgroup_scan_tasks(&scan); + + heap_free(&heap); + return retval; +} + +/** * update_cpumask - update the cpus_allowed mask of a cpuset and all tasks in it * @cs: the cpuset to consider * @buf: buffer of cpu numbers written to this cpuset */ -static int update_cpumask(struct cpuset *cs, char *buf) +static int update_cpumask(struct cpuset *cs, const char *buf) { struct cpuset trialcs; - struct cgroup_scanner scan; - struct ptr_heap heap; int retval; int is_load_balanced; @@ -792,7 +802,6 @@ static int update_cpumask(struct cpuset *cs, char *buf) * that parsing. The validate_change() call ensures that cpusets * with tasks have cpus. */ - buf = strstrip(buf); if (!*buf) { cpus_clear(trialcs.cpus_allowed); } else { @@ -811,10 +820,6 @@ static int update_cpumask(struct cpuset *cs, char *buf) if (cpus_equal(cs->cpus_allowed, trialcs.cpus_allowed)) return 0; - retval = heap_init(&heap, PAGE_SIZE, GFP_KERNEL, &started_after); - if (retval) - return retval; - is_load_balanced = is_sched_load_balance(&trialcs); mutex_lock(&callback_mutex); @@ -825,12 +830,9 @@ static int update_cpumask(struct cpuset *cs, char *buf) * Scan tasks in the cpuset, and update the cpumasks of any * that need an update. */ - scan.cg = cs->css.cgroup; - scan.test_task = cpuset_test_cpumask; - scan.process_task = cpuset_change_cpumask; - scan.heap = &heap; - cgroup_scan_tasks(&scan); - heap_free(&heap); + retval = update_tasks_cpumask(cs); + if (retval < 0) + return retval; if (is_load_balanced) rebuild_sched_domains(); @@ -886,74 +888,25 @@ static void cpuset_migrate_mm(struct mm_struct *mm, const nodemask_t *from, mutex_unlock(&callback_mutex); } -/* - * Handle user request to change the 'mems' memory placement - * of a cpuset. Needs to validate the request, update the - * cpusets mems_allowed and mems_generation, and for each - * task in the cpuset, rebind any vma mempolicies and if - * the cpuset is marked 'memory_migrate', migrate the tasks - * pages to the new memory. - * - * Call with cgroup_mutex held. May take callback_mutex during call. - * Will take tasklist_lock, scan tasklist for tasks in cpuset cs, - * lock each such tasks mm->mmap_sem, scan its vma's and rebind - * their mempolicies to the cpusets new mems_allowed. - */ - static void *cpuset_being_rebound; -static int update_nodemask(struct cpuset *cs, char *buf) +/** + * update_tasks_nodemask - Update the nodemasks of tasks in the cpuset. + * @cs: the cpuset in which each task's mems_allowed mask needs to be changed + * @oldmem: old mems_allowed of cpuset cs + * + * Called with cgroup_mutex held + * Return 0 if successful, -errno if not. + */ +static int update_tasks_nodemask(struct cpuset *cs, const nodemask_t *oldmem) { - struct cpuset trialcs; - nodemask_t oldmem; struct task_struct *p; struct mm_struct **mmarray; int i, n, ntasks; int migrate; int fudge; - int retval; struct cgroup_iter it; - - /* - * top_cpuset.mems_allowed tracks node_stats[N_HIGH_MEMORY]; - * it's read-only - */ - if (cs == &top_cpuset) - return -EACCES; - - trialcs = *cs; - - /* - * An empty mems_allowed is ok iff there are no tasks in the cpuset. - * Since nodelist_parse() fails on an empty mask, we special case - * that parsing. The validate_change() call ensures that cpusets - * with tasks have memory. - */ - buf = strstrip(buf); - if (!*buf) { - nodes_clear(trialcs.mems_allowed); - } else { - retval = nodelist_parse(buf, trialcs.mems_allowed); - if (retval < 0) - goto done; - - if (!nodes_subset(trialcs.mems_allowed, - node_states[N_HIGH_MEMORY])) - return -EINVAL; - } - oldmem = cs->mems_allowed; - if (nodes_equal(oldmem, trialcs.mems_allowed)) { - retval = 0; /* Too easy - nothing to do */ - goto done; - } - retval = validate_change(cs, &trialcs); - if (retval < 0) - goto done; - - mutex_lock(&callback_mutex); - cs->mems_allowed = trialcs.mems_allowed; - cs->mems_generation = cpuset_mems_generation++; - mutex_unlock(&callback_mutex); + int retval; cpuset_being_rebound = cs; /* causes mpol_dup() rebind */ @@ -1020,7 +973,7 @@ static int update_nodemask(struct cpuset *cs, char *buf) mpol_rebind_mm(mm, &cs->mems_allowed); if (migrate) - cpuset_migrate_mm(mm, &oldmem, &cs->mems_allowed); + cpuset_migrate_mm(mm, oldmem, &cs->mems_allowed); mmput(mm); } @@ -1032,6 +985,70 @@ done: return retval; } +/* + * Handle user request to change the 'mems' memory placement + * of a cpuset. Needs to validate the request, update the + * cpusets mems_allowed and mems_generation, and for each + * task in the cpuset, rebind any vma mempolicies and if + * the cpuset is marked 'memory_migrate', migrate the tasks + * pages to the new memory. + * + * Call with cgroup_mutex held. May take callback_mutex during call. + * Will take tasklist_lock, scan tasklist for tasks in cpuset cs, + * lock each such tasks mm->mmap_sem, scan its vma's and rebind + * their mempolicies to the cpusets new mems_allowed. + */ +static int update_nodemask(struct cpuset *cs, const char *buf) +{ + struct cpuset trialcs; + nodemask_t oldmem; + int retval; + + /* + * top_cpuset.mems_allowed tracks node_stats[N_HIGH_MEMORY]; + * it's read-only + */ + if (cs == &top_cpuset) + return -EACCES; + + trialcs = *cs; + + /* + * An empty mems_allowed is ok iff there are no tasks in the cpuset. + * Since nodelist_parse() fails on an empty mask, we special case + * that parsing. The validate_change() call ensures that cpusets + * with tasks have memory. + */ + if (!*buf) { + nodes_clear(trialcs.mems_allowed); + } else { + retval = nodelist_parse(buf, trialcs.mems_allowed); + if (retval < 0) + goto done; + + if (!nodes_subset(trialcs.mems_allowed, + node_states[N_HIGH_MEMORY])) + return -EINVAL; + } + oldmem = cs->mems_allowed; + if (nodes_equal(oldmem, trialcs.mems_allowed)) { + retval = 0; /* Too easy - nothing to do */ + goto done; + } + retval = validate_change(cs, &trialcs); + if (retval < 0) + goto done; + + mutex_lock(&callback_mutex); + cs->mems_allowed = trialcs.mems_allowed; + cs->mems_generation = cpuset_mems_generation++; + mutex_unlock(&callback_mutex); + + retval = update_tasks_nodemask(cs, &oldmem); +done: + return retval; +} + int current_cpuset_is_being_rebound(void) { return task_cs(current) == cpuset_being_rebound; @@ -1044,7 +1061,8 @@ static int update_relax_domain_level(struct cpuset *cs, s64 val) if (val != cs->relax_domain_level) { cs->relax_domain_level = val; - rebuild_sched_domains(); + if (!cpus_empty(cs->cpus_allowed) && is_sched_load_balance(cs)) + rebuild_sched_domains(); } return 0; @@ -1256,72 +1274,14 @@ typedef enum { FILE_SPREAD_SLAB, } cpuset_filetype_t; -static ssize_t cpuset_common_file_write(struct cgroup *cont, - struct cftype *cft, - struct file *file, - const char __user *userbuf, - size_t nbytes, loff_t *unused_ppos) -{ - struct cpuset *cs = cgroup_cs(cont); - cpuset_filetype_t type = cft->private; - char *buffer; - int retval = 0; - - /* Crude upper limit on largest legitimate cpulist user might write. */ - if (nbytes > 100U + 6 * max(NR_CPUS, MAX_NUMNODES)) - return -E2BIG; - - /* +1 for nul-terminator */ - buffer = kmalloc(nbytes + 1, GFP_KERNEL); - if (!buffer) - return -ENOMEM; - - if (copy_from_user(buffer, userbuf, nbytes)) { - retval = -EFAULT; - goto out1; - } - buffer[nbytes] = 0; /* nul-terminate */ - - cgroup_lock(); - - if (cgroup_is_removed(cont)) { - retval = -ENODEV; - goto out2; - } - - switch (type) { - case FILE_CPULIST: - retval = update_cpumask(cs, buffer); - break; - case FILE_MEMLIST: - retval = update_nodemask(cs, buffer); - break; - default: - retval = -EINVAL; - goto out2; - } - - if (retval == 0) - retval = nbytes; -out2: - cgroup_unlock(); -out1: - kfree(buffer); - return retval; -} - static int cpuset_write_u64(struct cgroup *cgrp, struct cftype *cft, u64 val) { int retval = 0; struct cpuset *cs = cgroup_cs(cgrp); cpuset_filetype_t type = cft->private; - cgroup_lock(); - - if (cgroup_is_removed(cgrp)) { - cgroup_unlock(); + if (!cgroup_lock_live_group(cgrp)) return -ENODEV; - } switch (type) { case FILE_CPU_EXCLUSIVE: @@ -1367,12 +1327,9 @@ static int cpuset_write_s64(struct cgroup *cgrp, struct cftype *cft, s64 val) struct cpuset *cs = cgroup_cs(cgrp); cpuset_filetype_t type = cft->private; - cgroup_lock(); - - if (cgroup_is_removed(cgrp)) { - cgroup_unlock(); + if (!cgroup_lock_live_group(cgrp)) return -ENODEV; - } + switch (type) { case FILE_SCHED_RELAX_DOMAIN_LEVEL: retval = update_relax_domain_level(cs, val); @@ -1386,6 +1343,32 @@ static int cpuset_write_s64(struct cgroup *cgrp, struct cftype *cft, s64 val) } /* + * Common handling for a write to a "cpus" or "mems" file. + */ +static int cpuset_write_resmask(struct cgroup *cgrp, struct cftype *cft, + const char *buf) +{ + int retval = 0; + + if (!cgroup_lock_live_group(cgrp)) + return -ENODEV; + + switch (cft->private) { + case FILE_CPULIST: + retval = update_cpumask(cgroup_cs(cgrp), buf); + break; + case FILE_MEMLIST: + retval = update_nodemask(cgroup_cs(cgrp), buf); + break; + default: + retval = -EINVAL; + break; + } + cgroup_unlock(); + return retval; +} + +/* * These ascii lists should be read in a single call, by using a user * buffer large enough to hold the entire map. If read in smaller * chunks, there is no guarantee of atomicity. Since the display format @@ -1504,14 +1487,16 @@ static struct cftype files[] = { { .name = "cpus", .read = cpuset_common_file_read, - .write = cpuset_common_file_write, + .write_string = cpuset_write_resmask, + .max_write_len = (100U + 6 * NR_CPUS), .private = FILE_CPULIST, }, { .name = "mems", .read = cpuset_common_file_read, - .write = cpuset_common_file_write, + .write_string = cpuset_write_resmask, + .max_write_len = (100U + 6 * MAX_NUMNODES), .private = FILE_MEMLIST, }, @@ -1792,7 +1777,7 @@ static void move_member_tasks_to_cpuset(struct cpuset *from, struct cpuset *to) scan.scan.heap = NULL; scan.to = to->css.cgroup; - if (cgroup_scan_tasks((struct cgroup_scanner *)&scan)) + if (cgroup_scan_tasks(&scan.scan)) printk(KERN_ERR "move_member_tasks_to_cpuset: " "cgroup_scan_tasks failed\n"); } @@ -1852,6 +1837,7 @@ static void scan_for_empty_cpusets(const struct cpuset *root) struct cpuset *child; /* scans child cpusets of cp */ struct list_head queue; struct cgroup *cont; + nodemask_t oldmems; INIT_LIST_HEAD(&queue); @@ -1871,6 +1857,8 @@ static void scan_for_empty_cpusets(const struct cpuset *root) nodes_subset(cp->mems_allowed, node_states[N_HIGH_MEMORY])) continue; + oldmems = cp->mems_allowed; + /* Remove offline cpus and mems from this cpuset. */ mutex_lock(&callback_mutex); cpus_and(cp->cpus_allowed, cp->cpus_allowed, cpu_online_map); @@ -1882,6 +1870,10 @@ static void scan_for_empty_cpusets(const struct cpuset *root) if (cpus_empty(cp->cpus_allowed) || nodes_empty(cp->mems_allowed)) remove_tasks_in_empty_cpuset(cp); + else { + update_tasks_cpumask(cp); + update_tasks_nodemask(cp, &oldmems); + } } } @@ -1974,7 +1966,6 @@ void __init cpuset_init_smp(void) } /** - * cpuset_cpus_allowed - return cpus_allowed mask from a tasks cpuset. * @tsk: pointer to task_struct from which to obtain cpuset->cpus_allowed. * @pmask: pointer to cpumask_t variable to receive cpus_allowed set. diff --git a/kernel/delayacct.c b/kernel/delayacct.c index 10e43fd8b721..b3179dad71be 100644 --- a/kernel/delayacct.c +++ b/kernel/delayacct.c @@ -145,8 +145,11 @@ int __delayacct_add_tsk(struct taskstats *d, struct task_struct *tsk) d->blkio_delay_total = (tmp < d->blkio_delay_total) ? 0 : tmp; tmp = d->swapin_delay_total + tsk->delays->swapin_delay; d->swapin_delay_total = (tmp < d->swapin_delay_total) ? 0 : tmp; + tmp = d->freepages_delay_total + tsk->delays->freepages_delay; + d->freepages_delay_total = (tmp < d->freepages_delay_total) ? 0 : tmp; d->blkio_count += tsk->delays->blkio_count; d->swapin_count += tsk->delays->swapin_count; + d->freepages_count += tsk->delays->freepages_count; spin_unlock_irqrestore(&tsk->delays->lock, flags); done: @@ -165,3 +168,16 @@ __u64 __delayacct_blkio_ticks(struct task_struct *tsk) return ret; } +void __delayacct_freepages_start(void) +{ + delayacct_start(¤t->delays->freepages_start); +} + +void __delayacct_freepages_end(void) +{ + delayacct_end(¤t->delays->freepages_start, + ¤t->delays->freepages_end, + ¤t->delays->freepages_delay, + ¤t->delays->freepages_count); +} + diff --git a/kernel/exit.c b/kernel/exit.c index 93d2711b9381..ad933bb29ec7 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -85,7 +85,6 @@ static void __exit_signal(struct task_struct *tsk) BUG_ON(!sig); BUG_ON(!atomic_read(&sig->count)); - rcu_read_lock(); sighand = rcu_dereference(tsk->sighand); spin_lock(&sighand->siglock); @@ -121,6 +120,18 @@ static void __exit_signal(struct task_struct *tsk) sig->nivcsw += tsk->nivcsw; sig->inblock += task_io_get_inblock(tsk); sig->oublock += task_io_get_oublock(tsk); +#ifdef CONFIG_TASK_XACCT + sig->rchar += tsk->rchar; + sig->wchar += tsk->wchar; + sig->syscr += tsk->syscr; + sig->syscw += tsk->syscw; +#endif /* CONFIG_TASK_XACCT */ +#ifdef CONFIG_TASK_IO_ACCOUNTING + sig->ioac.read_bytes += tsk->ioac.read_bytes; + sig->ioac.write_bytes += tsk->ioac.write_bytes; + sig->ioac.cancelled_write_bytes += + tsk->ioac.cancelled_write_bytes; +#endif /* CONFIG_TASK_IO_ACCOUNTING */ sig->sum_sched_runtime += tsk->se.sum_exec_runtime; sig = NULL; /* Marker for below. */ } @@ -136,7 +147,6 @@ static void __exit_signal(struct task_struct *tsk) tsk->signal = NULL; tsk->sighand = NULL; spin_unlock(&sighand->siglock); - rcu_read_unlock(); __cleanup_sighand(sighand); clear_tsk_thread_flag(tsk,TIF_SIGPENDING); @@ -432,7 +442,7 @@ void daemonize(const char *name, ...) * We don't want to have TIF_FREEZE set if the system-wide hibernation * or suspend transition begins right now. */ - current->flags |= PF_NOFREEZE; + current->flags |= (PF_NOFREEZE | PF_KTHREAD); if (current->nsproxy != &init_nsproxy) { get_nsproxy(&init_nsproxy); @@ -666,26 +676,40 @@ assign_new_owner: static void exit_mm(struct task_struct * tsk) { struct mm_struct *mm = tsk->mm; + struct core_state *core_state; mm_release(tsk, mm); if (!mm) return; /* * Serialize with any possible pending coredump. - * We must hold mmap_sem around checking core_waiters + * We must hold mmap_sem around checking core_state * and clearing tsk->mm. The core-inducing thread - * will increment core_waiters for each thread in the + * will increment ->nr_threads for each thread in the * group with ->mm != NULL. */ down_read(&mm->mmap_sem); - if (mm->core_waiters) { + core_state = mm->core_state; + if (core_state) { + struct core_thread self; up_read(&mm->mmap_sem); - down_write(&mm->mmap_sem); - if (!--mm->core_waiters) - complete(mm->core_startup_done); - up_write(&mm->mmap_sem); - wait_for_completion(&mm->core_done); + self.task = tsk; + self.next = xchg(&core_state->dumper.next, &self); + /* + * Implies mb(), the result of xchg() must be visible + * to core_state->dumper. + */ + if (atomic_dec_and_test(&core_state->nr_threads)) + complete(&core_state->startup); + + for (;;) { + set_task_state(tsk, TASK_UNINTERRUPTIBLE); + if (!self.task) /* see coredump_finish() */ + break; + schedule(); + } + __set_task_state(tsk, TASK_RUNNING); down_read(&mm->mmap_sem); } atomic_inc(&mm->mm_count); @@ -1354,6 +1378,21 @@ static int wait_task_zombie(struct task_struct *p, int options, psig->coublock += task_io_get_oublock(p) + sig->oublock + sig->coublock; +#ifdef CONFIG_TASK_XACCT + psig->rchar += p->rchar + sig->rchar; + psig->wchar += p->wchar + sig->wchar; + psig->syscr += p->syscr + sig->syscr; + psig->syscw += p->syscw + sig->syscw; +#endif /* CONFIG_TASK_XACCT */ +#ifdef CONFIG_TASK_IO_ACCOUNTING + psig->ioac.read_bytes += + p->ioac.read_bytes + sig->ioac.read_bytes; + psig->ioac.write_bytes += + p->ioac.write_bytes + sig->ioac.write_bytes; + psig->ioac.cancelled_write_bytes += + p->ioac.cancelled_write_bytes + + sig->ioac.cancelled_write_bytes; +#endif /* CONFIG_TASK_IO_ACCOUNTING */ spin_unlock_irq(&p->parent->sighand->siglock); } diff --git a/kernel/fork.c b/kernel/fork.c index 552c8d8e77ad..b99d73e971a4 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -93,6 +93,23 @@ int nr_processes(void) static struct kmem_cache *task_struct_cachep; #endif +#ifndef __HAVE_ARCH_THREAD_INFO_ALLOCATOR +static inline struct thread_info *alloc_thread_info(struct task_struct *tsk) +{ +#ifdef CONFIG_DEBUG_STACK_USAGE + gfp_t mask = GFP_KERNEL | __GFP_ZERO; +#else + gfp_t mask = GFP_KERNEL; +#endif + return (struct thread_info *)__get_free_pages(mask, THREAD_SIZE_ORDER); +} + +static inline void free_thread_info(struct thread_info *ti) +{ + free_pages((unsigned long)ti, THREAD_SIZE_ORDER); +} +#endif + /* SLAB cache for signal_struct structures (tsk->signal) */ static struct kmem_cache *signal_cachep; @@ -383,7 +400,7 @@ static struct mm_struct * mm_init(struct mm_struct * mm, struct task_struct *p) INIT_LIST_HEAD(&mm->mmlist); mm->flags = (current->mm) ? current->mm->flags : MMF_DUMP_FILTER_DEFAULT; - mm->core_waiters = 0; + mm->core_state = NULL; mm->nr_ptes = 0; set_mm_counter(mm, file_rss, 0); set_mm_counter(mm, anon_rss, 0); @@ -457,7 +474,7 @@ EXPORT_SYMBOL_GPL(mmput); /** * get_task_mm - acquire a reference to the task's mm * - * Returns %NULL if the task has no mm. Checks PF_BORROWED_MM (meaning + * Returns %NULL if the task has no mm. Checks PF_KTHREAD (meaning * this kernel workthread has transiently adopted a user mm with use_mm, * to do its AIO) is not set and if so returns a reference to it, after * bumping up the use count. User must release the mm via mmput() @@ -470,7 +487,7 @@ struct mm_struct *get_task_mm(struct task_struct *task) task_lock(task); mm = task->mm; if (mm) { - if (task->flags & PF_BORROWED_MM) + if (task->flags & PF_KTHREAD) mm = NULL; else atomic_inc(&mm->mm_users); @@ -795,6 +812,12 @@ static int copy_signal(unsigned long clone_flags, struct task_struct *tsk) sig->nvcsw = sig->nivcsw = sig->cnvcsw = sig->cnivcsw = 0; sig->min_flt = sig->maj_flt = sig->cmin_flt = sig->cmaj_flt = 0; sig->inblock = sig->oublock = sig->cinblock = sig->coublock = 0; +#ifdef CONFIG_TASK_XACCT + sig->rchar = sig->wchar = sig->syscr = sig->syscw = 0; +#endif +#ifdef CONFIG_TASK_IO_ACCOUNTING + memset(&sig->ioac, 0, sizeof(sig->ioac)); +#endif sig->sum_sched_runtime = 0; INIT_LIST_HEAD(&sig->cpu_timers[0]); INIT_LIST_HEAD(&sig->cpu_timers[1]); @@ -1090,6 +1113,12 @@ static struct task_struct *copy_process(unsigned long clone_flags, if (clone_flags & CLONE_THREAD) p->tgid = current->tgid; + if (current->nsproxy != p->nsproxy) { + retval = ns_cgroup_clone(p, pid); + if (retval) + goto bad_fork_free_pid; + } + p->set_child_tid = (clone_flags & CLONE_CHILD_SETTID) ? child_tidptr : NULL; /* * Clear TID on mm_release()? diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c index 5bc6e5ecc493..f8914b92b664 100644 --- a/kernel/irq/manage.c +++ b/kernel/irq/manage.c @@ -260,9 +260,7 @@ int set_irq_wake(unsigned int irq, unsigned int on) } } else { if (desc->wake_depth == 0) { - printk(KERN_WARNING "Unbalanced IRQ %d " - "wake disable\n", irq); - WARN_ON(1); + WARN(1, "Unbalanced IRQ %d wake disable\n", irq); } else if (--desc->wake_depth == 0) { ret = set_irq_wake_real(irq, on); if (ret) diff --git a/kernel/kallsyms.c b/kernel/kallsyms.c index 6fc0040f3e3a..38fc10ac7541 100644 --- a/kernel/kallsyms.c +++ b/kernel/kallsyms.c @@ -176,7 +176,7 @@ static unsigned long get_symbol_pos(unsigned long addr, high = kallsyms_num_syms; while (high - low > 1) { - mid = (low + high) / 2; + mid = low + (high - low) / 2; if (kallsyms_addresses[mid] <= addr) low = mid; else diff --git a/kernel/kmod.c b/kernel/kmod.c index 2989f67c4446..2456d1a0befb 100644 --- a/kernel/kmod.c +++ b/kernel/kmod.c @@ -352,16 +352,17 @@ static inline void register_pm_notifier_callback(void) {} * @path: path to usermode executable * @argv: arg vector for process * @envp: environment for process + * @gfp_mask: gfp mask for memory allocation * * Returns either %NULL on allocation failure, or a subprocess_info * structure. This should be passed to call_usermodehelper_exec to * exec the process and free the structure. */ -struct subprocess_info *call_usermodehelper_setup(char *path, - char **argv, char **envp) +struct subprocess_info *call_usermodehelper_setup(char *path, char **argv, + char **envp, gfp_t gfp_mask) { struct subprocess_info *sub_info; - sub_info = kzalloc(sizeof(struct subprocess_info), GFP_ATOMIC); + sub_info = kzalloc(sizeof(struct subprocess_info), gfp_mask); if (!sub_info) goto out; @@ -494,7 +495,7 @@ int call_usermodehelper_pipe(char *path, char **argv, char **envp, struct subprocess_info *sub_info; int ret; - sub_info = call_usermodehelper_setup(path, argv, envp); + sub_info = call_usermodehelper_setup(path, argv, envp, GFP_KERNEL); if (sub_info == NULL) return -ENOMEM; diff --git a/kernel/kprobes.c b/kernel/kprobes.c index 1485ca8d0e00..75bc2cd9ebc6 100644 --- a/kernel/kprobes.c +++ b/kernel/kprobes.c @@ -62,6 +62,7 @@ addr = ((kprobe_opcode_t *)(kallsyms_lookup_name(name))) #endif +static int kprobes_initialized; static struct hlist_head kprobe_table[KPROBE_TABLE_SIZE]; static struct hlist_head kretprobe_inst_table[KPROBE_TABLE_SIZE]; @@ -69,8 +70,15 @@ static struct hlist_head kretprobe_inst_table[KPROBE_TABLE_SIZE]; static bool kprobe_enabled; DEFINE_MUTEX(kprobe_mutex); /* Protects kprobe_table */ -DEFINE_SPINLOCK(kretprobe_lock); /* Protects kretprobe_inst_table */ static DEFINE_PER_CPU(struct kprobe *, kprobe_instance) = NULL; +static struct { + spinlock_t lock ____cacheline_aligned; +} kretprobe_table_locks[KPROBE_TABLE_SIZE]; + +static spinlock_t *kretprobe_table_lock_ptr(unsigned long hash) +{ + return &(kretprobe_table_locks[hash].lock); +} /* * Normally, functions that we'd want to prohibit kprobes in, are marked @@ -368,26 +376,53 @@ void __kprobes kprobes_inc_nmissed_count(struct kprobe *p) return; } -/* Called with kretprobe_lock held */ void __kprobes recycle_rp_inst(struct kretprobe_instance *ri, struct hlist_head *head) { + struct kretprobe *rp = ri->rp; + /* remove rp inst off the rprobe_inst_table */ hlist_del(&ri->hlist); - if (ri->rp) { - /* remove rp inst off the used list */ - hlist_del(&ri->uflist); - /* put rp inst back onto the free list */ - INIT_HLIST_NODE(&ri->uflist); - hlist_add_head(&ri->uflist, &ri->rp->free_instances); + INIT_HLIST_NODE(&ri->hlist); + if (likely(rp)) { + spin_lock(&rp->lock); + hlist_add_head(&ri->hlist, &rp->free_instances); + spin_unlock(&rp->lock); } else /* Unregistering */ hlist_add_head(&ri->hlist, head); } -struct hlist_head __kprobes *kretprobe_inst_table_head(struct task_struct *tsk) +void kretprobe_hash_lock(struct task_struct *tsk, + struct hlist_head **head, unsigned long *flags) { - return &kretprobe_inst_table[hash_ptr(tsk, KPROBE_HASH_BITS)]; + unsigned long hash = hash_ptr(tsk, KPROBE_HASH_BITS); + spinlock_t *hlist_lock; + + *head = &kretprobe_inst_table[hash]; + hlist_lock = kretprobe_table_lock_ptr(hash); + spin_lock_irqsave(hlist_lock, *flags); +} + +void kretprobe_table_lock(unsigned long hash, unsigned long *flags) +{ + spinlock_t *hlist_lock = kretprobe_table_lock_ptr(hash); + spin_lock_irqsave(hlist_lock, *flags); +} + +void kretprobe_hash_unlock(struct task_struct *tsk, unsigned long *flags) +{ + unsigned long hash = hash_ptr(tsk, KPROBE_HASH_BITS); + spinlock_t *hlist_lock; + + hlist_lock = kretprobe_table_lock_ptr(hash); + spin_unlock_irqrestore(hlist_lock, *flags); +} + +void kretprobe_table_unlock(unsigned long hash, unsigned long *flags) +{ + spinlock_t *hlist_lock = kretprobe_table_lock_ptr(hash); + spin_unlock_irqrestore(hlist_lock, *flags); } /* @@ -401,17 +436,21 @@ void __kprobes kprobe_flush_task(struct task_struct *tk) struct kretprobe_instance *ri; struct hlist_head *head, empty_rp; struct hlist_node *node, *tmp; - unsigned long flags = 0; + unsigned long hash, flags = 0; - INIT_HLIST_HEAD(&empty_rp); - spin_lock_irqsave(&kretprobe_lock, flags); - head = kretprobe_inst_table_head(tk); + if (unlikely(!kprobes_initialized)) + /* Early boot. kretprobe_table_locks not yet initialized. */ + return; + + hash = hash_ptr(tk, KPROBE_HASH_BITS); + head = &kretprobe_inst_table[hash]; + kretprobe_table_lock(hash, &flags); hlist_for_each_entry_safe(ri, node, tmp, head, hlist) { if (ri->task == tk) recycle_rp_inst(ri, &empty_rp); } - spin_unlock_irqrestore(&kretprobe_lock, flags); - + kretprobe_table_unlock(hash, &flags); + INIT_HLIST_HEAD(&empty_rp); hlist_for_each_entry_safe(ri, node, tmp, &empty_rp, hlist) { hlist_del(&ri->hlist); kfree(ri); @@ -423,24 +462,29 @@ static inline void free_rp_inst(struct kretprobe *rp) struct kretprobe_instance *ri; struct hlist_node *pos, *next; - hlist_for_each_entry_safe(ri, pos, next, &rp->free_instances, uflist) { - hlist_del(&ri->uflist); + hlist_for_each_entry_safe(ri, pos, next, &rp->free_instances, hlist) { + hlist_del(&ri->hlist); kfree(ri); } } static void __kprobes cleanup_rp_inst(struct kretprobe *rp) { - unsigned long flags; + unsigned long flags, hash; struct kretprobe_instance *ri; struct hlist_node *pos, *next; + struct hlist_head *head; + /* No race here */ - spin_lock_irqsave(&kretprobe_lock, flags); - hlist_for_each_entry_safe(ri, pos, next, &rp->used_instances, uflist) { - ri->rp = NULL; - hlist_del(&ri->uflist); + for (hash = 0; hash < KPROBE_TABLE_SIZE; hash++) { + kretprobe_table_lock(hash, &flags); + head = &kretprobe_inst_table[hash]; + hlist_for_each_entry_safe(ri, pos, next, head, hlist) { + if (ri->rp == rp) + ri->rp = NULL; + } + kretprobe_table_unlock(hash, &flags); } - spin_unlock_irqrestore(&kretprobe_lock, flags); free_rp_inst(rp); } @@ -831,32 +875,37 @@ static int __kprobes pre_handler_kretprobe(struct kprobe *p, struct pt_regs *regs) { struct kretprobe *rp = container_of(p, struct kretprobe, kp); - unsigned long flags = 0; + unsigned long hash, flags = 0; + struct kretprobe_instance *ri; /*TODO: consider to only swap the RA after the last pre_handler fired */ - spin_lock_irqsave(&kretprobe_lock, flags); + hash = hash_ptr(current, KPROBE_HASH_BITS); + spin_lock_irqsave(&rp->lock, flags); if (!hlist_empty(&rp->free_instances)) { - struct kretprobe_instance *ri; - ri = hlist_entry(rp->free_instances.first, - struct kretprobe_instance, uflist); + struct kretprobe_instance, hlist); + hlist_del(&ri->hlist); + spin_unlock_irqrestore(&rp->lock, flags); + ri->rp = rp; ri->task = current; if (rp->entry_handler && rp->entry_handler(ri, regs)) { - spin_unlock_irqrestore(&kretprobe_lock, flags); + spin_unlock_irqrestore(&rp->lock, flags); return 0; } arch_prepare_kretprobe(ri, regs); /* XXX(hch): why is there no hlist_move_head? */ - hlist_del(&ri->uflist); - hlist_add_head(&ri->uflist, &ri->rp->used_instances); - hlist_add_head(&ri->hlist, kretprobe_inst_table_head(ri->task)); - } else + INIT_HLIST_NODE(&ri->hlist); + kretprobe_table_lock(hash, &flags); + hlist_add_head(&ri->hlist, &kretprobe_inst_table[hash]); + kretprobe_table_unlock(hash, &flags); + } else { rp->nmissed++; - spin_unlock_irqrestore(&kretprobe_lock, flags); + spin_unlock_irqrestore(&rp->lock, flags); + } return 0; } @@ -892,7 +941,7 @@ static int __kprobes __register_kretprobe(struct kretprobe *rp, rp->maxactive = NR_CPUS; #endif } - INIT_HLIST_HEAD(&rp->used_instances); + spin_lock_init(&rp->lock); INIT_HLIST_HEAD(&rp->free_instances); for (i = 0; i < rp->maxactive; i++) { inst = kmalloc(sizeof(struct kretprobe_instance) + @@ -901,8 +950,8 @@ static int __kprobes __register_kretprobe(struct kretprobe *rp, free_rp_inst(rp); return -ENOMEM; } - INIT_HLIST_NODE(&inst->uflist); - hlist_add_head(&inst->uflist, &rp->free_instances); + INIT_HLIST_NODE(&inst->hlist); + hlist_add_head(&inst->hlist, &rp->free_instances); } rp->nmissed = 0; @@ -1009,6 +1058,7 @@ static int __init init_kprobes(void) for (i = 0; i < KPROBE_TABLE_SIZE; i++) { INIT_HLIST_HEAD(&kprobe_table[i]); INIT_HLIST_HEAD(&kretprobe_inst_table[i]); + spin_lock_init(&(kretprobe_table_locks[i].lock)); } /* @@ -1050,6 +1100,7 @@ static int __init init_kprobes(void) err = arch_init_kprobes(); if (!err) err = register_die_notifier(&kprobe_exceptions_nb); + kprobes_initialized = (err == 0); if (!err) init_test_probes(); @@ -1286,13 +1337,8 @@ EXPORT_SYMBOL_GPL(register_jprobe); EXPORT_SYMBOL_GPL(unregister_jprobe); EXPORT_SYMBOL_GPL(register_jprobes); EXPORT_SYMBOL_GPL(unregister_jprobes); -#ifdef CONFIG_KPROBES EXPORT_SYMBOL_GPL(jprobe_return); -#endif - -#ifdef CONFIG_KPROBES EXPORT_SYMBOL_GPL(register_kretprobe); EXPORT_SYMBOL_GPL(unregister_kretprobe); EXPORT_SYMBOL_GPL(register_kretprobes); EXPORT_SYMBOL_GPL(unregister_kretprobes); -#endif diff --git a/kernel/marker.c b/kernel/marker.c index 1abfb923b761..971da5317903 100644 --- a/kernel/marker.c +++ b/kernel/marker.c @@ -441,7 +441,7 @@ static int remove_marker(const char *name) hlist_del(&e->hlist); /* Make sure the call_rcu has been executed */ if (e->rcu_pending) - rcu_barrier(); + rcu_barrier_sched(); kfree(e); return 0; } @@ -476,7 +476,7 @@ static int marker_set_format(struct marker_entry **entry, const char *format) hlist_del(&(*entry)->hlist); /* Make sure the call_rcu has been executed */ if ((*entry)->rcu_pending) - rcu_barrier(); + rcu_barrier_sched(); kfree(*entry); *entry = e; trace_mark(core_marker_format, "name %s format %s", @@ -655,7 +655,7 @@ int marker_probe_register(const char *name, const char *format, * make sure it's executed now. */ if (entry->rcu_pending) - rcu_barrier(); + rcu_barrier_sched(); old = marker_entry_add_probe(entry, probe, probe_private); if (IS_ERR(old)) { ret = PTR_ERR(old); @@ -670,10 +670,7 @@ int marker_probe_register(const char *name, const char *format, entry->rcu_pending = 1; /* write rcu_pending before calling the RCU callback */ smp_wmb(); -#ifdef CONFIG_PREEMPT_RCU - synchronize_sched(); /* Until we have the call_rcu_sched() */ -#endif - call_rcu(&entry->rcu, free_old_closure); + call_rcu_sched(&entry->rcu, free_old_closure); end: mutex_unlock(&markers_mutex); return ret; @@ -704,7 +701,7 @@ int marker_probe_unregister(const char *name, if (!entry) goto end; if (entry->rcu_pending) - rcu_barrier(); + rcu_barrier_sched(); old = marker_entry_remove_probe(entry, probe, probe_private); mutex_unlock(&markers_mutex); marker_update_probes(); /* may update entry */ @@ -716,10 +713,7 @@ int marker_probe_unregister(const char *name, entry->rcu_pending = 1; /* write rcu_pending before calling the RCU callback */ smp_wmb(); -#ifdef CONFIG_PREEMPT_RCU - synchronize_sched(); /* Until we have the call_rcu_sched() */ -#endif - call_rcu(&entry->rcu, free_old_closure); + call_rcu_sched(&entry->rcu, free_old_closure); remove_marker(name); /* Ignore busy error message */ ret = 0; end: @@ -786,7 +780,7 @@ int marker_probe_unregister_private_data(marker_probe_func *probe, goto end; } if (entry->rcu_pending) - rcu_barrier(); + rcu_barrier_sched(); old = marker_entry_remove_probe(entry, NULL, probe_private); mutex_unlock(&markers_mutex); marker_update_probes(); /* may update entry */ @@ -797,10 +791,7 @@ int marker_probe_unregister_private_data(marker_probe_func *probe, entry->rcu_pending = 1; /* write rcu_pending before calling the RCU callback */ smp_wmb(); -#ifdef CONFIG_PREEMPT_RCU - synchronize_sched(); /* Until we have the call_rcu_sched() */ -#endif - call_rcu(&entry->rcu, free_old_closure); + call_rcu_sched(&entry->rcu, free_old_closure); remove_marker(entry->name); /* Ignore busy error message */ end: mutex_unlock(&markers_mutex); diff --git a/kernel/ns_cgroup.c b/kernel/ns_cgroup.c index 48d7ed6fc3a4..43c2111cd54d 100644 --- a/kernel/ns_cgroup.c +++ b/kernel/ns_cgroup.c @@ -7,6 +7,7 @@ #include <linux/module.h> #include <linux/cgroup.h> #include <linux/fs.h> +#include <linux/proc_fs.h> #include <linux/slab.h> #include <linux/nsproxy.h> @@ -24,9 +25,12 @@ static inline struct ns_cgroup *cgroup_to_ns( struct ns_cgroup, css); } -int ns_cgroup_clone(struct task_struct *task) +int ns_cgroup_clone(struct task_struct *task, struct pid *pid) { - return cgroup_clone(task, &ns_subsys); + char name[PROC_NUMBUF]; + + snprintf(name, PROC_NUMBUF, "%d", pid_vnr(pid)); + return cgroup_clone(task, &ns_subsys, name); } /* diff --git a/kernel/nsproxy.c b/kernel/nsproxy.c index adc785146a1c..21575fc46d05 100644 --- a/kernel/nsproxy.c +++ b/kernel/nsproxy.c @@ -157,12 +157,6 @@ int copy_namespaces(unsigned long flags, struct task_struct *tsk) goto out; } - err = ns_cgroup_clone(tsk); - if (err) { - put_nsproxy(new_ns); - goto out; - } - tsk->nsproxy = new_ns; out: @@ -209,7 +203,7 @@ int unshare_nsproxy_namespaces(unsigned long unshare_flags, goto out; } - err = ns_cgroup_clone(current); + err = ns_cgroup_clone(current, task_pid(current)); if (err) put_nsproxy(*new_nsp); diff --git a/kernel/panic.c b/kernel/panic.c index 425567f45b9f..12c5a0a6c89b 100644 --- a/kernel/panic.c +++ b/kernel/panic.c @@ -318,6 +318,28 @@ void warn_on_slowpath(const char *file, int line) add_taint(TAINT_WARN); } EXPORT_SYMBOL(warn_on_slowpath); + + +void warn_slowpath(const char *file, int line, const char *fmt, ...) +{ + va_list args; + char function[KSYM_SYMBOL_LEN]; + unsigned long caller = (unsigned long)__builtin_return_address(0); + sprint_symbol(function, caller); + + printk(KERN_WARNING "------------[ cut here ]------------\n"); + printk(KERN_WARNING "WARNING: at %s:%d %s()\n", file, + line, function); + va_start(args, fmt); + vprintk(fmt, args); + va_end(args); + + print_modules(); + dump_stack(); + print_oops_end_marker(); + add_taint(TAINT_WARN); +} +EXPORT_SYMBOL(warn_slowpath); #endif #ifdef CONFIG_CC_STACKPROTECTOR diff --git a/kernel/pid.c b/kernel/pid.c index 30bd5d4b2ac7..064e76afa507 100644 --- a/kernel/pid.c +++ b/kernel/pid.c @@ -309,12 +309,6 @@ struct pid *find_vpid(int nr) } EXPORT_SYMBOL_GPL(find_vpid); -struct pid *find_pid(int nr) -{ - return find_pid_ns(nr, &init_pid_ns); -} -EXPORT_SYMBOL_GPL(find_pid); - /* * attach_pid() must be called with the tasklist_lock write-held. */ @@ -435,6 +429,7 @@ struct pid *find_get_pid(pid_t nr) return pid; } +EXPORT_SYMBOL_GPL(find_get_pid); pid_t pid_nr_ns(struct pid *pid, struct pid_namespace *ns) { @@ -482,7 +477,7 @@ EXPORT_SYMBOL(task_session_nr_ns); /* * Used by proc to find the first pid that is greater then or equal to nr. * - * If there is a pid at nr this function is exactly the same as find_pid. + * If there is a pid at nr this function is exactly the same as find_pid_ns. */ struct pid *find_ge_pid(int nr, struct pid_namespace *ns) { @@ -497,7 +492,6 @@ struct pid *find_ge_pid(int nr, struct pid_namespace *ns) return pid; } -EXPORT_SYMBOL_GPL(find_get_pid); /* * The pid hash table is scaled according to the amount of memory in the diff --git a/kernel/pid_namespace.c b/kernel/pid_namespace.c index 98702b4b8851..ea567b78d1aa 100644 --- a/kernel/pid_namespace.c +++ b/kernel/pid_namespace.c @@ -12,6 +12,7 @@ #include <linux/pid_namespace.h> #include <linux/syscalls.h> #include <linux/err.h> +#include <linux/acct.h> #define BITS_PER_PAGE (PAGE_SIZE*8) @@ -71,7 +72,7 @@ static struct pid_namespace *create_pid_namespace(unsigned int level) struct pid_namespace *ns; int i; - ns = kmem_cache_alloc(pid_ns_cachep, GFP_KERNEL); + ns = kmem_cache_zalloc(pid_ns_cachep, GFP_KERNEL); if (ns == NULL) goto out; @@ -84,17 +85,13 @@ static struct pid_namespace *create_pid_namespace(unsigned int level) goto out_free_map; kref_init(&ns->kref); - ns->last_pid = 0; - ns->child_reaper = NULL; ns->level = level; set_bit(0, ns->pidmap[0].page); atomic_set(&ns->pidmap[0].nr_free, BITS_PER_PAGE - 1); - for (i = 1; i < PIDMAP_ENTRIES; i++) { - ns->pidmap[i].page = NULL; + for (i = 1; i < PIDMAP_ENTRIES; i++) atomic_set(&ns->pidmap[i].nr_free, BITS_PER_PAGE); - } return ns; @@ -185,6 +182,7 @@ void zap_pid_ns_processes(struct pid_namespace *pid_ns) /* Child reaper for the pid namespace is going away */ pid_ns->child_reaper = NULL; + acct_exit_ns(pid_ns); return; } diff --git a/kernel/posix-timers.c b/kernel/posix-timers.c index dbd8398ddb0b..9a21681aa80f 100644 --- a/kernel/posix-timers.c +++ b/kernel/posix-timers.c @@ -449,9 +449,6 @@ static void release_posix_timer(struct k_itimer *tmr, int it_id_set) spin_unlock_irqrestore(&idr_lock, flags); } sigqueue_free(tmr->sigq); - if (unlikely(tmr->it_process) && - tmr->it_sigev_notify == (SIGEV_SIGNAL|SIGEV_THREAD_ID)) - put_task_struct(tmr->it_process); kmem_cache_free(posix_timers_cache, tmr); } @@ -856,11 +853,10 @@ retry_delete: * This keeps any tasks waiting on the spin lock from thinking * they got something (see the lock code above). */ - if (timer->it_process) { - if (timer->it_sigev_notify == (SIGEV_SIGNAL|SIGEV_THREAD_ID)) - put_task_struct(timer->it_process); - timer->it_process = NULL; - } + if (timer->it_sigev_notify == (SIGEV_SIGNAL|SIGEV_THREAD_ID)) + put_task_struct(timer->it_process); + timer->it_process = NULL; + unlock_timer(timer, flags); release_posix_timer(timer, IT_ID_SET); return 0; @@ -885,11 +881,10 @@ retry_delete: * This keeps any tasks waiting on the spin lock from thinking * they got something (see the lock code above). */ - if (timer->it_process) { - if (timer->it_sigev_notify == (SIGEV_SIGNAL|SIGEV_THREAD_ID)) - put_task_struct(timer->it_process); - timer->it_process = NULL; - } + if (timer->it_sigev_notify == (SIGEV_SIGNAL|SIGEV_THREAD_ID)) + put_task_struct(timer->it_process); + timer->it_process = NULL; + unlock_timer(timer, flags); release_posix_timer(timer, IT_ID_SET); } diff --git a/kernel/printk.c b/kernel/printk.c index 3f7a2a94583b..a7f7559c5f6c 100644 --- a/kernel/printk.c +++ b/kernel/printk.c @@ -1308,6 +1308,8 @@ void tty_write_message(struct tty_struct *tty, char *msg) } #if defined CONFIG_PRINTK + +DEFINE_RATELIMIT_STATE(printk_ratelimit_state, 5 * HZ, 10); /* * printk rate limiting, lifted from the networking subsystem. * @@ -1315,22 +1317,9 @@ void tty_write_message(struct tty_struct *tty, char *msg) * every printk_ratelimit_jiffies to make a denial-of-service * attack impossible. */ -int __printk_ratelimit(int ratelimit_jiffies, int ratelimit_burst) -{ - return __ratelimit(ratelimit_jiffies, ratelimit_burst); -} -EXPORT_SYMBOL(__printk_ratelimit); - -/* minimum time in jiffies between messages */ -int printk_ratelimit_jiffies = 5 * HZ; - -/* number of messages we send before ratelimiting */ -int printk_ratelimit_burst = 10; - int printk_ratelimit(void) { - return __printk_ratelimit(printk_ratelimit_jiffies, - printk_ratelimit_burst); + return __ratelimit(&printk_ratelimit_state); } EXPORT_SYMBOL(printk_ratelimit); diff --git a/kernel/profile.c b/kernel/profile.c index 58926411eb2a..cd26bed4cc26 100644 --- a/kernel/profile.c +++ b/kernel/profile.c @@ -112,8 +112,6 @@ void __init profile_init(void) /* Profile event notifications */ -#ifdef CONFIG_PROFILING - static BLOCKING_NOTIFIER_HEAD(task_exit_notifier); static ATOMIC_NOTIFIER_HEAD(task_free_notifier); static BLOCKING_NOTIFIER_HEAD(munmap_notifier); @@ -203,8 +201,6 @@ void unregister_timer_hook(int (*hook)(struct pt_regs *)) } EXPORT_SYMBOL_GPL(unregister_timer_hook); -#endif /* CONFIG_PROFILING */ - #ifdef CONFIG_SMP /* diff --git a/kernel/res_counter.c b/kernel/res_counter.c index d3c61b4ebef2..f275c8eca772 100644 --- a/kernel/res_counter.c +++ b/kernel/res_counter.c @@ -13,6 +13,7 @@ #include <linux/slab.h> #include <linux/res_counter.h> #include <linux/uaccess.h> +#include <linux/mm.h> void res_counter_init(struct res_counter *counter) { @@ -102,44 +103,37 @@ u64 res_counter_read_u64(struct res_counter *counter, int member) return *res_counter_member(counter, member); } -ssize_t res_counter_write(struct res_counter *counter, int member, - const char __user *userbuf, size_t nbytes, loff_t *pos, - int (*write_strategy)(char *st_buf, unsigned long long *val)) +int res_counter_memparse_write_strategy(const char *buf, + unsigned long long *res) { - int ret; - char *buf, *end; - unsigned long flags; - unsigned long long tmp, *val; - - buf = kmalloc(nbytes + 1, GFP_KERNEL); - ret = -ENOMEM; - if (buf == NULL) - goto out; + char *end; + /* FIXME - make memparse() take const char* args */ + *res = memparse((char *)buf, &end); + if (*end != '\0') + return -EINVAL; - buf[nbytes] = '\0'; - ret = -EFAULT; - if (copy_from_user(buf, userbuf, nbytes)) - goto out_free; + *res = PAGE_ALIGN(*res); + return 0; +} - ret = -EINVAL; +int res_counter_write(struct res_counter *counter, int member, + const char *buf, write_strategy_fn write_strategy) +{ + char *end; + unsigned long flags; + unsigned long long tmp, *val; - strstrip(buf); if (write_strategy) { - if (write_strategy(buf, &tmp)) { - goto out_free; - } + if (write_strategy(buf, &tmp)) + return -EINVAL; } else { tmp = simple_strtoull(buf, &end, 10); if (*end != '\0') - goto out_free; + return -EINVAL; } spin_lock_irqsave(&counter->lock, flags); val = res_counter_member(counter, member); *val = tmp; spin_unlock_irqrestore(&counter->lock, flags); - ret = nbytes; -out_free: - kfree(buf); -out: - return ret; + return 0; } diff --git a/kernel/sched.c b/kernel/sched.c index 6acf749d3336..0047bd9b96aa 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -4046,6 +4046,8 @@ void account_user_time(struct task_struct *p, cputime_t cputime) cpustat->nice = cputime64_add(cpustat->nice, tmp); else cpustat->user = cputime64_add(cpustat->user, tmp); + /* Account for user time used */ + acct_update_integrals(p); } /* diff --git a/kernel/signal.c b/kernel/signal.c index 6c0958e52ea7..82c3545596c5 100644 --- a/kernel/signal.c +++ b/kernel/signal.c @@ -338,13 +338,9 @@ unblock_all_signals(void) spin_unlock_irqrestore(¤t->sighand->siglock, flags); } -static int collect_signal(int sig, struct sigpending *list, siginfo_t *info) +static void collect_signal(int sig, struct sigpending *list, siginfo_t *info) { struct sigqueue *q, *first = NULL; - int still_pending = 0; - - if (unlikely(!sigismember(&list->signal, sig))) - return 0; /* * Collect the siginfo appropriate to this signal. Check if @@ -352,33 +348,30 @@ static int collect_signal(int sig, struct sigpending *list, siginfo_t *info) */ list_for_each_entry(q, &list->list, list) { if (q->info.si_signo == sig) { - if (first) { - still_pending = 1; - break; - } + if (first) + goto still_pending; first = q; } } + + sigdelset(&list->signal, sig); + if (first) { +still_pending: list_del_init(&first->list); copy_siginfo(info, &first->info); __sigqueue_free(first); - if (!still_pending) - sigdelset(&list->signal, sig); } else { - /* Ok, it wasn't in the queue. This must be a fast-pathed signal or we must have been out of queue space. So zero out the info. */ - sigdelset(&list->signal, sig); info->si_signo = sig; info->si_errno = 0; info->si_code = 0; info->si_pid = 0; info->si_uid = 0; } - return 1; } static int __dequeue_signal(struct sigpending *pending, sigset_t *mask, @@ -396,8 +389,7 @@ static int __dequeue_signal(struct sigpending *pending, sigset_t *mask, } } - if (!collect_signal(sig, pending, info)) - sig = 0; + collect_signal(sig, pending, info); } return sig; @@ -462,8 +454,7 @@ int dequeue_signal(struct task_struct *tsk, sigset_t *mask, siginfo_t *info) * is to alert stop-signal processing code when another * processor has come along and cleared the flag. */ - if (!(tsk->signal->flags & SIGNAL_GROUP_EXIT)) - tsk->signal->flags |= SIGNAL_STOP_DEQUEUED; + tsk->signal->flags |= SIGNAL_STOP_DEQUEUED; } if ((info->si_code & __SI_MASK) == __SI_TIMER && info->si_sys_private) { /* @@ -1125,7 +1116,7 @@ EXPORT_SYMBOL_GPL(kill_pid_info_as_uid); * is probably wrong. Should make it like BSD or SYSV. */ -static int kill_something_info(int sig, struct siginfo *info, int pid) +static int kill_something_info(int sig, struct siginfo *info, pid_t pid) { int ret; @@ -1237,17 +1228,6 @@ int kill_pid(struct pid *pid, int sig, int priv) } EXPORT_SYMBOL(kill_pid); -int -kill_proc(pid_t pid, int sig, int priv) -{ - int ret; - - rcu_read_lock(); - ret = kill_pid_info(sig, __si_special(priv), find_pid(pid)); - rcu_read_unlock(); - return ret; -} - /* * These functions support sending signals using preallocated sigqueue * structures. This is needed "because realtime applications cannot @@ -1379,10 +1359,9 @@ void do_notify_parent(struct task_struct *tsk, int sig) info.si_uid = tsk->uid; - /* FIXME: find out whether or not this is supposed to be c*time. */ - info.si_utime = cputime_to_jiffies(cputime_add(tsk->utime, + info.si_utime = cputime_to_clock_t(cputime_add(tsk->utime, tsk->signal->utime)); - info.si_stime = cputime_to_jiffies(cputime_add(tsk->stime, + info.si_stime = cputime_to_clock_t(cputime_add(tsk->stime, tsk->signal->stime)); info.si_status = tsk->exit_code & 0x7f; @@ -1450,9 +1429,8 @@ static void do_notify_parent_cldstop(struct task_struct *tsk, int why) info.si_uid = tsk->uid; - /* FIXME: find out whether or not this is supposed to be c*time. */ - info.si_utime = cputime_to_jiffies(tsk->utime); - info.si_stime = cputime_to_jiffies(tsk->stime); + info.si_utime = cputime_to_clock_t(tsk->utime); + info.si_stime = cputime_to_clock_t(tsk->stime); info.si_code = why; switch (why) { @@ -1491,10 +1469,10 @@ static inline int may_ptrace_stop(void) * is a deadlock situation, and pointless because our tracer * is dead so don't allow us to stop. * If SIGKILL was already sent before the caller unlocked - * ->siglock we must see ->core_waiters != 0. Otherwise it + * ->siglock we must see ->core_state != NULL. Otherwise it * is safe to enter schedule(). */ - if (unlikely(current->mm->core_waiters) && + if (unlikely(current->mm->core_state) && unlikely(current->mm == current->parent->mm)) return 0; @@ -1507,9 +1485,8 @@ static inline int may_ptrace_stop(void) */ static int sigkill_pending(struct task_struct *tsk) { - return ((sigismember(&tsk->pending.signal, SIGKILL) || - sigismember(&tsk->signal->shared_pending.signal, SIGKILL)) && - !unlikely(sigismember(&tsk->blocked, SIGKILL))); + return sigismember(&tsk->pending.signal, SIGKILL) || + sigismember(&tsk->signal->shared_pending.signal, SIGKILL); } /* @@ -1525,8 +1502,6 @@ static int sigkill_pending(struct task_struct *tsk) */ static void ptrace_stop(int exit_code, int clear_code, siginfo_t *info) { - int killed = 0; - if (arch_ptrace_stop_needed(exit_code, info)) { /* * The arch code has something special to do before a @@ -1542,7 +1517,8 @@ static void ptrace_stop(int exit_code, int clear_code, siginfo_t *info) spin_unlock_irq(¤t->sighand->siglock); arch_ptrace_stop(exit_code, info); spin_lock_irq(¤t->sighand->siglock); - killed = sigkill_pending(current); + if (sigkill_pending(current)) + return; } /* @@ -1559,7 +1535,7 @@ static void ptrace_stop(int exit_code, int clear_code, siginfo_t *info) __set_current_state(TASK_TRACED); spin_unlock_irq(¤t->sighand->siglock); read_lock(&tasklist_lock); - if (!unlikely(killed) && may_ptrace_stop()) { + if (may_ptrace_stop()) { do_notify_parent_cldstop(current, CLD_TRAPPED); read_unlock(&tasklist_lock); schedule(); @@ -1658,8 +1634,7 @@ static int do_signal_stop(int signr) } else { struct task_struct *t; - if (unlikely((sig->flags & (SIGNAL_STOP_DEQUEUED | SIGNAL_UNKILLABLE)) - != SIGNAL_STOP_DEQUEUED) || + if (!likely(sig->flags & SIGNAL_STOP_DEQUEUED) || unlikely(signal_group_exit(sig))) return 0; /* @@ -1920,7 +1895,6 @@ EXPORT_SYMBOL(recalc_sigpending); EXPORT_SYMBOL_GPL(dequeue_signal); EXPORT_SYMBOL(flush_signals); EXPORT_SYMBOL(force_sig); -EXPORT_SYMBOL(kill_proc); EXPORT_SYMBOL(ptrace_notify); EXPORT_SYMBOL(send_sig); EXPORT_SYMBOL(send_sig_info); @@ -2196,7 +2170,7 @@ sys_rt_sigtimedwait(const sigset_t __user *uthese, } asmlinkage long -sys_kill(int pid, int sig) +sys_kill(pid_t pid, int sig) { struct siginfo info; @@ -2209,7 +2183,7 @@ sys_kill(int pid, int sig) return kill_something_info(sig, &info, pid); } -static int do_tkill(int tgid, int pid, int sig) +static int do_tkill(pid_t tgid, pid_t pid, int sig) { int error; struct siginfo info; @@ -2255,7 +2229,7 @@ static int do_tkill(int tgid, int pid, int sig) * exists but it's not belonging to the target process anymore. This * method solves the problem of threads exiting and PIDs getting reused. */ -asmlinkage long sys_tgkill(int tgid, int pid, int sig) +asmlinkage long sys_tgkill(pid_t tgid, pid_t pid, int sig) { /* This is only valid for single tasks */ if (pid <= 0 || tgid <= 0) @@ -2268,7 +2242,7 @@ asmlinkage long sys_tgkill(int tgid, int pid, int sig) * Send a signal to only one task, even if it's a CLONE_THREAD task. */ asmlinkage long -sys_tkill(int pid, int sig) +sys_tkill(pid_t pid, int sig) { /* This is only valid for single tasks */ if (pid <= 0) @@ -2278,7 +2252,7 @@ sys_tkill(int pid, int sig) } asmlinkage long -sys_rt_sigqueueinfo(int pid, int sig, siginfo_t __user *uinfo) +sys_rt_sigqueueinfo(pid_t pid, int sig, siginfo_t __user *uinfo) { siginfo_t info; diff --git a/kernel/sys.c b/kernel/sys.c index 14e97282eb6c..0c9d3fa1f5ff 100644 --- a/kernel/sys.c +++ b/kernel/sys.c @@ -1343,8 +1343,6 @@ EXPORT_SYMBOL(in_egroup_p); DECLARE_RWSEM(uts_sem); -EXPORT_SYMBOL(uts_sem); - asmlinkage long sys_newuname(struct new_utsname __user * name) { int errno = 0; @@ -1795,7 +1793,7 @@ int orderly_poweroff(bool force) goto out; } - info = call_usermodehelper_setup(argv[0], argv, envp); + info = call_usermodehelper_setup(argv[0], argv, envp, GFP_ATOMIC); if (info == NULL) { argv_free(argv); goto out; diff --git a/kernel/sys_ni.c b/kernel/sys_ni.c index bd66ac5406f3..08d6e1bb99ac 100644 --- a/kernel/sys_ni.c +++ b/kernel/sys_ni.c @@ -57,6 +57,7 @@ cond_syscall(compat_sys_set_robust_list); cond_syscall(sys_get_robust_list); cond_syscall(compat_sys_get_robust_list); cond_syscall(sys_epoll_create); +cond_syscall(sys_epoll_create1); cond_syscall(sys_epoll_ctl); cond_syscall(sys_epoll_wait); cond_syscall(sys_epoll_pwait); @@ -159,6 +160,7 @@ cond_syscall(sys_ioprio_get); cond_syscall(sys_signalfd); cond_syscall(sys_signalfd4); cond_syscall(compat_sys_signalfd); +cond_syscall(compat_sys_signalfd4); cond_syscall(sys_timerfd_create); cond_syscall(sys_timerfd_settime); cond_syscall(sys_timerfd_gettime); diff --git a/kernel/sysctl.c b/kernel/sysctl.c index 1a8299d1fe59..35a50db9b6ce 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -624,7 +624,7 @@ static struct ctl_table kern_table[] = { { .ctl_name = KERN_PRINTK_RATELIMIT, .procname = "printk_ratelimit", - .data = &printk_ratelimit_jiffies, + .data = &printk_ratelimit_state.interval, .maxlen = sizeof(int), .mode = 0644, .proc_handler = &proc_dointvec_jiffies, @@ -633,7 +633,7 @@ static struct ctl_table kern_table[] = { { .ctl_name = KERN_PRINTK_RATELIMIT_BURST, .procname = "printk_ratelimit_burst", - .data = &printk_ratelimit_burst, + .data = &printk_ratelimit_state.burst, .maxlen = sizeof(int), .mode = 0644, .proc_handler = &proc_dointvec, diff --git a/kernel/sysctl_check.c b/kernel/sysctl_check.c index c09350d564f2..c35da23ab8fb 100644 --- a/kernel/sysctl_check.c +++ b/kernel/sysctl_check.c @@ -1532,6 +1532,8 @@ int sysctl_check_table(struct nsproxy *namespaces, struct ctl_table *table) sysctl_check_leaf(namespaces, table, &fail); } sysctl_check_bin_path(table, &fail); + if (table->mode > 0777) + set_fail(&fail, table, "bogus .mode"); if (fail) { set_fail(&fail, table, NULL); error = -EINVAL; diff --git a/kernel/taskstats.c b/kernel/taskstats.c index 06b17547f4e7..bd6be76303cf 100644 --- a/kernel/taskstats.c +++ b/kernel/taskstats.c @@ -35,7 +35,7 @@ */ #define TASKSTATS_CPUMASK_MAXLEN (100+6*NR_CPUS) -static DEFINE_PER_CPU(__u32, taskstats_seqnum) = { 0 }; +static DEFINE_PER_CPU(__u32, taskstats_seqnum); static int family_registered; struct kmem_cache *taskstats_cache; diff --git a/kernel/trace/trace_sysprof.c b/kernel/trace/trace_sysprof.c index 63528086337c..ce2d723c10e1 100644 --- a/kernel/trace/trace_sysprof.c +++ b/kernel/trace/trace_sysprof.c @@ -161,7 +161,7 @@ static void timer_notify(struct pt_regs *regs, int cpu) __trace_special(tr, data, 2, regs->ip, 0); while (i < sample_max_depth) { - frame.next_fp = 0; + frame.next_fp = NULL; frame.return_address = 0; if (!copy_stack_frame(fp, &frame)) break; diff --git a/kernel/tsacct.c b/kernel/tsacct.c index 4ab1b584961b..3da47ccdc5e5 100644 --- a/kernel/tsacct.c +++ b/kernel/tsacct.c @@ -28,14 +28,14 @@ void bacct_add_tsk(struct taskstats *stats, struct task_struct *tsk) { struct timespec uptime, ts; - s64 ac_etime; + u64 ac_etime; BUILD_BUG_ON(TS_COMM_LEN < TASK_COMM_LEN); /* calculate task elapsed time in timespec */ do_posix_clock_monotonic_gettime(&uptime); ts = timespec_sub(uptime, tsk->start_time); - /* rebase elapsed time to usec */ + /* rebase elapsed time to usec (should never be negative) */ ac_etime = timespec_to_ns(&ts); do_div(ac_etime, NSEC_PER_USEC); stats->ac_etime = ac_etime; @@ -84,9 +84,9 @@ void xacct_add_tsk(struct taskstats *stats, struct task_struct *p) { struct mm_struct *mm; - /* convert pages-jiffies to Mbyte-usec */ - stats->coremem = jiffies_to_usecs(p->acct_rss_mem1) * PAGE_SIZE / MB; - stats->virtmem = jiffies_to_usecs(p->acct_vm_mem1) * PAGE_SIZE / MB; + /* convert pages-usec to Mbyte-usec */ + stats->coremem = p->acct_rss_mem1 * PAGE_SIZE / MB; + stats->virtmem = p->acct_vm_mem1 * PAGE_SIZE / MB; mm = get_task_mm(p); if (mm) { /* adjust to KB unit */ @@ -118,12 +118,19 @@ void xacct_add_tsk(struct taskstats *stats, struct task_struct *p) void acct_update_integrals(struct task_struct *tsk) { if (likely(tsk->mm)) { - long delta = cputime_to_jiffies( - cputime_sub(tsk->stime, tsk->acct_stimexpd)); + cputime_t time, dtime; + struct timeval value; + u64 delta; + + time = tsk->stime + tsk->utime; + dtime = cputime_sub(time, tsk->acct_timexpd); + jiffies_to_timeval(cputime_to_jiffies(dtime), &value); + delta = value.tv_sec; + delta = delta * USEC_PER_SEC + value.tv_usec; if (delta == 0) return; - tsk->acct_stimexpd = tsk->stime; + tsk->acct_timexpd = time; tsk->acct_rss_mem1 += delta * get_mm_rss(tsk->mm); tsk->acct_vm_mem1 += delta * tsk->mm->total_vm; } @@ -135,7 +142,7 @@ void acct_update_integrals(struct task_struct *tsk) */ void acct_clear_integrals(struct task_struct *tsk) { - tsk->acct_stimexpd = 0; + tsk->acct_timexpd = 0; tsk->acct_rss_mem1 = 0; tsk->acct_vm_mem1 = 0; } diff --git a/kernel/workqueue.c b/kernel/workqueue.c index 6fd158b21026..ec7e4f62aaff 100644 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c @@ -125,7 +125,7 @@ struct cpu_workqueue_struct *get_wq_data(struct work_struct *work) } static void insert_work(struct cpu_workqueue_struct *cwq, - struct work_struct *work, int tail) + struct work_struct *work, struct list_head *head) { set_wq_data(work, cwq); /* @@ -133,10 +133,7 @@ static void insert_work(struct cpu_workqueue_struct *cwq, * result of list_add() below, see try_to_grab_pending(). */ smp_wmb(); - if (tail) - list_add_tail(&work->entry, &cwq->worklist); - else - list_add(&work->entry, &cwq->worklist); + list_add_tail(&work->entry, head); wake_up(&cwq->more_work); } @@ -146,7 +143,7 @@ static void __queue_work(struct cpu_workqueue_struct *cwq, unsigned long flags; spin_lock_irqsave(&cwq->lock, flags); - insert_work(cwq, work, 1); + insert_work(cwq, work, &cwq->worklist); spin_unlock_irqrestore(&cwq->lock, flags); } @@ -162,14 +159,11 @@ static void __queue_work(struct cpu_workqueue_struct *cwq, */ int queue_work(struct workqueue_struct *wq, struct work_struct *work) { - int ret = 0; + int ret; + + ret = queue_work_on(get_cpu(), wq, work); + put_cpu(); - if (!test_and_set_bit(WORK_STRUCT_PENDING, work_data_bits(work))) { - BUG_ON(!list_empty(&work->entry)); - __queue_work(wq_per_cpu(wq, get_cpu()), work); - put_cpu(); - ret = 1; - } return ret; } EXPORT_SYMBOL_GPL(queue_work); @@ -361,14 +355,14 @@ static void wq_barrier_func(struct work_struct *work) } static void insert_wq_barrier(struct cpu_workqueue_struct *cwq, - struct wq_barrier *barr, int tail) + struct wq_barrier *barr, struct list_head *head) { INIT_WORK(&barr->work, wq_barrier_func); __set_bit(WORK_STRUCT_PENDING, work_data_bits(&barr->work)); init_completion(&barr->done); - insert_work(cwq, &barr->work, tail); + insert_work(cwq, &barr->work, head); } static int flush_cpu_workqueue(struct cpu_workqueue_struct *cwq) @@ -388,7 +382,7 @@ static int flush_cpu_workqueue(struct cpu_workqueue_struct *cwq) active = 0; spin_lock_irq(&cwq->lock); if (!list_empty(&cwq->worklist) || cwq->current_work != NULL) { - insert_wq_barrier(cwq, &barr, 1); + insert_wq_barrier(cwq, &barr, &cwq->worklist); active = 1; } spin_unlock_irq(&cwq->lock); @@ -426,6 +420,57 @@ void flush_workqueue(struct workqueue_struct *wq) } EXPORT_SYMBOL_GPL(flush_workqueue); +/** + * flush_work - block until a work_struct's callback has terminated + * @work: the work which is to be flushed + * + * Returns false if @work has already terminated. + * + * It is expected that, prior to calling flush_work(), the caller has + * arranged for the work to not be requeued, otherwise it doesn't make + * sense to use this function. + */ +int flush_work(struct work_struct *work) +{ + struct cpu_workqueue_struct *cwq; + struct list_head *prev; + struct wq_barrier barr; + + might_sleep(); + cwq = get_wq_data(work); + if (!cwq) + return 0; + + lock_acquire(&cwq->wq->lockdep_map, 0, 0, 0, 2, _THIS_IP_); + lock_release(&cwq->wq->lockdep_map, 1, _THIS_IP_); + + prev = NULL; + spin_lock_irq(&cwq->lock); + if (!list_empty(&work->entry)) { + /* + * See the comment near try_to_grab_pending()->smp_rmb(). + * If it was re-queued under us we are not going to wait. + */ + smp_rmb(); + if (unlikely(cwq != get_wq_data(work))) + goto out; + prev = &work->entry; + } else { + if (cwq->current_work != work) + goto out; + prev = &cwq->worklist; + } + insert_wq_barrier(cwq, &barr, prev->next); +out: + spin_unlock_irq(&cwq->lock); + if (!prev) + return 0; + + wait_for_completion(&barr.done); + return 1; +} +EXPORT_SYMBOL_GPL(flush_work); + /* * Upon a successful return (>= 0), the caller "owns" WORK_STRUCT_PENDING bit, * so this work can't be re-armed in any way. @@ -473,7 +518,7 @@ static void wait_on_cpu_work(struct cpu_workqueue_struct *cwq, spin_lock_irq(&cwq->lock); if (unlikely(cwq->current_work == work)) { - insert_wq_barrier(cwq, &barr, 0); + insert_wq_barrier(cwq, &barr, cwq->worklist.next); running = 1; } spin_unlock_irq(&cwq->lock); @@ -644,10 +689,10 @@ int schedule_on_each_cpu(work_func_t func) struct work_struct *work = per_cpu_ptr(works, cpu); INIT_WORK(work, func); - set_bit(WORK_STRUCT_PENDING, work_data_bits(work)); - __queue_work(per_cpu_ptr(keventd_wq->cpu_wq, cpu), work); + schedule_work_on(cpu, work); } - flush_workqueue(keventd_wq); + for_each_online_cpu(cpu) + flush_work(per_cpu_ptr(works, cpu)); put_online_cpus(); free_percpu(works); return 0; @@ -784,7 +829,7 @@ struct workqueue_struct *__create_workqueue_key(const char *name, err = create_workqueue_thread(cwq, singlethread_cpu); start_workqueue_thread(cwq, -1); } else { - get_online_cpus(); + cpu_maps_update_begin(); spin_lock(&workqueue_lock); list_add(&wq->list, &workqueues); spin_unlock(&workqueue_lock); @@ -796,7 +841,7 @@ struct workqueue_struct *__create_workqueue_key(const char *name, err = create_workqueue_thread(cwq, cpu); start_workqueue_thread(cwq, cpu); } - put_online_cpus(); + cpu_maps_update_done(); } if (err) { @@ -810,8 +855,8 @@ EXPORT_SYMBOL_GPL(__create_workqueue_key); static void cleanup_workqueue_thread(struct cpu_workqueue_struct *cwq) { /* - * Our caller is either destroy_workqueue() or CPU_DEAD, - * get_online_cpus() protects cwq->thread. + * Our caller is either destroy_workqueue() or CPU_POST_DEAD, + * cpu_add_remove_lock protects cwq->thread. */ if (cwq->thread == NULL) return; @@ -821,7 +866,7 @@ static void cleanup_workqueue_thread(struct cpu_workqueue_struct *cwq) flush_cpu_workqueue(cwq); /* - * If the caller is CPU_DEAD and cwq->worklist was not empty, + * If the caller is CPU_POST_DEAD and cwq->worklist was not empty, * a concurrent flush_workqueue() can insert a barrier after us. * However, in that case run_workqueue() won't return and check * kthread_should_stop() until it flushes all work_struct's. @@ -845,14 +890,14 @@ void destroy_workqueue(struct workqueue_struct *wq) const cpumask_t *cpu_map = wq_cpu_map(wq); int cpu; - get_online_cpus(); + cpu_maps_update_begin(); spin_lock(&workqueue_lock); list_del(&wq->list); spin_unlock(&workqueue_lock); for_each_cpu_mask_nr(cpu, *cpu_map) cleanup_workqueue_thread(per_cpu_ptr(wq->cpu_wq, cpu)); - put_online_cpus(); + cpu_maps_update_done(); free_percpu(wq->cpu_wq); kfree(wq); @@ -866,6 +911,7 @@ static int __devinit workqueue_cpu_callback(struct notifier_block *nfb, unsigned int cpu = (unsigned long)hcpu; struct cpu_workqueue_struct *cwq; struct workqueue_struct *wq; + int ret = NOTIFY_OK; action &= ~CPU_TASKS_FROZEN; @@ -873,7 +919,7 @@ static int __devinit workqueue_cpu_callback(struct notifier_block *nfb, case CPU_UP_PREPARE: cpu_set(cpu, cpu_populated_map); } - +undo: list_for_each_entry(wq, &workqueues, list) { cwq = per_cpu_ptr(wq->cpu_wq, cpu); @@ -883,7 +929,9 @@ static int __devinit workqueue_cpu_callback(struct notifier_block *nfb, break; printk(KERN_ERR "workqueue [%s] for %i failed\n", wq->name, cpu); - return NOTIFY_BAD; + action = CPU_UP_CANCELED; + ret = NOTIFY_BAD; + goto undo; case CPU_ONLINE: start_workqueue_thread(cwq, cpu); @@ -891,7 +939,7 @@ static int __devinit workqueue_cpu_callback(struct notifier_block *nfb, case CPU_UP_CANCELED: start_workqueue_thread(cwq, -1); - case CPU_DEAD: + case CPU_POST_DEAD: cleanup_workqueue_thread(cwq); break; } @@ -899,11 +947,11 @@ static int __devinit workqueue_cpu_callback(struct notifier_block *nfb, switch (action) { case CPU_UP_CANCELED: - case CPU_DEAD: + case CPU_POST_DEAD: cpu_clear(cpu, cpu_populated_map); } - return NOTIFY_OK; + return ret; } void __init init_workqueues(void) diff --git a/lib/cmdline.c b/lib/cmdline.c index f596c08d213a..5ba8a942a478 100644 --- a/lib/cmdline.c +++ b/lib/cmdline.c @@ -116,7 +116,7 @@ char *get_options(const char *str, int nints, int *ints) /** * memparse - parse a string with mem suffixes into a number * @ptr: Where parse begins - * @retptr: (output) Pointer to next char after parse completes + * @retptr: (output) Optional pointer to next char after parse completes * * Parses a string into a number. The number stored at @ptr is * potentially suffixed with %K (for kilobytes, or 1024 bytes), @@ -126,11 +126,13 @@ char *get_options(const char *str, int nints, int *ints) * megabyte, or one gigabyte, respectively. */ -unsigned long long memparse (char *ptr, char **retptr) +unsigned long long memparse(char *ptr, char **retptr) { - unsigned long long ret = simple_strtoull (ptr, retptr, 0); + char *endptr; /* local pointer to end of parsed string */ - switch (**retptr) { + unsigned long long ret = simple_strtoull(ptr, &endptr, 0); + + switch (*endptr) { case 'G': case 'g': ret <<= 10; @@ -140,10 +142,14 @@ unsigned long long memparse (char *ptr, char **retptr) case 'K': case 'k': ret <<= 10; - (*retptr)++; + endptr++; default: break; } + + if (retptr) + *retptr = endptr; + return ret; } diff --git a/lib/idr.c b/lib/idr.c index 7a02e173f027..3476f8203e97 100644 --- a/lib/idr.c +++ b/lib/idr.c @@ -6,6 +6,8 @@ * Modified by George Anzinger to reuse immediately and to use * find bit instructions. Also removed _irq on spinlocks. * + * Modified by Nadia Derbey to make it RCU safe. + * * Small id to pointer translation service. * * It uses a radix tree like structure as a sparse array indexed @@ -35,7 +37,7 @@ static struct kmem_cache *idr_layer_cache; -static struct idr_layer *alloc_layer(struct idr *idp) +static struct idr_layer *get_from_free_list(struct idr *idp) { struct idr_layer *p; unsigned long flags; @@ -50,15 +52,28 @@ static struct idr_layer *alloc_layer(struct idr *idp) return(p); } +static void idr_layer_rcu_free(struct rcu_head *head) +{ + struct idr_layer *layer; + + layer = container_of(head, struct idr_layer, rcu_head); + kmem_cache_free(idr_layer_cache, layer); +} + +static inline void free_layer(struct idr_layer *p) +{ + call_rcu(&p->rcu_head, idr_layer_rcu_free); +} + /* only called when idp->lock is held */ -static void __free_layer(struct idr *idp, struct idr_layer *p) +static void __move_to_free_list(struct idr *idp, struct idr_layer *p) { p->ary[0] = idp->id_free; idp->id_free = p; idp->id_free_cnt++; } -static void free_layer(struct idr *idp, struct idr_layer *p) +static void move_to_free_list(struct idr *idp, struct idr_layer *p) { unsigned long flags; @@ -66,7 +81,7 @@ static void free_layer(struct idr *idp, struct idr_layer *p) * Depends on the return element being zeroed. */ spin_lock_irqsave(&idp->lock, flags); - __free_layer(idp, p); + __move_to_free_list(idp, p); spin_unlock_irqrestore(&idp->lock, flags); } @@ -96,7 +111,7 @@ static void idr_mark_full(struct idr_layer **pa, int id) * @gfp_mask: memory allocation flags * * This function should be called prior to locking and calling the - * following function. It preallocates enough memory to satisfy + * idr_get_new* functions. It preallocates enough memory to satisfy * the worst possible allocation. * * If the system is REALLY out of memory this function returns 0, @@ -109,7 +124,7 @@ int idr_pre_get(struct idr *idp, gfp_t gfp_mask) new = kmem_cache_alloc(idr_layer_cache, gfp_mask); if (new == NULL) return (0); - free_layer(idp, new); + move_to_free_list(idp, new); } return 1; } @@ -143,7 +158,7 @@ static int sub_alloc(struct idr *idp, int *starting_id, struct idr_layer **pa) /* if already at the top layer, we need to grow */ if (!(p = pa[l])) { *starting_id = id; - return -2; + return IDR_NEED_TO_GROW; } /* If we need to go up one layer, continue the @@ -160,16 +175,17 @@ static int sub_alloc(struct idr *idp, int *starting_id, struct idr_layer **pa) id = ((id >> sh) ^ n ^ m) << sh; } if ((id >= MAX_ID_BIT) || (id < 0)) - return -3; + return IDR_NOMORE_SPACE; if (l == 0) break; /* * Create the layer below if it is missing. */ if (!p->ary[m]) { - if (!(new = alloc_layer(idp))) + new = get_from_free_list(idp); + if (!new) return -1; - p->ary[m] = new; + rcu_assign_pointer(p->ary[m], new); p->count++; } pa[l--] = p; @@ -192,7 +208,7 @@ build_up: p = idp->top; layers = idp->layers; if (unlikely(!p)) { - if (!(p = alloc_layer(idp))) + if (!(p = get_from_free_list(idp))) return -1; layers = 1; } @@ -204,7 +220,7 @@ build_up: layers++; if (!p->count) continue; - if (!(new = alloc_layer(idp))) { + if (!(new = get_from_free_list(idp))) { /* * The allocation failed. If we built part of * the structure tear it down. @@ -214,7 +230,7 @@ build_up: p = p->ary[0]; new->ary[0] = NULL; new->bitmap = new->count = 0; - __free_layer(idp, new); + __move_to_free_list(idp, new); } spin_unlock_irqrestore(&idp->lock, flags); return -1; @@ -225,10 +241,10 @@ build_up: __set_bit(0, &new->bitmap); p = new; } - idp->top = p; + rcu_assign_pointer(idp->top, p); idp->layers = layers; v = sub_alloc(idp, &id, pa); - if (v == -2) + if (v == IDR_NEED_TO_GROW) goto build_up; return(v); } @@ -244,7 +260,8 @@ static int idr_get_new_above_int(struct idr *idp, void *ptr, int starting_id) * Successfully found an empty slot. Install the user * pointer and mark the slot full. */ - pa[0]->ary[id & IDR_MASK] = (struct idr_layer *)ptr; + rcu_assign_pointer(pa[0]->ary[id & IDR_MASK], + (struct idr_layer *)ptr); pa[0]->count++; idr_mark_full(pa, id); } @@ -277,12 +294,8 @@ int idr_get_new_above(struct idr *idp, void *ptr, int starting_id, int *id) * This is a cheap hack until the IDR code can be fixed to * return proper error values. */ - if (rv < 0) { - if (rv == -1) - return -EAGAIN; - else /* Will be -3 */ - return -ENOSPC; - } + if (rv < 0) + return _idr_rc_to_errno(rv); *id = rv; return 0; } @@ -312,12 +325,8 @@ int idr_get_new(struct idr *idp, void *ptr, int *id) * This is a cheap hack until the IDR code can be fixed to * return proper error values. */ - if (rv < 0) { - if (rv == -1) - return -EAGAIN; - else /* Will be -3 */ - return -ENOSPC; - } + if (rv < 0) + return _idr_rc_to_errno(rv); *id = rv; return 0; } @@ -325,7 +334,8 @@ EXPORT_SYMBOL(idr_get_new); static void idr_remove_warning(int id) { - printk("idr_remove called for id=%d which is not allocated.\n", id); + printk(KERN_WARNING + "idr_remove called for id=%d which is not allocated.\n", id); dump_stack(); } @@ -334,6 +344,7 @@ static void sub_remove(struct idr *idp, int shift, int id) struct idr_layer *p = idp->top; struct idr_layer **pa[MAX_LEVEL]; struct idr_layer ***paa = &pa[0]; + struct idr_layer *to_free; int n; *paa = NULL; @@ -349,13 +360,18 @@ static void sub_remove(struct idr *idp, int shift, int id) n = id & IDR_MASK; if (likely(p != NULL && test_bit(n, &p->bitmap))){ __clear_bit(n, &p->bitmap); - p->ary[n] = NULL; + rcu_assign_pointer(p->ary[n], NULL); + to_free = NULL; while(*paa && ! --((**paa)->count)){ - free_layer(idp, **paa); + if (to_free) + free_layer(to_free); + to_free = **paa; **paa-- = NULL; } if (!*paa) idp->layers = 0; + if (to_free) + free_layer(to_free); } else idr_remove_warning(id); } @@ -368,22 +384,34 @@ static void sub_remove(struct idr *idp, int shift, int id) void idr_remove(struct idr *idp, int id) { struct idr_layer *p; + struct idr_layer *to_free; /* Mask off upper bits we don't use for the search. */ id &= MAX_ID_MASK; sub_remove(idp, (idp->layers - 1) * IDR_BITS, id); if (idp->top && idp->top->count == 1 && (idp->layers > 1) && - idp->top->ary[0]) { // We can drop a layer - + idp->top->ary[0]) { + /* + * Single child at leftmost slot: we can shrink the tree. + * This level is not needed anymore since when layers are + * inserted, they are inserted at the top of the existing + * tree. + */ + to_free = idp->top; p = idp->top->ary[0]; - idp->top->bitmap = idp->top->count = 0; - free_layer(idp, idp->top); - idp->top = p; + rcu_assign_pointer(idp->top, p); --idp->layers; + to_free->bitmap = to_free->count = 0; + free_layer(to_free); } while (idp->id_free_cnt >= IDR_FREE_MAX) { - p = alloc_layer(idp); + p = get_from_free_list(idp); + /* + * Note: we don't call the rcu callback here, since the only + * layers that fall into the freelist are those that have been + * preallocated. + */ kmem_cache_free(idr_layer_cache, p); } return; @@ -424,15 +452,13 @@ void idr_remove_all(struct idr *idp) id += 1 << n; while (n < fls(id)) { - if (p) { - memset(p, 0, sizeof *p); - free_layer(idp, p); - } + if (p) + free_layer(p); n += IDR_BITS; p = *--paa; } } - idp->top = NULL; + rcu_assign_pointer(idp->top, NULL); idp->layers = 0; } EXPORT_SYMBOL(idr_remove_all); @@ -444,7 +470,7 @@ EXPORT_SYMBOL(idr_remove_all); void idr_destroy(struct idr *idp) { while (idp->id_free_cnt) { - struct idr_layer *p = alloc_layer(idp); + struct idr_layer *p = get_from_free_list(idp); kmem_cache_free(idr_layer_cache, p); } } @@ -459,7 +485,8 @@ EXPORT_SYMBOL(idr_destroy); * return indicates that @id is not valid or you passed %NULL in * idr_get_new(). * - * The caller must serialize idr_find() vs idr_get_new() and idr_remove(). + * This function can be called under rcu_read_lock(), given that the leaf + * pointers lifetimes are correctly managed. */ void *idr_find(struct idr *idp, int id) { @@ -467,7 +494,7 @@ void *idr_find(struct idr *idp, int id) struct idr_layer *p; n = idp->layers * IDR_BITS; - p = idp->top; + p = rcu_dereference(idp->top); /* Mask off upper bits we don't use for the search. */ id &= MAX_ID_MASK; @@ -477,7 +504,7 @@ void *idr_find(struct idr *idp, int id) while (n > 0 && p) { n -= IDR_BITS; - p = p->ary[(id >> n) & IDR_MASK]; + p = rcu_dereference(p->ary[(id >> n) & IDR_MASK]); } return((void *)p); } @@ -510,7 +537,7 @@ int idr_for_each(struct idr *idp, struct idr_layer **paa = &pa[0]; n = idp->layers * IDR_BITS; - p = idp->top; + p = rcu_dereference(idp->top); max = 1 << n; id = 0; @@ -518,7 +545,7 @@ int idr_for_each(struct idr *idp, while (n > 0 && p) { n -= IDR_BITS; *paa++ = p; - p = p->ary[(id >> n) & IDR_MASK]; + p = rcu_dereference(p->ary[(id >> n) & IDR_MASK]); } if (p) { @@ -548,7 +575,7 @@ EXPORT_SYMBOL(idr_for_each); * A -ENOENT return indicates that @id was not found. * A -EINVAL return indicates that @id was not within valid constraints. * - * The caller must serialize vs idr_find(), idr_get_new(), and idr_remove(). + * The caller must serialize with writers. */ void *idr_replace(struct idr *idp, void *ptr, int id) { @@ -574,7 +601,7 @@ void *idr_replace(struct idr *idp, void *ptr, int id) return ERR_PTR(-ENOENT); old_p = p->ary[n]; - p->ary[n] = ptr; + rcu_assign_pointer(p->ary[n], ptr); return old_p; } @@ -694,12 +721,8 @@ int ida_get_new_above(struct ida *ida, int starting_id, int *p_id) restart: /* get vacant slot */ t = idr_get_empty_slot(&ida->idr, idr_id, pa); - if (t < 0) { - if (t == -1) - return -EAGAIN; - else /* will be -3 */ - return -ENOSPC; - } + if (t < 0) + return _idr_rc_to_errno(t); if (t * IDA_BITMAP_BITS >= MAX_ID_BIT) return -ENOSPC; @@ -720,7 +743,8 @@ int ida_get_new_above(struct ida *ida, int starting_id, int *p_id) return -EAGAIN; memset(bitmap, 0, sizeof(struct ida_bitmap)); - pa[0]->ary[idr_id & IDR_MASK] = (void *)bitmap; + rcu_assign_pointer(pa[0]->ary[idr_id & IDR_MASK], + (void *)bitmap); pa[0]->count++; } @@ -749,7 +773,7 @@ int ida_get_new_above(struct ida *ida, int starting_id, int *p_id) * allocation. */ if (ida->idr.id_free_cnt || ida->free_bitmap) { - struct idr_layer *p = alloc_layer(&ida->idr); + struct idr_layer *p = get_from_free_list(&ida->idr); if (p) kmem_cache_free(idr_layer_cache, p); } diff --git a/lib/inflate.c b/lib/inflate.c index 9762294be062..1a8e8a978128 100644 --- a/lib/inflate.c +++ b/lib/inflate.c @@ -230,6 +230,45 @@ STATIC const ush mask_bits[] = { #define NEEDBITS(n) {while(k<(n)){b|=((ulg)NEXTBYTE())<<k;k+=8;}} #define DUMPBITS(n) {b>>=(n);k-=(n);} +#ifndef NO_INFLATE_MALLOC +/* A trivial malloc implementation, adapted from + * malloc by Hannu Savolainen 1993 and Matthias Urlichs 1994 + */ + +static unsigned long malloc_ptr; +static int malloc_count; + +static void *malloc(int size) +{ + void *p; + + if (size < 0) + error("Malloc error"); + if (!malloc_ptr) + malloc_ptr = free_mem_ptr; + + malloc_ptr = (malloc_ptr + 3) & ~3; /* Align */ + + p = (void *)malloc_ptr; + malloc_ptr += size; + + if (free_mem_end_ptr && malloc_ptr >= free_mem_end_ptr) + error("Out of memory"); + + malloc_count++; + return p; +} + +static void free(void *where) +{ + malloc_count--; + if (!malloc_count) + malloc_ptr = free_mem_ptr; +} +#else +#define malloc(a) kmalloc(a, GFP_KERNEL) +#define free(a) kfree(a) +#endif /* Huffman code decoding is performed using a multi-level table lookup. @@ -1045,7 +1084,6 @@ STATIC int INIT inflate(void) int e; /* last block flag */ int r; /* result code */ unsigned h; /* maximum struct huft's malloc'ed */ - void *ptr; /* initialize window, bit buffer */ wp = 0; @@ -1057,12 +1095,12 @@ STATIC int INIT inflate(void) h = 0; do { hufts = 0; - gzip_mark(&ptr); - if ((r = inflate_block(&e)) != 0) { - gzip_release(&ptr); - return r; - } - gzip_release(&ptr); +#ifdef ARCH_HAS_DECOMP_WDOG + arch_decomp_wdog(); +#endif + r = inflate_block(&e); + if (r) + return r; if (hufts > h) h = hufts; } while (!e); diff --git a/lib/kobject.c b/lib/kobject.c index 744401571ed7..bd732ffebc85 100644 --- a/lib/kobject.c +++ b/lib/kobject.c @@ -164,9 +164,8 @@ static int kobject_add_internal(struct kobject *kobj) return -ENOENT; if (!kobj->name || !kobj->name[0]) { - pr_debug("kobject: (%p): attempted to be registered with empty " + WARN(1, "kobject: (%p): attempted to be registered with empty " "name!\n", kobj); - WARN_ON(1); return -EINVAL; } @@ -583,12 +582,10 @@ static void kobject_release(struct kref *kref) void kobject_put(struct kobject *kobj) { if (kobj) { - if (!kobj->state_initialized) { - printk(KERN_WARNING "kobject: '%s' (%p): is not " + if (!kobj->state_initialized) + WARN(1, KERN_WARNING "kobject: '%s' (%p): is not " "initialized, yet kobject_put() is being " "called.\n", kobject_name(kobj), kobj); - WARN_ON(1); - } kref_put(&kobj->kref, kobject_release); } } diff --git a/lib/list_debug.c b/lib/list_debug.c index 4350ba9655bd..1a39f4e3ae1f 100644 --- a/lib/list_debug.c +++ b/lib/list_debug.c @@ -20,18 +20,14 @@ void __list_add(struct list_head *new, struct list_head *prev, struct list_head *next) { - if (unlikely(next->prev != prev)) { - printk(KERN_ERR "list_add corruption. next->prev should be " - "prev (%p), but was %p. (next=%p).\n", - prev, next->prev, next); - BUG(); - } - if (unlikely(prev->next != next)) { - printk(KERN_ERR "list_add corruption. prev->next should be " - "next (%p), but was %p. (prev=%p).\n", - next, prev->next, prev); - BUG(); - } + WARN(next->prev != prev, + "list_add corruption. next->prev should be " + "prev (%p), but was %p. (next=%p).\n", + prev, next->prev, next); + WARN(prev->next != next, + "list_add corruption. prev->next should be " + "next (%p), but was %p. (prev=%p).\n", + next, prev->next, prev); next->prev = new; new->next = next; new->prev = prev; @@ -40,20 +36,6 @@ void __list_add(struct list_head *new, EXPORT_SYMBOL(__list_add); /** - * list_add - add a new entry - * @new: new entry to be added - * @head: list head to add it after - * - * Insert a new entry after the specified head. - * This is good for implementing stacks. - */ -void list_add(struct list_head *new, struct list_head *head) -{ - __list_add(new, head, head->next); -} -EXPORT_SYMBOL(list_add); - -/** * list_del - deletes entry from list. * @entry: the element to delete from the list. * Note: list_empty on entry does not return true after this, the entry is @@ -61,16 +43,12 @@ EXPORT_SYMBOL(list_add); */ void list_del(struct list_head *entry) { - if (unlikely(entry->prev->next != entry)) { - printk(KERN_ERR "list_del corruption. prev->next should be %p, " - "but was %p\n", entry, entry->prev->next); - BUG(); - } - if (unlikely(entry->next->prev != entry)) { - printk(KERN_ERR "list_del corruption. next->prev should be %p, " - "but was %p\n", entry, entry->next->prev); - BUG(); - } + WARN(entry->prev->next != entry, + "list_del corruption. prev->next should be %p, " + "but was %p\n", entry, entry->prev->next); + WARN(entry->next->prev != entry, + "list_del corruption. next->prev should be %p, " + "but was %p\n", entry, entry->next->prev); __list_del(entry->prev, entry->next); entry->next = LIST_POISON1; entry->prev = LIST_POISON2; diff --git a/lib/lzo/lzo1x_decompress.c b/lib/lzo/lzo1x_decompress.c index 77f0f9b775a9..5dc6b29c1575 100644 --- a/lib/lzo/lzo1x_decompress.c +++ b/lib/lzo/lzo1x_decompress.c @@ -138,8 +138,7 @@ match: t += 31 + *ip++; } m_pos = op - 1; - m_pos -= le16_to_cpu(get_unaligned( - (const unsigned short *)ip)) >> 2; + m_pos -= get_unaligned_le16(ip) >> 2; ip += 2; } else if (t >= 16) { m_pos = op; @@ -157,8 +156,7 @@ match: } t += 7 + *ip++; } - m_pos -= le16_to_cpu(get_unaligned( - (const unsigned short *)ip)) >> 2; + m_pos -= get_unaligned_le16(ip) >> 2; ip += 2; if (m_pos == op) goto eof_found; diff --git a/lib/ratelimit.c b/lib/ratelimit.c index 485e3040dcd4..35136671b215 100644 --- a/lib/ratelimit.c +++ b/lib/ratelimit.c @@ -3,6 +3,9 @@ * * Isolated from kernel/printk.c by Dave Young <hidave.darkstar@gmail.com> * + * 2008-05-01 rewrite the function and use a ratelimit_state data struct as + * parameter. Now every user can use their own standalone ratelimit_state. + * * This file is released under the GPLv2. * */ @@ -11,41 +14,43 @@ #include <linux/jiffies.h> #include <linux/module.h> +static DEFINE_SPINLOCK(ratelimit_lock); +static unsigned long flags; + /* * __ratelimit - rate limiting - * @ratelimit_jiffies: minimum time in jiffies between two callbacks - * @ratelimit_burst: number of callbacks we do before ratelimiting + * @rs: ratelimit_state data * - * This enforces a rate limit: not more than @ratelimit_burst callbacks - * in every ratelimit_jiffies + * This enforces a rate limit: not more than @rs->ratelimit_burst callbacks + * in every @rs->ratelimit_jiffies */ -int __ratelimit(int ratelimit_jiffies, int ratelimit_burst) +int __ratelimit(struct ratelimit_state *rs) { - static DEFINE_SPINLOCK(ratelimit_lock); - static unsigned toks = 10 * 5 * HZ; - static unsigned long last_msg; - static int missed; - unsigned long flags; - unsigned long now = jiffies; + if (!rs->interval) + return 1; spin_lock_irqsave(&ratelimit_lock, flags); - toks += now - last_msg; - last_msg = now; - if (toks > (ratelimit_burst * ratelimit_jiffies)) - toks = ratelimit_burst * ratelimit_jiffies; - if (toks >= ratelimit_jiffies) { - int lost = missed; + if (!rs->begin) + rs->begin = jiffies; - missed = 0; - toks -= ratelimit_jiffies; - spin_unlock_irqrestore(&ratelimit_lock, flags); - if (lost) - printk(KERN_WARNING "%s: %d messages suppressed\n", - __func__, lost); - return 1; + if (time_is_before_jiffies(rs->begin + rs->interval)) { + if (rs->missed) + printk(KERN_WARNING "%s: %d callbacks suppressed\n", + __func__, rs->missed); + rs->begin = 0; + rs->printed = 0; + rs->missed = 0; } - missed++; + if (rs->burst && rs->burst > rs->printed) + goto print; + + rs->missed++; spin_unlock_irqrestore(&ratelimit_lock, flags); return 0; + +print: + rs->printed++; + spin_unlock_irqrestore(&ratelimit_lock, flags); + return 1; } EXPORT_SYMBOL(__ratelimit); diff --git a/mm/filemap.c b/mm/filemap.c index 7675b91f4f63..2d3ec1ffc66e 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -115,7 +115,7 @@ void __remove_from_page_cache(struct page *page) { struct address_space *mapping = page->mapping; - mem_cgroup_uncharge_page(page); + mem_cgroup_uncharge_cache_page(page); radix_tree_delete(&mapping->page_tree, page->index); page->mapping = NULL; mapping->nrpages--; @@ -474,12 +474,12 @@ int add_to_page_cache(struct page *page, struct address_space *mapping, mapping->nrpages++; __inc_zone_page_state(page, NR_FILE_PAGES); } else - mem_cgroup_uncharge_page(page); + mem_cgroup_uncharge_cache_page(page); write_unlock_irq(&mapping->tree_lock); radix_tree_preload_end(); } else - mem_cgroup_uncharge_page(page); + mem_cgroup_uncharge_cache_page(page); out: return error; } @@ -2563,9 +2563,8 @@ EXPORT_SYMBOL(generic_file_aio_write); * Otherwise return zero. * * The @gfp_mask argument specifies whether I/O may be performed to release - * this page (__GFP_IO), and whether the call may block (__GFP_WAIT). + * this page (__GFP_IO), and whether the call may block (__GFP_WAIT & __GFP_FS). * - * NOTE: @gfp_mask may go away, and this function may become non-blocking. */ int try_to_release_page(struct page *page, gfp_t gfp_mask) { diff --git a/mm/hugetlb.c b/mm/hugetlb.c index 41341c414194..a8bf4ab01f86 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -1026,6 +1026,17 @@ static void __init report_hugepages(void) } } +static unsigned int cpuset_mems_nr(unsigned int *array) +{ + int node; + unsigned int nr = 0; + + for_each_node_mask(node, cpuset_current_mems_allowed) + nr += array[node]; + + return nr; +} + #ifdef CONFIG_SYSCTL #ifdef CONFIG_HIGHMEM static void try_to_free_low(struct hstate *h, unsigned long count) @@ -1375,17 +1386,6 @@ static int __init hugetlb_default_setup(char *s) } __setup("default_hugepagesz=", hugetlb_default_setup); -static unsigned int cpuset_mems_nr(unsigned int *array) -{ - int node; - unsigned int nr = 0; - - for_each_node_mask(node, cpuset_current_mems_allowed) - nr += array[node]; - - return nr; -} - int hugetlb_sysctl_handler(struct ctl_table *table, int write, struct file *file, void __user *buffer, size_t *length, loff_t *ppos) diff --git a/mm/memcontrol.c b/mm/memcontrol.c index e46451e1d9b7..fba566c51322 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -35,9 +35,9 @@ #include <asm/uaccess.h> -struct cgroup_subsys mem_cgroup_subsys; -static const int MEM_CGROUP_RECLAIM_RETRIES = 5; -static struct kmem_cache *page_cgroup_cache; +struct cgroup_subsys mem_cgroup_subsys __read_mostly; +static struct kmem_cache *page_cgroup_cache __read_mostly; +#define MEM_CGROUP_RECLAIM_RETRIES 5 /* * Statistics for memory cgroup. @@ -166,7 +166,6 @@ struct page_cgroup { struct list_head lru; /* per cgroup LRU list */ struct page *page; struct mem_cgroup *mem_cgroup; - int ref_cnt; /* cached, mapped, migrating */ int flags; }; #define PAGE_CGROUP_FLAG_CACHE (0x1) /* charged as cache */ @@ -185,6 +184,7 @@ static enum zone_type page_cgroup_zid(struct page_cgroup *pc) enum charge_type { MEM_CGROUP_CHARGE_TYPE_CACHE = 0, MEM_CGROUP_CHARGE_TYPE_MAPPED, + MEM_CGROUP_CHARGE_TYPE_FORCE, /* used by force_empty */ }; /* @@ -296,7 +296,7 @@ static void __mem_cgroup_remove_list(struct mem_cgroup_per_zone *mz, MEM_CGROUP_ZSTAT(mz, MEM_CGROUP_ZSTAT_INACTIVE) -= 1; mem_cgroup_charge_statistics(pc->mem_cgroup, pc->flags, false); - list_del_init(&pc->lru); + list_del(&pc->lru); } static void __mem_cgroup_add_list(struct mem_cgroup_per_zone *mz, @@ -354,6 +354,9 @@ void mem_cgroup_move_lists(struct page *page, bool active) struct mem_cgroup_per_zone *mz; unsigned long flags; + if (mem_cgroup_subsys.disabled) + return; + /* * We cannot lock_page_cgroup while holding zone's lru_lock, * because other holders of lock_page_cgroup can be interrupted @@ -524,7 +527,8 @@ unsigned long mem_cgroup_isolate_pages(unsigned long nr_to_scan, * < 0 if the cgroup is over its limit */ static int mem_cgroup_charge_common(struct page *page, struct mm_struct *mm, - gfp_t gfp_mask, enum charge_type ctype) + gfp_t gfp_mask, enum charge_type ctype, + struct mem_cgroup *memcg) { struct mem_cgroup *mem; struct page_cgroup *pc; @@ -532,35 +536,8 @@ static int mem_cgroup_charge_common(struct page *page, struct mm_struct *mm, unsigned long nr_retries = MEM_CGROUP_RECLAIM_RETRIES; struct mem_cgroup_per_zone *mz; - if (mem_cgroup_subsys.disabled) - return 0; - - /* - * Should page_cgroup's go to their own slab? - * One could optimize the performance of the charging routine - * by saving a bit in the page_flags and using it as a lock - * to see if the cgroup page already has a page_cgroup associated - * with it - */ -retry: - lock_page_cgroup(page); - pc = page_get_page_cgroup(page); - /* - * The page_cgroup exists and - * the page has already been accounted. - */ - if (pc) { - VM_BUG_ON(pc->page != page); - VM_BUG_ON(pc->ref_cnt <= 0); - - pc->ref_cnt++; - unlock_page_cgroup(page); - goto done; - } - unlock_page_cgroup(page); - - pc = kmem_cache_zalloc(page_cgroup_cache, gfp_mask); - if (pc == NULL) + pc = kmem_cache_alloc(page_cgroup_cache, gfp_mask); + if (unlikely(pc == NULL)) goto err; /* @@ -569,16 +546,18 @@ retry: * thread group leader migrates. It's possible that mm is not * set, if so charge the init_mm (happens for pagecache usage). */ - if (!mm) - mm = &init_mm; - - rcu_read_lock(); - mem = mem_cgroup_from_task(rcu_dereference(mm->owner)); - /* - * For every charge from the cgroup, increment reference count - */ - css_get(&mem->css); - rcu_read_unlock(); + if (likely(!memcg)) { + rcu_read_lock(); + mem = mem_cgroup_from_task(rcu_dereference(mm->owner)); + /* + * For every charge from the cgroup, increment reference count + */ + css_get(&mem->css); + rcu_read_unlock(); + } else { + mem = memcg; + css_get(&memcg->css); + } while (res_counter_charge(&mem->res, PAGE_SIZE)) { if (!(gfp_mask & __GFP_WAIT)) @@ -603,25 +582,24 @@ retry: } } - pc->ref_cnt = 1; pc->mem_cgroup = mem; pc->page = page; - pc->flags = PAGE_CGROUP_FLAG_ACTIVE; + /* + * If a page is accounted as a page cache, insert to inactive list. + * If anon, insert to active list. + */ if (ctype == MEM_CGROUP_CHARGE_TYPE_CACHE) pc->flags = PAGE_CGROUP_FLAG_CACHE; + else + pc->flags = PAGE_CGROUP_FLAG_ACTIVE; lock_page_cgroup(page); - if (page_get_page_cgroup(page)) { + if (unlikely(page_get_page_cgroup(page))) { unlock_page_cgroup(page); - /* - * Another charge has been added to this page already. - * We take lock_page_cgroup(page) again and read - * page->cgroup, increment refcnt.... just retry is OK. - */ res_counter_uncharge(&mem->res, PAGE_SIZE); css_put(&mem->css); kmem_cache_free(page_cgroup_cache, pc); - goto retry; + goto done; } page_assign_page_cgroup(page, pc); @@ -642,24 +620,65 @@ err: int mem_cgroup_charge(struct page *page, struct mm_struct *mm, gfp_t gfp_mask) { + if (mem_cgroup_subsys.disabled) + return 0; + + /* + * If already mapped, we don't have to account. + * If page cache, page->mapping has address_space. + * But page->mapping may have out-of-use anon_vma pointer, + * detecit it by PageAnon() check. newly-mapped-anon's page->mapping + * is NULL. + */ + if (page_mapped(page) || (page->mapping && !PageAnon(page))) + return 0; + if (unlikely(!mm)) + mm = &init_mm; return mem_cgroup_charge_common(page, mm, gfp_mask, - MEM_CGROUP_CHARGE_TYPE_MAPPED); + MEM_CGROUP_CHARGE_TYPE_MAPPED, NULL); } int mem_cgroup_cache_charge(struct page *page, struct mm_struct *mm, gfp_t gfp_mask) { - if (!mm) + if (mem_cgroup_subsys.disabled) + return 0; + + /* + * Corner case handling. This is called from add_to_page_cache() + * in usual. But some FS (shmem) precharges this page before calling it + * and call add_to_page_cache() with GFP_NOWAIT. + * + * For GFP_NOWAIT case, the page may be pre-charged before calling + * add_to_page_cache(). (See shmem.c) check it here and avoid to call + * charge twice. (It works but has to pay a bit larger cost.) + */ + if (!(gfp_mask & __GFP_WAIT)) { + struct page_cgroup *pc; + + lock_page_cgroup(page); + pc = page_get_page_cgroup(page); + if (pc) { + VM_BUG_ON(pc->page != page); + VM_BUG_ON(!pc->mem_cgroup); + unlock_page_cgroup(page); + return 0; + } + unlock_page_cgroup(page); + } + + if (unlikely(!mm)) mm = &init_mm; + return mem_cgroup_charge_common(page, mm, gfp_mask, - MEM_CGROUP_CHARGE_TYPE_CACHE); + MEM_CGROUP_CHARGE_TYPE_CACHE, NULL); } /* - * Uncharging is always a welcome operation, we never complain, simply - * uncharge. + * uncharge if !page_mapped(page) */ -void mem_cgroup_uncharge_page(struct page *page) +static void +__mem_cgroup_uncharge_common(struct page *page, enum charge_type ctype) { struct page_cgroup *pc; struct mem_cgroup *mem; @@ -674,98 +693,151 @@ void mem_cgroup_uncharge_page(struct page *page) */ lock_page_cgroup(page); pc = page_get_page_cgroup(page); - if (!pc) + if (unlikely(!pc)) goto unlock; VM_BUG_ON(pc->page != page); - VM_BUG_ON(pc->ref_cnt <= 0); - if (--(pc->ref_cnt) == 0) { - mz = page_cgroup_zoneinfo(pc); - spin_lock_irqsave(&mz->lru_lock, flags); - __mem_cgroup_remove_list(mz, pc); - spin_unlock_irqrestore(&mz->lru_lock, flags); + if ((ctype == MEM_CGROUP_CHARGE_TYPE_MAPPED) + && ((pc->flags & PAGE_CGROUP_FLAG_CACHE) + || page_mapped(page))) + goto unlock; - page_assign_page_cgroup(page, NULL); - unlock_page_cgroup(page); + mz = page_cgroup_zoneinfo(pc); + spin_lock_irqsave(&mz->lru_lock, flags); + __mem_cgroup_remove_list(mz, pc); + spin_unlock_irqrestore(&mz->lru_lock, flags); - mem = pc->mem_cgroup; - res_counter_uncharge(&mem->res, PAGE_SIZE); - css_put(&mem->css); + page_assign_page_cgroup(page, NULL); + unlock_page_cgroup(page); - kmem_cache_free(page_cgroup_cache, pc); - return; - } + mem = pc->mem_cgroup; + res_counter_uncharge(&mem->res, PAGE_SIZE); + css_put(&mem->css); + kmem_cache_free(page_cgroup_cache, pc); + return; unlock: unlock_page_cgroup(page); } +void mem_cgroup_uncharge_page(struct page *page) +{ + __mem_cgroup_uncharge_common(page, MEM_CGROUP_CHARGE_TYPE_MAPPED); +} + +void mem_cgroup_uncharge_cache_page(struct page *page) +{ + VM_BUG_ON(page_mapped(page)); + __mem_cgroup_uncharge_common(page, MEM_CGROUP_CHARGE_TYPE_CACHE); +} + /* - * Returns non-zero if a page (under migration) has valid page_cgroup member. - * Refcnt of page_cgroup is incremented. + * Before starting migration, account against new page. */ -int mem_cgroup_prepare_migration(struct page *page) +int mem_cgroup_prepare_migration(struct page *page, struct page *newpage) { struct page_cgroup *pc; + struct mem_cgroup *mem = NULL; + enum charge_type ctype = MEM_CGROUP_CHARGE_TYPE_MAPPED; + int ret = 0; if (mem_cgroup_subsys.disabled) return 0; lock_page_cgroup(page); pc = page_get_page_cgroup(page); - if (pc) - pc->ref_cnt++; + if (pc) { + mem = pc->mem_cgroup; + css_get(&mem->css); + if (pc->flags & PAGE_CGROUP_FLAG_CACHE) + ctype = MEM_CGROUP_CHARGE_TYPE_CACHE; + } unlock_page_cgroup(page); - return pc != NULL; + if (mem) { + ret = mem_cgroup_charge_common(newpage, NULL, GFP_KERNEL, + ctype, mem); + css_put(&mem->css); + } + return ret; } -void mem_cgroup_end_migration(struct page *page) +/* remove redundant charge if migration failed*/ +void mem_cgroup_end_migration(struct page *newpage) { - mem_cgroup_uncharge_page(page); + /* + * At success, page->mapping is not NULL. + * special rollback care is necessary when + * 1. at migration failure. (newpage->mapping is cleared in this case) + * 2. the newpage was moved but not remapped again because the task + * exits and the newpage is obsolete. In this case, the new page + * may be a swapcache. So, we just call mem_cgroup_uncharge_page() + * always for avoiding mess. The page_cgroup will be removed if + * unnecessary. File cache pages is still on radix-tree. Don't + * care it. + */ + if (!newpage->mapping) + __mem_cgroup_uncharge_common(newpage, + MEM_CGROUP_CHARGE_TYPE_FORCE); + else if (PageAnon(newpage)) + mem_cgroup_uncharge_page(newpage); } /* - * We know both *page* and *newpage* are now not-on-LRU and PG_locked. - * And no race with uncharge() routines because page_cgroup for *page* - * has extra one reference by mem_cgroup_prepare_migration. + * A call to try to shrink memory usage under specified resource controller. + * This is typically used for page reclaiming for shmem for reducing side + * effect of page allocation from shmem, which is used by some mem_cgroup. */ -void mem_cgroup_page_migration(struct page *page, struct page *newpage) +int mem_cgroup_shrink_usage(struct mm_struct *mm, gfp_t gfp_mask) { - struct page_cgroup *pc; - struct mem_cgroup_per_zone *mz; - unsigned long flags; + struct mem_cgroup *mem; + int progress = 0; + int retry = MEM_CGROUP_RECLAIM_RETRIES; - lock_page_cgroup(page); - pc = page_get_page_cgroup(page); - if (!pc) { - unlock_page_cgroup(page); - return; - } + if (mem_cgroup_subsys.disabled) + return 0; - mz = page_cgroup_zoneinfo(pc); - spin_lock_irqsave(&mz->lru_lock, flags); - __mem_cgroup_remove_list(mz, pc); - spin_unlock_irqrestore(&mz->lru_lock, flags); + rcu_read_lock(); + mem = mem_cgroup_from_task(rcu_dereference(mm->owner)); + css_get(&mem->css); + rcu_read_unlock(); - page_assign_page_cgroup(page, NULL); - unlock_page_cgroup(page); + do { + progress = try_to_free_mem_cgroup_pages(mem, gfp_mask); + } while (!progress && --retry); - pc->page = newpage; - lock_page_cgroup(newpage); - page_assign_page_cgroup(newpage, pc); + css_put(&mem->css); + if (!retry) + return -ENOMEM; + return 0; +} - mz = page_cgroup_zoneinfo(pc); - spin_lock_irqsave(&mz->lru_lock, flags); - __mem_cgroup_add_list(mz, pc); - spin_unlock_irqrestore(&mz->lru_lock, flags); +int mem_cgroup_resize_limit(struct mem_cgroup *memcg, unsigned long long val) +{ + + int retry_count = MEM_CGROUP_RECLAIM_RETRIES; + int progress; + int ret = 0; - unlock_page_cgroup(newpage); + while (res_counter_set_limit(&memcg->res, val)) { + if (signal_pending(current)) { + ret = -EINTR; + break; + } + if (!retry_count) { + ret = -EBUSY; + break; + } + progress = try_to_free_mem_cgroup_pages(memcg, GFP_KERNEL); + if (!progress) + retry_count--; + } + return ret; } + /* * This routine traverse page_cgroup in given list and drop them all. - * This routine ignores page_cgroup->ref_cnt. * *And* this routine doesn't reclaim page itself, just removes page_cgroup. */ #define FORCE_UNCHARGE_BATCH (128) @@ -790,12 +862,20 @@ static void mem_cgroup_force_empty_list(struct mem_cgroup *mem, page = pc->page; get_page(page); spin_unlock_irqrestore(&mz->lru_lock, flags); - mem_cgroup_uncharge_page(page); - put_page(page); - if (--count <= 0) { - count = FORCE_UNCHARGE_BATCH; + /* + * Check if this page is on LRU. !LRU page can be found + * if it's under page migration. + */ + if (PageLRU(page)) { + __mem_cgroup_uncharge_common(page, + MEM_CGROUP_CHARGE_TYPE_FORCE); + put_page(page); + if (--count <= 0) { + count = FORCE_UNCHARGE_BATCH; + cond_resched(); + } + } else cond_resched(); - } spin_lock_irqsave(&mz->lru_lock, flags); } spin_unlock_irqrestore(&mz->lru_lock, flags); @@ -810,9 +890,6 @@ static int mem_cgroup_force_empty(struct mem_cgroup *mem) int ret = -EBUSY; int node, zid; - if (mem_cgroup_subsys.disabled) - return 0; - css_get(&mem->css); /* * page reclaim code (kswapd etc..) will move pages between @@ -838,32 +915,34 @@ out: return ret; } -static int mem_cgroup_write_strategy(char *buf, unsigned long long *tmp) -{ - *tmp = memparse(buf, &buf); - if (*buf != '\0') - return -EINVAL; - - /* - * Round up the value to the closest page size - */ - *tmp = ((*tmp + PAGE_SIZE - 1) >> PAGE_SHIFT) << PAGE_SHIFT; - return 0; -} - static u64 mem_cgroup_read(struct cgroup *cont, struct cftype *cft) { return res_counter_read_u64(&mem_cgroup_from_cont(cont)->res, cft->private); } - -static ssize_t mem_cgroup_write(struct cgroup *cont, struct cftype *cft, - struct file *file, const char __user *userbuf, - size_t nbytes, loff_t *ppos) +/* + * The user of this function is... + * RES_LIMIT. + */ +static int mem_cgroup_write(struct cgroup *cont, struct cftype *cft, + const char *buffer) { - return res_counter_write(&mem_cgroup_from_cont(cont)->res, - cft->private, userbuf, nbytes, ppos, - mem_cgroup_write_strategy); + struct mem_cgroup *memcg = mem_cgroup_from_cont(cont); + unsigned long long val; + int ret; + + switch (cft->private) { + case RES_LIMIT: + /* This function does all necessary parse...reuse it */ + ret = res_counter_memparse_write_strategy(buffer, &val); + if (!ret) + ret = mem_cgroup_resize_limit(memcg, val); + break; + default: + ret = -EINVAL; /* should be BUG() ? */ + break; + } + return ret; } static int mem_cgroup_reset(struct cgroup *cont, unsigned int event) @@ -940,7 +1019,7 @@ static struct cftype mem_cgroup_files[] = { { .name = "limit_in_bytes", .private = RES_LIMIT, - .write = mem_cgroup_write, + .write_string = mem_cgroup_write, .read_u64 = mem_cgroup_read, }, { @@ -1070,8 +1149,6 @@ static void mem_cgroup_destroy(struct cgroup_subsys *ss, static int mem_cgroup_populate(struct cgroup_subsys *ss, struct cgroup *cont) { - if (mem_cgroup_subsys.disabled) - return 0; return cgroup_add_files(cont, ss, mem_cgroup_files, ARRAY_SIZE(mem_cgroup_files)); } @@ -1084,9 +1161,6 @@ static void mem_cgroup_move_task(struct cgroup_subsys *ss, struct mm_struct *mm; struct mem_cgroup *mem, *old_mem; - if (mem_cgroup_subsys.disabled) - return; - mm = get_task_mm(p); if (mm == NULL) return; diff --git a/mm/migrate.c b/mm/migrate.c index 376cceba82f9..d8c65a65c61d 100644 --- a/mm/migrate.c +++ b/mm/migrate.c @@ -358,6 +358,9 @@ static int migrate_page_move_mapping(struct address_space *mapping, __inc_zone_page_state(newpage, NR_FILE_PAGES); write_unlock_irq(&mapping->tree_lock); + if (!PageSwapCache(newpage)) { + mem_cgroup_uncharge_cache_page(page); + } return 0; } @@ -611,7 +614,6 @@ static int move_to_new_page(struct page *newpage, struct page *page) rc = fallback_migrate_page(mapping, newpage, page); if (!rc) { - mem_cgroup_page_migration(page, newpage); remove_migration_ptes(page, newpage); } else newpage->mapping = NULL; @@ -641,6 +643,14 @@ static int unmap_and_move(new_page_t get_new_page, unsigned long private, /* page was freed from under us. So we are done. */ goto move_newpage; + charge = mem_cgroup_prepare_migration(page, newpage); + if (charge == -ENOMEM) { + rc = -ENOMEM; + goto move_newpage; + } + /* prepare cgroup just returns 0 or -ENOMEM */ + BUG_ON(charge); + rc = -EAGAIN; if (TestSetPageLocked(page)) { if (!force) @@ -692,19 +702,14 @@ static int unmap_and_move(new_page_t get_new_page, unsigned long private, goto rcu_unlock; } - charge = mem_cgroup_prepare_migration(page); /* Establish migration ptes or remove ptes */ try_to_unmap(page, 1); if (!page_mapped(page)) rc = move_to_new_page(newpage, page); - if (rc) { + if (rc) remove_migration_ptes(page, page); - if (charge) - mem_cgroup_end_migration(page); - } else if (charge) - mem_cgroup_end_migration(newpage); rcu_unlock: if (rcu_locked) rcu_read_unlock(); @@ -725,6 +730,8 @@ unlock: } move_newpage: + if (!charge) + mem_cgroup_end_migration(newpage); /* * Move the new page to the LRU. If migration was not successful * then this will free the page. diff --git a/mm/pdflush.c b/mm/pdflush.c index 9d834aa4b979..0cbe0c60c6bf 100644 --- a/mm/pdflush.c +++ b/mm/pdflush.c @@ -130,7 +130,7 @@ static int __pdflush(struct pdflush_work *my_work) * Thread creation: For how long have there been zero * available threads? */ - if (jiffies - last_empty_jifs > 1 * HZ) { + if (time_after(jiffies, last_empty_jifs + 1 * HZ)) { /* unlocked list_empty() test is OK here */ if (list_empty(&pdflush_list)) { /* unlocked test is OK here */ @@ -151,7 +151,7 @@ static int __pdflush(struct pdflush_work *my_work) if (nr_pdflush_threads <= MIN_PDFLUSH_THREADS) continue; pdf = list_entry(pdflush_list.prev, struct pdflush_work, list); - if (jiffies - pdf->when_i_went_to_sleep > 1 * HZ) { + if (time_after(jiffies, pdf->when_i_went_to_sleep + 1 * HZ)) { /* Limit exit rate */ pdf->when_i_went_to_sleep = jiffies; break; /* exeunt */ diff --git a/mm/rmap.c b/mm/rmap.c index bf0a5b7cfb8e..abbd29f7c43f 100644 --- a/mm/rmap.c +++ b/mm/rmap.c @@ -576,14 +576,8 @@ void page_add_anon_rmap(struct page *page, VM_BUG_ON(address < vma->vm_start || address >= vma->vm_end); if (atomic_inc_and_test(&page->_mapcount)) __page_set_anon_rmap(page, vma, address); - else { + else __page_check_anon_rmap(page, vma, address); - /* - * We unconditionally charged during prepare, we uncharge here - * This takes care of balancing the reference counts - */ - mem_cgroup_uncharge_page(page); - } } /** @@ -614,12 +608,6 @@ void page_add_file_rmap(struct page *page) { if (atomic_inc_and_test(&page->_mapcount)) __inc_zone_page_state(page, NR_FILE_MAPPED); - else - /* - * We unconditionally charged during prepare, we uncharge here - * This takes care of balancing the reference counts - */ - mem_cgroup_uncharge_page(page); } #ifdef CONFIG_DEBUG_VM diff --git a/mm/shmem.c b/mm/shmem.c index 9ffbea9b79e1..f92fea94d037 100644 --- a/mm/shmem.c +++ b/mm/shmem.c @@ -922,20 +922,26 @@ found: error = 1; if (!inode) goto out; - /* Precharge page while we can wait, compensate afterwards */ + /* Precharge page using GFP_KERNEL while we can wait */ error = mem_cgroup_cache_charge(page, current->mm, GFP_KERNEL); if (error) goto out; error = radix_tree_preload(GFP_KERNEL); - if (error) - goto uncharge; + if (error) { + mem_cgroup_uncharge_cache_page(page); + goto out; + } error = 1; spin_lock(&info->lock); ptr = shmem_swp_entry(info, idx, NULL); - if (ptr && ptr->val == entry.val) + if (ptr && ptr->val == entry.val) { error = add_to_page_cache(page, inode->i_mapping, idx, GFP_NOWAIT); + /* does mem_cgroup_uncharge_cache_page on error */ + } else /* we must compensate for our precharge above */ + mem_cgroup_uncharge_cache_page(page); + if (error == -EEXIST) { struct page *filepage = find_get_page(inode->i_mapping, idx); error = 1; @@ -961,8 +967,6 @@ found: shmem_swp_unmap(ptr); spin_unlock(&info->lock); radix_tree_preload_end(); -uncharge: - mem_cgroup_uncharge_page(page); out: unlock_page(page); page_cache_release(page); @@ -1311,17 +1315,14 @@ repeat: shmem_swp_unmap(entry); spin_unlock(&info->lock); unlock_page(swappage); + page_cache_release(swappage); if (error == -ENOMEM) { /* allow reclaim from this memory cgroup */ - error = mem_cgroup_cache_charge(swappage, - current->mm, gfp & ~__GFP_HIGHMEM); - if (error) { - page_cache_release(swappage); + error = mem_cgroup_shrink_usage(current->mm, + gfp); + if (error) goto failed; - } - mem_cgroup_uncharge_page(swappage); } - page_cache_release(swappage); goto repeat; } } else if (sgp == SGP_READ && !filepage) { @@ -1358,6 +1359,8 @@ repeat: } if (!filepage) { + int ret; + spin_unlock(&info->lock); filepage = shmem_alloc_page(gfp, info, idx); if (!filepage) { @@ -1386,10 +1389,18 @@ repeat: swap = *entry; shmem_swp_unmap(entry); } - if (error || swap.val || 0 != add_to_page_cache_lru( - filepage, mapping, idx, GFP_NOWAIT)) { + ret = error || swap.val; + if (ret) + mem_cgroup_uncharge_cache_page(filepage); + else + ret = add_to_page_cache_lru(filepage, mapping, + idx, GFP_NOWAIT); + /* + * At add_to_page_cache_lru() failure, uncharge will + * be done automatically. + */ + if (ret) { spin_unlock(&info->lock); - mem_cgroup_uncharge_page(filepage); page_cache_release(filepage); shmem_unacct_blocks(info->flags, 1); shmem_free_blocks(inode, 1); @@ -1398,7 +1409,6 @@ repeat: goto failed; goto repeat; } - mem_cgroup_uncharge_page(filepage); info->flags |= SHMEM_PAGEIN; } diff --git a/mm/vmscan.c b/mm/vmscan.c index 967d30ccd92b..26672c6cd3ce 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -38,6 +38,7 @@ #include <linux/kthread.h> #include <linux/freezer.h> #include <linux/memcontrol.h> +#include <linux/delayacct.h> #include <asm/tlbflush.h> #include <asm/div64.h> @@ -1316,6 +1317,8 @@ static unsigned long do_try_to_free_pages(struct zonelist *zonelist, struct zone *zone; enum zone_type high_zoneidx = gfp_zone(sc->gfp_mask); + delayacct_freepages_start(); + if (scan_global_lru(sc)) count_vm_event(ALLOCSTALL); /* @@ -1396,6 +1399,8 @@ out: } else mem_cgroup_record_reclaim_priority(sc->mem_cgroup, priority); + delayacct_freepages_end(); + return ret; } diff --git a/net/802/psnap.c b/net/802/psnap.c index ea4643931446..b3cfe5a14fca 100644 --- a/net/802/psnap.c +++ b/net/802/psnap.c @@ -31,11 +31,9 @@ static struct llc_sap *snap_sap; */ static struct datalink_proto *find_snap_client(unsigned char *desc) { - struct list_head *entry; struct datalink_proto *proto = NULL, *p; - list_for_each_rcu(entry, &snap_list) { - p = list_entry(entry, struct datalink_proto, node); + list_for_each_entry_rcu(p, &snap_list, node) { if (!memcmp(p->type, desc, 5)) { proto = p; break; diff --git a/net/core/sysctl_net_core.c b/net/core/sysctl_net_core.c index a570e2af22cb..f686467ff12b 100644 --- a/net/core/sysctl_net_core.c +++ b/net/core/sysctl_net_core.c @@ -67,7 +67,7 @@ static struct ctl_table net_core_table[] = { { .ctl_name = NET_CORE_MSG_COST, .procname = "message_cost", - .data = &net_msg_cost, + .data = &net_ratelimit_state.interval, .maxlen = sizeof(int), .mode = 0644, .proc_handler = &proc_dointvec_jiffies, @@ -76,7 +76,7 @@ static struct ctl_table net_core_table[] = { { .ctl_name = NET_CORE_MSG_BURST, .procname = "message_burst", - .data = &net_msg_burst, + .data = &net_ratelimit_state.burst, .maxlen = sizeof(int), .mode = 0644, .proc_handler = &proc_dointvec, diff --git a/net/core/utils.c b/net/core/utils.c index 8031eb59054e..72e0ebe964a0 100644 --- a/net/core/utils.c +++ b/net/core/utils.c @@ -31,17 +31,16 @@ #include <asm/system.h> #include <asm/uaccess.h> -int net_msg_cost __read_mostly = 5*HZ; -int net_msg_burst __read_mostly = 10; int net_msg_warn __read_mostly = 1; EXPORT_SYMBOL(net_msg_warn); +DEFINE_RATELIMIT_STATE(net_ratelimit_state, 5 * HZ, 10); /* * All net warning printk()s should be guarded by this function. */ int net_ratelimit(void) { - return __printk_ratelimit(net_msg_cost, net_msg_burst); + return __ratelimit(&net_ratelimit_state); } EXPORT_SYMBOL(net_ratelimit); diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index dd919d84285f..f440a9f54924 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -264,7 +264,6 @@ static inline int inet_netns_ok(struct net *net, int protocol) static int inet_create(struct net *net, struct socket *sock, int protocol) { struct sock *sk; - struct list_head *p; struct inet_protosw *answer; struct inet_sock *inet; struct proto *answer_prot; @@ -281,13 +280,12 @@ static int inet_create(struct net *net, struct socket *sock, int protocol) sock->state = SS_UNCONNECTED; /* Look for the requested type/protocol pair. */ - answer = NULL; lookup_protocol: err = -ESOCKTNOSUPPORT; rcu_read_lock(); - list_for_each_rcu(p, &inetsw[sock->type]) { - answer = list_entry(p, struct inet_protosw, list); + list_for_each_entry_rcu(answer, &inetsw[sock->type], list) { + err = 0; /* Check the non-wild match. */ if (protocol == answer->protocol) { if (protocol != IPPROTO_IP) @@ -302,10 +300,9 @@ lookup_protocol: break; } err = -EPROTONOSUPPORT; - answer = NULL; } - if (unlikely(answer == NULL)) { + if (unlikely(err)) { if (try_loading_module < 2) { rcu_read_unlock(); /* diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c index 3d828bc4b1cf..60461ad7fa6f 100644 --- a/net/ipv6/af_inet6.c +++ b/net/ipv6/af_inet6.c @@ -83,7 +83,6 @@ static int inet6_create(struct net *net, struct socket *sock, int protocol) struct inet_sock *inet; struct ipv6_pinfo *np; struct sock *sk; - struct list_head *p; struct inet_protosw *answer; struct proto *answer_prot; unsigned char answer_flags; @@ -97,13 +96,12 @@ static int inet6_create(struct net *net, struct socket *sock, int protocol) build_ehash_secret(); /* Look for the requested type/protocol pair. */ - answer = NULL; lookup_protocol: err = -ESOCKTNOSUPPORT; rcu_read_lock(); - list_for_each_rcu(p, &inetsw6[sock->type]) { - answer = list_entry(p, struct inet_protosw, list); + list_for_each_entry_rcu(answer, &inetsw6[sock->type], list) { + err = 0; /* Check the non-wild match. */ if (protocol == answer->protocol) { if (protocol != IPPROTO_IP) @@ -118,10 +116,9 @@ lookup_protocol: break; } err = -EPROTONOSUPPORT; - answer = NULL; } - if (!answer) { + if (err) { if (try_loading_module < 2) { rcu_read_unlock(); /* diff --git a/net/sysctl_net.c b/net/sysctl_net.c index 007c1a6708ee..63ada437fc2f 100644 --- a/net/sysctl_net.c +++ b/net/sysctl_net.c @@ -35,8 +35,22 @@ net_ctl_header_lookup(struct ctl_table_root *root, struct nsproxy *namespaces) return &namespaces->net_ns->sysctl_table_headers; } +/* Return standard mode bits for table entry. */ +static int net_ctl_permissions(struct ctl_table_root *root, + struct nsproxy *nsproxy, + struct ctl_table *table) +{ + /* Allow network administrator to have same access as root. */ + if (capable(CAP_NET_ADMIN)) { + int mode = (table->mode >> 6) & 7; + return (mode << 6) | (mode << 3) | mode; + } + return table->mode; +} + static struct ctl_table_root net_sysctl_root = { .lookup = net_ctl_header_lookup, + .permissions = net_ctl_permissions, }; static LIST_HEAD(net_sysctl_ro_tables); diff --git a/scripts/checkstack.pl b/scripts/checkstack.pl index 340ad6920511..3eca62566d6b 100755 --- a/scripts/checkstack.pl +++ b/scripts/checkstack.pl @@ -26,12 +26,17 @@ # $& (whole re) matches the complete objdump line with the stack growth # $1 (first bracket) matches the size of the stack growth # +# $dre is similar, but for dynamic stack redutions: +# $& (whole re) matches the complete objdump line with the stack growth +# $1 (first bracket) matches the dynamic amount of the stack growth +# # use anything else and feel the pain ;) -my (@stack, $re, $x, $xs); +my (@stack, $re, $dre, $x, $xs); { my $arch = shift; if ($arch eq "") { $arch = `uname -m`; + chomp($arch); } $x = "[0-9a-f]"; # hex character @@ -46,9 +51,11 @@ my (@stack, $re, $x, $xs); } elsif ($arch =~ /^i[3456]86$/) { #c0105234: 81 ec ac 05 00 00 sub $0x5ac,%esp $re = qr/^.*[as][du][db] \$(0x$x{1,8}),\%esp$/o; + $dre = qr/^.*[as][du][db] (%.*),\%esp$/o; } elsif ($arch eq 'x86_64') { # 2f60: 48 81 ec e8 05 00 00 sub $0x5e8,%rsp $re = qr/^.*[as][du][db] \$(0x$x{1,8}),\%rsp$/o; + $dre = qr/^.*[as][du][db] (\%.*),\%rsp$/o; } elsif ($arch eq 'ia64') { #e0000000044011fc: 01 0f fc 8c adds r12=-384,r12 $re = qr/.*adds.*r12=-(([0-9]{2}|[3-9])[0-9]{2}),r12/o; @@ -85,7 +92,7 @@ my (@stack, $re, $x, $xs); # 0: 00 e8 38 01 LINK 0x4e0; $re = qr/.*[[:space:]]LINK[[:space:]]*(0x$x{1,8})/o; } else { - print("wrong or unknown architecture\n"); + print("wrong or unknown architecture \"$arch\"\n"); exit } } @@ -141,6 +148,22 @@ while (my $line = <STDIN>) { next if ($size < 100); push @stack, "$intro$size\n"; } + elsif (defined $dre && $line =~ m/$dre/) { + my $size = "Dynamic ($1)"; + + next if $line !~ m/^($xs*)/; + my $addr = $1; + $addr =~ s/ /0/g; + $addr = "0x$addr"; + + my $intro = "$addr $func [$file]:"; + my $padlen = 56 - length($intro); + while ($padlen > 0) { + $intro .= ' '; + $padlen -= 8; + } + push @stack, "$intro$size\n"; + } } print sort bysize @stack; diff --git a/security/device_cgroup.c b/security/device_cgroup.c index ddd92cec78ed..7bd296cca041 100644 --- a/security/device_cgroup.c +++ b/security/device_cgroup.c @@ -41,6 +41,7 @@ struct dev_whitelist_item { short type; short access; struct list_head list; + struct rcu_head rcu; }; struct dev_cgroup { @@ -59,6 +60,11 @@ static inline struct dev_cgroup *cgroup_to_devcgroup(struct cgroup *cgroup) return css_to_devcgroup(cgroup_subsys_state(cgroup, devices_subsys_id)); } +static inline struct dev_cgroup *task_devcgroup(struct task_struct *task) +{ + return css_to_devcgroup(task_subsys_state(task, devices_subsys_id)); +} + struct cgroup_subsys devices_subsys; static int devcgroup_can_attach(struct cgroup_subsys *ss, @@ -128,11 +134,19 @@ static int dev_whitelist_add(struct dev_cgroup *dev_cgroup, } if (whcopy != NULL) - list_add_tail(&whcopy->list, &dev_cgroup->whitelist); + list_add_tail_rcu(&whcopy->list, &dev_cgroup->whitelist); spin_unlock(&dev_cgroup->lock); return 0; } +static void whitelist_item_free(struct rcu_head *rcu) +{ + struct dev_whitelist_item *item; + + item = container_of(rcu, struct dev_whitelist_item, rcu); + kfree(item); +} + /* * called under cgroup_lock() * since the list is visible to other tasks, we need the spinlock also @@ -156,8 +170,8 @@ static void dev_whitelist_rm(struct dev_cgroup *dev_cgroup, remove: walk->access &= ~wh->access; if (!walk->access) { - list_del(&walk->list); - kfree(walk); + list_del_rcu(&walk->list); + call_rcu(&walk->rcu, whitelist_item_free); } } spin_unlock(&dev_cgroup->lock); @@ -188,7 +202,7 @@ static struct cgroup_subsys_state *devcgroup_create(struct cgroup_subsys *ss, } wh->minor = wh->major = ~0; wh->type = DEV_ALL; - wh->access = ACC_MKNOD | ACC_READ | ACC_WRITE; + wh->access = ACC_MASK; list_add(&wh->list, &dev_cgroup->whitelist); } else { parent_dev_cgroup = cgroup_to_devcgroup(parent_cgroup); @@ -250,11 +264,10 @@ static char type_to_char(short type) static void set_majmin(char *str, unsigned m) { - memset(str, 0, MAJMINLEN); if (m == ~0) - sprintf(str, "*"); + strcpy(str, "*"); else - snprintf(str, MAJMINLEN, "%u", m); + sprintf(str, "%u", m); } static int devcgroup_seq_read(struct cgroup *cgroup, struct cftype *cft, @@ -264,15 +277,15 @@ static int devcgroup_seq_read(struct cgroup *cgroup, struct cftype *cft, struct dev_whitelist_item *wh; char maj[MAJMINLEN], min[MAJMINLEN], acc[ACCLEN]; - spin_lock(&devcgroup->lock); - list_for_each_entry(wh, &devcgroup->whitelist, list) { + rcu_read_lock(); + list_for_each_entry_rcu(wh, &devcgroup->whitelist, list) { set_access(acc, wh->access); set_majmin(maj, wh->major); set_majmin(min, wh->minor); seq_printf(m, "%c %s:%s %s\n", type_to_char(wh->type), maj, min, acc); } - spin_unlock(&devcgroup->lock); + rcu_read_unlock(); return 0; } @@ -312,10 +325,10 @@ static int may_access_whitelist(struct dev_cgroup *c, * when adding a new allow rule to a device whitelist, the rule * must be allowed in the parent device */ -static int parent_has_perm(struct cgroup *childcg, +static int parent_has_perm(struct dev_cgroup *childcg, struct dev_whitelist_item *wh) { - struct cgroup *pcg = childcg->parent; + struct cgroup *pcg = childcg->css.cgroup->parent; struct dev_cgroup *parent; int ret; @@ -341,39 +354,19 @@ static int parent_has_perm(struct cgroup *childcg, * new access is only allowed if you're in the top-level cgroup, or your * parent cgroup has the access you're asking for. */ -static ssize_t devcgroup_access_write(struct cgroup *cgroup, struct cftype *cft, - struct file *file, const char __user *userbuf, - size_t nbytes, loff_t *ppos) +static int devcgroup_update_access(struct dev_cgroup *devcgroup, + int filetype, const char *buffer) { - struct cgroup *cur_cgroup; - struct dev_cgroup *devcgroup, *cur_devcgroup; - int filetype = cft->private; - char *buffer, *b; + struct dev_cgroup *cur_devcgroup; + const char *b; + char *endp; int retval = 0, count; struct dev_whitelist_item wh; if (!capable(CAP_SYS_ADMIN)) return -EPERM; - devcgroup = cgroup_to_devcgroup(cgroup); - cur_cgroup = task_cgroup(current, devices_subsys.subsys_id); - cur_devcgroup = cgroup_to_devcgroup(cur_cgroup); - - buffer = kmalloc(nbytes+1, GFP_KERNEL); - if (!buffer) - return -ENOMEM; - - if (copy_from_user(buffer, userbuf, nbytes)) { - retval = -EFAULT; - goto out1; - } - buffer[nbytes] = 0; /* nul-terminate */ - - cgroup_lock(); - if (cgroup_is_removed(cgroup)) { - retval = -ENODEV; - goto out2; - } + cur_devcgroup = task_devcgroup(current); memset(&wh, 0, sizeof(wh)); b = buffer; @@ -392,32 +385,23 @@ static ssize_t devcgroup_access_write(struct cgroup *cgroup, struct cftype *cft, wh.type = DEV_CHAR; break; default: - retval = -EINVAL; - goto out2; + return -EINVAL; } b++; - if (!isspace(*b)) { - retval = -EINVAL; - goto out2; - } + if (!isspace(*b)) + return -EINVAL; b++; if (*b == '*') { wh.major = ~0; b++; } else if (isdigit(*b)) { - wh.major = 0; - while (isdigit(*b)) { - wh.major = wh.major*10+(*b-'0'); - b++; - } + wh.major = simple_strtoul(b, &endp, 10); + b = endp; } else { - retval = -EINVAL; - goto out2; - } - if (*b != ':') { - retval = -EINVAL; - goto out2; + return -EINVAL; } + if (*b != ':') + return -EINVAL; b++; /* read minor */ @@ -425,19 +409,13 @@ static ssize_t devcgroup_access_write(struct cgroup *cgroup, struct cftype *cft, wh.minor = ~0; b++; } else if (isdigit(*b)) { - wh.minor = 0; - while (isdigit(*b)) { - wh.minor = wh.minor*10+(*b-'0'); - b++; - } + wh.minor = simple_strtoul(b, &endp, 10); + b = endp; } else { - retval = -EINVAL; - goto out2; - } - if (!isspace(*b)) { - retval = -EINVAL; - goto out2; + return -EINVAL; } + if (!isspace(*b)) + return -EINVAL; for (b++, count = 0; count < 3; count++, b++) { switch (*b) { case 'r': @@ -454,8 +432,7 @@ static ssize_t devcgroup_access_write(struct cgroup *cgroup, struct cftype *cft, count = 3; break; default: - retval = -EINVAL; - goto out2; + return -EINVAL; } } @@ -463,38 +440,39 @@ handle: retval = 0; switch (filetype) { case DEVCG_ALLOW: - if (!parent_has_perm(cgroup, &wh)) - retval = -EPERM; - else - retval = dev_whitelist_add(devcgroup, &wh); - break; + if (!parent_has_perm(devcgroup, &wh)) + return -EPERM; + return dev_whitelist_add(devcgroup, &wh); case DEVCG_DENY: dev_whitelist_rm(devcgroup, &wh); break; default: - retval = -EINVAL; - goto out2; + return -EINVAL; } + return 0; +} - if (retval == 0) - retval = nbytes; - -out2: +static int devcgroup_access_write(struct cgroup *cgrp, struct cftype *cft, + const char *buffer) +{ + int retval; + if (!cgroup_lock_live_group(cgrp)) + return -ENODEV; + retval = devcgroup_update_access(cgroup_to_devcgroup(cgrp), + cft->private, buffer); cgroup_unlock(); -out1: - kfree(buffer); return retval; } static struct cftype dev_cgroup_files[] = { { .name = "allow", - .write = devcgroup_access_write, + .write_string = devcgroup_access_write, .private = DEVCG_ALLOW, }, { .name = "deny", - .write = devcgroup_access_write, + .write_string = devcgroup_access_write, .private = DEVCG_DENY, }, { @@ -535,8 +513,8 @@ int devcgroup_inode_permission(struct inode *inode, int mask) if (!dev_cgroup) return 0; - spin_lock(&dev_cgroup->lock); - list_for_each_entry(wh, &dev_cgroup->whitelist, list) { + rcu_read_lock(); + list_for_each_entry_rcu(wh, &dev_cgroup->whitelist, list) { if (wh->type & DEV_ALL) goto acc_check; if ((wh->type & DEV_BLOCK) && !S_ISBLK(inode->i_mode)) @@ -552,10 +530,10 @@ acc_check: continue; if ((mask & MAY_READ) && !(wh->access & ACC_READ)) continue; - spin_unlock(&dev_cgroup->lock); + rcu_read_unlock(); return 0; } - spin_unlock(&dev_cgroup->lock); + rcu_read_unlock(); return -EPERM; } @@ -570,7 +548,7 @@ int devcgroup_inode_mknod(int mode, dev_t dev) if (!dev_cgroup) return 0; - spin_lock(&dev_cgroup->lock); + rcu_read_lock(); list_for_each_entry(wh, &dev_cgroup->whitelist, list) { if (wh->type & DEV_ALL) goto acc_check; @@ -585,9 +563,9 @@ int devcgroup_inode_mknod(int mode, dev_t dev) acc_check: if (!(wh->access & ACC_MKNOD)) continue; - spin_unlock(&dev_cgroup->lock); + rcu_read_unlock(); return 0; } - spin_unlock(&dev_cgroup->lock); + rcu_read_unlock(); return -EPERM; } |