diff options
301 files changed, 16308 insertions, 8580 deletions
diff --git a/Documentation/DocBook/Makefile b/Documentation/DocBook/Makefile index b1eb661e6302..9632444f6c62 100644 --- a/Documentation/DocBook/Makefile +++ b/Documentation/DocBook/Makefile @@ -13,7 +13,8 @@ DOCBOOKS := z8530book.xml mcabook.xml device-drivers.xml \ gadget.xml libata.xml mtdnand.xml librs.xml rapidio.xml \ genericirq.xml s390-drivers.xml uio-howto.xml scsi.xml \ mac80211.xml debugobjects.xml sh.xml regulator.xml \ - alsa-driver-api.xml writing-an-alsa-driver.xml + alsa-driver-api.xml writing-an-alsa-driver.xml \ + tracepoint.xml ### # The build process is as follows (targets): diff --git a/Documentation/DocBook/tracepoint.tmpl b/Documentation/DocBook/tracepoint.tmpl new file mode 100644 index 000000000000..b0756d0fd579 --- /dev/null +++ b/Documentation/DocBook/tracepoint.tmpl @@ -0,0 +1,89 @@ +<?xml version="1.0" encoding="UTF-8"?> +<!DOCTYPE book PUBLIC "-//OASIS//DTD DocBook XML V4.1.2//EN" + "http://www.oasis-open.org/docbook/xml/4.1.2/docbookx.dtd" []> + +<book id="Tracepoints"> + <bookinfo> + <title>The Linux Kernel Tracepoint API</title> + + <authorgroup> + <author> + <firstname>Jason</firstname> + <surname>Baron</surname> + <affiliation> + <address> + <email>jbaron@redhat.com</email> + </address> + </affiliation> + </author> + </authorgroup> + + <legalnotice> + <para> + This documentation is free software; you can redistribute + it and/or modify it under the terms of the GNU General Public + License as published by the Free Software Foundation; either + version 2 of the License, or (at your option) any later + version. + </para> + + <para> + This program is distributed in the hope that it will be + useful, but WITHOUT ANY WARRANTY; without even the implied + warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + See the GNU General Public License for more details. + </para> + + <para> + You should have received a copy of the GNU General Public + License along with this program; if not, write to the Free + Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, + MA 02111-1307 USA + </para> + + <para> + For more details see the file COPYING in the source + distribution of Linux. + </para> + </legalnotice> + </bookinfo> + + <toc></toc> + <chapter id="intro"> + <title>Introduction</title> + <para> + Tracepoints are static probe points that are located in strategic points + throughout the kernel. 'Probes' register/unregister with tracepoints + via a callback mechanism. The 'probes' are strictly typed functions that + are passed a unique set of parameters defined by each tracepoint. + </para> + + <para> + From this simple callback mechanism, 'probes' can be used to profile, debug, + and understand kernel behavior. There are a number of tools that provide a + framework for using 'probes'. These tools include Systemtap, ftrace, and + LTTng. + </para> + + <para> + Tracepoints are defined in a number of header files via various macros. Thus, + the purpose of this document is to provide a clear accounting of the available + tracepoints. The intention is to understand not only what tracepoints are + available but also to understand where future tracepoints might be added. + </para> + + <para> + The API presented has functions of the form: + <function>trace_tracepointname(function parameters)</function>. These are the + tracepoints callbacks that are found throughout the code. Registering and + unregistering probes with these callback sites is covered in the + <filename>Documentation/trace/*</filename> directory. + </para> + </chapter> + + <chapter id="irq"> + <title>IRQ</title> +!Iinclude/trace/events/irq.h + </chapter> + +</book> diff --git a/Documentation/RCU/trace.txt b/Documentation/RCU/trace.txt index 068848240a8b..02cced183b2d 100644 --- a/Documentation/RCU/trace.txt +++ b/Documentation/RCU/trace.txt @@ -192,23 +192,24 @@ rcu/rcuhier (which displays the struct rcu_node hierarchy). The output of "cat rcu/rcudata" looks as follows: rcu: - 0 c=4011 g=4012 pq=1 pqc=4011 qp=0 rpfq=1 rp=3c2a dt=23301/73 dn=2 df=1882 of=0 ri=2126 ql=2 b=10 - 1 c=4011 g=4012 pq=1 pqc=4011 qp=0 rpfq=3 rp=39a6 dt=78073/1 dn=2 df=1402 of=0 ri=1875 ql=46 b=10 - 2 c=4010 g=4010 pq=1 pqc=4010 qp=0 rpfq=-5 rp=1d12 dt=16646/0 dn=2 df=3140 of=0 ri=2080 ql=0 b=10 - 3 c=4012 g=4013 pq=1 pqc=4012 qp=1 rpfq=3 rp=2b50 dt=21159/1 dn=2 df=2230 of=0 ri=1923 ql=72 b=10 - 4 c=4012 g=4013 pq=1 pqc=4012 qp=1 rpfq=3 rp=1644 dt=5783/1 dn=2 df=3348 of=0 ri=2805 ql=7 b=10 - 5 c=4012 g=4013 pq=0 pqc=4011 qp=1 rpfq=3 rp=1aac dt=5879/1 dn=2 df=3140 of=0 ri=2066 ql=10 b=10 - 6 c=4012 g=4013 pq=1 pqc=4012 qp=1 rpfq=3 rp=ed8 dt=5847/1 dn=2 df=3797 of=0 ri=1266 ql=10 b=10 - 7 c=4012 g=4013 pq=1 pqc=4012 qp=1 rpfq=3 rp=1fa2 dt=6199/1 dn=2 df=2795 of=0 ri=2162 ql=28 b=10 +rcu: + 0 c=17829 g=17829 pq=1 pqc=17829 qp=0 dt=10951/1 dn=0 df=1101 of=0 ri=36 ql=0 b=10 + 1 c=17829 g=17829 pq=1 pqc=17829 qp=0 dt=16117/1 dn=0 df=1015 of=0 ri=0 ql=0 b=10 + 2 c=17829 g=17829 pq=1 pqc=17829 qp=0 dt=1445/1 dn=0 df=1839 of=0 ri=0 ql=0 b=10 + 3 c=17829 g=17829 pq=1 pqc=17829 qp=0 dt=6681/1 dn=0 df=1545 of=0 ri=0 ql=0 b=10 + 4 c=17829 g=17829 pq=1 pqc=17829 qp=0 dt=1003/1 dn=0 df=1992 of=0 ri=0 ql=0 b=10 + 5 c=17829 g=17830 pq=1 pqc=17829 qp=1 dt=3887/1 dn=0 df=3331 of=0 ri=4 ql=2 b=10 + 6 c=17829 g=17829 pq=1 pqc=17829 qp=0 dt=859/1 dn=0 df=3224 of=0 ri=0 ql=0 b=10 + 7 c=17829 g=17830 pq=0 pqc=17829 qp=1 dt=3761/1 dn=0 df=1818 of=0 ri=0 ql=2 b=10 rcu_bh: - 0 c=-268 g=-268 pq=1 pqc=-268 qp=0 rpfq=-145 rp=21d6 dt=23301/73 dn=2 df=0 of=0 ri=0 ql=0 b=10 - 1 c=-268 g=-268 pq=1 pqc=-268 qp=1 rpfq=-170 rp=20ce dt=78073/1 dn=2 df=26 of=0 ri=5 ql=0 b=10 - 2 c=-268 g=-268 pq=1 pqc=-268 qp=1 rpfq=-83 rp=fbd dt=16646/0 dn=2 df=28 of=0 ri=4 ql=0 b=10 - 3 c=-268 g=-268 pq=1 pqc=-268 qp=0 rpfq=-105 rp=178c dt=21159/1 dn=2 df=28 of=0 ri=2 ql=0 b=10 - 4 c=-268 g=-268 pq=1 pqc=-268 qp=1 rpfq=-30 rp=b54 dt=5783/1 dn=2 df=32 of=0 ri=0 ql=0 b=10 - 5 c=-268 g=-268 pq=1 pqc=-268 qp=1 rpfq=-29 rp=df5 dt=5879/1 dn=2 df=30 of=0 ri=3 ql=0 b=10 - 6 c=-268 g=-268 pq=1 pqc=-268 qp=1 rpfq=-28 rp=788 dt=5847/1 dn=2 df=32 of=0 ri=0 ql=0 b=10 - 7 c=-268 g=-268 pq=1 pqc=-268 qp=1 rpfq=-53 rp=1098 dt=6199/1 dn=2 df=30 of=0 ri=3 ql=0 b=10 + 0 c=-275 g=-275 pq=1 pqc=-275 qp=0 dt=10951/1 dn=0 df=0 of=0 ri=0 ql=0 b=10 + 1 c=-275 g=-275 pq=1 pqc=-275 qp=0 dt=16117/1 dn=0 df=13 of=0 ri=0 ql=0 b=10 + 2 c=-275 g=-275 pq=1 pqc=-275 qp=0 dt=1445/1 dn=0 df=15 of=0 ri=0 ql=0 b=10 + 3 c=-275 g=-275 pq=1 pqc=-275 qp=0 dt=6681/1 dn=0 df=9 of=0 ri=0 ql=0 b=10 + 4 c=-275 g=-275 pq=1 pqc=-275 qp=0 dt=1003/1 dn=0 df=15 of=0 ri=0 ql=0 b=10 + 5 c=-275 g=-275 pq=1 pqc=-275 qp=0 dt=3887/1 dn=0 df=15 of=0 ri=0 ql=0 b=10 + 6 c=-275 g=-275 pq=1 pqc=-275 qp=0 dt=859/1 dn=0 df=15 of=0 ri=0 ql=0 b=10 + 7 c=-275 g=-275 pq=1 pqc=-275 qp=0 dt=3761/1 dn=0 df=15 of=0 ri=0 ql=0 b=10 The first section lists the rcu_data structures for rcu, the second for rcu_bh. Each section has one line per CPU, or eight for this 8-CPU system. @@ -253,12 +254,6 @@ o "pqc" indicates which grace period the last-observed quiescent o "qp" indicates that RCU still expects a quiescent state from this CPU. -o "rpfq" is the number of rcu_pending() calls on this CPU required - to induce this CPU to invoke force_quiescent_state(). - -o "rp" is low-order four hex digits of the count of how many times - rcu_pending() has been invoked on this CPU. - o "dt" is the current value of the dyntick counter that is incremented when entering or leaving dynticks idle state, either by the scheduler or by irq. The number after the "/" is the interrupt @@ -305,6 +300,9 @@ o "b" is the batch limit for this CPU. If more than this number of RCU callbacks is ready to invoke, then the remainder will be deferred. +There is also an rcu/rcudata.csv file with the same information in +comma-separated-variable spreadsheet format. + The output of "cat rcu/rcugp" looks as follows: @@ -411,3 +409,63 @@ o Each element of the form "1/1 0:127 ^0" represents one struct For example, the first entry at the lowest level shows "^0", indicating that it corresponds to bit zero in the first entry at the middle level. + + +The output of "cat rcu/rcu_pending" looks as follows: + +rcu: + 0 np=255892 qsp=53936 cbr=0 cng=14417 gpc=10033 gps=24320 nf=6445 nn=146741 + 1 np=261224 qsp=54638 cbr=0 cng=25723 gpc=16310 gps=2849 nf=5912 nn=155792 + 2 np=237496 qsp=49664 cbr=0 cng=2762 gpc=45478 gps=1762 nf=1201 nn=136629 + 3 np=236249 qsp=48766 cbr=0 cng=286 gpc=48049 gps=1218 nf=207 nn=137723 + 4 np=221310 qsp=46850 cbr=0 cng=26 gpc=43161 gps=4634 nf=3529 nn=123110 + 5 np=237332 qsp=48449 cbr=0 cng=54 gpc=47920 gps=3252 nf=201 nn=137456 + 6 np=219995 qsp=46718 cbr=0 cng=50 gpc=42098 gps=6093 nf=4202 nn=120834 + 7 np=249893 qsp=49390 cbr=0 cng=72 gpc=38400 gps=17102 nf=41 nn=144888 +rcu_bh: + 0 np=146741 qsp=1419 cbr=0 cng=6 gpc=0 gps=0 nf=2 nn=145314 + 1 np=155792 qsp=12597 cbr=0 cng=0 gpc=4 gps=8 nf=3 nn=143180 + 2 np=136629 qsp=18680 cbr=0 cng=0 gpc=7 gps=6 nf=0 nn=117936 + 3 np=137723 qsp=2843 cbr=0 cng=0 gpc=10 gps=7 nf=0 nn=134863 + 4 np=123110 qsp=12433 cbr=0 cng=0 gpc=4 gps=2 nf=0 nn=110671 + 5 np=137456 qsp=4210 cbr=0 cng=0 gpc=6 gps=5 nf=0 nn=133235 + 6 np=120834 qsp=9902 cbr=0 cng=0 gpc=6 gps=3 nf=2 nn=110921 + 7 np=144888 qsp=26336 cbr=0 cng=0 gpc=8 gps=2 nf=0 nn=118542 + +As always, this is once again split into "rcu" and "rcu_bh" portions. +The fields are as follows: + +o "np" is the number of times that __rcu_pending() has been invoked + for the corresponding flavor of RCU. + +o "qsp" is the number of times that the RCU was waiting for a + quiescent state from this CPU. + +o "cbr" is the number of times that this CPU had RCU callbacks + that had passed through a grace period, and were thus ready + to be invoked. + +o "cng" is the number of times that this CPU needed another + grace period while RCU was idle. + +o "gpc" is the number of times that an old grace period had + completed, but this CPU was not yet aware of it. + +o "gps" is the number of times that a new grace period had started, + but this CPU was not yet aware of it. + +o "nf" is the number of times that this CPU suspected that the + current grace period had run for too long, and thus needed to + be forced. + + Please note that "forcing" consists of sending resched IPIs + to holdout CPUs. If that CPU really still is in an old RCU + read-side critical section, then we really do have to wait for it. + The assumption behing "forcing" is that the CPU is not still in + an old RCU read-side critical section, but has not yet responded + for some other reason. + +o "nn" is the number of times that this CPU needed nothing. Alert + readers will note that the rcu "nn" number for a given CPU very + closely matches the rcu_bh "np" number for that same CPU. This + is due to short-circuit evaluation in rcu_pending(). diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index a5253f6d01af..72d3bf08d79b 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt @@ -56,7 +56,6 @@ parameter is applicable: ISAPNP ISA PnP code is enabled. ISDN Appropriate ISDN support is enabled. JOY Appropriate joystick support is enabled. - KMEMTRACE kmemtrace is enabled. LIBATA Libata driver is enabled LP Printer support is enabled. LOOP Loopback device support is enabled. @@ -754,12 +753,25 @@ and is between 256 and 4096 characters. It is defined in the file ia64_pal_cache_flush instead of SAL_CACHE_FLUSH. ftrace=[tracer] - [ftrace] will set and start the specified tracer + [FTRACE] will set and start the specified tracer as early as possible in order to facilitate early boot debugging. ftrace_dump_on_oops - [ftrace] will dump the trace buffers on oops. + [FTRACE] will dump the trace buffers on oops. + + ftrace_filter=[function-list] + [FTRACE] Limit the functions traced by the function + tracer at boot up. function-list is a comma separated + list of functions. This list can be changed at run + time by the set_ftrace_filter file in the debugfs + tracing directory. + + ftrace_notrace=[function-list] + [FTRACE] Do not trace the functions specified in + function-list. This list can be changed at run time + by the set_ftrace_notrace file in the debugfs + tracing directory. gamecon.map[2|3]= [HW,JOY] Multisystem joystick and NES/SNES/PSX pad @@ -1062,15 +1074,6 @@ and is between 256 and 4096 characters. It is defined in the file use the HighMem zone if it exists, and the Normal zone if it does not. - kmemtrace.enable= [KNL,KMEMTRACE] Format: { yes | no } - Controls whether kmemtrace is enabled - at boot-time. - - kmemtrace.subbufs=n [KNL,KMEMTRACE] Overrides the number of - subbufs kmemtrace's relay channel has. Set this - higher than default (KMEMTRACE_N_SUBBUFS in code) if - you experience buffer overruns. - kgdboc= [HW] kgdb over consoles. Requires a tty driver that supports console polling. (only serial suported for now) @@ -1671,6 +1674,14 @@ and is between 256 and 4096 characters. It is defined in the file oprofile.timer= [HW] Use timer interrupt instead of performance counters + oprofile.cpu_type= Force an oprofile cpu type + This might be useful if you have an older oprofile + userland or if you want common events. + Format: { archperfmon } + archperfmon: [X86] Force use of architectural + perfmon on Intel CPUs instead of the + CPU specific event set. + osst= [HW,SCSI] SCSI Tape Driver Format: <buffer_size>,<write_threshold> See also Documentation/scsi/st.txt. diff --git a/Documentation/trace/events.txt b/Documentation/trace/events.txt new file mode 100644 index 000000000000..f157d7594ea7 --- /dev/null +++ b/Documentation/trace/events.txt @@ -0,0 +1,90 @@ + Event Tracing + + Documentation written by Theodore Ts'o + Updated by Li Zefan + +1. Introduction +=============== + +Tracepoints (see Documentation/trace/tracepoints.txt) can be used +without creating custom kernel modules to register probe functions +using the event tracing infrastructure. + +Not all tracepoints can be traced using the event tracing system; +the kernel developer must provide code snippets which define how the +tracing information is saved into the tracing buffer, and how the +tracing information should be printed. + +2. Using Event Tracing +====================== + +2.1 Via the 'set_event' interface +--------------------------------- + +The events which are available for tracing can be found in the file +/debug/tracing/available_events. + +To enable a particular event, such as 'sched_wakeup', simply echo it +to /debug/tracing/set_event. For example: + + # echo sched_wakeup >> /debug/tracing/set_event + +[ Note: '>>' is necessary, otherwise it will firstly disable + all the events. ] + +To disable an event, echo the event name to the set_event file prefixed +with an exclamation point: + + # echo '!sched_wakeup' >> /debug/tracing/set_event + +To disable all events, echo an empty line to the set_event file: + + # echo > /debug/tracing/set_event + +To enable all events, echo '*:*' or '*:' to the set_event file: + + # echo *:* > /debug/tracing/set_event + +The events are organized into subsystems, such as ext4, irq, sched, +etc., and a full event name looks like this: <subsystem>:<event>. The +subsystem name is optional, but it is displayed in the available_events +file. All of the events in a subsystem can be specified via the syntax +"<subsystem>:*"; for example, to enable all irq events, you can use the +command: + + # echo 'irq:*' > /debug/tracing/set_event + +2.2 Via the 'enable' toggle +--------------------------- + +The events available are also listed in /debug/tracing/events/ hierarchy +of directories. + +To enable event 'sched_wakeup': + + # echo 1 > /debug/tracing/events/sched/sched_wakeup/enable + +To disable it: + + # echo 0 > /debug/tracing/events/sched/sched_wakeup/enable + +To enable all events in sched subsystem: + + # echo 1 > /debug/tracing/events/sched/enable + +To eanble all events: + + # echo 1 > /debug/tracing/events/enable + +When reading one of these enable files, there are four results: + + 0 - all events this file affects are disabled + 1 - all events this file affects are enabled + X - there is a mixture of events enabled and disabled + ? - this file does not affect any event + +3. Defining an event-enabled tracepoint +======================================= + +See The example provided in samples/trace_events + diff --git a/Documentation/trace/ftrace.txt b/Documentation/trace/ftrace.txt index e362f50c496f..2a82d8602944 100644 --- a/Documentation/trace/ftrace.txt +++ b/Documentation/trace/ftrace.txt @@ -179,7 +179,7 @@ Here is the list of current tracers that may be configured. Function call tracer to trace all kernel functions. - "function_graph_tracer" + "function_graph" Similar to the function tracer except that the function tracer probes the functions on their entry diff --git a/Documentation/trace/power.txt b/Documentation/trace/power.txt new file mode 100644 index 000000000000..cd805e16dc27 --- /dev/null +++ b/Documentation/trace/power.txt @@ -0,0 +1,17 @@ +The power tracer collects detailed information about C-state and P-state +transitions, instead of just looking at the high-level "average" +information. + +There is a helper script found in scrips/tracing/power.pl in the kernel +sources which can be used to parse this information and create a +Scalable Vector Graphics (SVG) picture from the trace data. + +To use this tracer: + + echo 0 > /sys/kernel/debug/tracing/tracing_enabled + echo power > /sys/kernel/debug/tracing/current_tracer + echo 1 > /sys/kernel/debug/tracing/tracing_enabled + sleep 1 + echo 0 > /sys/kernel/debug/tracing/tracing_enabled + cat /sys/kernel/debug/tracing/trace | \ + perl scripts/tracing/power.pl > out.sv diff --git a/MAINTAINERS b/MAINTAINERS index cf4abddfc8a4..84285b5ba359 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -71,7 +71,7 @@ P: Person M: Mail patches to L: Mailing list that is relevant to this area W: Web-page with status/info -T: SCM tree type and location. Type is one of: git, hg, quilt. +T: SCM tree type and location. Type is one of: git, hg, quilt, stgit. S: Status, one of the following: Supported: Someone is actually paid to look after this. @@ -159,7 +159,8 @@ F: drivers/net/r8169.c 8250/16?50 (AND CLONE UARTS) SERIAL DRIVER L: linux-serial@vger.kernel.org W: http://serial.sourceforge.net -S: Orphan +M: alan@lxorguk.ukuu.org.uk +S: Odd Fixes F: drivers/serial/8250* F: include/linux/serial_8250.h @@ -5629,6 +5630,7 @@ P: Alan Cox M: alan@lxorguk.ukuu.org.uk L: linux-kernel@vger.kernel.org S: Maintained +T: stgit http://zeniv.linux.org.uk/~alan/ttydev/ TULIP NETWORK DRIVERS P: Grant Grundler diff --git a/arch/arm/plat-mxc/include/mach/imx-uart.h b/arch/arm/plat-mxc/include/mach/imx-uart.h index 599217b2e13f..f9bd17dd8dd7 100644 --- a/arch/arm/plat-mxc/include/mach/imx-uart.h +++ b/arch/arm/plat-mxc/include/mach/imx-uart.h @@ -20,11 +20,16 @@ #define ASMARM_ARCH_UART_H #define IMXUART_HAVE_RTSCTS (1<<0) +#define IMXUART_IRDA (1<<1) struct imxuart_platform_data { int (*init)(struct platform_device *pdev); int (*exit)(struct platform_device *pdev); unsigned int flags; + void (*irda_enable)(int enable); + unsigned int irda_inv_rx:1; + unsigned int irda_inv_tx:1; + unsigned short transceiver_delay; }; #endif diff --git a/arch/frv/Kconfig b/arch/frv/Kconfig index 9d1552a9ee2c..8a5bd7a9c6f5 100644 --- a/arch/frv/Kconfig +++ b/arch/frv/Kconfig @@ -6,6 +6,7 @@ config FRV bool default y select HAVE_IDE + select HAVE_ARCH_TRACEHOOK config ZONE_DMA bool diff --git a/arch/frv/include/asm/bitops.h b/arch/frv/include/asm/bitops.h index 287f6f697ce2..50ae91b29674 100644 --- a/arch/frv/include/asm/bitops.h +++ b/arch/frv/include/asm/bitops.h @@ -112,7 +112,7 @@ extern unsigned long atomic_test_and_XOR_mask(unsigned long mask, volatile unsig #define atomic_clear_mask(mask, v) atomic_test_and_ANDNOT_mask((mask), (v)) #define atomic_set_mask(mask, v) atomic_test_and_OR_mask((mask), (v)) -static inline int test_and_clear_bit(int nr, volatile void *addr) +static inline int test_and_clear_bit(unsigned long nr, volatile void *addr) { volatile unsigned long *ptr = addr; unsigned long mask = 1UL << (nr & 31); @@ -120,7 +120,7 @@ static inline int test_and_clear_bit(int nr, volatile void *addr) return (atomic_test_and_ANDNOT_mask(mask, ptr) & mask) != 0; } -static inline int test_and_set_bit(int nr, volatile void *addr) +static inline int test_and_set_bit(unsigned long nr, volatile void *addr) { volatile unsigned long *ptr = addr; unsigned long mask = 1UL << (nr & 31); @@ -128,7 +128,7 @@ static inline int test_and_set_bit(int nr, volatile void *addr) return (atomic_test_and_OR_mask(mask, ptr) & mask) != 0; } -static inline int test_and_change_bit(int nr, volatile void *addr) +static inline int test_and_change_bit(unsigned long nr, volatile void *addr) { volatile unsigned long *ptr = addr; unsigned long mask = 1UL << (nr & 31); @@ -136,22 +136,22 @@ static inline int test_and_change_bit(int nr, volatile void *addr) return (atomic_test_and_XOR_mask(mask, ptr) & mask) != 0; } -static inline void clear_bit(int nr, volatile void *addr) +static inline void clear_bit(unsigned long nr, volatile void *addr) { test_and_clear_bit(nr, addr); } -static inline void set_bit(int nr, volatile void *addr) +static inline void set_bit(unsigned long nr, volatile void *addr) { test_and_set_bit(nr, addr); } -static inline void change_bit(int nr, volatile void * addr) +static inline void change_bit(unsigned long nr, volatile void *addr) { test_and_change_bit(nr, addr); } -static inline void __clear_bit(int nr, volatile void * addr) +static inline void __clear_bit(unsigned long nr, volatile void *addr) { volatile unsigned long *a = addr; int mask; @@ -161,7 +161,7 @@ static inline void __clear_bit(int nr, volatile void * addr) *a &= ~mask; } -static inline void __set_bit(int nr, volatile void * addr) +static inline void __set_bit(unsigned long nr, volatile void *addr) { volatile unsigned long *a = addr; int mask; @@ -171,7 +171,7 @@ static inline void __set_bit(int nr, volatile void * addr) *a |= mask; } -static inline void __change_bit(int nr, volatile void *addr) +static inline void __change_bit(unsigned long nr, volatile void *addr) { volatile unsigned long *a = addr; int mask; @@ -181,7 +181,7 @@ static inline void __change_bit(int nr, volatile void *addr) *a ^= mask; } -static inline int __test_and_clear_bit(int nr, volatile void * addr) +static inline int __test_and_clear_bit(unsigned long nr, volatile void *addr) { volatile unsigned long *a = addr; int mask, retval; @@ -193,7 +193,7 @@ static inline int __test_and_clear_bit(int nr, volatile void * addr) return retval; } -static inline int __test_and_set_bit(int nr, volatile void * addr) +static inline int __test_and_set_bit(unsigned long nr, volatile void *addr) { volatile unsigned long *a = addr; int mask, retval; @@ -205,7 +205,7 @@ static inline int __test_and_set_bit(int nr, volatile void * addr) return retval; } -static inline int __test_and_change_bit(int nr, volatile void * addr) +static inline int __test_and_change_bit(unsigned long nr, volatile void *addr) { volatile unsigned long *a = addr; int mask, retval; @@ -220,12 +220,13 @@ static inline int __test_and_change_bit(int nr, volatile void * addr) /* * This routine doesn't need to be atomic. */ -static inline int __constant_test_bit(int nr, const volatile void * addr) +static inline int +__constant_test_bit(unsigned long nr, const volatile void *addr) { return ((1UL << (nr & 31)) & (((const volatile unsigned int *) addr)[nr >> 5])) != 0; } -static inline int __test_bit(int nr, const volatile void * addr) +static inline int __test_bit(unsigned long nr, const volatile void *addr) { int * a = (int *) addr; int mask; diff --git a/arch/frv/include/asm/elf.h b/arch/frv/include/asm/elf.h index 7279ec07d62e..7bbf6e47f8c8 100644 --- a/arch/frv/include/asm/elf.h +++ b/arch/frv/include/asm/elf.h @@ -116,6 +116,7 @@ do { \ } while(0) #define USE_ELF_CORE_DUMP +#define CORE_DUMP_USE_REGSET #define ELF_FDPIC_CORE_EFLAGS EF_FRV_FDPIC #define ELF_EXEC_PAGESIZE 16384 diff --git a/arch/frv/include/asm/pci.h b/arch/frv/include/asm/pci.h index 585d9b49949a..cc685e60b0f9 100644 --- a/arch/frv/include/asm/pci.h +++ b/arch/frv/include/asm/pci.h @@ -87,8 +87,7 @@ static inline void pci_dma_sync_single(struct pci_dev *hwdev, dma_addr_t dma_handle, size_t size, int direction) { - if (direction == PCI_DMA_NONE) - BUG(); + BUG_ON(direction == PCI_DMA_NONE); frv_cache_wback_inv((unsigned long)bus_to_virt(dma_handle), (unsigned long)bus_to_virt(dma_handle) + size); @@ -105,9 +104,7 @@ static inline void pci_dma_sync_sg(struct pci_dev *hwdev, int nelems, int direction) { int i; - - if (direction == PCI_DMA_NONE) - BUG(); + BUG_ON(direction == PCI_DMA_NONE); for (i = 0; i < nelems; i++) frv_cache_wback_inv(sg_dma_address(&sg[i]), diff --git a/arch/frv/include/asm/ptrace.h b/arch/frv/include/asm/ptrace.h index cf6934012b64..a54b535c9e49 100644 --- a/arch/frv/include/asm/ptrace.h +++ b/arch/frv/include/asm/ptrace.h @@ -65,6 +65,8 @@ #ifdef __KERNEL__ #ifndef __ASSEMBLY__ +struct task_struct; + /* * we dedicate GR28 to keeping a pointer to the current exception frame * - gr28 is destroyed on entry to the kernel from userspace @@ -73,11 +75,18 @@ register struct pt_regs *__frame asm("gr28"); #define user_mode(regs) (!((regs)->psr & PSR_S)) #define instruction_pointer(regs) ((regs)->pc) +#define user_stack_pointer(regs) ((regs)->sp) extern unsigned long user_stack(const struct pt_regs *); extern void show_regs(struct pt_regs *); #define profile_pc(regs) ((regs)->pc) -#endif + +#define task_pt_regs(task) ((task)->thread.frame0) + +#define arch_has_single_step() (1) +extern void user_enable_single_step(struct task_struct *); +extern void user_disable_single_step(struct task_struct *); #endif /* !__ASSEMBLY__ */ +#endif /* __KERNEL__ */ #endif /* _ASM_PTRACE_H */ diff --git a/arch/frv/include/asm/syscall.h b/arch/frv/include/asm/syscall.h new file mode 100644 index 000000000000..70689eb29b98 --- /dev/null +++ b/arch/frv/include/asm/syscall.h @@ -0,0 +1,123 @@ +/* syscall parameter access functions + * + * Copyright (C) 2009 Red Hat, Inc. All Rights Reserved. + * Written by David Howells (dhowells@redhat.com) + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public Licence + * as published by the Free Software Foundation; either version + * 2 of the Licence, or (at your option) any later version. + */ + +#ifndef _ASM_SYSCALL_H +#define _ASM_SYSCALL_H + +#include <linux/err.h> +#include <asm/ptrace.h> + +/* + * Get the system call number or -1 + */ +static inline long syscall_get_nr(struct task_struct *task, + struct pt_regs *regs) +{ + return regs->syscallno; +} + +/* + * Restore the clobbered GR8 register + * (1st syscall arg was overwritten with syscall return or error) + */ +static inline void syscall_rollback(struct task_struct *task, + struct pt_regs *regs) +{ + regs->gr8 = regs->orig_gr8; +} + +/* + * See if the syscall return value is an error, returning it if it is and 0 if + * not + */ +static inline long syscall_get_error(struct task_struct *task, + struct pt_regs *regs) +{ + return IS_ERR_VALUE(regs->gr8) ? regs->gr8 : 0; +} + +/* + * Get the syscall return value + */ +static inline long syscall_get_return_value(struct task_struct *task, + struct pt_regs *regs) +{ + return regs->gr8; +} + +/* + * Set the syscall return value + */ +static inline void syscall_set_return_value(struct task_struct *task, + struct pt_regs *regs, + int error, long val) +{ + if (error) + regs->gr8 = -error; + else + regs->gr8 = val; +} + +/* + * Retrieve the system call arguments + */ +static inline void syscall_get_arguments(struct task_struct *task, + struct pt_regs *regs, + unsigned int i, unsigned int n, + unsigned long *args) +{ + /* + * Do this simply for now. If we need to start supporting + * fetching arguments from arbitrary indices, this will need some + * extra logic. Presently there are no in-tree users that depend + * on this behaviour. + */ + BUG_ON(i); + + /* Argument pattern is: GR8, GR9, GR10, GR11, GR12, GR13 */ + switch (n) { + case 6: args[5] = regs->gr13; + case 5: args[4] = regs->gr12; + case 4: args[3] = regs->gr11; + case 3: args[2] = regs->gr10; + case 2: args[1] = regs->gr9; + case 1: args[0] = regs->gr8; + break; + default: + BUG(); + } +} + +/* + * Alter the system call arguments + */ +static inline void syscall_set_arguments(struct task_struct *task, + struct pt_regs *regs, + unsigned int i, unsigned int n, + const unsigned long *args) +{ + /* Same note as above applies */ + BUG_ON(i); + + switch (n) { + case 6: regs->gr13 = args[5]; + case 5: regs->gr12 = args[4]; + case 4: regs->gr11 = args[3]; + case 3: regs->gr10 = args[2]; + case 2: regs->gr9 = args[1]; + case 1: regs->gr8 = args[0]; + break; + default: + BUG(); + } +} + +#endif /* _ASM_SYSCALL_H */ diff --git a/arch/frv/include/asm/thread_info.h b/arch/frv/include/asm/thread_info.h index bb53ab753ffb..e8a5ed7be021 100644 --- a/arch/frv/include/asm/thread_info.h +++ b/arch/frv/include/asm/thread_info.h @@ -109,20 +109,20 @@ register struct thread_info *__current_thread_info asm("gr15"); * - other flags in MSW */ #define TIF_SYSCALL_TRACE 0 /* syscall trace active */ -#define TIF_SIGPENDING 1 /* signal pending */ -#define TIF_NEED_RESCHED 2 /* rescheduling necessary */ -#define TIF_SINGLESTEP 3 /* restore singlestep on return to user mode */ -#define TIF_IRET 4 /* return with iret */ +#define TIF_NOTIFY_RESUME 1 /* callback before returning to user */ +#define TIF_SIGPENDING 2 /* signal pending */ +#define TIF_NEED_RESCHED 3 /* rescheduling necessary */ +#define TIF_SINGLESTEP 4 /* restore singlestep on return to user mode */ #define TIF_RESTORE_SIGMASK 5 /* restore signal mask in do_signal() */ #define TIF_POLLING_NRFLAG 16 /* true if poll_idle() is polling TIF_NEED_RESCHED */ #define TIF_MEMDIE 17 /* OOM killer killed process */ #define TIF_FREEZE 18 /* freezing for suspend */ #define _TIF_SYSCALL_TRACE (1 << TIF_SYSCALL_TRACE) +#define _TIF_NOTIFY_RESUME (1 << TIF_NOTIFY_RESUME) #define _TIF_SIGPENDING (1 << TIF_SIGPENDING) #define _TIF_NEED_RESCHED (1 << TIF_NEED_RESCHED) #define _TIF_SINGLESTEP (1 << TIF_SINGLESTEP) -#define _TIF_IRET (1 << TIF_IRET) #define _TIF_RESTORE_SIGMASK (1 << TIF_RESTORE_SIGMASK) #define _TIF_POLLING_NRFLAG (1 << TIF_POLLING_NRFLAG) #define _TIF_FREEZE (1 << TIF_FREEZE) diff --git a/arch/frv/kernel/entry.S b/arch/frv/kernel/entry.S index 1da523b3298e..356e0e327a89 100644 --- a/arch/frv/kernel/entry.S +++ b/arch/frv/kernel/entry.S @@ -886,7 +886,6 @@ system_call: bnc icc0,#0,__syscall_badsys ldi @(gr15,#TI_FLAGS),gr4 - ori gr4,#_TIF_SYSCALL_TRACE,gr4 andicc gr4,#_TIF_SYSCALL_TRACE,gr0,icc0 bne icc0,#0,__syscall_trace_entry @@ -1150,11 +1149,10 @@ __entry_work_notifysig: # perform syscall entry tracing __syscall_trace_entry: LEDS 0x6320 - setlos.p #0,gr8 - call do_syscall_trace + call syscall_trace_entry - ldi @(gr28,#REG_SYSCALLNO),gr7 - lddi @(gr28,#REG_GR(8)) ,gr8 + lddi.p @(gr28,#REG_GR(8)) ,gr8 + ori gr8,#0,gr7 ; syscall_trace_entry() returned new syscallno lddi @(gr28,#REG_GR(10)),gr10 lddi.p @(gr28,#REG_GR(12)),gr12 @@ -1169,11 +1167,10 @@ __syscall_exit_work: beq icc0,#1,__entry_work_pending movsg psr,gr23 - andi gr23,#~PSR_PIL,gr23 ; could let do_syscall_trace() call schedule() + andi gr23,#~PSR_PIL,gr23 ; could let syscall_trace_exit() call schedule() movgs gr23,psr - setlos.p #1,gr8 - call do_syscall_trace + call syscall_trace_exit bra __entry_resume_userspace __syscall_badsys: diff --git a/arch/frv/kernel/ptrace.c b/arch/frv/kernel/ptrace.c index 5e7d401d21e7..60eeed3694c0 100644 --- a/arch/frv/kernel/ptrace.c +++ b/arch/frv/kernel/ptrace.c @@ -19,6 +19,9 @@ #include <linux/user.h> #include <linux/security.h> #include <linux/signal.h> +#include <linux/regset.h> +#include <linux/elf.h> +#include <linux/tracehook.h> #include <asm/uaccess.h> #include <asm/page.h> @@ -33,6 +36,169 @@ */ /* + * retrieve the contents of FRV userspace general registers + */ +static int genregs_get(struct task_struct *target, + const struct user_regset *regset, + unsigned int pos, unsigned int count, + void *kbuf, void __user *ubuf) +{ + const struct user_int_regs *iregs = &target->thread.user->i; + int ret; + + ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf, + iregs, 0, sizeof(*iregs)); + if (ret < 0) + return ret; + + return user_regset_copyout_zero(&pos, &count, &kbuf, &ubuf, + sizeof(*iregs), -1); +} + +/* + * update the contents of the FRV userspace general registers + */ +static int genregs_set(struct task_struct *target, + const struct user_regset *regset, + unsigned int pos, unsigned int count, + const void *kbuf, const void __user *ubuf) +{ + struct user_int_regs *iregs = &target->thread.user->i; + unsigned int offs_gr0, offs_gr1; + int ret; + + /* not allowed to set PSR or __status */ + if (pos < offsetof(struct user_int_regs, psr) + sizeof(long) && + pos + count > offsetof(struct user_int_regs, psr)) + return -EIO; + + if (pos < offsetof(struct user_int_regs, __status) + sizeof(long) && + pos + count > offsetof(struct user_int_regs, __status)) + return -EIO; + + /* set the control regs */ + offs_gr0 = offsetof(struct user_int_regs, gr[0]); + offs_gr1 = offsetof(struct user_int_regs, gr[1]); + ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, + iregs, 0, offs_gr0); + if (ret < 0) + return ret; + + /* skip GR0/TBR */ + ret = user_regset_copyin_ignore(&pos, &count, &kbuf, &ubuf, + offs_gr0, offs_gr1); + if (ret < 0) + return ret; + + /* set the general regs */ + ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, + &iregs->gr[1], offs_gr1, sizeof(*iregs)); + if (ret < 0) + return ret; + + return user_regset_copyin_ignore(&pos, &count, &kbuf, &ubuf, + sizeof(*iregs), -1); +} + +/* + * retrieve the contents of FRV userspace FP/Media registers + */ +static int fpmregs_get(struct task_struct *target, + const struct user_regset *regset, + unsigned int pos, unsigned int count, + void *kbuf, void __user *ubuf) +{ + const struct user_fpmedia_regs *fpregs = &target->thread.user->f; + int ret; + + ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf, + fpregs, 0, sizeof(*fpregs)); + if (ret < 0) + return ret; + + return user_regset_copyout_zero(&pos, &count, &kbuf, &ubuf, + sizeof(*fpregs), -1); +} + +/* + * update the contents of the FRV userspace FP/Media registers + */ +static int fpmregs_set(struct task_struct *target, + const struct user_regset *regset, + unsigned int pos, unsigned int count, + const void *kbuf, const void __user *ubuf) +{ + struct user_fpmedia_regs *fpregs = &target->thread.user->f; + int ret; + + ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, + fpregs, 0, sizeof(*fpregs)); + if (ret < 0) + return ret; + + return user_regset_copyin_ignore(&pos, &count, &kbuf, &ubuf, + sizeof(*fpregs), -1); +} + +/* + * determine if the FP/Media registers have actually been used + */ +static int fpmregs_active(struct task_struct *target, + const struct user_regset *regset) +{ + return tsk_used_math(target) ? regset->n : 0; +} + +/* + * Define the register sets available on the FRV under Linux + */ +enum frv_regset { + REGSET_GENERAL, + REGSET_FPMEDIA, +}; + +static const struct user_regset frv_regsets[] = { + /* + * General register format is: + * PSR, ISR, CCR, CCCR, LR, LCR, PC, (STATUS), SYSCALLNO, ORIG_G8 + * GNER0-1, IACC0, TBR, GR1-63 + */ + [REGSET_GENERAL] = { + .core_note_type = NT_PRSTATUS, + .n = ELF_NGREG, + .size = sizeof(long), + .align = sizeof(long), + .get = genregs_get, + .set = genregs_set, + }, + /* + * FPU/Media register format is: + * FR0-63, FNER0-1, MSR0-1, ACC0-7, ACCG0-8, FSR + */ + [REGSET_FPMEDIA] = { + .core_note_type = NT_PRFPREG, + .n = sizeof(struct user_fpmedia_regs) / sizeof(long), + .size = sizeof(long), + .align = sizeof(long), + .get = fpmregs_get, + .set = fpmregs_set, + .active = fpmregs_active, + }, +}; + +static const struct user_regset_view user_frv_native_view = { + .name = "frv", + .e_machine = EM_FRV, + .regsets = frv_regsets, + .n = ARRAY_SIZE(frv_regsets), +}; + +const struct user_regset_view *task_user_regset_view(struct task_struct *task) +{ + return &user_frv_native_view; +} + +/* * Get contents of register REGNO in task TASK. */ static inline long get_reg(struct task_struct *task, int regno) @@ -69,40 +235,23 @@ static inline int put_reg(struct task_struct *task, int regno, } /* - * check that an address falls within the bounds of the target process's memory - * mappings - */ -static inline int is_user_addr_valid(struct task_struct *child, - unsigned long start, unsigned long len) -{ -#ifdef CONFIG_MMU - if (start >= PAGE_OFFSET || len > PAGE_OFFSET - start) - return -EIO; - return 0; -#else - struct vm_area_struct *vma; - - vma = find_vma(child->mm, start); - if (vma && start >= vma->vm_start && start + len <= vma->vm_end) - return 0; - - return -EIO; -#endif -} - -/* * Called by kernel/ptrace.c when detaching.. * * Control h/w single stepping */ -void ptrace_disable(struct task_struct *child) +void user_enable_single_step(struct task_struct *child) +{ + child->thread.frame0->__status |= REG__STATUS_STEP; +} + +void user_disable_single_step(struct task_struct *child) { child->thread.frame0->__status &= ~REG__STATUS_STEP; } -void ptrace_enable(struct task_struct *child) +void ptrace_disable(struct task_struct *child) { - child->thread.frame0->__status |= REG__STATUS_STEP; + user_disable_single_step(child); } long arch_ptrace(struct task_struct *child, long request, long addr, long data) @@ -111,15 +260,6 @@ long arch_ptrace(struct task_struct *child, long request, long addr, long data) int ret; switch (request) { - /* when I and D space are separate, these will need to be fixed. */ - case PTRACE_PEEKTEXT: /* read word at location addr. */ - case PTRACE_PEEKDATA: - ret = -EIO; - if (is_user_addr_valid(child, addr, sizeof(tmp)) < 0) - break; - ret = generic_ptrace_peekdata(child, addr, data); - break; - /* read the word at location addr in the USER area. */ case PTRACE_PEEKUSR: { tmp = 0; @@ -163,15 +303,6 @@ long arch_ptrace(struct task_struct *child, long request, long addr, long data) break; } - /* when I and D space are separate, this will have to be fixed. */ - case PTRACE_POKETEXT: /* write the word at location addr. */ - case PTRACE_POKEDATA: - ret = -EIO; - if (is_user_addr_valid(child, addr, sizeof(tmp)) < 0) - break; - ret = generic_ptrace_pokedata(child, addr, data); - break; - case PTRACE_POKEUSR: /* write the word at location addr in the USER area */ ret = -EIO; if ((addr & 3) || addr < 0) @@ -179,7 +310,7 @@ long arch_ptrace(struct task_struct *child, long request, long addr, long data) ret = 0; switch (addr >> 2) { - case 0 ... PT__END-1: + case 0 ... PT__END - 1: ret = put_reg(child, addr >> 2, data); break; @@ -189,95 +320,29 @@ long arch_ptrace(struct task_struct *child, long request, long addr, long data) } break; - case PTRACE_SYSCALL: /* continue and stop at next (return from) syscall */ - case PTRACE_CONT: /* restart after signal. */ - ret = -EIO; - if (!valid_signal(data)) - break; - if (request == PTRACE_SYSCALL) - set_tsk_thread_flag(child, TIF_SYSCALL_TRACE); - else - clear_tsk_thread_flag(child, TIF_SYSCALL_TRACE); - child->exit_code = data; - ptrace_disable(child); - wake_up_process(child); - ret = 0; - break; - - /* make the child exit. Best I can do is send it a sigkill. - * perhaps it should be put in the status that it wants to - * exit. - */ - case PTRACE_KILL: - ret = 0; - if (child->exit_state == EXIT_ZOMBIE) /* already dead */ - break; - child->exit_code = SIGKILL; - clear_tsk_thread_flag(child, TIF_SINGLESTEP); - ptrace_disable(child); - wake_up_process(child); - break; - - case PTRACE_SINGLESTEP: /* set the trap flag. */ - ret = -EIO; - if (!valid_signal(data)) - break; - clear_tsk_thread_flag(child, TIF_SYSCALL_TRACE); - ptrace_enable(child); - child->exit_code = data; - wake_up_process(child); - ret = 0; - break; - - case PTRACE_DETACH: /* detach a process that was attached. */ - ret = ptrace_detach(child, data); - break; - - case PTRACE_GETREGS: { /* Get all integer regs from the child. */ - int i; - for (i = 0; i < PT__GPEND; i++) { - tmp = get_reg(child, i); - if (put_user(tmp, (unsigned long *) data)) { - ret = -EFAULT; - break; - } - data += sizeof(long); - } - ret = 0; - break; - } - - case PTRACE_SETREGS: { /* Set all integer regs in the child. */ - int i; - for (i = 0; i < PT__GPEND; i++) { - if (get_user(tmp, (unsigned long *) data)) { - ret = -EFAULT; - break; - } - put_reg(child, i, tmp); - data += sizeof(long); - } - ret = 0; - break; - } - - case PTRACE_GETFPREGS: { /* Get the child FP/Media state. */ - ret = 0; - if (copy_to_user((void *) data, - &child->thread.user->f, - sizeof(child->thread.user->f))) - ret = -EFAULT; - break; - } - - case PTRACE_SETFPREGS: { /* Set the child FP/Media state. */ - ret = 0; - if (copy_from_user(&child->thread.user->f, - (void *) data, - sizeof(child->thread.user->f))) - ret = -EFAULT; - break; - } + case PTRACE_GETREGS: /* Get all integer regs from the child. */ + return copy_regset_to_user(child, &user_frv_native_view, + REGSET_GENERAL, + 0, sizeof(child->thread.user->i), + (void __user *)data); + + case PTRACE_SETREGS: /* Set all integer regs in the child. */ + return copy_regset_from_user(child, &user_frv_native_view, + REGSET_GENERAL, + 0, sizeof(child->thread.user->i), + (const void __user *)data); + + case PTRACE_GETFPREGS: /* Get the child FP/Media state. */ + return copy_regset_to_user(child, &user_frv_native_view, + REGSET_FPMEDIA, + 0, sizeof(child->thread.user->f), + (void __user *)data); + + case PTRACE_SETFPREGS: /* Set the child FP/Media state. */ + return copy_regset_from_user(child, &user_frv_native_view, + REGSET_FPMEDIA, + 0, sizeof(child->thread.user->f), + (const void __user *)data); case PTRACE_GETFDPIC: tmp = 0; @@ -300,414 +365,36 @@ long arch_ptrace(struct task_struct *child, long request, long addr, long data) break; default: - ret = -EIO; + ret = ptrace_request(child, request, addr, data); break; } return ret; } -int __nongprelbss kstrace; - -static const struct { - const char *name; - unsigned argmask; -} __syscall_name_table[NR_syscalls] = { - [0] = { "restart_syscall" }, - [1] = { "exit", 0x000001 }, - [2] = { "fork", 0xffffff }, - [3] = { "read", 0x000141 }, - [4] = { "write", 0x000141 }, - [5] = { "open", 0x000235 }, - [6] = { "close", 0x000001 }, - [7] = { "waitpid", 0x000141 }, - [8] = { "creat", 0x000025 }, - [9] = { "link", 0x000055 }, - [10] = { "unlink", 0x000005 }, - [11] = { "execve", 0x000445 }, - [12] = { "chdir", 0x000005 }, - [13] = { "time", 0x000004 }, - [14] = { "mknod", 0x000325 }, - [15] = { "chmod", 0x000025 }, - [16] = { "lchown", 0x000025 }, - [17] = { "break" }, - [18] = { "oldstat", 0x000045 }, - [19] = { "lseek", 0x000131 }, - [20] = { "getpid", 0xffffff }, - [21] = { "mount", 0x043555 }, - [22] = { "umount", 0x000005 }, - [23] = { "setuid", 0x000001 }, - [24] = { "getuid", 0xffffff }, - [25] = { "stime", 0x000004 }, - [26] = { "ptrace", 0x004413 }, - [27] = { "alarm", 0x000001 }, - [28] = { "oldfstat", 0x000041 }, - [29] = { "pause", 0xffffff }, - [30] = { "utime", 0x000045 }, - [31] = { "stty" }, - [32] = { "gtty" }, - [33] = { "access", 0x000025 }, - [34] = { "nice", 0x000001 }, - [35] = { "ftime" }, - [36] = { "sync", 0xffffff }, - [37] = { "kill", 0x000011 }, - [38] = { "rename", 0x000055 }, - [39] = { "mkdir", 0x000025 }, - [40] = { "rmdir", 0x000005 }, - [41] = { "dup", 0x000001 }, - [42] = { "pipe", 0x000004 }, - [43] = { "times", 0x000004 }, - [44] = { "prof" }, - [45] = { "brk", 0x000004 }, - [46] = { "setgid", 0x000001 }, - [47] = { "getgid", 0xffffff }, - [48] = { "signal", 0x000041 }, - [49] = { "geteuid", 0xffffff }, - [50] = { "getegid", 0xffffff }, - [51] = { "acct", 0x000005 }, - [52] = { "umount2", 0x000035 }, - [53] = { "lock" }, - [54] = { "ioctl", 0x000331 }, - [55] = { "fcntl", 0x000331 }, - [56] = { "mpx" }, - [57] = { "setpgid", 0x000011 }, - [58] = { "ulimit" }, - [60] = { "umask", 0x000002 }, - [61] = { "chroot", 0x000005 }, - [62] = { "ustat", 0x000043 }, - [63] = { "dup2", 0x000011 }, - [64] = { "getppid", 0xffffff }, - [65] = { "getpgrp", 0xffffff }, - [66] = { "setsid", 0xffffff }, - [67] = { "sigaction" }, - [68] = { "sgetmask" }, - [69] = { "ssetmask" }, - [70] = { "setreuid" }, - [71] = { "setregid" }, - [72] = { "sigsuspend" }, - [73] = { "sigpending" }, - [74] = { "sethostname" }, - [75] = { "setrlimit" }, - [76] = { "getrlimit" }, - [77] = { "getrusage" }, - [78] = { "gettimeofday" }, - [79] = { "settimeofday" }, - [80] = { "getgroups" }, - [81] = { "setgroups" }, - [82] = { "select" }, - [83] = { "symlink" }, - [84] = { "oldlstat" }, - [85] = { "readlink" }, - [86] = { "uselib" }, - [87] = { "swapon" }, - [88] = { "reboot" }, - [89] = { "readdir" }, - [91] = { "munmap", 0x000034 }, - [92] = { "truncate" }, - [93] = { "ftruncate" }, - [94] = { "fchmod" }, - [95] = { "fchown" }, - [96] = { "getpriority" }, - [97] = { "setpriority" }, - [99] = { "statfs" }, - [100] = { "fstatfs" }, - [102] = { "socketcall" }, - [103] = { "syslog" }, - [104] = { "setitimer" }, - [105] = { "getitimer" }, - [106] = { "stat" }, - [107] = { "lstat" }, - [108] = { "fstat" }, - [111] = { "vhangup" }, - [114] = { "wait4" }, - [115] = { "swapoff" }, - [116] = { "sysinfo" }, - [117] = { "ipc" }, - [118] = { "fsync" }, - [119] = { "sigreturn" }, - [120] = { "clone" }, - [121] = { "setdomainname" }, - [122] = { "uname" }, - [123] = { "modify_ldt" }, - [123] = { "cacheflush" }, - [124] = { "adjtimex" }, - [125] = { "mprotect" }, - [126] = { "sigprocmask" }, - [127] = { "create_module" }, - [128] = { "init_module" }, - [129] = { "delete_module" }, - [130] = { "get_kernel_syms" }, - [131] = { "quotactl" }, - [132] = { "getpgid" }, - [133] = { "fchdir" }, - [134] = { "bdflush" }, - [135] = { "sysfs" }, - [136] = { "personality" }, - [137] = { "afs_syscall" }, - [138] = { "setfsuid" }, - [139] = { "setfsgid" }, - [140] = { "_llseek", 0x014331 }, - [141] = { "getdents" }, - [142] = { "_newselect", 0x000141 }, - [143] = { "flock" }, - [144] = { "msync" }, - [145] = { "readv" }, - [146] = { "writev" }, - [147] = { "getsid", 0x000001 }, - [148] = { "fdatasync", 0x000001 }, - [149] = { "_sysctl", 0x000004 }, - [150] = { "mlock" }, - [151] = { "munlock" }, - [152] = { "mlockall" }, - [153] = { "munlockall" }, - [154] = { "sched_setparam" }, - [155] = { "sched_getparam" }, - [156] = { "sched_setscheduler" }, - [157] = { "sched_getscheduler" }, - [158] = { "sched_yield" }, - [159] = { "sched_get_priority_max" }, - [160] = { "sched_get_priority_min" }, - [161] = { "sched_rr_get_interval" }, - [162] = { "nanosleep", 0x000044 }, - [163] = { "mremap" }, - [164] = { "setresuid" }, - [165] = { "getresuid" }, - [166] = { "vm86" }, - [167] = { "query_module" }, - [168] = { "poll" }, - [169] = { "nfsservctl" }, - [170] = { "setresgid" }, - [171] = { "getresgid" }, - [172] = { "prctl", 0x333331 }, - [173] = { "rt_sigreturn", 0xffffff }, - [174] = { "rt_sigaction", 0x001441 }, - [175] = { "rt_sigprocmask", 0x001441 }, - [176] = { "rt_sigpending", 0x000014 }, - [177] = { "rt_sigtimedwait", 0x001444 }, - [178] = { "rt_sigqueueinfo", 0x000411 }, - [179] = { "rt_sigsuspend", 0x000014 }, - [180] = { "pread", 0x003341 }, - [181] = { "pwrite", 0x003341 }, - [182] = { "chown", 0x000115 }, - [183] = { "getcwd" }, - [184] = { "capget" }, - [185] = { "capset" }, - [186] = { "sigaltstack" }, - [187] = { "sendfile" }, - [188] = { "getpmsg" }, - [189] = { "putpmsg" }, - [190] = { "vfork", 0xffffff }, - [191] = { "ugetrlimit" }, - [192] = { "mmap2", 0x313314 }, - [193] = { "truncate64" }, - [194] = { "ftruncate64" }, - [195] = { "stat64", 0x000045 }, - [196] = { "lstat64", 0x000045 }, - [197] = { "fstat64", 0x000041 }, - [198] = { "lchown32" }, - [199] = { "getuid32", 0xffffff }, - [200] = { "getgid32", 0xffffff }, - [201] = { "geteuid32", 0xffffff }, - [202] = { "getegid32", 0xffffff }, - [203] = { "setreuid32" }, - [204] = { "setregid32" }, - [205] = { "getgroups32" }, - [206] = { "setgroups32" }, - [207] = { "fchown32" }, - [208] = { "setresuid32" }, - [209] = { "getresuid32" }, - [210] = { "setresgid32" }, - [211] = { "getresgid32" }, - [212] = { "chown32" }, - [213] = { "setuid32" }, - [214] = { "setgid32" }, - [215] = { "setfsuid32" }, - [216] = { "setfsgid32" }, - [217] = { "pivot_root" }, - [218] = { "mincore" }, - [219] = { "madvise" }, - [220] = { "getdents64" }, - [221] = { "fcntl64" }, - [223] = { "security" }, - [224] = { "gettid" }, - [225] = { "readahead" }, - [226] = { "setxattr" }, - [227] = { "lsetxattr" }, - [228] = { "fsetxattr" }, - [229] = { "getxattr" }, - [230] = { "lgetxattr" }, - [231] = { "fgetxattr" }, - [232] = { "listxattr" }, - [233] = { "llistxattr" }, - [234] = { "flistxattr" }, - [235] = { "removexattr" }, - [236] = { "lremovexattr" }, - [237] = { "fremovexattr" }, - [238] = { "tkill" }, - [239] = { "sendfile64" }, - [240] = { "futex" }, - [241] = { "sched_setaffinity" }, - [242] = { "sched_getaffinity" }, - [243] = { "set_thread_area" }, - [244] = { "get_thread_area" }, - [245] = { "io_setup" }, - [246] = { "io_destroy" }, - [247] = { "io_getevents" }, - [248] = { "io_submit" }, - [249] = { "io_cancel" }, - [250] = { "fadvise64" }, - [252] = { "exit_group", 0x000001 }, - [253] = { "lookup_dcookie" }, - [254] = { "epoll_create" }, - [255] = { "epoll_ctl" }, - [256] = { "epoll_wait" }, - [257] = { "remap_file_pages" }, - [258] = { "set_tid_address" }, - [259] = { "timer_create" }, - [260] = { "timer_settime" }, - [261] = { "timer_gettime" }, - [262] = { "timer_getoverrun" }, - [263] = { "timer_delete" }, - [264] = { "clock_settime" }, - [265] = { "clock_gettime" }, - [266] = { "clock_getres" }, - [267] = { "clock_nanosleep" }, - [268] = { "statfs64" }, - [269] = { "fstatfs64" }, - [270] = { "tgkill" }, - [271] = { "utimes" }, - [272] = { "fadvise64_64" }, - [273] = { "vserver" }, - [274] = { "mbind" }, - [275] = { "get_mempolicy" }, - [276] = { "set_mempolicy" }, - [277] = { "mq_open" }, - [278] = { "mq_unlink" }, - [279] = { "mq_timedsend" }, - [280] = { "mq_timedreceive" }, - [281] = { "mq_notify" }, - [282] = { "mq_getsetattr" }, - [283] = { "sys_kexec_load" }, -}; - -asmlinkage void do_syscall_trace(int leaving) +/* + * handle tracing of system call entry + * - return the revised system call number or ULONG_MAX to cause ENOSYS + */ +asmlinkage unsigned long syscall_trace_entry(void) { -#if 0 - unsigned long *argp; - const char *name; - unsigned argmask; - char buffer[16]; - - if (!kstrace) - return; - - if (!current->mm) - return; - - if (__frame->gr7 == __NR_close) - return; - -#if 0 - if (__frame->gr7 != __NR_mmap2 && - __frame->gr7 != __NR_vfork && - __frame->gr7 != __NR_execve && - __frame->gr7 != __NR_exit) - return; -#endif - - argmask = 0; - name = NULL; - if (__frame->gr7 < NR_syscalls) { - name = __syscall_name_table[__frame->gr7].name; - argmask = __syscall_name_table[__frame->gr7].argmask; - } - if (!name) { - sprintf(buffer, "sys_%lx", __frame->gr7); - name = buffer; - } - - if (!leaving) { - if (!argmask) { - printk(KERN_CRIT "[%d] %s(%lx,%lx,%lx,%lx,%lx,%lx)\n", - current->pid, - name, - __frame->gr8, - __frame->gr9, - __frame->gr10, - __frame->gr11, - __frame->gr12, - __frame->gr13); - } - else if (argmask == 0xffffff) { - printk(KERN_CRIT "[%d] %s()\n", - current->pid, - name); - } - else { - printk(KERN_CRIT "[%d] %s(", - current->pid, - name); - - argp = &__frame->gr8; - - do { - switch (argmask & 0xf) { - case 1: - printk("%ld", (long) *argp); - break; - case 2: - printk("%lo", *argp); - break; - case 3: - printk("%lx", *argp); - break; - case 4: - printk("%p", (void *) *argp); - break; - case 5: - printk("\"%s\"", (char *) *argp); - break; - } - - argp++; - argmask >>= 4; - if (argmask) - printk(","); - - } while (argmask); - - printk(")\n"); - } - } - else { - if ((int)__frame->gr8 > -4096 && (int)__frame->gr8 < 4096) - printk(KERN_CRIT "[%d] %s() = %ld\n", current->pid, name, __frame->gr8); - else - printk(KERN_CRIT "[%d] %s() = %lx\n", current->pid, name, __frame->gr8); + __frame->__status |= REG__STATUS_SYSC_ENTRY; + if (tracehook_report_syscall_entry(__frame)) { + /* tracing decided this syscall should not happen, so + * We'll return a bogus call number to get an ENOSYS + * error, but leave the original number in + * __frame->syscallno + */ + return ULONG_MAX; } - return; -#endif - - if (!test_thread_flag(TIF_SYSCALL_TRACE)) - return; - - if (!(current->ptrace & PT_PTRACED)) - return; - /* we need to indicate entry or exit to strace */ - if (leaving) - __frame->__status |= REG__STATUS_SYSC_EXIT; - else - __frame->__status |= REG__STATUS_SYSC_ENTRY; - - ptrace_notify(SIGTRAP); + return __frame->syscallno; +} - /* - * this isn't the same as continuing with a signal, but it will do - * for normal use. strace only continues with a signal if the - * stopping signal is not SIGTRAP. -brl - */ - if (current->exit_code) { - send_sig(current->exit_code, current, 1); - current->exit_code = 0; - } +/* + * handle tracing of system call exit + */ +asmlinkage void syscall_trace_exit(void) +{ + __frame->__status |= REG__STATUS_SYSC_EXIT; + tracehook_report_syscall_exit(__frame, 0); } diff --git a/arch/frv/kernel/signal.c b/arch/frv/kernel/signal.c index 3bdb368292a8..4a7a62c6e783 100644 --- a/arch/frv/kernel/signal.c +++ b/arch/frv/kernel/signal.c @@ -21,6 +21,7 @@ #include <linux/unistd.h> #include <linux/personality.h> #include <linux/freezer.h> +#include <linux/tracehook.h> #include <asm/ucontext.h> #include <asm/uaccess.h> #include <asm/cacheflush.h> @@ -516,6 +517,9 @@ static void do_signal(void) * clear the TIF_RESTORE_SIGMASK flag */ if (test_thread_flag(TIF_RESTORE_SIGMASK)) clear_thread_flag(TIF_RESTORE_SIGMASK); + + tracehook_signal_handler(signr, &info, &ka, __frame, + test_thread_flag(TIF_SINGLESTEP)); } return; @@ -564,4 +568,10 @@ asmlinkage void do_notify_resume(__u32 thread_info_flags) if (thread_info_flags & (_TIF_SIGPENDING | _TIF_RESTORE_SIGMASK)) do_signal(); + /* deal with notification on about to resume userspace execution */ + if (thread_info_flags & _TIF_NOTIFY_RESUME) { + clear_thread_flag(TIF_NOTIFY_RESUME); + tracehook_notify_resume(__frame); + } + } /* end do_notify_resume() */ diff --git a/arch/frv/kernel/uaccess.c b/arch/frv/kernel/uaccess.c index 9fb771a20df3..374f88d6cc00 100644 --- a/arch/frv/kernel/uaccess.c +++ b/arch/frv/kernel/uaccess.c @@ -23,8 +23,7 @@ long strncpy_from_user(char *dst, const char __user *src, long count) char *p, ch; long err = -EFAULT; - if (count < 0) - BUG(); + BUG_ON(count < 0); p = dst; @@ -76,8 +75,7 @@ long strnlen_user(const char __user *src, long count) long err = 0; char ch; - if (count < 0) - BUG(); + BUG_ON(count < 0); #ifndef CONFIG_MMU if ((unsigned long) src < memory_start) diff --git a/arch/frv/mb93090-mb00/pci-dma-nommu.c b/arch/frv/mb93090-mb00/pci-dma-nommu.c index 52ff9aec799d..4e1ba0b15443 100644 --- a/arch/frv/mb93090-mb00/pci-dma-nommu.c +++ b/arch/frv/mb93090-mb00/pci-dma-nommu.c @@ -116,8 +116,7 @@ EXPORT_SYMBOL(dma_free_coherent); dma_addr_t dma_map_single(struct device *dev, void *ptr, size_t size, enum dma_data_direction direction) { - if (direction == DMA_NONE) - BUG(); + BUG_ON(direction == DMA_NONE); frv_cache_wback_inv((unsigned long) ptr, (unsigned long) ptr + size); @@ -151,8 +150,7 @@ int dma_map_sg(struct device *dev, struct scatterlist *sg, int nents, frv_cache_wback_inv(sg_dma_address(&sg[i]), sg_dma_address(&sg[i]) + sg_dma_len(&sg[i])); - if (direction == DMA_NONE) - BUG(); + BUG_ON(direction == DMA_NONE); return nents; } diff --git a/arch/frv/mb93090-mb00/pci-dma.c b/arch/frv/mb93090-mb00/pci-dma.c index 3ddedebc4eb3..45954f0813dc 100644 --- a/arch/frv/mb93090-mb00/pci-dma.c +++ b/arch/frv/mb93090-mb00/pci-dma.c @@ -48,8 +48,7 @@ EXPORT_SYMBOL(dma_free_coherent); dma_addr_t dma_map_single(struct device *dev, void *ptr, size_t size, enum dma_data_direction direction) { - if (direction == DMA_NONE) - BUG(); + BUG_ON(direction == DMA_NONE); frv_cache_wback_inv((unsigned long) ptr, (unsigned long) ptr + size); @@ -81,8 +80,7 @@ int dma_map_sg(struct device *dev, struct scatterlist *sg, int nents, void *vaddr; int i; - if (direction == DMA_NONE) - BUG(); + BUG_ON(direction == DMA_NONE); dampr2 = __get_DAMPR(2); diff --git a/arch/mn10300/Kconfig b/arch/mn10300/Kconfig index 355926730e8d..89faacad5d17 100644 --- a/arch/mn10300/Kconfig +++ b/arch/mn10300/Kconfig @@ -8,6 +8,7 @@ mainmenu "Linux Kernel Configuration" config MN10300 def_bool y select HAVE_OPROFILE + select HAVE_ARCH_TRACEHOOK config AM33 def_bool y diff --git a/arch/mn10300/include/asm/elf.h b/arch/mn10300/include/asm/elf.h index bf09f8bb392e..49105462e6fc 100644 --- a/arch/mn10300/include/asm/elf.h +++ b/arch/mn10300/include/asm/elf.h @@ -34,7 +34,7 @@ */ typedef unsigned long elf_greg_t; -#define ELF_NGREG (sizeof (struct pt_regs) / sizeof(elf_greg_t)) +#define ELF_NGREG ((sizeof(struct pt_regs) / sizeof(elf_greg_t)) - 1) typedef elf_greg_t elf_gregset_t[ELF_NGREG]; #define ELF_NFPREG 32 @@ -76,6 +76,7 @@ do { \ } while (0) #define USE_ELF_CORE_DUMP +#define CORE_DUMP_USE_REGSET #define ELF_EXEC_PAGESIZE 4096 /* diff --git a/arch/mn10300/include/asm/processor.h b/arch/mn10300/include/asm/processor.h index 73239271873d..f7d4b0d285e8 100644 --- a/arch/mn10300/include/asm/processor.h +++ b/arch/mn10300/include/asm/processor.h @@ -143,13 +143,7 @@ extern unsigned long thread_saved_pc(struct task_struct *tsk); unsigned long get_wchan(struct task_struct *p); -#define task_pt_regs(task) \ -({ \ - struct pt_regs *__regs__; \ - __regs__ = (struct pt_regs *) (KSTK_TOP(task_stack_page(task)) - 8); \ - __regs__ - 1; \ -}) - +#define task_pt_regs(task) ((task)->thread.uregs) #define KSTK_EIP(task) (task_pt_regs(task)->pc) #define KSTK_ESP(task) (task_pt_regs(task)->sp) diff --git a/arch/mn10300/include/asm/ptrace.h b/arch/mn10300/include/asm/ptrace.h index 7b06cc623d8b..921942ed1b03 100644 --- a/arch/mn10300/include/asm/ptrace.h +++ b/arch/mn10300/include/asm/ptrace.h @@ -91,9 +91,17 @@ extern struct pt_regs *__frame; /* current frame pointer */ #if defined(__KERNEL__) #if !defined(__ASSEMBLY__) +struct task_struct; + #define user_mode(regs) (((regs)->epsw & EPSW_nSL) == EPSW_nSL) #define instruction_pointer(regs) ((regs)->pc) +#define user_stack_pointer(regs) ((regs)->sp) extern void show_regs(struct pt_regs *); + +#define arch_has_single_step() (1) +extern void user_enable_single_step(struct task_struct *); +extern void user_disable_single_step(struct task_struct *); + #endif /* !__ASSEMBLY */ #define profile_pc(regs) ((regs)->pc) diff --git a/arch/mn10300/kernel/entry.S b/arch/mn10300/kernel/entry.S index 3dc3e462f92a..7408a27199f3 100644 --- a/arch/mn10300/kernel/entry.S +++ b/arch/mn10300/kernel/entry.S @@ -76,7 +76,7 @@ ENTRY(system_call) cmp nr_syscalls,d0 bcc syscall_badsys btst _TIF_SYSCALL_TRACE,(TI_flags,a2) - bne syscall_trace_entry + bne syscall_entry_trace syscall_call: add d0,d0,a1 add a1,a1 @@ -104,11 +104,10 @@ restore_all: syscall_exit_work: btst _TIF_SYSCALL_TRACE,d2 beq work_pending - __sti # could let do_syscall_trace() call + __sti # could let syscall_trace_exit() call # schedule() instead mov fp,d0 - mov 1,d1 - call do_syscall_trace[],0 # do_syscall_trace(regs,entryexit) + call syscall_trace_exit[],0 # do_syscall_trace(regs) jmp resume_userspace ALIGN @@ -138,13 +137,11 @@ work_notifysig: jmp resume_userspace # perform syscall entry tracing -syscall_trace_entry: +syscall_entry_trace: mov -ENOSYS,d0 mov d0,(REG_D0,fp) mov fp,d0 - clr d1 - call do_syscall_trace[],0 - mov (REG_ORIG_D0,fp),d0 + call syscall_trace_entry[],0 # returns the syscall number to actually use mov (REG_D1,fp),d1 cmp nr_syscalls,d0 bcs syscall_call diff --git a/arch/mn10300/kernel/ptrace.c b/arch/mn10300/kernel/ptrace.c index d6d6cdc75c52..e143339ad28e 100644 --- a/arch/mn10300/kernel/ptrace.c +++ b/arch/mn10300/kernel/ptrace.c @@ -17,6 +17,9 @@ #include <linux/errno.h> #include <linux/ptrace.h> #include <linux/user.h> +#include <linux/regset.h> +#include <linux/elf.h> +#include <linux/tracehook.h> #include <asm/uaccess.h> #include <asm/pgtable.h> #include <asm/system.h> @@ -64,12 +67,6 @@ static inline int get_stack_long(struct task_struct *task, int offset) ((unsigned long) task->thread.uregs + offset); } -/* - * this routine will put a word on the processes privileged stack. - * the offset is how far from the base addr as stored in the TSS. - * this routine assumes that all the privileged stacks are in our - * data space. - */ static inline int put_stack_long(struct task_struct *task, int offset, unsigned long data) { @@ -80,94 +77,233 @@ int put_stack_long(struct task_struct *task, int offset, unsigned long data) return 0; } -static inline unsigned long get_fpregs(struct fpu_state_struct *buf, - struct task_struct *tsk) +/* + * retrieve the contents of MN10300 userspace general registers + */ +static int genregs_get(struct task_struct *target, + const struct user_regset *regset, + unsigned int pos, unsigned int count, + void *kbuf, void __user *ubuf) { - return __copy_to_user(buf, &tsk->thread.fpu_state, - sizeof(struct fpu_state_struct)); + const struct pt_regs *regs = task_pt_regs(target); + int ret; + + /* we need to skip regs->next */ + ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf, + regs, 0, PT_ORIG_D0 * sizeof(long)); + if (ret < 0) + return ret; + + ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf, + ®s->orig_d0, PT_ORIG_D0 * sizeof(long), + NR_PTREGS * sizeof(long)); + if (ret < 0) + return ret; + + return user_regset_copyout_zero(&pos, &count, &kbuf, &ubuf, + NR_PTREGS * sizeof(long), -1); } -static inline unsigned long set_fpregs(struct task_struct *tsk, - struct fpu_state_struct *buf) +/* + * update the contents of the MN10300 userspace general registers + */ +static int genregs_set(struct task_struct *target, + const struct user_regset *regset, + unsigned int pos, unsigned int count, + const void *kbuf, const void __user *ubuf) { - return __copy_from_user(&tsk->thread.fpu_state, buf, - sizeof(struct fpu_state_struct)); + struct pt_regs *regs = task_pt_regs(target); + unsigned long tmp; + int ret; + + /* we need to skip regs->next */ + ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, + regs, 0, PT_ORIG_D0 * sizeof(long)); + if (ret < 0) + return ret; + + ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, + ®s->orig_d0, PT_ORIG_D0 * sizeof(long), + PT_EPSW * sizeof(long)); + if (ret < 0) + return ret; + + /* we need to mask off changes to EPSW */ + tmp = regs->epsw; + ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, + &tmp, PT_EPSW * sizeof(long), + PT_PC * sizeof(long)); + tmp &= EPSW_FLAG_V | EPSW_FLAG_C | EPSW_FLAG_N | EPSW_FLAG_Z; + tmp |= regs->epsw & ~(EPSW_FLAG_V | EPSW_FLAG_C | EPSW_FLAG_N | + EPSW_FLAG_Z); + regs->epsw = tmp; + + if (ret < 0) + return ret; + + /* and finally load the PC */ + ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, + ®s->pc, PT_PC * sizeof(long), + NR_PTREGS * sizeof(long)); + + if (ret < 0) + return ret; + + return user_regset_copyin_ignore(&pos, &count, &kbuf, &ubuf, + NR_PTREGS * sizeof(long), -1); } -static inline void fpsave_init(struct task_struct *task) +/* + * retrieve the contents of MN10300 userspace FPU registers + */ +static int fpuregs_get(struct task_struct *target, + const struct user_regset *regset, + unsigned int pos, unsigned int count, + void *kbuf, void __user *ubuf) { - memset(&task->thread.fpu_state, 0, sizeof(struct fpu_state_struct)); + const struct fpu_state_struct *fpregs = &target->thread.fpu_state; + int ret; + + unlazy_fpu(target); + + ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf, + fpregs, 0, sizeof(*fpregs)); + if (ret < 0) + return ret; + + return user_regset_copyout_zero(&pos, &count, &kbuf, &ubuf, + sizeof(*fpregs), -1); } /* - * make sure the single step bit is not set + * update the contents of the MN10300 userspace FPU registers */ -void ptrace_disable(struct task_struct *child) +static int fpuregs_set(struct task_struct *target, + const struct user_regset *regset, + unsigned int pos, unsigned int count, + const void *kbuf, const void __user *ubuf) +{ + struct fpu_state_struct fpu_state = target->thread.fpu_state; + int ret; + + ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, + &fpu_state, 0, sizeof(fpu_state)); + if (ret < 0) + return ret; + + fpu_kill_state(target); + target->thread.fpu_state = fpu_state; + set_using_fpu(target); + + return user_regset_copyin_ignore(&pos, &count, &kbuf, &ubuf, + sizeof(fpu_state), -1); +} + +/* + * determine if the FPU registers have actually been used + */ +static int fpuregs_active(struct task_struct *target, + const struct user_regset *regset) +{ + return is_using_fpu(target) ? regset->n : 0; +} + +/* + * Define the register sets available on the MN10300 under Linux + */ +enum mn10300_regset { + REGSET_GENERAL, + REGSET_FPU, +}; + +static const struct user_regset mn10300_regsets[] = { + /* + * General register format is: + * A3, A2, D3, D2, MCVF, MCRL, MCRH, MDRQ + * E1, E0, E7...E2, SP, LAR, LIR, MDR + * A1, A0, D1, D0, ORIG_D0, EPSW, PC + */ + [REGSET_GENERAL] = { + .core_note_type = NT_PRSTATUS, + .n = ELF_NGREG, + .size = sizeof(long), + .align = sizeof(long), + .get = genregs_get, + .set = genregs_set, + }, + /* + * FPU register format is: + * FS0-31, FPCR + */ + [REGSET_FPU] = { + .core_note_type = NT_PRFPREG, + .n = sizeof(struct fpu_state_struct) / sizeof(long), + .size = sizeof(long), + .align = sizeof(long), + .get = fpuregs_get, + .set = fpuregs_set, + .active = fpuregs_active, + }, +}; + +static const struct user_regset_view user_mn10300_native_view = { + .name = "mn10300", + .e_machine = EM_MN10300, + .regsets = mn10300_regsets, + .n = ARRAY_SIZE(mn10300_regsets), +}; + +const struct user_regset_view *task_user_regset_view(struct task_struct *task) +{ + return &user_mn10300_native_view; +} + +/* + * set the single-step bit + */ +void user_enable_single_step(struct task_struct *child) { #ifndef CONFIG_MN10300_USING_JTAG struct user *dummy = NULL; long tmp; tmp = get_stack_long(child, (unsigned long) &dummy->regs.epsw); - tmp &= ~EPSW_T; + tmp |= EPSW_T; put_stack_long(child, (unsigned long) &dummy->regs.epsw, tmp); #endif } /* - * set the single step bit + * make sure the single-step bit is not set */ -void ptrace_enable(struct task_struct *child) +void user_disable_single_step(struct task_struct *child) { #ifndef CONFIG_MN10300_USING_JTAG struct user *dummy = NULL; long tmp; tmp = get_stack_long(child, (unsigned long) &dummy->regs.epsw); - tmp |= EPSW_T; + tmp &= ~EPSW_T; put_stack_long(child, (unsigned long) &dummy->regs.epsw, tmp); #endif } +void ptrace_disable(struct task_struct *child) +{ + user_disable_single_step(child); +} + /* * handle the arch-specific side of process tracing */ long arch_ptrace(struct task_struct *child, long request, long addr, long data) { - struct fpu_state_struct fpu_state; - int i, ret; + unsigned long tmp; + int ret; switch (request) { - /* read the word at location addr. */ - case PTRACE_PEEKTEXT: { - unsigned long tmp; - int copied; - - copied = access_process_vm(child, addr, &tmp, sizeof(tmp), 0); - ret = -EIO; - if (copied != sizeof(tmp)) - break; - ret = put_user(tmp, (unsigned long *) data); - break; - } - - /* read the word at location addr. */ - case PTRACE_PEEKDATA: { - unsigned long tmp; - int copied; - - copied = access_process_vm(child, addr, &tmp, sizeof(tmp), 0); - ret = -EIO; - if (copied != sizeof(tmp)) - break; - ret = put_user(tmp, (unsigned long *) data); - break; - } - /* read the word at location addr in the USER area. */ - case PTRACE_PEEKUSR: { - unsigned long tmp; - + case PTRACE_PEEKUSR: ret = -EIO; if ((addr & 3) || addr < 0 || addr > sizeof(struct user) - 3) @@ -179,17 +315,6 @@ long arch_ptrace(struct task_struct *child, long request, long addr, long data) ptrace_regid_to_frame[addr]); ret = put_user(tmp, (unsigned long *) data); break; - } - - /* write the word at location addr. */ - case PTRACE_POKETEXT: - case PTRACE_POKEDATA: - if (access_process_vm(child, addr, &data, sizeof(data), 1) == - sizeof(data)) - ret = 0; - else - ret = -EIO; - break; /* write the word at location addr in the USER area */ case PTRACE_POKEUSR: @@ -204,132 +329,32 @@ long arch_ptrace(struct task_struct *child, long request, long addr, long data) data); break; - /* continue and stop at next (return from) syscall */ - case PTRACE_SYSCALL: - /* restart after signal. */ - case PTRACE_CONT: - ret = -EIO; - if ((unsigned long) data > _NSIG) - break; - if (request == PTRACE_SYSCALL) - set_tsk_thread_flag(child, TIF_SYSCALL_TRACE); - else - clear_tsk_thread_flag(child, TIF_SYSCALL_TRACE); - child->exit_code = data; - ptrace_disable(child); - wake_up_process(child); - ret = 0; - break; - - /* - * make the child exit - * - the best I can do is send it a sigkill - * - perhaps it should be put in the status that it wants to - * exit - */ - case PTRACE_KILL: - ret = 0; - if (child->exit_state == EXIT_ZOMBIE) /* already dead */ - break; - child->exit_code = SIGKILL; - clear_tsk_thread_flag(child, TIF_SINGLESTEP); - ptrace_disable(child); - wake_up_process(child); - break; - - case PTRACE_SINGLESTEP: /* set the trap flag. */ -#ifndef CONFIG_MN10300_USING_JTAG - ret = -EIO; - if ((unsigned long) data > _NSIG) - break; - clear_tsk_thread_flag(child, TIF_SYSCALL_TRACE); - ptrace_enable(child); - child->exit_code = data; - wake_up_process(child); - ret = 0; -#else - ret = -EINVAL; -#endif - break; - - case PTRACE_DETACH: /* detach a process that was attached. */ - ret = ptrace_detach(child, data); - break; - - /* Get all gp regs from the child. */ - case PTRACE_GETREGS: { - unsigned long tmp; - - if (!access_ok(VERIFY_WRITE, (unsigned *) data, NR_PTREGS << 2)) { - ret = -EIO; - break; - } - - for (i = 0; i < NR_PTREGS << 2; i += 4) { - tmp = get_stack_long(child, ptrace_regid_to_frame[i]); - __put_user(tmp, (unsigned long *) data); - data += sizeof(tmp); - } - ret = 0; - break; - } - - case PTRACE_SETREGS: { /* Set all gp regs in the child. */ - unsigned long tmp; - - if (!access_ok(VERIFY_READ, (unsigned long *)data, - sizeof(struct pt_regs))) { - ret = -EIO; - break; - } - - for (i = 0; i < NR_PTREGS << 2; i += 4) { - __get_user(tmp, (unsigned long *) data); - put_stack_long(child, ptrace_regid_to_frame[i], tmp); - data += sizeof(tmp); - } - ret = 0; - break; - } - - case PTRACE_GETFPREGS: { /* Get the child FPU state. */ - if (is_using_fpu(child)) { - unlazy_fpu(child); - fpu_state = child->thread.fpu_state; - } else { - memset(&fpu_state, 0, sizeof(fpu_state)); - } - - ret = -EIO; - if (copy_to_user((void *) data, &fpu_state, - sizeof(fpu_state)) == 0) - ret = 0; - break; - } - - case PTRACE_SETFPREGS: { /* Set the child FPU state. */ - ret = -EFAULT; - if (copy_from_user(&fpu_state, (const void *) data, - sizeof(fpu_state)) == 0) { - fpu_kill_state(child); - child->thread.fpu_state = fpu_state; - set_using_fpu(child); - ret = 0; - } - break; - } - - case PTRACE_SETOPTIONS: { - if (data & PTRACE_O_TRACESYSGOOD) - child->ptrace |= PT_TRACESYSGOOD; - else - child->ptrace &= ~PT_TRACESYSGOOD; - ret = 0; - break; - } + case PTRACE_GETREGS: /* Get all integer regs from the child. */ + return copy_regset_to_user(child, &user_mn10300_native_view, + REGSET_GENERAL, + 0, NR_PTREGS * sizeof(long), + (void __user *)data); + + case PTRACE_SETREGS: /* Set all integer regs in the child. */ + return copy_regset_from_user(child, &user_mn10300_native_view, + REGSET_GENERAL, + 0, NR_PTREGS * sizeof(long), + (const void __user *)data); + + case PTRACE_GETFPREGS: /* Get the child FPU state. */ + return copy_regset_to_user(child, &user_mn10300_native_view, + REGSET_FPU, + 0, sizeof(struct fpu_state_struct), + (void __user *)data); + + case PTRACE_SETFPREGS: /* Set the child FPU state. */ + return copy_regset_from_user(child, &user_mn10300_native_view, + REGSET_FPU, + 0, sizeof(struct fpu_state_struct), + (const void __user *)data); default: - ret = -EIO; + ret = ptrace_request(child, request, addr, data); break; } @@ -337,43 +362,26 @@ long arch_ptrace(struct task_struct *child, long request, long addr, long data) } /* - * notification of system call entry/exit - * - triggered by current->work.syscall_trace + * handle tracing of system call entry + * - return the revised system call number or ULONG_MAX to cause ENOSYS */ -asmlinkage void do_syscall_trace(struct pt_regs *regs, int entryexit) +asmlinkage unsigned long syscall_trace_entry(struct pt_regs *regs) { -#if 0 - /* just in case... */ - printk(KERN_DEBUG "[%d] syscall_%lu(%lx,%lx,%lx,%lx) = %lx\n", - current->pid, - regs->orig_d0, - regs->a0, - regs->d1, - regs->a3, - regs->a2, - regs->d0); - return; -#endif - - if (!test_thread_flag(TIF_SYSCALL_TRACE) && - !test_thread_flag(TIF_SINGLESTEP)) - return; - if (!(current->ptrace & PT_PTRACED)) - return; + if (tracehook_report_syscall_entry(regs)) + /* tracing decided this syscall should not happen, so + * We'll return a bogus call number to get an ENOSYS + * error, but leave the original number in + * regs->orig_d0 + */ + return ULONG_MAX; - /* the 0x80 provides a way for the tracing parent to distinguish - between a syscall stop and SIGTRAP delivery */ - ptrace_notify(SIGTRAP | - ((current->ptrace & PT_TRACESYSGOOD) && - !test_thread_flag(TIF_SINGLESTEP) ? 0x80 : 0)); + return regs->orig_d0; +} - /* - * this isn't the same as continuing with a signal, but it will do - * for normal use. strace only continues with a signal if the - * stopping signal is not SIGTRAP. -brl - */ - if (current->exit_code) { - send_sig(current->exit_code, current, 1); - current->exit_code = 0; - } +/* + * handle tracing of system call exit + */ +asmlinkage void syscall_trace_exit(struct pt_regs *regs) +{ + tracehook_report_syscall_exit(regs, 0); } diff --git a/arch/mn10300/kernel/signal.c b/arch/mn10300/kernel/signal.c index 841ca9955a18..9f7572a0f578 100644 --- a/arch/mn10300/kernel/signal.c +++ b/arch/mn10300/kernel/signal.c @@ -23,6 +23,7 @@ #include <linux/tty.h> #include <linux/personality.h> #include <linux/suspend.h> +#include <linux/tracehook.h> #include <asm/cacheflush.h> #include <asm/ucontext.h> #include <asm/uaccess.h> @@ -511,6 +512,9 @@ static void do_signal(struct pt_regs *regs) * clear the TIF_RESTORE_SIGMASK flag */ if (test_thread_flag(TIF_RESTORE_SIGMASK)) clear_thread_flag(TIF_RESTORE_SIGMASK); + + tracehook_signal_handler(signr, &info, &ka, regs, + test_thread_flag(TIF_SINGLESTEP)); } return; @@ -561,4 +565,9 @@ asmlinkage void do_notify_resume(struct pt_regs *regs, u32 thread_info_flags) /* deal with pending signal delivery */ if (thread_info_flags & (_TIF_SIGPENDING | _TIF_RESTORE_SIGMASK)) do_signal(regs); + + if (thread_info_flags & _TIF_NOTIFY_RESUME) { + clear_thread_flag(TIF_NOTIFY_RESUME); + tracehook_notify_resume(__frame); + } } diff --git a/arch/mn10300/mm/tlb-mn10300.S b/arch/mn10300/mm/tlb-mn10300.S index 789208094e98..7095147dcb8b 100644 --- a/arch/mn10300/mm/tlb-mn10300.S +++ b/arch/mn10300/mm/tlb-mn10300.S @@ -165,24 +165,6 @@ ENTRY(itlb_aerror) ENTRY(dtlb_aerror) and ~EPSW_NMID,epsw add -4,sp - mov d1,(sp) - - movhu (MMUFCR_DFC),d1 # is it the initial valid write - # to this page? - and MMUFCR_xFC_INITWR,d1 - beq dtlb_pagefault # jump if not - - mov (DPTEL),d1 # set the dirty bit - # (don't replace with BSET!) - or _PAGE_DIRTY,d1 - mov d1,(DPTEL) - mov (sp),d1 - add 4,sp - rti - - ALIGN -dtlb_pagefault: - mov (sp),d1 SAVE_ALL add -4,sp # need to pass three params diff --git a/arch/x86/Kconfig.debug b/arch/x86/Kconfig.debug index 33fac6bbe1c2..d105f29bb6bb 100644 --- a/arch/x86/Kconfig.debug +++ b/arch/x86/Kconfig.debug @@ -174,6 +174,15 @@ config IOMMU_LEAK Add a simple leak tracer to the IOMMU code. This is useful when you are debugging a buggy device driver that leaks IOMMU mappings. +config X86_DS_SELFTEST + bool "DS selftest" + default y + depends on DEBUG_KERNEL + depends on X86_DS + ---help--- + Perform Debug Store selftests at boot time. + If in doubt, say "N". + config HAVE_MMIOTRACE_SUPPORT def_bool y diff --git a/arch/x86/ia32/ia32entry.S b/arch/x86/ia32/ia32entry.S index a505202086e8..dcef387ddc36 100644 --- a/arch/x86/ia32/ia32entry.S +++ b/arch/x86/ia32/ia32entry.S @@ -830,4 +830,5 @@ ia32_sys_call_table: .quad sys_inotify_init1 .quad compat_sys_preadv .quad compat_sys_pwritev + .quad compat_sys_rt_tgsigqueueinfo /* 335 */ ia32_syscall_end: diff --git a/arch/x86/include/asm/ds.h b/arch/x86/include/asm/ds.h index a8f672ba100c..70dac199b093 100644 --- a/arch/x86/include/asm/ds.h +++ b/arch/x86/include/asm/ds.h @@ -15,8 +15,8 @@ * - buffer allocation (memory accounting) * * - * Copyright (C) 2007-2008 Intel Corporation. - * Markus Metzger <markus.t.metzger@intel.com>, 2007-2008 + * Copyright (C) 2007-2009 Intel Corporation. + * Markus Metzger <markus.t.metzger@intel.com>, 2007-2009 */ #ifndef _ASM_X86_DS_H @@ -83,8 +83,10 @@ enum ds_feature { * The interrupt threshold is independent from the overflow callback * to allow users to use their own overflow interrupt handling mechanism. * - * task: the task to request recording for; - * NULL for per-cpu recording on the current cpu + * The function might sleep. + * + * task: the task to request recording for + * cpu: the cpu to request recording for * base: the base pointer for the (non-pageable) buffer; * size: the size of the provided buffer in bytes * ovfl: pointer to a function to be called on buffer overflow; @@ -93,19 +95,28 @@ enum ds_feature { * -1 if no interrupt threshold is requested. * flags: a bit-mask of the above flags */ -extern struct bts_tracer *ds_request_bts(struct task_struct *task, - void *base, size_t size, - bts_ovfl_callback_t ovfl, - size_t th, unsigned int flags); -extern struct pebs_tracer *ds_request_pebs(struct task_struct *task, - void *base, size_t size, - pebs_ovfl_callback_t ovfl, - size_t th, unsigned int flags); +extern struct bts_tracer *ds_request_bts_task(struct task_struct *task, + void *base, size_t size, + bts_ovfl_callback_t ovfl, + size_t th, unsigned int flags); +extern struct bts_tracer *ds_request_bts_cpu(int cpu, void *base, size_t size, + bts_ovfl_callback_t ovfl, + size_t th, unsigned int flags); +extern struct pebs_tracer *ds_request_pebs_task(struct task_struct *task, + void *base, size_t size, + pebs_ovfl_callback_t ovfl, + size_t th, unsigned int flags); +extern struct pebs_tracer *ds_request_pebs_cpu(int cpu, + void *base, size_t size, + pebs_ovfl_callback_t ovfl, + size_t th, unsigned int flags); /* * Release BTS or PEBS resources * Suspend and resume BTS or PEBS tracing * + * Must be called with irq's enabled. + * * tracer: the tracer handle returned from ds_request_~() */ extern void ds_release_bts(struct bts_tracer *tracer); @@ -115,6 +126,28 @@ extern void ds_release_pebs(struct pebs_tracer *tracer); extern void ds_suspend_pebs(struct pebs_tracer *tracer); extern void ds_resume_pebs(struct pebs_tracer *tracer); +/* + * Release BTS or PEBS resources + * Suspend and resume BTS or PEBS tracing + * + * Cpu tracers must call this on the traced cpu. + * Task tracers must call ds_release_~_noirq() for themselves. + * + * May be called with irq's disabled. + * + * Returns 0 if successful; + * -EPERM if the cpu tracer does not trace the current cpu. + * -EPERM if the task tracer does not trace itself. + * + * tracer: the tracer handle returned from ds_request_~() + */ +extern int ds_release_bts_noirq(struct bts_tracer *tracer); +extern int ds_suspend_bts_noirq(struct bts_tracer *tracer); +extern int ds_resume_bts_noirq(struct bts_tracer *tracer); +extern int ds_release_pebs_noirq(struct pebs_tracer *tracer); +extern int ds_suspend_pebs_noirq(struct pebs_tracer *tracer); +extern int ds_resume_pebs_noirq(struct pebs_tracer *tracer); + /* * The raw DS buffer state as it is used for BTS and PEBS recording. @@ -170,9 +203,9 @@ struct bts_struct { } lbr; /* BTS_TASK_ARRIVES or BTS_TASK_DEPARTS */ struct { - __u64 jiffies; + __u64 clock; pid_t pid; - } timestamp; + } event; } variant; }; @@ -201,8 +234,12 @@ struct bts_trace { struct pebs_trace { struct ds_trace ds; - /* the PEBS reset value */ - unsigned long long reset_value; + /* the number of valid counters in the below array */ + unsigned int counters; + +#define MAX_PEBS_COUNTERS 4 + /* the counter reset value */ + unsigned long long counter_reset[MAX_PEBS_COUNTERS]; }; @@ -237,9 +274,11 @@ extern int ds_reset_pebs(struct pebs_tracer *tracer); * Returns 0 on success; -Eerrno on error * * tracer: the tracer handle returned from ds_request_pebs() + * counter: the index of the counter * value: the new counter reset value */ -extern int ds_set_pebs_reset(struct pebs_tracer *tracer, u64 value); +extern int ds_set_pebs_reset(struct pebs_tracer *tracer, + unsigned int counter, u64 value); /* * Initialization @@ -252,21 +291,12 @@ extern void __cpuinit ds_init_intel(struct cpuinfo_x86 *); */ extern void ds_switch_to(struct task_struct *prev, struct task_struct *next); -/* - * Task clone/init and cleanup work - */ -extern void ds_copy_thread(struct task_struct *tsk, struct task_struct *father); -extern void ds_exit_thread(struct task_struct *tsk); - #else /* CONFIG_X86_DS */ struct cpuinfo_x86; static inline void __cpuinit ds_init_intel(struct cpuinfo_x86 *ignored) {} static inline void ds_switch_to(struct task_struct *prev, struct task_struct *next) {} -static inline void ds_copy_thread(struct task_struct *tsk, - struct task_struct *father) {} -static inline void ds_exit_thread(struct task_struct *tsk) {} #endif /* CONFIG_X86_DS */ #endif /* _ASM_X86_DS_H */ diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index 87ede2f31bc7..c7768269b1cf 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h @@ -462,14 +462,8 @@ struct thread_struct { unsigned io_bitmap_max; /* MSR_IA32_DEBUGCTLMSR value to switch in if TIF_DEBUGCTLMSR is set. */ unsigned long debugctlmsr; -#ifdef CONFIG_X86_DS -/* Debug Store context; see include/asm-x86/ds.h; goes into MSR_IA32_DS_AREA */ + /* Debug Store context; see asm/ds.h */ struct ds_context *ds_ctx; -#endif /* CONFIG_X86_DS */ -#ifdef CONFIG_X86_PTRACE_BTS -/* the signal to send on a bts buffer overflow */ - unsigned int bts_ovfl_signal; -#endif /* CONFIG_X86_PTRACE_BTS */ }; static inline unsigned long native_get_debugreg(int regno) @@ -797,6 +791,21 @@ static inline unsigned long get_debugctlmsr(void) return debugctlmsr; } +static inline unsigned long get_debugctlmsr_on_cpu(int cpu) +{ + u64 debugctlmsr = 0; + u32 val1, val2; + +#ifndef CONFIG_X86_DEBUGCTLMSR + if (boot_cpu_data.x86 < 6) + return 0; +#endif + rdmsr_on_cpu(cpu, MSR_IA32_DEBUGCTLMSR, &val1, &val2); + debugctlmsr = val1 | ((u64)val2 << 32); + + return debugctlmsr; +} + static inline void update_debugctlmsr(unsigned long debugctlmsr) { #ifndef CONFIG_X86_DEBUGCTLMSR @@ -806,6 +815,18 @@ static inline void update_debugctlmsr(unsigned long debugctlmsr) wrmsrl(MSR_IA32_DEBUGCTLMSR, debugctlmsr); } +static inline void update_debugctlmsr_on_cpu(int cpu, + unsigned long debugctlmsr) +{ +#ifndef CONFIG_X86_DEBUGCTLMSR + if (boot_cpu_data.x86 < 6) + return; +#endif + wrmsr_on_cpu(cpu, MSR_IA32_DEBUGCTLMSR, + (u32)((u64)debugctlmsr), + (u32)((u64)debugctlmsr >> 32)); +} + /* * from system description table in BIOS. Mostly for MCA use, but * others may find it useful: diff --git a/arch/x86/include/asm/ptrace.h b/arch/x86/include/asm/ptrace.h index 624f133943ed..0f0d908349aa 100644 --- a/arch/x86/include/asm/ptrace.h +++ b/arch/x86/include/asm/ptrace.h @@ -236,12 +236,11 @@ extern int do_get_thread_area(struct task_struct *p, int idx, extern int do_set_thread_area(struct task_struct *p, int idx, struct user_desc __user *info, int can_allocate); -extern void x86_ptrace_untrace(struct task_struct *); -extern void x86_ptrace_fork(struct task_struct *child, - unsigned long clone_flags); +#ifdef CONFIG_X86_PTRACE_BTS +extern void ptrace_bts_untrace(struct task_struct *tsk); -#define arch_ptrace_untrace(tsk) x86_ptrace_untrace(tsk) -#define arch_ptrace_fork(child, flags) x86_ptrace_fork(child, flags) +#define arch_ptrace_untrace(tsk) ptrace_bts_untrace(tsk) +#endif /* CONFIG_X86_PTRACE_BTS */ #endif /* __KERNEL__ */ diff --git a/arch/x86/include/asm/termios.h b/arch/x86/include/asm/termios.h index f72956331c49..c4ee8056baca 100644 --- a/arch/x86/include/asm/termios.h +++ b/arch/x86/include/asm/termios.h @@ -67,6 +67,7 @@ static inline int user_termio_to_kernel_termios(struct ktermios *termios, SET_LOW_TERMIOS_BITS(termios, termio, c_oflag); SET_LOW_TERMIOS_BITS(termios, termio, c_cflag); SET_LOW_TERMIOS_BITS(termios, termio, c_lflag); + get_user(termios->c_line, &termio->c_line); return copy_from_user(termios->c_cc, termio->c_cc, NCC); } diff --git a/arch/x86/include/asm/tlbflush.h b/arch/x86/include/asm/tlbflush.h index 16a5c84b0329..a5ecc9c33e92 100644 --- a/arch/x86/include/asm/tlbflush.h +++ b/arch/x86/include/asm/tlbflush.h @@ -17,7 +17,7 @@ static inline void __native_flush_tlb(void) { - write_cr3(read_cr3()); + native_write_cr3(native_read_cr3()); } static inline void __native_flush_tlb_global(void) @@ -32,11 +32,11 @@ static inline void __native_flush_tlb_global(void) */ raw_local_irq_save(flags); - cr4 = read_cr4(); + cr4 = native_read_cr4(); /* clear PGE */ - write_cr4(cr4 & ~X86_CR4_PGE); + native_write_cr4(cr4 & ~X86_CR4_PGE); /* write old PGE again and flush TLBs */ - write_cr4(cr4); + native_write_cr4(cr4); raw_local_irq_restore(flags); } diff --git a/arch/x86/include/asm/unistd_32.h b/arch/x86/include/asm/unistd_32.h index 6e72d74cf8dc..708dae61262d 100644 --- a/arch/x86/include/asm/unistd_32.h +++ b/arch/x86/include/asm/unistd_32.h @@ -340,6 +340,7 @@ #define __NR_inotify_init1 332 #define __NR_preadv 333 #define __NR_pwritev 334 +#define __NR_rt_tgsigqueueinfo 335 #ifdef __KERNEL__ diff --git a/arch/x86/include/asm/unistd_64.h b/arch/x86/include/asm/unistd_64.h index f81829462325..4e2b05404400 100644 --- a/arch/x86/include/asm/unistd_64.h +++ b/arch/x86/include/asm/unistd_64.h @@ -657,6 +657,8 @@ __SYSCALL(__NR_inotify_init1, sys_inotify_init1) __SYSCALL(__NR_preadv, sys_preadv) #define __NR_pwritev 296 __SYSCALL(__NR_pwritev, sys_pwritev) +#define __NR_rt_tgsigqueueinfo 297 +__SYSCALL(__NR_rt_tgsigqueueinfo, sys_rt_tgsigqueueinfo) #ifndef __NO_STUBS diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile index 235f5927bb97..4f78bd682125 100644 --- a/arch/x86/kernel/Makefile +++ b/arch/x86/kernel/Makefile @@ -44,6 +44,7 @@ obj-y += process.o obj-y += i387.o xsave.o obj-y += ptrace.o obj-$(CONFIG_X86_DS) += ds.o +obj-$(CONFIG_X86_DS_SELFTEST) += ds_selftest.o obj-$(CONFIG_X86_32) += tls.o obj-$(CONFIG_IA32_EMULATION) += tls.o obj-y += step.o diff --git a/arch/x86/kernel/ds.c b/arch/x86/kernel/ds.c index 87b67e3a765a..48bfe1386038 100644 --- a/arch/x86/kernel/ds.c +++ b/arch/x86/kernel/ds.c @@ -19,45 +19,61 @@ * Markus Metzger <markus.t.metzger@intel.com>, 2007-2009 */ - -#include <asm/ds.h> - -#include <linux/errno.h> +#include <linux/kernel.h> #include <linux/string.h> -#include <linux/slab.h> +#include <linux/errno.h> #include <linux/sched.h> +#include <linux/slab.h> #include <linux/mm.h> -#include <linux/kernel.h> +#include <linux/trace_clock.h> + +#include <asm/ds.h> +#include "ds_selftest.h" /* - * The configuration for a particular DS hardware implementation. + * The configuration for a particular DS hardware implementation: */ struct ds_configuration { - /* the name of the configuration */ - const char *name; - /* the size of one pointer-typed field in the DS structure and - in the BTS and PEBS buffers in bytes; - this covers the first 8 DS fields related to buffer management. */ - unsigned char sizeof_field; - /* the size of a BTS/PEBS record in bytes */ - unsigned char sizeof_rec[2]; - /* a series of bit-masks to control various features indexed - * by enum ds_feature */ - unsigned long ctl[dsf_ctl_max]; + /* The name of the configuration: */ + const char *name; + + /* The size of pointer-typed fields in DS, BTS, and PEBS: */ + unsigned char sizeof_ptr_field; + + /* The size of a BTS/PEBS record in bytes: */ + unsigned char sizeof_rec[2]; + + /* The number of pebs counter reset values in the DS structure. */ + unsigned char nr_counter_reset; + + /* Control bit-masks indexed by enum ds_feature: */ + unsigned long ctl[dsf_ctl_max]; }; -static DEFINE_PER_CPU(struct ds_configuration, ds_cfg_array); +static struct ds_configuration ds_cfg __read_mostly; + + +/* Maximal size of a DS configuration: */ +#define MAX_SIZEOF_DS 0x80 -#define ds_cfg per_cpu(ds_cfg_array, smp_processor_id()) +/* Maximal size of a BTS record: */ +#define MAX_SIZEOF_BTS (3 * 8) -#define MAX_SIZEOF_DS (12 * 8) /* maximal size of a DS configuration */ -#define MAX_SIZEOF_BTS (3 * 8) /* maximal size of a BTS record */ -#define DS_ALIGNMENT (1 << 3) /* BTS and PEBS buffer alignment */ +/* BTS and PEBS buffer alignment: */ +#define DS_ALIGNMENT (1 << 3) -#define BTS_CONTROL \ - (ds_cfg.ctl[dsf_bts] | ds_cfg.ctl[dsf_bts_kernel] | ds_cfg.ctl[dsf_bts_user] |\ - ds_cfg.ctl[dsf_bts_overflow]) +/* Number of buffer pointers in DS: */ +#define NUM_DS_PTR_FIELDS 8 +/* Size of a pebs reset value in DS: */ +#define PEBS_RESET_FIELD_SIZE 8 + +/* Mask of control bits in the DS MSR register: */ +#define BTS_CONTROL \ + ( ds_cfg.ctl[dsf_bts] | \ + ds_cfg.ctl[dsf_bts_kernel] | \ + ds_cfg.ctl[dsf_bts_user] | \ + ds_cfg.ctl[dsf_bts_overflow] ) /* * A BTS or PEBS tracer. @@ -66,29 +82,36 @@ static DEFINE_PER_CPU(struct ds_configuration, ds_cfg_array); * to identify tracers. */ struct ds_tracer { - /* the DS context (partially) owned by this tracer */ - struct ds_context *context; - /* the buffer provided on ds_request() and its size in bytes */ - void *buffer; - size_t size; + /* The DS context (partially) owned by this tracer. */ + struct ds_context *context; + /* The buffer provided on ds_request() and its size in bytes. */ + void *buffer; + size_t size; }; struct bts_tracer { - /* the common DS part */ - struct ds_tracer ds; - /* the trace including the DS configuration */ - struct bts_trace trace; - /* buffer overflow notification function */ - bts_ovfl_callback_t ovfl; + /* The common DS part: */ + struct ds_tracer ds; + + /* The trace including the DS configuration: */ + struct bts_trace trace; + + /* Buffer overflow notification function: */ + bts_ovfl_callback_t ovfl; + + /* Active flags affecting trace collection. */ + unsigned int flags; }; struct pebs_tracer { - /* the common DS part */ - struct ds_tracer ds; - /* the trace including the DS configuration */ - struct pebs_trace trace; - /* buffer overflow notification function */ - pebs_ovfl_callback_t ovfl; + /* The common DS part: */ + struct ds_tracer ds; + + /* The trace including the DS configuration: */ + struct pebs_trace trace; + + /* Buffer overflow notification function: */ + pebs_ovfl_callback_t ovfl; }; /* @@ -97,6 +120,7 @@ struct pebs_tracer { * * The DS configuration consists of the following fields; different * architetures vary in the size of those fields. + * * - double-word aligned base linear address of the BTS buffer * - write pointer into the BTS buffer * - end linear address of the BTS buffer (one byte beyond the end of @@ -135,21 +159,22 @@ enum ds_field { }; enum ds_qualifier { - ds_bts = 0, + ds_bts = 0, ds_pebs }; -static inline unsigned long ds_get(const unsigned char *base, - enum ds_qualifier qual, enum ds_field field) +static inline unsigned long +ds_get(const unsigned char *base, enum ds_qualifier qual, enum ds_field field) { - base += (ds_cfg.sizeof_field * (field + (4 * qual))); + base += (ds_cfg.sizeof_ptr_field * (field + (4 * qual))); return *(unsigned long *)base; } -static inline void ds_set(unsigned char *base, enum ds_qualifier qual, - enum ds_field field, unsigned long value) +static inline void +ds_set(unsigned char *base, enum ds_qualifier qual, enum ds_field field, + unsigned long value) { - base += (ds_cfg.sizeof_field * (field + (4 * qual))); + base += (ds_cfg.sizeof_ptr_field * (field + (4 * qual))); (*(unsigned long *)base) = value; } @@ -159,7 +184,6 @@ static inline void ds_set(unsigned char *base, enum ds_qualifier qual, */ static DEFINE_SPINLOCK(ds_lock); - /* * We either support (system-wide) per-cpu or per-thread allocation. * We distinguish the two based on the task_struct pointer, where a @@ -178,12 +202,28 @@ static DEFINE_SPINLOCK(ds_lock); */ static atomic_t tracers = ATOMIC_INIT(0); -static inline void get_tracer(struct task_struct *task) +static inline int get_tracer(struct task_struct *task) { - if (task) + int error; + + spin_lock_irq(&ds_lock); + + if (task) { + error = -EPERM; + if (atomic_read(&tracers) < 0) + goto out; atomic_inc(&tracers); - else + } else { + error = -EPERM; + if (atomic_read(&tracers) > 0) + goto out; atomic_dec(&tracers); + } + + error = 0; +out: + spin_unlock_irq(&ds_lock); + return error; } static inline void put_tracer(struct task_struct *task) @@ -194,14 +234,6 @@ static inline void put_tracer(struct task_struct *task) atomic_inc(&tracers); } -static inline int check_tracer(struct task_struct *task) -{ - return task ? - (atomic_read(&tracers) >= 0) : - (atomic_read(&tracers) <= 0); -} - - /* * The DS context is either attached to a thread or to a cpu: * - in the former case, the thread_struct contains a pointer to the @@ -213,61 +245,58 @@ static inline int check_tracer(struct task_struct *task) * deallocated when the last user puts the context. */ struct ds_context { - /* pointer to the DS configuration; goes into MSR_IA32_DS_AREA */ - unsigned char ds[MAX_SIZEOF_DS]; - /* the owner of the BTS and PEBS configuration, respectively */ - struct bts_tracer *bts_master; - struct pebs_tracer *pebs_master; - /* use count */ - unsigned long count; - /* a pointer to the context location inside the thread_struct - * or the per_cpu context array */ - struct ds_context **this; - /* a pointer to the task owning this context, or NULL, if the - * context is owned by a cpu */ - struct task_struct *task; -}; + /* The DS configuration; goes into MSR_IA32_DS_AREA: */ + unsigned char ds[MAX_SIZEOF_DS]; + + /* The owner of the BTS and PEBS configuration, respectively: */ + struct bts_tracer *bts_master; + struct pebs_tracer *pebs_master; -static DEFINE_PER_CPU(struct ds_context *, system_context_array); + /* Use count: */ + unsigned long count; -#define system_context per_cpu(system_context_array, smp_processor_id()) + /* Pointer to the context pointer field: */ + struct ds_context **this; + + /* The traced task; NULL for cpu tracing: */ + struct task_struct *task; + + /* The traced cpu; only valid if task is NULL: */ + int cpu; +}; +static DEFINE_PER_CPU(struct ds_context *, cpu_context); -static inline struct ds_context *ds_get_context(struct task_struct *task) + +static struct ds_context *ds_get_context(struct task_struct *task, int cpu) { struct ds_context **p_context = - (task ? &task->thread.ds_ctx : &system_context); + (task ? &task->thread.ds_ctx : &per_cpu(cpu_context, cpu)); struct ds_context *context = NULL; struct ds_context *new_context = NULL; - unsigned long irq; /* Chances are small that we already have a context. */ new_context = kzalloc(sizeof(*new_context), GFP_KERNEL); if (!new_context) return NULL; - spin_lock_irqsave(&ds_lock, irq); + spin_lock_irq(&ds_lock); context = *p_context; - if (!context) { + if (likely(!context)) { context = new_context; context->this = p_context; context->task = task; + context->cpu = cpu; context->count = 0; - if (task) - set_tsk_thread_flag(task, TIF_DS_AREA_MSR); - - if (!task || (task == current)) - wrmsrl(MSR_IA32_DS_AREA, (unsigned long)context->ds); - *p_context = context; } context->count++; - spin_unlock_irqrestore(&ds_lock, irq); + spin_unlock_irq(&ds_lock); if (context != new_context) kfree(new_context); @@ -275,8 +304,9 @@ static inline struct ds_context *ds_get_context(struct task_struct *task) return context; } -static inline void ds_put_context(struct ds_context *context) +static void ds_put_context(struct ds_context *context) { + struct task_struct *task; unsigned long irq; if (!context) @@ -291,17 +321,55 @@ static inline void ds_put_context(struct ds_context *context) *(context->this) = NULL; - if (context->task) - clear_tsk_thread_flag(context->task, TIF_DS_AREA_MSR); + task = context->task; + + if (task) + clear_tsk_thread_flag(task, TIF_DS_AREA_MSR); - if (!context->task || (context->task == current)) - wrmsrl(MSR_IA32_DS_AREA, 0); + /* + * We leave the (now dangling) pointer to the DS configuration in + * the DS_AREA msr. This is as good or as bad as replacing it with + * NULL - the hardware would crash if we enabled tracing. + * + * This saves us some problems with having to write an msr on a + * different cpu while preventing others from doing the same for the + * next context for that same cpu. + */ spin_unlock_irqrestore(&ds_lock, irq); + /* The context might still be in use for context switching. */ + if (task && (task != current)) + wait_task_context_switch(task); + kfree(context); } +static void ds_install_ds_area(struct ds_context *context) +{ + unsigned long ds; + + ds = (unsigned long)context->ds; + + /* + * There is a race between the bts master and the pebs master. + * + * The thread/cpu access is synchronized via get/put_cpu() for + * task tracing and via wrmsr_on_cpu for cpu tracing. + * + * If bts and pebs are collected for the same task or same cpu, + * the same confiuration is written twice. + */ + if (context->task) { + get_cpu(); + if (context->task == current) + wrmsrl(MSR_IA32_DS_AREA, ds); + set_tsk_thread_flag(context->task, TIF_DS_AREA_MSR); + put_cpu(); + } else + wrmsr_on_cpu(context->cpu, MSR_IA32_DS_AREA, + (u32)((u64)ds), (u32)((u64)ds >> 32)); +} /* * Call the tracer's callback on a buffer overflow. @@ -332,9 +400,9 @@ static void ds_overflow(struct ds_context *context, enum ds_qualifier qual) * The remainder of any partially written record is zeroed out. * * context: the DS context - * qual: the buffer type - * record: the data to write - * size: the size of the data + * qual: the buffer type + * record: the data to write + * size: the size of the data */ static int ds_write(struct ds_context *context, enum ds_qualifier qual, const void *record, size_t size) @@ -349,14 +417,14 @@ static int ds_write(struct ds_context *context, enum ds_qualifier qual, unsigned long write_size, adj_write_size; /* - * write as much as possible without producing an + * Write as much as possible without producing an * overflow interrupt. * - * interrupt_threshold must either be + * Interrupt_threshold must either be * - bigger than absolute_maximum or * - point to a record between buffer_base and absolute_maximum * - * index points to a valid record. + * Index points to a valid record. */ base = ds_get(context->ds, qual, ds_buffer_base); index = ds_get(context->ds, qual, ds_index); @@ -365,8 +433,10 @@ static int ds_write(struct ds_context *context, enum ds_qualifier qual, write_end = min(end, int_th); - /* if we are already beyond the interrupt threshold, - * we fill the entire buffer */ + /* + * If we are already beyond the interrupt threshold, + * we fill the entire buffer. + */ if (write_end <= index) write_end = end; @@ -383,7 +453,7 @@ static int ds_write(struct ds_context *context, enum ds_qualifier qual, adj_write_size = write_size / ds_cfg.sizeof_rec[qual]; adj_write_size *= ds_cfg.sizeof_rec[qual]; - /* zero out trailing bytes */ + /* Zero out trailing bytes. */ memset((char *)index + write_size, 0, adj_write_size - write_size); index += adj_write_size; @@ -410,7 +480,7 @@ static int ds_write(struct ds_context *context, enum ds_qualifier qual, * Later architectures use 64bit pointers throughout, whereas earlier * architectures use 32bit pointers in 32bit mode. * - * We compute the base address for the first 8 fields based on: + * We compute the base address for the fields based on: * - the field size stored in the DS configuration * - the relative field position * @@ -431,23 +501,23 @@ enum bts_field { bts_to, bts_flags, - bts_qual = bts_from, - bts_jiffies = bts_to, - bts_pid = bts_flags, + bts_qual = bts_from, + bts_clock = bts_to, + bts_pid = bts_flags, - bts_qual_mask = (bts_qual_max - 1), - bts_escape = ((unsigned long)-1 & ~bts_qual_mask) + bts_qual_mask = (bts_qual_max - 1), + bts_escape = ((unsigned long)-1 & ~bts_qual_mask) }; static inline unsigned long bts_get(const char *base, enum bts_field field) { - base += (ds_cfg.sizeof_field * field); + base += (ds_cfg.sizeof_ptr_field * field); return *(unsigned long *)base; } static inline void bts_set(char *base, enum bts_field field, unsigned long val) { - base += (ds_cfg.sizeof_field * field);; + base += (ds_cfg.sizeof_ptr_field * field);; (*(unsigned long *)base) = val; } @@ -463,8 +533,8 @@ static inline void bts_set(char *base, enum bts_field field, unsigned long val) * * return: bytes read/written on success; -Eerrno, otherwise */ -static int bts_read(struct bts_tracer *tracer, const void *at, - struct bts_struct *out) +static int +bts_read(struct bts_tracer *tracer, const void *at, struct bts_struct *out) { if (!tracer) return -EINVAL; @@ -478,8 +548,8 @@ static int bts_read(struct bts_tracer *tracer, const void *at, memset(out, 0, sizeof(*out)); if ((bts_get(at, bts_qual) & ~bts_qual_mask) == bts_escape) { out->qualifier = (bts_get(at, bts_qual) & bts_qual_mask); - out->variant.timestamp.jiffies = bts_get(at, bts_jiffies); - out->variant.timestamp.pid = bts_get(at, bts_pid); + out->variant.event.clock = bts_get(at, bts_clock); + out->variant.event.pid = bts_get(at, bts_pid); } else { out->qualifier = bts_branch; out->variant.lbr.from = bts_get(at, bts_from); @@ -516,8 +586,8 @@ static int bts_write(struct bts_tracer *tracer, const struct bts_struct *in) case bts_task_arrives: case bts_task_departs: bts_set(raw, bts_qual, (bts_escape | in->qualifier)); - bts_set(raw, bts_jiffies, in->variant.timestamp.jiffies); - bts_set(raw, bts_pid, in->variant.timestamp.pid); + bts_set(raw, bts_clock, in->variant.event.clock); + bts_set(raw, bts_pid, in->variant.event.pid); break; default: return -EINVAL; @@ -555,7 +625,8 @@ static void ds_init_ds_trace(struct ds_trace *trace, enum ds_qualifier qual, unsigned int flags) { unsigned long buffer, adj; - /* adjust the buffer address and size to meet alignment + /* + * Adjust the buffer address and size to meet alignment * constraints: * - buffer is double-word aligned * - size is multiple of record size @@ -577,9 +648,11 @@ static void ds_init_ds_trace(struct ds_trace *trace, enum ds_qualifier qual, trace->begin = (void *)buffer; trace->top = trace->begin; trace->end = (void *)(buffer + size); - /* The value for 'no threshold' is -1, which will set the + /* + * The value for 'no threshold' is -1, which will set the * threshold outside of the buffer, just like we want it. */ + ith *= ds_cfg.sizeof_rec[qual]; trace->ith = (void *)(buffer + size - ith); trace->flags = flags; @@ -588,18 +661,27 @@ static void ds_init_ds_trace(struct ds_trace *trace, enum ds_qualifier qual, static int ds_request(struct ds_tracer *tracer, struct ds_trace *trace, enum ds_qualifier qual, struct task_struct *task, - void *base, size_t size, size_t th, unsigned int flags) + int cpu, void *base, size_t size, size_t th) { struct ds_context *context; int error; + size_t req_size; + + error = -EOPNOTSUPP; + if (!ds_cfg.sizeof_rec[qual]) + goto out; error = -EINVAL; if (!base) goto out; - /* we require some space to do alignment adjustments below */ + req_size = ds_cfg.sizeof_rec[qual]; + /* We might need space for alignment adjustments. */ + if (!IS_ALIGNED((unsigned long)base, DS_ALIGNMENT)) + req_size += DS_ALIGNMENT; + error = -EINVAL; - if (size < (DS_ALIGNMENT + ds_cfg.sizeof_rec[qual])) + if (size < req_size) goto out; if (th != (size_t)-1) { @@ -614,182 +696,318 @@ static int ds_request(struct ds_tracer *tracer, struct ds_trace *trace, tracer->size = size; error = -ENOMEM; - context = ds_get_context(task); + context = ds_get_context(task, cpu); if (!context) goto out; tracer->context = context; - ds_init_ds_trace(trace, qual, base, size, th, flags); + /* + * Defer any tracer-specific initialization work for the context until + * context ownership has been clarified. + */ error = 0; out: return error; } -struct bts_tracer *ds_request_bts(struct task_struct *task, - void *base, size_t size, - bts_ovfl_callback_t ovfl, size_t th, - unsigned int flags) +static struct bts_tracer *ds_request_bts(struct task_struct *task, int cpu, + void *base, size_t size, + bts_ovfl_callback_t ovfl, size_t th, + unsigned int flags) { struct bts_tracer *tracer; - unsigned long irq; int error; + /* Buffer overflow notification is not yet implemented. */ error = -EOPNOTSUPP; - if (!ds_cfg.ctl[dsf_bts]) + if (ovfl) goto out; - /* buffer overflow notification is not yet implemented */ - error = -EOPNOTSUPP; - if (ovfl) + error = get_tracer(task); + if (error < 0) goto out; error = -ENOMEM; tracer = kzalloc(sizeof(*tracer), GFP_KERNEL); if (!tracer) - goto out; + goto out_put_tracer; tracer->ovfl = ovfl; + /* Do some more error checking and acquire a tracing context. */ error = ds_request(&tracer->ds, &tracer->trace.ds, - ds_bts, task, base, size, th, flags); + ds_bts, task, cpu, base, size, th); if (error < 0) goto out_tracer; - - spin_lock_irqsave(&ds_lock, irq); - - error = -EPERM; - if (!check_tracer(task)) - goto out_unlock; - get_tracer(task); + /* Claim the bts part of the tracing context we acquired above. */ + spin_lock_irq(&ds_lock); error = -EPERM; if (tracer->ds.context->bts_master) - goto out_put_tracer; + goto out_unlock; tracer->ds.context->bts_master = tracer; - spin_unlock_irqrestore(&ds_lock, irq); + spin_unlock_irq(&ds_lock); + /* + * Now that we own the bts part of the context, let's complete the + * initialization for that part. + */ + ds_init_ds_trace(&tracer->trace.ds, ds_bts, base, size, th, flags); + ds_write_config(tracer->ds.context, &tracer->trace.ds, ds_bts); + ds_install_ds_area(tracer->ds.context); tracer->trace.read = bts_read; tracer->trace.write = bts_write; - ds_write_config(tracer->ds.context, &tracer->trace.ds, ds_bts); + /* Start tracing. */ ds_resume_bts(tracer); return tracer; - out_put_tracer: - put_tracer(task); out_unlock: - spin_unlock_irqrestore(&ds_lock, irq); + spin_unlock_irq(&ds_lock); ds_put_context(tracer->ds.context); out_tracer: kfree(tracer); + out_put_tracer: + put_tracer(task); out: return ERR_PTR(error); } -struct pebs_tracer *ds_request_pebs(struct task_struct *task, - void *base, size_t size, - pebs_ovfl_callback_t ovfl, size_t th, - unsigned int flags) +struct bts_tracer *ds_request_bts_task(struct task_struct *task, + void *base, size_t size, + bts_ovfl_callback_t ovfl, + size_t th, unsigned int flags) +{ + return ds_request_bts(task, 0, base, size, ovfl, th, flags); +} + +struct bts_tracer *ds_request_bts_cpu(int cpu, void *base, size_t size, + bts_ovfl_callback_t ovfl, + size_t th, unsigned int flags) +{ + return ds_request_bts(NULL, cpu, base, size, ovfl, th, flags); +} + +static struct pebs_tracer *ds_request_pebs(struct task_struct *task, int cpu, + void *base, size_t size, + pebs_ovfl_callback_t ovfl, size_t th, + unsigned int flags) { struct pebs_tracer *tracer; - unsigned long irq; int error; - /* buffer overflow notification is not yet implemented */ + /* Buffer overflow notification is not yet implemented. */ error = -EOPNOTSUPP; if (ovfl) goto out; + error = get_tracer(task); + if (error < 0) + goto out; + error = -ENOMEM; tracer = kzalloc(sizeof(*tracer), GFP_KERNEL); if (!tracer) - goto out; + goto out_put_tracer; tracer->ovfl = ovfl; + /* Do some more error checking and acquire a tracing context. */ error = ds_request(&tracer->ds, &tracer->trace.ds, - ds_pebs, task, base, size, th, flags); + ds_pebs, task, cpu, base, size, th); if (error < 0) goto out_tracer; - spin_lock_irqsave(&ds_lock, irq); - - error = -EPERM; - if (!check_tracer(task)) - goto out_unlock; - get_tracer(task); + /* Claim the pebs part of the tracing context we acquired above. */ + spin_lock_irq(&ds_lock); error = -EPERM; if (tracer->ds.context->pebs_master) - goto out_put_tracer; + goto out_unlock; tracer->ds.context->pebs_master = tracer; - spin_unlock_irqrestore(&ds_lock, irq); + spin_unlock_irq(&ds_lock); + /* + * Now that we own the pebs part of the context, let's complete the + * initialization for that part. + */ + ds_init_ds_trace(&tracer->trace.ds, ds_pebs, base, size, th, flags); ds_write_config(tracer->ds.context, &tracer->trace.ds, ds_pebs); + ds_install_ds_area(tracer->ds.context); + + /* Start tracing. */ ds_resume_pebs(tracer); return tracer; - out_put_tracer: - put_tracer(task); out_unlock: - spin_unlock_irqrestore(&ds_lock, irq); + spin_unlock_irq(&ds_lock); ds_put_context(tracer->ds.context); out_tracer: kfree(tracer); + out_put_tracer: + put_tracer(task); out: return ERR_PTR(error); } -void ds_release_bts(struct bts_tracer *tracer) +struct pebs_tracer *ds_request_pebs_task(struct task_struct *task, + void *base, size_t size, + pebs_ovfl_callback_t ovfl, + size_t th, unsigned int flags) { - if (!tracer) - return; + return ds_request_pebs(task, 0, base, size, ovfl, th, flags); +} - ds_suspend_bts(tracer); +struct pebs_tracer *ds_request_pebs_cpu(int cpu, void *base, size_t size, + pebs_ovfl_callback_t ovfl, + size_t th, unsigned int flags) +{ + return ds_request_pebs(NULL, cpu, base, size, ovfl, th, flags); +} + +static void ds_free_bts(struct bts_tracer *tracer) +{ + struct task_struct *task; + + task = tracer->ds.context->task; WARN_ON_ONCE(tracer->ds.context->bts_master != tracer); tracer->ds.context->bts_master = NULL; - put_tracer(tracer->ds.context->task); + /* Make sure tracing stopped and the tracer is not in use. */ + if (task && (task != current)) + wait_task_context_switch(task); + ds_put_context(tracer->ds.context); + put_tracer(task); kfree(tracer); } +void ds_release_bts(struct bts_tracer *tracer) +{ + might_sleep(); + + if (!tracer) + return; + + ds_suspend_bts(tracer); + ds_free_bts(tracer); +} + +int ds_release_bts_noirq(struct bts_tracer *tracer) +{ + struct task_struct *task; + unsigned long irq; + int error; + + if (!tracer) + return 0; + + task = tracer->ds.context->task; + + local_irq_save(irq); + + error = -EPERM; + if (!task && + (tracer->ds.context->cpu != smp_processor_id())) + goto out; + + error = -EPERM; + if (task && (task != current)) + goto out; + + ds_suspend_bts_noirq(tracer); + ds_free_bts(tracer); + + error = 0; + out: + local_irq_restore(irq); + return error; +} + +static void update_task_debugctlmsr(struct task_struct *task, + unsigned long debugctlmsr) +{ + task->thread.debugctlmsr = debugctlmsr; + + get_cpu(); + if (task == current) + update_debugctlmsr(debugctlmsr); + put_cpu(); +} + void ds_suspend_bts(struct bts_tracer *tracer) { struct task_struct *task; + unsigned long debugctlmsr; + int cpu; if (!tracer) return; + tracer->flags = 0; + task = tracer->ds.context->task; + cpu = tracer->ds.context->cpu; - if (!task || (task == current)) - update_debugctlmsr(get_debugctlmsr() & ~BTS_CONTROL); + WARN_ON(!task && irqs_disabled()); - if (task) { - task->thread.debugctlmsr &= ~BTS_CONTROL; + debugctlmsr = (task ? + task->thread.debugctlmsr : + get_debugctlmsr_on_cpu(cpu)); + debugctlmsr &= ~BTS_CONTROL; - if (!task->thread.debugctlmsr) - clear_tsk_thread_flag(task, TIF_DEBUGCTLMSR); - } + if (task) + update_task_debugctlmsr(task, debugctlmsr); + else + update_debugctlmsr_on_cpu(cpu, debugctlmsr); } -void ds_resume_bts(struct bts_tracer *tracer) +int ds_suspend_bts_noirq(struct bts_tracer *tracer) { struct task_struct *task; - unsigned long control; + unsigned long debugctlmsr, irq; + int cpu, error = 0; if (!tracer) - return; + return 0; + + tracer->flags = 0; task = tracer->ds.context->task; + cpu = tracer->ds.context->cpu; + + local_irq_save(irq); + + error = -EPERM; + if (!task && (cpu != smp_processor_id())) + goto out; + + debugctlmsr = (task ? + task->thread.debugctlmsr : + get_debugctlmsr()); + debugctlmsr &= ~BTS_CONTROL; + + if (task) + update_task_debugctlmsr(task, debugctlmsr); + else + update_debugctlmsr(debugctlmsr); + + error = 0; + out: + local_irq_restore(irq); + return error; +} + +static unsigned long ds_bts_control(struct bts_tracer *tracer) +{ + unsigned long control; control = ds_cfg.ctl[dsf_bts]; if (!(tracer->trace.ds.flags & BTS_KERNEL)) @@ -797,41 +1015,149 @@ void ds_resume_bts(struct bts_tracer *tracer) if (!(tracer->trace.ds.flags & BTS_USER)) control |= ds_cfg.ctl[dsf_bts_user]; - if (task) { - task->thread.debugctlmsr |= control; - set_tsk_thread_flag(task, TIF_DEBUGCTLMSR); - } - - if (!task || (task == current)) - update_debugctlmsr(get_debugctlmsr() | control); + return control; } -void ds_release_pebs(struct pebs_tracer *tracer) +void ds_resume_bts(struct bts_tracer *tracer) { + struct task_struct *task; + unsigned long debugctlmsr; + int cpu; + if (!tracer) return; - ds_suspend_pebs(tracer); + tracer->flags = tracer->trace.ds.flags; + + task = tracer->ds.context->task; + cpu = tracer->ds.context->cpu; + + WARN_ON(!task && irqs_disabled()); + + debugctlmsr = (task ? + task->thread.debugctlmsr : + get_debugctlmsr_on_cpu(cpu)); + debugctlmsr |= ds_bts_control(tracer); + + if (task) + update_task_debugctlmsr(task, debugctlmsr); + else + update_debugctlmsr_on_cpu(cpu, debugctlmsr); +} + +int ds_resume_bts_noirq(struct bts_tracer *tracer) +{ + struct task_struct *task; + unsigned long debugctlmsr, irq; + int cpu, error = 0; + + if (!tracer) + return 0; + + tracer->flags = tracer->trace.ds.flags; + + task = tracer->ds.context->task; + cpu = tracer->ds.context->cpu; + + local_irq_save(irq); + + error = -EPERM; + if (!task && (cpu != smp_processor_id())) + goto out; + + debugctlmsr = (task ? + task->thread.debugctlmsr : + get_debugctlmsr()); + debugctlmsr |= ds_bts_control(tracer); + + if (task) + update_task_debugctlmsr(task, debugctlmsr); + else + update_debugctlmsr(debugctlmsr); + + error = 0; + out: + local_irq_restore(irq); + return error; +} + +static void ds_free_pebs(struct pebs_tracer *tracer) +{ + struct task_struct *task; + + task = tracer->ds.context->task; WARN_ON_ONCE(tracer->ds.context->pebs_master != tracer); tracer->ds.context->pebs_master = NULL; - put_tracer(tracer->ds.context->task); ds_put_context(tracer->ds.context); + put_tracer(task); kfree(tracer); } +void ds_release_pebs(struct pebs_tracer *tracer) +{ + might_sleep(); + + if (!tracer) + return; + + ds_suspend_pebs(tracer); + ds_free_pebs(tracer); +} + +int ds_release_pebs_noirq(struct pebs_tracer *tracer) +{ + struct task_struct *task; + unsigned long irq; + int error; + + if (!tracer) + return 0; + + task = tracer->ds.context->task; + + local_irq_save(irq); + + error = -EPERM; + if (!task && + (tracer->ds.context->cpu != smp_processor_id())) + goto out; + + error = -EPERM; + if (task && (task != current)) + goto out; + + ds_suspend_pebs_noirq(tracer); + ds_free_pebs(tracer); + + error = 0; + out: + local_irq_restore(irq); + return error; +} + void ds_suspend_pebs(struct pebs_tracer *tracer) { } +int ds_suspend_pebs_noirq(struct pebs_tracer *tracer) +{ + return 0; +} + void ds_resume_pebs(struct pebs_tracer *tracer) { } +int ds_resume_pebs_noirq(struct pebs_tracer *tracer) +{ + return 0; +} + const struct bts_trace *ds_read_bts(struct bts_tracer *tracer) { if (!tracer) @@ -847,8 +1173,12 @@ const struct pebs_trace *ds_read_pebs(struct pebs_tracer *tracer) return NULL; ds_read_config(tracer->ds.context, &tracer->trace.ds, ds_pebs); - tracer->trace.reset_value = - *(u64 *)(tracer->ds.context->ds + (ds_cfg.sizeof_field * 8)); + + tracer->trace.counters = ds_cfg.nr_counter_reset; + memcpy(tracer->trace.counter_reset, + tracer->ds.context->ds + + (NUM_DS_PTR_FIELDS * ds_cfg.sizeof_ptr_field), + ds_cfg.nr_counter_reset * PEBS_RESET_FIELD_SIZE); return &tracer->trace; } @@ -873,18 +1203,24 @@ int ds_reset_pebs(struct pebs_tracer *tracer) tracer->trace.ds.top = tracer->trace.ds.begin; - ds_set(tracer->ds.context->ds, ds_bts, ds_index, + ds_set(tracer->ds.context->ds, ds_pebs, ds_index, (unsigned long)tracer->trace.ds.top); return 0; } -int ds_set_pebs_reset(struct pebs_tracer *tracer, u64 value) +int ds_set_pebs_reset(struct pebs_tracer *tracer, + unsigned int counter, u64 value) { if (!tracer) return -EINVAL; - *(u64 *)(tracer->ds.context->ds + (ds_cfg.sizeof_field * 8)) = value; + if (ds_cfg.nr_counter_reset < counter) + return -EINVAL; + + *(u64 *)(tracer->ds.context->ds + + (NUM_DS_PTR_FIELDS * ds_cfg.sizeof_ptr_field) + + (counter * PEBS_RESET_FIELD_SIZE)) = value; return 0; } @@ -894,73 +1230,117 @@ static const struct ds_configuration ds_cfg_netburst = { .ctl[dsf_bts] = (1 << 2) | (1 << 3), .ctl[dsf_bts_kernel] = (1 << 5), .ctl[dsf_bts_user] = (1 << 6), - - .sizeof_field = sizeof(long), - .sizeof_rec[ds_bts] = sizeof(long) * 3, -#ifdef __i386__ - .sizeof_rec[ds_pebs] = sizeof(long) * 10, -#else - .sizeof_rec[ds_pebs] = sizeof(long) * 18, -#endif + .nr_counter_reset = 1, }; static const struct ds_configuration ds_cfg_pentium_m = { .name = "Pentium M", .ctl[dsf_bts] = (1 << 6) | (1 << 7), - - .sizeof_field = sizeof(long), - .sizeof_rec[ds_bts] = sizeof(long) * 3, -#ifdef __i386__ - .sizeof_rec[ds_pebs] = sizeof(long) * 10, -#else - .sizeof_rec[ds_pebs] = sizeof(long) * 18, -#endif + .nr_counter_reset = 1, }; static const struct ds_configuration ds_cfg_core2_atom = { .name = "Core 2/Atom", .ctl[dsf_bts] = (1 << 6) | (1 << 7), .ctl[dsf_bts_kernel] = (1 << 9), .ctl[dsf_bts_user] = (1 << 10), - - .sizeof_field = 8, - .sizeof_rec[ds_bts] = 8 * 3, - .sizeof_rec[ds_pebs] = 8 * 18, + .nr_counter_reset = 1, +}; +static const struct ds_configuration ds_cfg_core_i7 = { + .name = "Core i7", + .ctl[dsf_bts] = (1 << 6) | (1 << 7), + .ctl[dsf_bts_kernel] = (1 << 9), + .ctl[dsf_bts_user] = (1 << 10), + .nr_counter_reset = 4, }; static void -ds_configure(const struct ds_configuration *cfg) +ds_configure(const struct ds_configuration *cfg, + struct cpuinfo_x86 *cpu) { + unsigned long nr_pebs_fields = 0; + + printk(KERN_INFO "[ds] using %s configuration\n", cfg->name); + +#ifdef __i386__ + nr_pebs_fields = 10; +#else + nr_pebs_fields = 18; +#endif + + /* + * Starting with version 2, architectural performance + * monitoring supports a format specifier. + */ + if ((cpuid_eax(0xa) & 0xff) > 1) { + unsigned long perf_capabilities, format; + + rdmsrl(MSR_IA32_PERF_CAPABILITIES, perf_capabilities); + + format = (perf_capabilities >> 8) & 0xf; + + switch (format) { + case 0: + nr_pebs_fields = 18; + break; + case 1: + nr_pebs_fields = 22; + break; + default: + printk(KERN_INFO + "[ds] unknown PEBS format: %lu\n", format); + nr_pebs_fields = 0; + break; + } + } + memset(&ds_cfg, 0, sizeof(ds_cfg)); ds_cfg = *cfg; - printk(KERN_INFO "[ds] using %s configuration\n", ds_cfg.name); + ds_cfg.sizeof_ptr_field = + (cpu_has(cpu, X86_FEATURE_DTES64) ? 8 : 4); + + ds_cfg.sizeof_rec[ds_bts] = ds_cfg.sizeof_ptr_field * 3; + ds_cfg.sizeof_rec[ds_pebs] = ds_cfg.sizeof_ptr_field * nr_pebs_fields; - if (!cpu_has_bts) { - ds_cfg.ctl[dsf_bts] = 0; + if (!cpu_has(cpu, X86_FEATURE_BTS)) { + ds_cfg.sizeof_rec[ds_bts] = 0; printk(KERN_INFO "[ds] bts not available\n"); } - if (!cpu_has_pebs) + if (!cpu_has(cpu, X86_FEATURE_PEBS)) { + ds_cfg.sizeof_rec[ds_pebs] = 0; printk(KERN_INFO "[ds] pebs not available\n"); + } + + printk(KERN_INFO "[ds] sizes: address: %u bit, ", + 8 * ds_cfg.sizeof_ptr_field); + printk("bts/pebs record: %u/%u bytes\n", + ds_cfg.sizeof_rec[ds_bts], ds_cfg.sizeof_rec[ds_pebs]); - WARN_ON_ONCE(MAX_SIZEOF_DS < (12 * ds_cfg.sizeof_field)); + WARN_ON_ONCE(MAX_PEBS_COUNTERS < ds_cfg.nr_counter_reset); } void __cpuinit ds_init_intel(struct cpuinfo_x86 *c) { + /* Only configure the first cpu. Others are identical. */ + if (ds_cfg.name) + return; + switch (c->x86) { case 0x6: switch (c->x86_model) { case 0x9: case 0xd: /* Pentium M */ - ds_configure(&ds_cfg_pentium_m); + ds_configure(&ds_cfg_pentium_m, c); break; case 0xf: case 0x17: /* Core2 */ case 0x1c: /* Atom */ - ds_configure(&ds_cfg_core2_atom); + ds_configure(&ds_cfg_core2_atom, c); + break; + case 0x1a: /* Core i7 */ + ds_configure(&ds_cfg_core_i7, c); break; - case 0x1a: /* i7 */ default: - /* sorry, don't know about them */ + /* Sorry, don't know about them. */ break; } break; @@ -969,64 +1349,89 @@ void __cpuinit ds_init_intel(struct cpuinfo_x86 *c) case 0x0: case 0x1: case 0x2: /* Netburst */ - ds_configure(&ds_cfg_netburst); + ds_configure(&ds_cfg_netburst, c); break; default: - /* sorry, don't know about them */ + /* Sorry, don't know about them. */ break; } break; default: - /* sorry, don't know about them */ + /* Sorry, don't know about them. */ break; } } +static inline void ds_take_timestamp(struct ds_context *context, + enum bts_qualifier qualifier, + struct task_struct *task) +{ + struct bts_tracer *tracer = context->bts_master; + struct bts_struct ts; + + /* Prevent compilers from reading the tracer pointer twice. */ + barrier(); + + if (!tracer || !(tracer->flags & BTS_TIMESTAMPS)) + return; + + memset(&ts, 0, sizeof(ts)); + ts.qualifier = qualifier; + ts.variant.event.clock = trace_clock_global(); + ts.variant.event.pid = task->pid; + + bts_write(tracer, &ts); +} + /* * Change the DS configuration from tracing prev to tracing next. */ void ds_switch_to(struct task_struct *prev, struct task_struct *next) { - struct ds_context *prev_ctx = prev->thread.ds_ctx; - struct ds_context *next_ctx = next->thread.ds_ctx; + struct ds_context *prev_ctx = prev->thread.ds_ctx; + struct ds_context *next_ctx = next->thread.ds_ctx; + unsigned long debugctlmsr = next->thread.debugctlmsr; + + /* Make sure all data is read before we start. */ + barrier(); if (prev_ctx) { update_debugctlmsr(0); - if (prev_ctx->bts_master && - (prev_ctx->bts_master->trace.ds.flags & BTS_TIMESTAMPS)) { - struct bts_struct ts = { - .qualifier = bts_task_departs, - .variant.timestamp.jiffies = jiffies_64, - .variant.timestamp.pid = prev->pid - }; - bts_write(prev_ctx->bts_master, &ts); - } + ds_take_timestamp(prev_ctx, bts_task_departs, prev); } if (next_ctx) { - if (next_ctx->bts_master && - (next_ctx->bts_master->trace.ds.flags & BTS_TIMESTAMPS)) { - struct bts_struct ts = { - .qualifier = bts_task_arrives, - .variant.timestamp.jiffies = jiffies_64, - .variant.timestamp.pid = next->pid - }; - bts_write(next_ctx->bts_master, &ts); - } + ds_take_timestamp(next_ctx, bts_task_arrives, next); wrmsrl(MSR_IA32_DS_AREA, (unsigned long)next_ctx->ds); } - update_debugctlmsr(next->thread.debugctlmsr); + update_debugctlmsr(debugctlmsr); } -void ds_copy_thread(struct task_struct *tsk, struct task_struct *father) +static __init int ds_selftest(void) { - clear_tsk_thread_flag(tsk, TIF_DS_AREA_MSR); - tsk->thread.ds_ctx = NULL; -} + if (ds_cfg.sizeof_rec[ds_bts]) { + int error; -void ds_exit_thread(struct task_struct *tsk) -{ + error = ds_selftest_bts(); + if (error) { + WARN(1, "[ds] selftest failed. disabling bts.\n"); + ds_cfg.sizeof_rec[ds_bts] = 0; + } + } + + if (ds_cfg.sizeof_rec[ds_pebs]) { + int error; + + error = ds_selftest_pebs(); + if (error) { + WARN(1, "[ds] selftest failed. disabling pebs.\n"); + ds_cfg.sizeof_rec[ds_pebs] = 0; + } + } + + return 0; } +device_initcall(ds_selftest); diff --git a/arch/x86/kernel/ds_selftest.c b/arch/x86/kernel/ds_selftest.c new file mode 100644 index 000000000000..6bc7c199ab99 --- /dev/null +++ b/arch/x86/kernel/ds_selftest.c @@ -0,0 +1,408 @@ +/* + * Debug Store support - selftest + * + * + * Copyright (C) 2009 Intel Corporation. + * Markus Metzger <markus.t.metzger@intel.com>, 2009 + */ + +#include "ds_selftest.h" + +#include <linux/kernel.h> +#include <linux/string.h> +#include <linux/smp.h> +#include <linux/cpu.h> + +#include <asm/ds.h> + + +#define BUFFER_SIZE 521 /* Intentionally chose an odd size. */ +#define SMALL_BUFFER_SIZE 24 /* A single bts entry. */ + +struct ds_selftest_bts_conf { + struct bts_tracer *tracer; + int error; + int (*suspend)(struct bts_tracer *); + int (*resume)(struct bts_tracer *); +}; + +static int ds_selftest_bts_consistency(const struct bts_trace *trace) +{ + int error = 0; + + if (!trace) { + printk(KERN_CONT "failed to access trace..."); + /* Bail out. Other tests are pointless. */ + return -1; + } + + if (!trace->read) { + printk(KERN_CONT "bts read not available..."); + error = -1; + } + + /* Do some sanity checks on the trace configuration. */ + if (!trace->ds.n) { + printk(KERN_CONT "empty bts buffer..."); + error = -1; + } + if (!trace->ds.size) { + printk(KERN_CONT "bad bts trace setup..."); + error = -1; + } + if (trace->ds.end != + (char *)trace->ds.begin + (trace->ds.n * trace->ds.size)) { + printk(KERN_CONT "bad bts buffer setup..."); + error = -1; + } + /* + * We allow top in [begin; end], since its not clear when the + * overflow adjustment happens: after the increment or before the + * write. + */ + if ((trace->ds.top < trace->ds.begin) || + (trace->ds.end < trace->ds.top)) { + printk(KERN_CONT "bts top out of bounds..."); + error = -1; + } + + return error; +} + +static int ds_selftest_bts_read(struct bts_tracer *tracer, + const struct bts_trace *trace, + const void *from, const void *to) +{ + const unsigned char *at; + + /* + * Check a few things which do not belong to this test. + * They should be covered by other tests. + */ + if (!trace) + return -1; + + if (!trace->read) + return -1; + + if (to < from) + return -1; + + if (from < trace->ds.begin) + return -1; + + if (trace->ds.end < to) + return -1; + + if (!trace->ds.size) + return -1; + + /* Now to the test itself. */ + for (at = from; (void *)at < to; at += trace->ds.size) { + struct bts_struct bts; + unsigned long index; + int error; + + if (((void *)at - trace->ds.begin) % trace->ds.size) { + printk(KERN_CONT + "read from non-integer index..."); + return -1; + } + index = ((void *)at - trace->ds.begin) / trace->ds.size; + + memset(&bts, 0, sizeof(bts)); + error = trace->read(tracer, at, &bts); + if (error < 0) { + printk(KERN_CONT + "error reading bts trace at [%lu] (0x%p)...", + index, at); + return error; + } + + switch (bts.qualifier) { + case BTS_BRANCH: + break; + default: + printk(KERN_CONT + "unexpected bts entry %llu at [%lu] (0x%p)...", + bts.qualifier, index, at); + return -1; + } + } + + return 0; +} + +static void ds_selftest_bts_cpu(void *arg) +{ + struct ds_selftest_bts_conf *conf = arg; + const struct bts_trace *trace; + void *top; + + if (IS_ERR(conf->tracer)) { + conf->error = PTR_ERR(conf->tracer); + conf->tracer = NULL; + + printk(KERN_CONT + "initialization failed (err: %d)...", conf->error); + return; + } + + /* We should meanwhile have enough trace. */ + conf->error = conf->suspend(conf->tracer); + if (conf->error < 0) + return; + + /* Let's see if we can access the trace. */ + trace = ds_read_bts(conf->tracer); + + conf->error = ds_selftest_bts_consistency(trace); + if (conf->error < 0) + return; + + /* If everything went well, we should have a few trace entries. */ + if (trace->ds.top == trace->ds.begin) { + /* + * It is possible but highly unlikely that we got a + * buffer overflow and end up at exactly the same + * position we started from. + * Let's issue a warning, but continue. + */ + printk(KERN_CONT "no trace/overflow..."); + } + + /* Let's try to read the trace we collected. */ + conf->error = + ds_selftest_bts_read(conf->tracer, trace, + trace->ds.begin, trace->ds.top); + if (conf->error < 0) + return; + + /* + * Let's read the trace again. + * Since we suspended tracing, we should get the same result. + */ + top = trace->ds.top; + + trace = ds_read_bts(conf->tracer); + conf->error = ds_selftest_bts_consistency(trace); + if (conf->error < 0) + return; + + if (top != trace->ds.top) { + printk(KERN_CONT "suspend not working..."); + conf->error = -1; + return; + } + + /* Let's collect some more trace - see if resume is working. */ + conf->error = conf->resume(conf->tracer); + if (conf->error < 0) + return; + + conf->error = conf->suspend(conf->tracer); + if (conf->error < 0) + return; + + trace = ds_read_bts(conf->tracer); + + conf->error = ds_selftest_bts_consistency(trace); + if (conf->error < 0) + return; + + if (trace->ds.top == top) { + /* + * It is possible but highly unlikely that we got a + * buffer overflow and end up at exactly the same + * position we started from. + * Let's issue a warning and check the full trace. + */ + printk(KERN_CONT + "no resume progress/overflow..."); + + conf->error = + ds_selftest_bts_read(conf->tracer, trace, + trace->ds.begin, trace->ds.end); + } else if (trace->ds.top < top) { + /* + * We had a buffer overflow - the entire buffer should + * contain trace records. + */ + conf->error = + ds_selftest_bts_read(conf->tracer, trace, + trace->ds.begin, trace->ds.end); + } else { + /* + * It is quite likely that the buffer did not overflow. + * Let's just check the delta trace. + */ + conf->error = + ds_selftest_bts_read(conf->tracer, trace, top, + trace->ds.top); + } + if (conf->error < 0) + return; + + conf->error = 0; +} + +static int ds_suspend_bts_wrap(struct bts_tracer *tracer) +{ + ds_suspend_bts(tracer); + return 0; +} + +static int ds_resume_bts_wrap(struct bts_tracer *tracer) +{ + ds_resume_bts(tracer); + return 0; +} + +static void ds_release_bts_noirq_wrap(void *tracer) +{ + (void)ds_release_bts_noirq(tracer); +} + +static int ds_selftest_bts_bad_release_noirq(int cpu, + struct bts_tracer *tracer) +{ + int error = -EPERM; + + /* Try to release the tracer on the wrong cpu. */ + get_cpu(); + if (cpu != smp_processor_id()) { + error = ds_release_bts_noirq(tracer); + if (error != -EPERM) + printk(KERN_CONT "release on wrong cpu..."); + } + put_cpu(); + + return error ? 0 : -1; +} + +static int ds_selftest_bts_bad_request_cpu(int cpu, void *buffer) +{ + struct bts_tracer *tracer; + int error; + + /* Try to request cpu tracing while task tracing is active. */ + tracer = ds_request_bts_cpu(cpu, buffer, BUFFER_SIZE, NULL, + (size_t)-1, BTS_KERNEL); + error = PTR_ERR(tracer); + if (!IS_ERR(tracer)) { + ds_release_bts(tracer); + error = 0; + } + + if (error != -EPERM) + printk(KERN_CONT "cpu/task tracing overlap..."); + + return error ? 0 : -1; +} + +static int ds_selftest_bts_bad_request_task(void *buffer) +{ + struct bts_tracer *tracer; + int error; + + /* Try to request cpu tracing while task tracing is active. */ + tracer = ds_request_bts_task(current, buffer, BUFFER_SIZE, NULL, + (size_t)-1, BTS_KERNEL); + error = PTR_ERR(tracer); + if (!IS_ERR(tracer)) { + error = 0; + ds_release_bts(tracer); + } + + if (error != -EPERM) + printk(KERN_CONT "task/cpu tracing overlap..."); + + return error ? 0 : -1; +} + +int ds_selftest_bts(void) +{ + struct ds_selftest_bts_conf conf; + unsigned char buffer[BUFFER_SIZE], *small_buffer; + unsigned long irq; + int cpu; + + printk(KERN_INFO "[ds] bts selftest..."); + conf.error = 0; + + small_buffer = (unsigned char *)ALIGN((unsigned long)buffer, 8) + 8; + + get_online_cpus(); + for_each_online_cpu(cpu) { + conf.suspend = ds_suspend_bts_wrap; + conf.resume = ds_resume_bts_wrap; + conf.tracer = + ds_request_bts_cpu(cpu, buffer, BUFFER_SIZE, + NULL, (size_t)-1, BTS_KERNEL); + ds_selftest_bts_cpu(&conf); + if (conf.error >= 0) + conf.error = ds_selftest_bts_bad_request_task(buffer); + ds_release_bts(conf.tracer); + if (conf.error < 0) + goto out; + + conf.suspend = ds_suspend_bts_noirq; + conf.resume = ds_resume_bts_noirq; + conf.tracer = + ds_request_bts_cpu(cpu, buffer, BUFFER_SIZE, + NULL, (size_t)-1, BTS_KERNEL); + smp_call_function_single(cpu, ds_selftest_bts_cpu, &conf, 1); + if (conf.error >= 0) { + conf.error = + ds_selftest_bts_bad_release_noirq(cpu, + conf.tracer); + /* We must not release the tracer twice. */ + if (conf.error < 0) + conf.tracer = NULL; + } + if (conf.error >= 0) + conf.error = ds_selftest_bts_bad_request_task(buffer); + smp_call_function_single(cpu, ds_release_bts_noirq_wrap, + conf.tracer, 1); + if (conf.error < 0) + goto out; + } + + conf.suspend = ds_suspend_bts_wrap; + conf.resume = ds_resume_bts_wrap; + conf.tracer = + ds_request_bts_task(current, buffer, BUFFER_SIZE, + NULL, (size_t)-1, BTS_KERNEL); + ds_selftest_bts_cpu(&conf); + if (conf.error >= 0) + conf.error = ds_selftest_bts_bad_request_cpu(0, buffer); + ds_release_bts(conf.tracer); + if (conf.error < 0) + goto out; + + conf.suspend = ds_suspend_bts_noirq; + conf.resume = ds_resume_bts_noirq; + conf.tracer = + ds_request_bts_task(current, small_buffer, SMALL_BUFFER_SIZE, + NULL, (size_t)-1, BTS_KERNEL); + local_irq_save(irq); + ds_selftest_bts_cpu(&conf); + if (conf.error >= 0) + conf.error = ds_selftest_bts_bad_request_cpu(0, buffer); + ds_release_bts_noirq(conf.tracer); + local_irq_restore(irq); + if (conf.error < 0) + goto out; + + conf.error = 0; + out: + put_online_cpus(); + printk(KERN_CONT "%s.\n", (conf.error ? "failed" : "passed")); + + return conf.error; +} + +int ds_selftest_pebs(void) +{ + return 0; +} diff --git a/arch/x86/kernel/ds_selftest.h b/arch/x86/kernel/ds_selftest.h new file mode 100644 index 000000000000..2ba8745c6663 --- /dev/null +++ b/arch/x86/kernel/ds_selftest.h @@ -0,0 +1,15 @@ +/* + * Debug Store support - selftest + * + * + * Copyright (C) 2009 Intel Corporation. + * Markus Metzger <markus.t.metzger@intel.com>, 2009 + */ + +#ifdef CONFIG_X86_DS_SELFTEST +extern int ds_selftest_bts(void); +extern int ds_selftest_pebs(void); +#else +static inline int ds_selftest_bts(void) { return 0; } +static inline int ds_selftest_pebs(void) { return 0; } +#endif diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S index bb01ce080b80..1c17d7c751a4 100644 --- a/arch/x86/kernel/entry_64.S +++ b/arch/x86/kernel/entry_64.S @@ -147,27 +147,14 @@ END(ftrace_graph_caller) GLOBAL(return_to_handler) subq $80, %rsp + /* Save the return values */ movq %rax, (%rsp) - movq %rcx, 8(%rsp) - movq %rdx, 16(%rsp) - movq %rsi, 24(%rsp) - movq %rdi, 32(%rsp) - movq %r8, 40(%rsp) - movq %r9, 48(%rsp) - movq %r10, 56(%rsp) - movq %r11, 64(%rsp) + movq %rdx, 8(%rsp) call ftrace_return_to_handler movq %rax, 72(%rsp) - movq 64(%rsp), %r11 - movq 56(%rsp), %r10 - movq 48(%rsp), %r9 - movq 40(%rsp), %r8 - movq 32(%rsp), %rdi - movq 24(%rsp), %rsi - movq 16(%rsp), %rdx - movq 8(%rsp), %rcx + movq 8(%rsp), %rdx movq (%rsp), %rax addq $72, %rsp retq diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index e22d63bdc8ff..3bb2be1649bd 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c @@ -16,6 +16,7 @@ #include <asm/idle.h> #include <asm/uaccess.h> #include <asm/i387.h> +#include <asm/ds.h> unsigned long idle_halt; EXPORT_SYMBOL(idle_halt); @@ -47,6 +48,8 @@ void free_thread_xstate(struct task_struct *tsk) kmem_cache_free(task_xstate_cachep, tsk->thread.xstate); tsk->thread.xstate = NULL; } + + WARN(tsk->thread.ds_ctx, "leaking DS context\n"); } void free_thread_info(struct thread_info *ti) @@ -85,8 +88,6 @@ void exit_thread(void) put_cpu(); kfree(bp); } - - ds_exit_thread(current); } void flush_thread(void) diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c index c60924b5d123..59f4524984af 100644 --- a/arch/x86/kernel/process_32.c +++ b/arch/x86/kernel/process_32.c @@ -287,7 +287,8 @@ int copy_thread(unsigned long clone_flags, unsigned long sp, p->thread.io_bitmap_max = 0; } - ds_copy_thread(p, current); + clear_tsk_thread_flag(p, TIF_DS_AREA_MSR); + p->thread.ds_ctx = NULL; clear_tsk_thread_flag(p, TIF_DEBUGCTLMSR); p->thread.debugctlmsr = 0; diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c index 45f010fb2e20..ebefb5407b9d 100644 --- a/arch/x86/kernel/process_64.c +++ b/arch/x86/kernel/process_64.c @@ -332,7 +332,8 @@ int copy_thread(unsigned long clone_flags, unsigned long sp, goto out; } - ds_copy_thread(p, me); + clear_tsk_thread_flag(p, TIF_DS_AREA_MSR); + p->thread.ds_ctx = NULL; clear_tsk_thread_flag(p, TIF_DEBUGCTLMSR); p->thread.debugctlmsr = 0; diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c index 23b7c8f017e2..09ecbde91c13 100644 --- a/arch/x86/kernel/ptrace.c +++ b/arch/x86/kernel/ptrace.c @@ -21,6 +21,7 @@ #include <linux/audit.h> #include <linux/seccomp.h> #include <linux/signal.h> +#include <linux/workqueue.h> #include <asm/uaccess.h> #include <asm/pgtable.h> @@ -578,17 +579,130 @@ static int ioperm_get(struct task_struct *target, } #ifdef CONFIG_X86_PTRACE_BTS +/* + * A branch trace store context. + * + * Contexts may only be installed by ptrace_bts_config() and only for + * ptraced tasks. + * + * Contexts are destroyed when the tracee is detached from the tracer. + * The actual destruction work requires interrupts enabled, so the + * work is deferred and will be scheduled during __ptrace_unlink(). + * + * Contexts hold an additional task_struct reference on the traced + * task, as well as a reference on the tracer's mm. + * + * Ptrace already holds a task_struct for the duration of ptrace operations, + * but since destruction is deferred, it may be executed after both + * tracer and tracee exited. + */ +struct bts_context { + /* The branch trace handle. */ + struct bts_tracer *tracer; + + /* The buffer used to store the branch trace and its size. */ + void *buffer; + unsigned int size; + + /* The mm that paid for the above buffer. */ + struct mm_struct *mm; + + /* The task this context belongs to. */ + struct task_struct *task; + + /* The signal to send on a bts buffer overflow. */ + unsigned int bts_ovfl_signal; + + /* The work struct to destroy a context. */ + struct work_struct work; +}; + +static int alloc_bts_buffer(struct bts_context *context, unsigned int size) +{ + void *buffer = NULL; + int err = -ENOMEM; + + err = account_locked_memory(current->mm, current->signal->rlim, size); + if (err < 0) + return err; + + buffer = kzalloc(size, GFP_KERNEL); + if (!buffer) + goto out_refund; + + context->buffer = buffer; + context->size = size; + context->mm = get_task_mm(current); + + return 0; + + out_refund: + refund_locked_memory(current->mm, size); + return err; +} + +static inline void free_bts_buffer(struct bts_context *context) +{ + if (!context->buffer) + return; + + kfree(context->buffer); + context->buffer = NULL; + + refund_locked_memory(context->mm, context->size); + context->size = 0; + + mmput(context->mm); + context->mm = NULL; +} + +static void free_bts_context_work(struct work_struct *w) +{ + struct bts_context *context; + + context = container_of(w, struct bts_context, work); + + ds_release_bts(context->tracer); + put_task_struct(context->task); + free_bts_buffer(context); + kfree(context); +} + +static inline void free_bts_context(struct bts_context *context) +{ + INIT_WORK(&context->work, free_bts_context_work); + schedule_work(&context->work); +} + +static inline struct bts_context *alloc_bts_context(struct task_struct *task) +{ + struct bts_context *context = kzalloc(sizeof(*context), GFP_KERNEL); + if (context) { + context->task = task; + task->bts = context; + + get_task_struct(task); + } + + return context; +} + static int ptrace_bts_read_record(struct task_struct *child, size_t index, struct bts_struct __user *out) { + struct bts_context *context; const struct bts_trace *trace; struct bts_struct bts; const unsigned char *at; int error; - trace = ds_read_bts(child->bts); + context = child->bts; + if (!context) + return -ESRCH; + + trace = ds_read_bts(context->tracer); if (!trace) - return -EPERM; + return -ESRCH; at = trace->ds.top - ((index + 1) * trace->ds.size); if ((void *)at < trace->ds.begin) @@ -597,7 +711,7 @@ static int ptrace_bts_read_record(struct task_struct *child, size_t index, if (!trace->read) return -EOPNOTSUPP; - error = trace->read(child->bts, at, &bts); + error = trace->read(context->tracer, at, &bts); if (error < 0) return error; @@ -611,13 +725,18 @@ static int ptrace_bts_drain(struct task_struct *child, long size, struct bts_struct __user *out) { + struct bts_context *context; const struct bts_trace *trace; const unsigned char *at; int error, drained = 0; - trace = ds_read_bts(child->bts); + context = child->bts; + if (!context) + return -ESRCH; + + trace = ds_read_bts(context->tracer); if (!trace) - return -EPERM; + return -ESRCH; if (!trace->read) return -EOPNOTSUPP; @@ -628,9 +747,8 @@ static int ptrace_bts_drain(struct task_struct *child, for (at = trace->ds.begin; (void *)at < trace->ds.top; out++, drained++, at += trace->ds.size) { struct bts_struct bts; - int error; - error = trace->read(child->bts, at, &bts); + error = trace->read(context->tracer, at, &bts); if (error < 0) return error; @@ -640,35 +758,18 @@ static int ptrace_bts_drain(struct task_struct *child, memset(trace->ds.begin, 0, trace->ds.n * trace->ds.size); - error = ds_reset_bts(child->bts); + error = ds_reset_bts(context->tracer); if (error < 0) return error; return drained; } -static int ptrace_bts_allocate_buffer(struct task_struct *child, size_t size) -{ - child->bts_buffer = alloc_locked_buffer(size); - if (!child->bts_buffer) - return -ENOMEM; - - child->bts_size = size; - - return 0; -} - -static void ptrace_bts_free_buffer(struct task_struct *child) -{ - free_locked_buffer(child->bts_buffer, child->bts_size); - child->bts_buffer = NULL; - child->bts_size = 0; -} - static int ptrace_bts_config(struct task_struct *child, long cfg_size, const struct ptrace_bts_config __user *ucfg) { + struct bts_context *context; struct ptrace_bts_config cfg; unsigned int flags = 0; @@ -678,28 +779,33 @@ static int ptrace_bts_config(struct task_struct *child, if (copy_from_user(&cfg, ucfg, sizeof(cfg))) return -EFAULT; - if (child->bts) { - ds_release_bts(child->bts); - child->bts = NULL; - } + context = child->bts; + if (!context) + context = alloc_bts_context(child); + if (!context) + return -ENOMEM; if (cfg.flags & PTRACE_BTS_O_SIGNAL) { if (!cfg.signal) return -EINVAL; - child->thread.bts_ovfl_signal = cfg.signal; return -EOPNOTSUPP; + context->bts_ovfl_signal = cfg.signal; } - if ((cfg.flags & PTRACE_BTS_O_ALLOC) && - (cfg.size != child->bts_size)) { - int error; + ds_release_bts(context->tracer); + context->tracer = NULL; - ptrace_bts_free_buffer(child); + if ((cfg.flags & PTRACE_BTS_O_ALLOC) && (cfg.size != context->size)) { + int err; - error = ptrace_bts_allocate_buffer(child, cfg.size); - if (error < 0) - return error; + free_bts_buffer(context); + if (!cfg.size) + return 0; + + err = alloc_bts_buffer(context, cfg.size); + if (err < 0) + return err; } if (cfg.flags & PTRACE_BTS_O_TRACE) @@ -708,15 +814,14 @@ static int ptrace_bts_config(struct task_struct *child, if (cfg.flags & PTRACE_BTS_O_SCHED) flags |= BTS_TIMESTAMPS; - child->bts = ds_request_bts(child, child->bts_buffer, child->bts_size, - /* ovfl = */ NULL, /* th = */ (size_t)-1, - flags); - if (IS_ERR(child->bts)) { - int error = PTR_ERR(child->bts); - - ptrace_bts_free_buffer(child); - child->bts = NULL; + context->tracer = + ds_request_bts_task(child, context->buffer, context->size, + NULL, (size_t)-1, flags); + if (unlikely(IS_ERR(context->tracer))) { + int error = PTR_ERR(context->tracer); + free_bts_buffer(context); + context->tracer = NULL; return error; } @@ -727,20 +832,25 @@ static int ptrace_bts_status(struct task_struct *child, long cfg_size, struct ptrace_bts_config __user *ucfg) { + struct bts_context *context; const struct bts_trace *trace; struct ptrace_bts_config cfg; + context = child->bts; + if (!context) + return -ESRCH; + if (cfg_size < sizeof(cfg)) return -EIO; - trace = ds_read_bts(child->bts); + trace = ds_read_bts(context->tracer); if (!trace) - return -EPERM; + return -ESRCH; memset(&cfg, 0, sizeof(cfg)); - cfg.size = trace->ds.end - trace->ds.begin; - cfg.signal = child->thread.bts_ovfl_signal; - cfg.bts_size = sizeof(struct bts_struct); + cfg.size = trace->ds.end - trace->ds.begin; + cfg.signal = context->bts_ovfl_signal; + cfg.bts_size = sizeof(struct bts_struct); if (cfg.signal) cfg.flags |= PTRACE_BTS_O_SIGNAL; @@ -759,80 +869,51 @@ static int ptrace_bts_status(struct task_struct *child, static int ptrace_bts_clear(struct task_struct *child) { + struct bts_context *context; const struct bts_trace *trace; - trace = ds_read_bts(child->bts); + context = child->bts; + if (!context) + return -ESRCH; + + trace = ds_read_bts(context->tracer); if (!trace) - return -EPERM; + return -ESRCH; memset(trace->ds.begin, 0, trace->ds.n * trace->ds.size); - return ds_reset_bts(child->bts); + return ds_reset_bts(context->tracer); } static int ptrace_bts_size(struct task_struct *child) { + struct bts_context *context; const struct bts_trace *trace; - trace = ds_read_bts(child->bts); + context = child->bts; + if (!context) + return -ESRCH; + + trace = ds_read_bts(context->tracer); if (!trace) - return -EPERM; + return -ESRCH; return (trace->ds.top - trace->ds.begin) / trace->ds.size; } -static void ptrace_bts_fork(struct task_struct *tsk) -{ - tsk->bts = NULL; - tsk->bts_buffer = NULL; - tsk->bts_size = 0; - tsk->thread.bts_ovfl_signal = 0; -} - -static void ptrace_bts_untrace(struct task_struct *child) +/* + * Called from __ptrace_unlink() after the child has been moved back + * to its original parent. + */ +void ptrace_bts_untrace(struct task_struct *child) { if (unlikely(child->bts)) { - ds_release_bts(child->bts); + free_bts_context(child->bts); child->bts = NULL; - - /* We cannot update total_vm and locked_vm since - child's mm is already gone. But we can reclaim the - memory. */ - kfree(child->bts_buffer); - child->bts_buffer = NULL; - child->bts_size = 0; } } - -static void ptrace_bts_detach(struct task_struct *child) -{ - /* - * Ptrace_detach() races with ptrace_untrace() in case - * the child dies and is reaped by another thread. - * - * We only do the memory accounting at this point and - * leave the buffer deallocation and the bts tracer - * release to ptrace_bts_untrace() which will be called - * later on with tasklist_lock held. - */ - release_locked_buffer(child->bts_buffer, child->bts_size); -} -#else -static inline void ptrace_bts_fork(struct task_struct *tsk) {} -static inline void ptrace_bts_detach(struct task_struct *child) {} -static inline void ptrace_bts_untrace(struct task_struct *child) {} #endif /* CONFIG_X86_PTRACE_BTS */ -void x86_ptrace_fork(struct task_struct *child, unsigned long clone_flags) -{ - ptrace_bts_fork(child); -} - -void x86_ptrace_untrace(struct task_struct *child) -{ - ptrace_bts_untrace(child); -} - /* * Called by kernel/ptrace.c when detaching.. * @@ -844,7 +925,6 @@ void ptrace_disable(struct task_struct *child) #ifdef TIF_SYSCALL_EMU clear_tsk_thread_flag(child, TIF_SYSCALL_EMU); #endif - ptrace_bts_detach(child); } #if defined CONFIG_X86_32 || defined CONFIG_IA32_EMULATION diff --git a/arch/x86/kernel/stacktrace.c b/arch/x86/kernel/stacktrace.c index f7bddc2e37d1..4aaf7e48394f 100644 --- a/arch/x86/kernel/stacktrace.c +++ b/arch/x86/kernel/stacktrace.c @@ -20,7 +20,7 @@ save_stack_warning_symbol(void *data, char *msg, unsigned long symbol) static int save_stack_stack(void *data, char *name) { - return -1; + return 0; } static void save_stack_address(void *data, unsigned long addr, int reliable) diff --git a/arch/x86/kernel/syscall_table_32.S b/arch/x86/kernel/syscall_table_32.S index ff5c8736b491..734f92c02dde 100644 --- a/arch/x86/kernel/syscall_table_32.S +++ b/arch/x86/kernel/syscall_table_32.S @@ -334,3 +334,4 @@ ENTRY(sys_call_table) .long sys_inotify_init1 .long sys_preadv .long sys_pwritev + .long sys_rt_tgsigqueueinfo /* 335 */ diff --git a/arch/x86/mm/kmmio.c b/arch/x86/mm/kmmio.c index 50dc802a1c46..16ccbd77917f 100644 --- a/arch/x86/mm/kmmio.c +++ b/arch/x86/mm/kmmio.c @@ -32,7 +32,7 @@ struct kmmio_fault_page { struct list_head list; struct kmmio_fault_page *release_next; unsigned long page; /* location of the fault page */ - bool old_presence; /* page presence prior to arming */ + pteval_t old_presence; /* page presence prior to arming */ bool armed; /* @@ -97,60 +97,62 @@ static struct kmmio_probe *get_kmmio_probe(unsigned long addr) static struct kmmio_fault_page *get_kmmio_fault_page(unsigned long page) { struct list_head *head; - struct kmmio_fault_page *p; + struct kmmio_fault_page *f; page &= PAGE_MASK; head = kmmio_page_list(page); - list_for_each_entry_rcu(p, head, list) { - if (p->page == page) - return p; + list_for_each_entry_rcu(f, head, list) { + if (f->page == page) + return f; } return NULL; } -static void set_pmd_presence(pmd_t *pmd, bool present, bool *old) +static void clear_pmd_presence(pmd_t *pmd, bool clear, pmdval_t *old) { pmdval_t v = pmd_val(*pmd); - *old = !!(v & _PAGE_PRESENT); - v &= ~_PAGE_PRESENT; - if (present) - v |= _PAGE_PRESENT; + if (clear) { + *old = v & _PAGE_PRESENT; + v &= ~_PAGE_PRESENT; + } else /* presume this has been called with clear==true previously */ + v |= *old; set_pmd(pmd, __pmd(v)); } -static void set_pte_presence(pte_t *pte, bool present, bool *old) +static void clear_pte_presence(pte_t *pte, bool clear, pteval_t *old) { pteval_t v = pte_val(*pte); - *old = !!(v & _PAGE_PRESENT); - v &= ~_PAGE_PRESENT; - if (present) - v |= _PAGE_PRESENT; + if (clear) { + *old = v & _PAGE_PRESENT; + v &= ~_PAGE_PRESENT; + } else /* presume this has been called with clear==true previously */ + v |= *old; set_pte_atomic(pte, __pte(v)); } -static int set_page_presence(unsigned long addr, bool present, bool *old) +static int clear_page_presence(struct kmmio_fault_page *f, bool clear) { unsigned int level; - pte_t *pte = lookup_address(addr, &level); + pte_t *pte = lookup_address(f->page, &level); if (!pte) { - pr_err("kmmio: no pte for page 0x%08lx\n", addr); + pr_err("kmmio: no pte for page 0x%08lx\n", f->page); return -1; } switch (level) { case PG_LEVEL_2M: - set_pmd_presence((pmd_t *)pte, present, old); + clear_pmd_presence((pmd_t *)pte, clear, &f->old_presence); break; case PG_LEVEL_4K: - set_pte_presence(pte, present, old); + clear_pte_presence(pte, clear, &f->old_presence); break; default: pr_err("kmmio: unexpected page level 0x%x.\n", level); return -1; } - __flush_tlb_one(addr); + __flush_tlb_one(f->page); return 0; } @@ -171,9 +173,9 @@ static int arm_kmmio_fault_page(struct kmmio_fault_page *f) WARN_ONCE(f->armed, KERN_ERR "kmmio page already armed.\n"); if (f->armed) { pr_warning("kmmio double-arm: page 0x%08lx, ref %d, old %d\n", - f->page, f->count, f->old_presence); + f->page, f->count, !!f->old_presence); } - ret = set_page_presence(f->page, false, &f->old_presence); + ret = clear_page_presence(f, true); WARN_ONCE(ret < 0, KERN_ERR "kmmio arming 0x%08lx failed.\n", f->page); f->armed = true; return ret; @@ -182,8 +184,7 @@ static int arm_kmmio_fault_page(struct kmmio_fault_page *f) /** Restore the given page to saved presence state. */ static void disarm_kmmio_fault_page(struct kmmio_fault_page *f) { - bool tmp; - int ret = set_page_presence(f->page, f->old_presence, &tmp); + int ret = clear_page_presence(f, false); WARN_ONCE(ret < 0, KERN_ERR "kmmio disarming 0x%08lx failed.\n", f->page); f->armed = false; @@ -310,7 +311,12 @@ static int post_kmmio_handler(unsigned long condition, struct pt_regs *regs) struct kmmio_context *ctx = &get_cpu_var(kmmio_ctx); if (!ctx->active) { - pr_debug("kmmio: spurious debug trap on CPU %d.\n", + /* + * debug traps without an active context are due to either + * something external causing them (f.e. using a debugger while + * mmio tracing enabled), or erroneous behaviour + */ + pr_warning("kmmio: unexpected debug trap on CPU %d.\n", smp_processor_id()); goto out; } @@ -439,12 +445,12 @@ static void rcu_free_kmmio_fault_pages(struct rcu_head *head) head, struct kmmio_delayed_release, rcu); - struct kmmio_fault_page *p = dr->release_list; - while (p) { - struct kmmio_fault_page *next = p->release_next; - BUG_ON(p->count); - kfree(p); - p = next; + struct kmmio_fault_page *f = dr->release_list; + while (f) { + struct kmmio_fault_page *next = f->release_next; + BUG_ON(f->count); + kfree(f); + f = next; } kfree(dr); } @@ -453,19 +459,19 @@ static void remove_kmmio_fault_pages(struct rcu_head *head) { struct kmmio_delayed_release *dr = container_of(head, struct kmmio_delayed_release, rcu); - struct kmmio_fault_page *p = dr->release_list; + struct kmmio_fault_page *f = dr->release_list; struct kmmio_fault_page **prevp = &dr->release_list; unsigned long flags; spin_lock_irqsave(&kmmio_lock, flags); - while (p) { - if (!p->count) { - list_del_rcu(&p->list); - prevp = &p->release_next; + while (f) { + if (!f->count) { + list_del_rcu(&f->list); + prevp = &f->release_next; } else { - *prevp = p->release_next; + *prevp = f->release_next; } - p = p->release_next; + f = f->release_next; } spin_unlock_irqrestore(&kmmio_lock, flags); @@ -528,8 +534,8 @@ void unregister_kmmio_probe(struct kmmio_probe *p) } EXPORT_SYMBOL(unregister_kmmio_probe); -static int kmmio_die_notifier(struct notifier_block *nb, unsigned long val, - void *args) +static int +kmmio_die_notifier(struct notifier_block *nb, unsigned long val, void *args) { struct die_args *arg = args; @@ -544,11 +550,23 @@ static struct notifier_block nb_die = { .notifier_call = kmmio_die_notifier }; -static int __init init_kmmio(void) +int kmmio_init(void) { int i; + for (i = 0; i < KMMIO_PAGE_TABLE_SIZE; i++) INIT_LIST_HEAD(&kmmio_page_table[i]); + return register_die_notifier(&nb_die); } -fs_initcall(init_kmmio); /* should be before device_initcall() */ + +void kmmio_cleanup(void) +{ + int i; + + unregister_die_notifier(&nb_die); + for (i = 0; i < KMMIO_PAGE_TABLE_SIZE; i++) { + WARN_ONCE(!list_empty(&kmmio_page_table[i]), + KERN_ERR "kmmio_page_table not empty at cleanup, any further tracing will leak memory.\n"); + } +} diff --git a/arch/x86/mm/mmio-mod.c b/arch/x86/mm/mmio-mod.c index c9342ed8b402..132772a8ec57 100644 --- a/arch/x86/mm/mmio-mod.c +++ b/arch/x86/mm/mmio-mod.c @@ -451,6 +451,7 @@ void enable_mmiotrace(void) if (nommiotrace) pr_info(NAME "MMIO tracing disabled.\n"); + kmmio_init(); enter_uniprocessor(); spin_lock_irq(&trace_lock); atomic_inc(&mmiotrace_enabled); @@ -473,6 +474,7 @@ void disable_mmiotrace(void) clear_trace_list(); /* guarantees: no more kmmio callbacks */ leave_uniprocessor(); + kmmio_cleanup(); pr_info(NAME "disabled.\n"); out: mutex_unlock(&mmiotrace_mutex); diff --git a/arch/x86/oprofile/nmi_int.c b/arch/x86/oprofile/nmi_int.c index 202864ad49a7..3b285e656e27 100644 --- a/arch/x86/oprofile/nmi_int.c +++ b/arch/x86/oprofile/nmi_int.c @@ -356,14 +356,11 @@ static void exit_sysfs(void) #define exit_sysfs() do { } while (0) #endif /* CONFIG_PM */ -static int p4force; -module_param(p4force, int, 0); - static int __init p4_init(char **cpu_type) { __u8 cpu_model = boot_cpu_data.x86_model; - if (!p4force && (cpu_model > 6 || cpu_model == 5)) + if (cpu_model > 6 || cpu_model == 5) return 0; #ifndef CONFIG_SMP @@ -389,10 +386,25 @@ static int __init p4_init(char **cpu_type) return 0; } +static int force_arch_perfmon; +static int force_cpu_type(const char *str, struct kernel_param *kp) +{ + if (!strcmp(str, "archperfmon")) { + force_arch_perfmon = 1; + printk(KERN_INFO "oprofile: forcing architectural perfmon\n"); + } + + return 0; +} +module_param_call(cpu_type, force_cpu_type, NULL, NULL, 0); + static int __init ppro_init(char **cpu_type) { __u8 cpu_model = boot_cpu_data.x86_model; + if (force_arch_perfmon && cpu_has_arch_perfmon) + return 0; + switch (cpu_model) { case 0 ... 2: *cpu_type = "i386/ppro"; @@ -414,6 +426,13 @@ static int __init ppro_init(char **cpu_type) case 15: case 23: *cpu_type = "i386/core_2"; break; + case 26: + arch_perfmon_setup_counters(); + *cpu_type = "i386/core_i7"; + break; + case 28: + *cpu_type = "i386/atom"; + break; default: /* Unknown */ return 0; diff --git a/block/blk-core.c b/block/blk-core.c index c89883be8737..648f15cb41f1 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -28,22 +28,14 @@ #include <linux/task_io_accounting_ops.h> #include <linux/blktrace_api.h> #include <linux/fault-inject.h> -#include <trace/block.h> + +#define CREATE_TRACE_POINTS +#include <trace/events/block.h> #include "blk.h" -DEFINE_TRACE(block_plug); -DEFINE_TRACE(block_unplug_io); -DEFINE_TRACE(block_unplug_timer); -DEFINE_TRACE(block_getrq); -DEFINE_TRACE(block_sleeprq); -DEFINE_TRACE(block_rq_requeue); -DEFINE_TRACE(block_bio_backmerge); -DEFINE_TRACE(block_bio_frontmerge); -DEFINE_TRACE(block_bio_queue); -DEFINE_TRACE(block_rq_complete); -DEFINE_TRACE(block_remap); /* Also used in drivers/md/dm.c */ EXPORT_TRACEPOINT_SYMBOL_GPL(block_remap); +EXPORT_TRACEPOINT_SYMBOL_GPL(block_bio_complete); static int __make_request(struct request_queue *q, struct bio *bio); @@ -1277,7 +1269,7 @@ static inline void blk_partition_remap(struct bio *bio) bio->bi_bdev = bdev->bd_contains; trace_block_remap(bdev_get_queue(bio->bi_bdev), bio, - bdev->bd_dev, bio->bi_sector, + bdev->bd_dev, bio->bi_sector - p->start_sect); } } @@ -1446,8 +1438,7 @@ static inline void __generic_make_request(struct bio *bio) goto end_io; if (old_sector != -1) - trace_block_remap(q, bio, old_dev, bio->bi_sector, - old_sector); + trace_block_remap(q, bio, old_dev, old_sector); trace_block_bio_queue(q, bio); @@ -1741,10 +1732,14 @@ static int __end_that_request_first(struct request *req, int error, trace_block_rq_complete(req->q, req); /* - * for a REQ_TYPE_BLOCK_PC request, we want to carry any eventual - * sense key with us all the way through + * For fs requests, rq is just carrier of independent bio's + * and each partial completion should be handled separately. + * Reset per-request error on each partial completion. + * + * TODO: tj: This is too subtle. It would be better to let + * low level drivers do what they see fit. */ - if (!blk_pc_request(req)) + if (blk_fs_request(req)) req->errors = 0; if (error && (blk_fs_request(req) && !(req->cmd_flags & REQ_QUIET))) { diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c index 3ff9bba3379a..26f9ec28f56c 100644 --- a/block/blk-sysfs.c +++ b/block/blk-sysfs.c @@ -383,16 +383,21 @@ struct kobj_type blk_queue_ktype = { int blk_register_queue(struct gendisk *disk) { int ret; + struct device *dev = disk_to_dev(disk); struct request_queue *q = disk->queue; if (WARN_ON(!q)) return -ENXIO; + ret = blk_trace_init_sysfs(dev); + if (ret) + return ret; + if (!q->request_fn) return 0; - ret = kobject_add(&q->kobj, kobject_get(&disk_to_dev(disk)->kobj), + ret = kobject_add(&q->kobj, kobject_get(&dev->kobj), "%s", "queue"); if (ret < 0) return ret; diff --git a/block/compat_ioctl.c b/block/compat_ioctl.c index f87615dea46b..f8c218cd08e1 100644 --- a/block/compat_ioctl.c +++ b/block/compat_ioctl.c @@ -568,7 +568,7 @@ static int compat_blk_trace_setup(struct block_device *bdev, char __user *arg) memcpy(&buts.name, &cbuts.name, 32); mutex_lock(&bdev->bd_mutex); - ret = do_blk_trace_setup(q, b, bdev->bd_dev, &buts); + ret = do_blk_trace_setup(q, b, bdev->bd_dev, bdev, &buts); mutex_unlock(&bdev->bd_mutex); if (ret) return ret; diff --git a/block/elevator.c b/block/elevator.c index 7073a9072577..e220f0c543e3 100644 --- a/block/elevator.c +++ b/block/elevator.c @@ -33,17 +33,16 @@ #include <linux/compiler.h> #include <linux/delay.h> #include <linux/blktrace_api.h> -#include <trace/block.h> #include <linux/hash.h> #include <linux/uaccess.h> +#include <trace/events/block.h> + #include "blk.h" static DEFINE_SPINLOCK(elv_list_lock); static LIST_HEAD(elv_list); -DEFINE_TRACE(block_rq_abort); - /* * Merge hash stuff. */ @@ -55,9 +54,6 @@ static const int elv_hash_shift = 6; #define rq_hash_key(rq) ((rq)->sector + (rq)->nr_sectors) #define ELV_ON_HASH(rq) (!hlist_unhashed(&(rq)->hash)) -DEFINE_TRACE(block_rq_insert); -DEFINE_TRACE(block_rq_issue); - /* * Query io scheduler to see if the current process issuing bio may be * merged with rq. diff --git a/drivers/bluetooth/hci_ldisc.c b/drivers/bluetooth/hci_ldisc.c index af761dc434f6..4895f0e05322 100644 --- a/drivers/bluetooth/hci_ldisc.c +++ b/drivers/bluetooth/hci_ldisc.c @@ -277,8 +277,8 @@ static int hci_uart_tty_open(struct tty_struct *tty) /* FIXME: why is this needed. Note don't use ldisc_ref here as the open path is before the ldisc is referencable */ - if (tty->ldisc.ops->flush_buffer) - tty->ldisc.ops->flush_buffer(tty); + if (tty->ldisc->ops->flush_buffer) + tty->ldisc->ops->flush_buffer(tty); tty_driver_flush_buffer(tty); return 0; @@ -463,7 +463,6 @@ static int hci_uart_tty_ioctl(struct tty_struct *tty, struct file * file, clear_bit(HCI_UART_PROTO_SET, &hu->flags); return err; } - tty->low_latency = 1; } else return -EBUSY; break; diff --git a/drivers/char/Kconfig b/drivers/char/Kconfig index 735bbe2be51a..02ecfd5fa61c 100644 --- a/drivers/char/Kconfig +++ b/drivers/char/Kconfig @@ -97,6 +97,19 @@ config DEVKMEM kind of kernel debugging operations. When in doubt, say "N". +config BFIN_JTAG_COMM + tristate "Blackfin JTAG Communication" + depends on BLACKFIN + help + Add support for emulating a TTY device over the Blackfin JTAG. + + To compile this driver as a module, choose M here: the + module will be called bfin_jtag_comm. + +config BFIN_JTAG_COMM_CONSOLE + bool "Console on Blackfin JTAG" + depends on BFIN_JTAG_COMM=y + config SERIAL_NONSTANDARD bool "Non-standard serial port support" depends on HAS_IOMEM diff --git a/drivers/char/Makefile b/drivers/char/Makefile index 9caf5b5ad1c0..189efcff08ce 100644 --- a/drivers/char/Makefile +++ b/drivers/char/Makefile @@ -13,6 +13,7 @@ obj-$(CONFIG_LEGACY_PTYS) += pty.o obj-$(CONFIG_UNIX98_PTYS) += pty.o obj-y += misc.o obj-$(CONFIG_VT) += vt_ioctl.o vc_screen.o selection.o keyboard.o +obj-$(CONFIG_BFIN_JTAG_COMM) += bfin_jtag_comm.o obj-$(CONFIG_CONSOLE_TRANSLATIONS) += consolemap.o consolemap_deftbl.o obj-$(CONFIG_HW_CONSOLE) += vt.o defkeymap.o obj-$(CONFIG_AUDIT) += tty_audit.o diff --git a/drivers/char/bfin_jtag_comm.c b/drivers/char/bfin_jtag_comm.c new file mode 100644 index 000000000000..44c113d56045 --- /dev/null +++ b/drivers/char/bfin_jtag_comm.c @@ -0,0 +1,365 @@ +/* + * TTY over Blackfin JTAG Communication + * + * Copyright 2008-2009 Analog Devices Inc. + * + * Enter bugs at http://blackfin.uclinux.org/ + * + * Licensed under the GPL-2 or later. + */ + +#include <linux/circ_buf.h> +#include <linux/console.h> +#include <linux/delay.h> +#include <linux/err.h> +#include <linux/kernel.h> +#include <linux/kthread.h> +#include <linux/module.h> +#include <linux/mutex.h> +#include <linux/sched.h> +#include <linux/tty.h> +#include <linux/tty_driver.h> +#include <linux/tty_flip.h> +#include <asm/atomic.h> + +/* See the Debug/Emulation chapter in the HRM */ +#define EMUDOF 0x00000001 /* EMUDAT_OUT full & valid */ +#define EMUDIF 0x00000002 /* EMUDAT_IN full & valid */ +#define EMUDOOVF 0x00000004 /* EMUDAT_OUT overflow */ +#define EMUDIOVF 0x00000008 /* EMUDAT_IN overflow */ + +#define DRV_NAME "bfin-jtag-comm" +#define DEV_NAME "ttyBFJC" + +#define pr_init(fmt, args...) ({ static const __initdata char __fmt[] = fmt; printk(__fmt, ## args); }) +#define debug(fmt, args...) pr_debug(DRV_NAME ": " fmt, ## args) + +static inline uint32_t bfin_write_emudat(uint32_t emudat) +{ + __asm__ __volatile__("emudat = %0;" : : "d"(emudat)); + return emudat; +} + +static inline uint32_t bfin_read_emudat(void) +{ + uint32_t emudat; + __asm__ __volatile__("%0 = emudat;" : "=d"(emudat)); + return emudat; +} + +static inline uint32_t bfin_write_emudat_chars(char a, char b, char c, char d) +{ + return bfin_write_emudat((a << 0) | (b << 8) | (c << 16) | (d << 24)); +} + +#define CIRC_SIZE 2048 /* see comment in tty_io.c:do_tty_write() */ +#define CIRC_MASK (CIRC_SIZE - 1) +#define circ_empty(circ) ((circ)->head == (circ)->tail) +#define circ_free(circ) CIRC_SPACE((circ)->head, (circ)->tail, CIRC_SIZE) +#define circ_cnt(circ) CIRC_CNT((circ)->head, (circ)->tail, CIRC_SIZE) +#define circ_byte(circ, idx) ((circ)->buf[(idx) & CIRC_MASK]) + +static struct tty_driver *bfin_jc_driver; +static struct task_struct *bfin_jc_kthread; +static struct tty_struct * volatile bfin_jc_tty; +static unsigned long bfin_jc_count; +static DEFINE_MUTEX(bfin_jc_tty_mutex); +static volatile struct circ_buf bfin_jc_write_buf; + +static int +bfin_jc_emudat_manager(void *arg) +{ + uint32_t inbound_len = 0, outbound_len = 0; + + while (!kthread_should_stop()) { + /* no one left to give data to, so sleep */ + if (bfin_jc_tty == NULL && circ_empty(&bfin_jc_write_buf)) { + debug("waiting for readers\n"); + __set_current_state(TASK_UNINTERRUPTIBLE); + schedule(); + __set_current_state(TASK_RUNNING); + } + + /* no data available, so just chill */ + if (!(bfin_read_DBGSTAT() & EMUDIF) && circ_empty(&bfin_jc_write_buf)) { + debug("waiting for data (in_len = %i) (circ: %i %i)\n", + inbound_len, bfin_jc_write_buf.tail, bfin_jc_write_buf.head); + if (inbound_len) + schedule(); + else + schedule_timeout_interruptible(HZ); + continue; + } + + /* if incoming data is ready, eat it */ + if (bfin_read_DBGSTAT() & EMUDIF) { + struct tty_struct *tty; + mutex_lock(&bfin_jc_tty_mutex); + tty = (struct tty_struct *)bfin_jc_tty; + if (tty != NULL) { + uint32_t emudat = bfin_read_emudat(); + if (inbound_len == 0) { + debug("incoming length: 0x%08x\n", emudat); + inbound_len = emudat; + } else { + size_t num_chars = (4 <= inbound_len ? 4 : inbound_len); + debug(" incoming data: 0x%08x (pushing %zu)\n", emudat, num_chars); + inbound_len -= num_chars; + tty_insert_flip_string(tty, (unsigned char *)&emudat, num_chars); + tty_flip_buffer_push(tty); + } + } + mutex_unlock(&bfin_jc_tty_mutex); + } + + /* if outgoing data is ready, post it */ + if (!(bfin_read_DBGSTAT() & EMUDOF) && !circ_empty(&bfin_jc_write_buf)) { + if (outbound_len == 0) { + outbound_len = circ_cnt(&bfin_jc_write_buf); + bfin_write_emudat(outbound_len); + debug("outgoing length: 0x%08x\n", outbound_len); + } else { + struct tty_struct *tty; + int tail = bfin_jc_write_buf.tail; + size_t ate = (4 <= outbound_len ? 4 : outbound_len); + uint32_t emudat = + bfin_write_emudat_chars( + circ_byte(&bfin_jc_write_buf, tail + 0), + circ_byte(&bfin_jc_write_buf, tail + 1), + circ_byte(&bfin_jc_write_buf, tail + 2), + circ_byte(&bfin_jc_write_buf, tail + 3) + ); + bfin_jc_write_buf.tail += ate; + outbound_len -= ate; + mutex_lock(&bfin_jc_tty_mutex); + tty = (struct tty_struct *)bfin_jc_tty; + if (tty) + tty_wakeup(tty); + mutex_unlock(&bfin_jc_tty_mutex); + debug(" outgoing data: 0x%08x (pushing %zu)\n", emudat, ate); + } + } + } + + __set_current_state(TASK_RUNNING); + return 0; +} + +static int +bfin_jc_open(struct tty_struct *tty, struct file *filp) +{ + mutex_lock(&bfin_jc_tty_mutex); + debug("open %lu\n", bfin_jc_count); + ++bfin_jc_count; + bfin_jc_tty = tty; + wake_up_process(bfin_jc_kthread); + mutex_unlock(&bfin_jc_tty_mutex); + return 0; +} + +static void +bfin_jc_close(struct tty_struct *tty, struct file *filp) +{ + mutex_lock(&bfin_jc_tty_mutex); + debug("close %lu\n", bfin_jc_count); + if (--bfin_jc_count == 0) + bfin_jc_tty = NULL; + wake_up_process(bfin_jc_kthread); + mutex_unlock(&bfin_jc_tty_mutex); +} + +/* XXX: we dont handle the put_char() case where we must handle count = 1 */ +static int +bfin_jc_circ_write(const unsigned char *buf, int count) +{ + int i; + count = min(count, circ_free(&bfin_jc_write_buf)); + debug("going to write chunk of %i bytes\n", count); + for (i = 0; i < count; ++i) + circ_byte(&bfin_jc_write_buf, bfin_jc_write_buf.head + i) = buf[i]; + bfin_jc_write_buf.head += i; + return i; +} + +#ifndef CONFIG_BFIN_JTAG_COMM_CONSOLE +# define acquire_console_sem() +# define release_console_sem() +#endif +static int +bfin_jc_write(struct tty_struct *tty, const unsigned char *buf, int count) +{ + int i; + acquire_console_sem(); + i = bfin_jc_circ_write(buf, count); + release_console_sem(); + wake_up_process(bfin_jc_kthread); + return i; +} + +static void +bfin_jc_flush_chars(struct tty_struct *tty) +{ + wake_up_process(bfin_jc_kthread); +} + +static int +bfin_jc_write_room(struct tty_struct *tty) +{ + return circ_free(&bfin_jc_write_buf); +} + +static int +bfin_jc_chars_in_buffer(struct tty_struct *tty) +{ + return circ_cnt(&bfin_jc_write_buf); +} + +static void +bfin_jc_wait_until_sent(struct tty_struct *tty, int timeout) +{ + unsigned long expire = jiffies + timeout; + while (!circ_empty(&bfin_jc_write_buf)) { + if (signal_pending(current)) + break; + if (time_after(jiffies, expire)) + break; + } +} + +static struct tty_operations bfin_jc_ops = { + .open = bfin_jc_open, + .close = bfin_jc_close, + .write = bfin_jc_write, + /*.put_char = bfin_jc_put_char,*/ + .flush_chars = bfin_jc_flush_chars, + .write_room = bfin_jc_write_room, + .chars_in_buffer = bfin_jc_chars_in_buffer, + .wait_until_sent = bfin_jc_wait_until_sent, +}; + +static int __init bfin_jc_init(void) +{ + int ret; + + bfin_jc_kthread = kthread_create(bfin_jc_emudat_manager, NULL, DRV_NAME); + if (IS_ERR(bfin_jc_kthread)) + return PTR_ERR(bfin_jc_kthread); + + ret = -ENOMEM; + + bfin_jc_write_buf.head = bfin_jc_write_buf.tail = 0; + bfin_jc_write_buf.buf = kmalloc(CIRC_SIZE, GFP_KERNEL); + if (!bfin_jc_write_buf.buf) + goto err; + + bfin_jc_driver = alloc_tty_driver(1); + if (!bfin_jc_driver) + goto err; + + bfin_jc_driver->owner = THIS_MODULE; + bfin_jc_driver->driver_name = DRV_NAME; + bfin_jc_driver->name = DEV_NAME; + bfin_jc_driver->type = TTY_DRIVER_TYPE_SERIAL; + bfin_jc_driver->subtype = SERIAL_TYPE_NORMAL; + bfin_jc_driver->init_termios = tty_std_termios; + tty_set_operations(bfin_jc_driver, &bfin_jc_ops); + + ret = tty_register_driver(bfin_jc_driver); + if (ret) + goto err; + + pr_init(KERN_INFO DRV_NAME ": initialized\n"); + + return 0; + + err: + put_tty_driver(bfin_jc_driver); + kfree(bfin_jc_write_buf.buf); + kthread_stop(bfin_jc_kthread); + return ret; +} +module_init(bfin_jc_init); + +static void __exit bfin_jc_exit(void) +{ + kthread_stop(bfin_jc_kthread); + kfree(bfin_jc_write_buf.buf); + tty_unregister_driver(bfin_jc_driver); + put_tty_driver(bfin_jc_driver); +} +module_exit(bfin_jc_exit); + +#if defined(CONFIG_BFIN_JTAG_COMM_CONSOLE) || defined(CONFIG_EARLY_PRINTK) +static void +bfin_jc_straight_buffer_write(const char *buf, unsigned count) +{ + unsigned ate = 0; + while (bfin_read_DBGSTAT() & EMUDOF) + continue; + bfin_write_emudat(count); + while (ate < count) { + while (bfin_read_DBGSTAT() & EMUDOF) + continue; + bfin_write_emudat_chars(buf[ate], buf[ate+1], buf[ate+2], buf[ate+3]); + ate += 4; + } +} +#endif + +#ifdef CONFIG_BFIN_JTAG_COMM_CONSOLE +static void +bfin_jc_console_write(struct console *co, const char *buf, unsigned count) +{ + if (bfin_jc_kthread == NULL) + bfin_jc_straight_buffer_write(buf, count); + else + bfin_jc_circ_write(buf, count); +} + +static struct tty_driver * +bfin_jc_console_device(struct console *co, int *index) +{ + *index = co->index; + return bfin_jc_driver; +} + +static struct console bfin_jc_console = { + .name = DEV_NAME, + .write = bfin_jc_console_write, + .device = bfin_jc_console_device, + .flags = CON_ANYTIME | CON_PRINTBUFFER, + .index = -1, +}; + +static int __init bfin_jc_console_init(void) +{ + register_console(&bfin_jc_console); + return 0; +} +console_initcall(bfin_jc_console_init); +#endif + +#ifdef CONFIG_EARLY_PRINTK +static void __init +bfin_jc_early_write(struct console *co, const char *buf, unsigned int count) +{ + bfin_jc_straight_buffer_write(buf, count); +} + +static struct __initdata console bfin_jc_early_console = { + .name = "early_BFJC", + .write = bfin_jc_early_write, + .flags = CON_ANYTIME | CON_PRINTBUFFER, + .index = -1, +}; + +struct console * __init +bfin_jc_early_init(unsigned int port, unsigned int cflag) +{ + return &bfin_jc_early_console; +} +#endif + +MODULE_AUTHOR("Mike Frysinger <vapier@gentoo.org>"); +MODULE_DESCRIPTION("TTY over Blackfin JTAG Communication"); +MODULE_LICENSE("GPL"); diff --git a/drivers/char/cyclades.c b/drivers/char/cyclades.c index 1fdb9f657d8f..f3366d3f06cf 100644 --- a/drivers/char/cyclades.c +++ b/drivers/char/cyclades.c @@ -604,7 +604,6 @@ #define NR_PORTS 256 -#define ZE_V1_NPORTS 64 #define ZO_V1 0 #define ZO_V2 1 #define ZE_V1 2 @@ -663,18 +662,6 @@ static void cy_throttle(struct tty_struct *tty); static void cy_send_xchar(struct tty_struct *tty, char ch); -#define IS_CYC_Z(card) ((card).num_chips == (unsigned int)-1) - -#define Z_FPGA_CHECK(card) \ - ((readl(&((struct RUNTIME_9060 __iomem *) \ - ((card).ctl_addr))->init_ctrl) & (1<<17)) != 0) - -#define ISZLOADED(card) (((ZO_V1 == readl(&((struct RUNTIME_9060 __iomem *) \ - ((card).ctl_addr))->mail_box_0)) || \ - Z_FPGA_CHECK(card)) && \ - (ZFIRM_ID == readl(&((struct FIRM_ID __iomem *) \ - ((card).base_addr+ID_ADDRESS))->signature))) - #ifndef SERIAL_XMIT_SIZE #define SERIAL_XMIT_SIZE (min(PAGE_SIZE, 4096)) #endif @@ -687,8 +674,6 @@ static void cy_send_xchar(struct tty_struct *tty, char ch); #define DRIVER_VERSION 0x02010203 #define RAM_SIZE 0x80000 -#define Z_FPGA_LOADED(X) ((readl(&(X)->init_ctrl) & (1<<17)) != 0) - enum zblock_type { ZBLOCK_PRG = 0, ZBLOCK_FPGA = 1 @@ -883,6 +868,29 @@ static void cyz_rx_restart(unsigned long); static struct timer_list cyz_rx_full_timer[NR_PORTS]; #endif /* CONFIG_CYZ_INTR */ +static inline bool cy_is_Z(struct cyclades_card *card) +{ + return card->num_chips == (unsigned int)-1; +} + +static inline bool __cyz_fpga_loaded(struct RUNTIME_9060 __iomem *ctl_addr) +{ + return readl(&ctl_addr->init_ctrl) & (1 << 17); +} + +static inline bool cyz_fpga_loaded(struct cyclades_card *card) +{ + return __cyz_fpga_loaded(card->ctl_addr.p9060); +} + +static inline bool cyz_is_loaded(struct cyclades_card *card) +{ + struct FIRM_ID __iomem *fw_id = card->base_addr + ID_ADDRESS; + + return (card->hw_ver == ZO_V1 || cyz_fpga_loaded(card)) && + readl(&fw_id->signature) == ZFIRM_ID; +} + static inline int serial_paranoia_check(struct cyclades_port *info, char *name, const char *routine) { @@ -1395,19 +1403,15 @@ cyz_fetch_msg(struct cyclades_card *cinfo, unsigned long loc_doorbell; firm_id = cinfo->base_addr + ID_ADDRESS; - if (!ISZLOADED(*cinfo)) - return -1; zfw_ctrl = cinfo->base_addr + (readl(&firm_id->zfwctrl_addr) & 0xfffff); board_ctrl = &zfw_ctrl->board_ctrl; - loc_doorbell = readl(&((struct RUNTIME_9060 __iomem *) - (cinfo->ctl_addr))->loc_doorbell); + loc_doorbell = readl(&cinfo->ctl_addr.p9060->loc_doorbell); if (loc_doorbell) { *cmd = (char)(0xff & loc_doorbell); *channel = readl(&board_ctrl->fwcmd_channel); *param = (__u32) readl(&board_ctrl->fwcmd_param); - cy_writel(&((struct RUNTIME_9060 __iomem *)(cinfo->ctl_addr))-> - loc_doorbell, 0xffffffff); + cy_writel(&cinfo->ctl_addr.p9060->loc_doorbell, 0xffffffff); return 1; } return 0; @@ -1424,15 +1428,14 @@ cyz_issue_cmd(struct cyclades_card *cinfo, unsigned int index; firm_id = cinfo->base_addr + ID_ADDRESS; - if (!ISZLOADED(*cinfo)) + if (!cyz_is_loaded(cinfo)) return -1; zfw_ctrl = cinfo->base_addr + (readl(&firm_id->zfwctrl_addr) & 0xfffff); board_ctrl = &zfw_ctrl->board_ctrl; index = 0; - pci_doorbell = - &((struct RUNTIME_9060 __iomem *)(cinfo->ctl_addr))->pci_doorbell; + pci_doorbell = &cinfo->ctl_addr.p9060->pci_doorbell; while ((readl(pci_doorbell) & 0xff) != 0) { if (index++ == 1000) return (int)(readl(pci_doorbell) & 0xff); @@ -1624,10 +1627,8 @@ static void cyz_handle_cmd(struct cyclades_card *cinfo) static struct BOARD_CTRL __iomem *board_ctrl; static struct CH_CTRL __iomem *ch_ctrl; static struct BUF_CTRL __iomem *buf_ctrl; - __u32 channel; + __u32 channel, param, fw_ver; __u8 cmd; - __u32 param; - __u32 hw_ver, fw_ver; int special_count; int delta_count; @@ -1635,8 +1636,6 @@ static void cyz_handle_cmd(struct cyclades_card *cinfo) zfw_ctrl = cinfo->base_addr + (readl(&firm_id->zfwctrl_addr) & 0xfffff); board_ctrl = &zfw_ctrl->board_ctrl; fw_ver = readl(&board_ctrl->fw_version); - hw_ver = readl(&((struct RUNTIME_9060 __iomem *)(cinfo->ctl_addr))-> - mail_box_0); while (cyz_fetch_msg(cinfo, &channel, &cmd, ¶m) == 1) { special_count = 0; @@ -1737,15 +1736,7 @@ static irqreturn_t cyz_interrupt(int irq, void *dev_id) { struct cyclades_card *cinfo = dev_id; - if (unlikely(cinfo == NULL)) { -#ifdef CY_DEBUG_INTERRUPTS - printk(KERN_DEBUG "cyz_interrupt: spurious interrupt %d\n", - irq); -#endif - return IRQ_NONE; /* spurious interrupt */ - } - - if (unlikely(!ISZLOADED(*cinfo))) { + if (unlikely(!cyz_is_loaded(cinfo))) { #ifdef CY_DEBUG_INTERRUPTS printk(KERN_DEBUG "cyz_interrupt: board not yet loaded " "(IRQ%d).\n", irq); @@ -1785,7 +1776,6 @@ static void cyz_poll(unsigned long arg) struct tty_struct *tty; struct FIRM_ID __iomem *firm_id; struct ZFW_CTRL __iomem *zfw_ctrl; - struct BOARD_CTRL __iomem *board_ctrl; struct BUF_CTRL __iomem *buf_ctrl; unsigned long expires = jiffies + HZ; unsigned int port, card; @@ -1793,19 +1783,17 @@ static void cyz_poll(unsigned long arg) for (card = 0; card < NR_CARDS; card++) { cinfo = &cy_card[card]; - if (!IS_CYC_Z(*cinfo)) + if (!cy_is_Z(cinfo)) continue; - if (!ISZLOADED(*cinfo)) + if (!cyz_is_loaded(cinfo)) continue; firm_id = cinfo->base_addr + ID_ADDRESS; zfw_ctrl = cinfo->base_addr + (readl(&firm_id->zfwctrl_addr) & 0xfffff); - board_ctrl = &(zfw_ctrl->board_ctrl); /* Skip first polling cycle to avoid racing conditions with the FW */ if (!cinfo->intr_enabled) { - cinfo->nports = (int)readl(&board_ctrl->n_channel); cinfo->intr_enabled = 1; continue; } @@ -1874,7 +1862,7 @@ static int startup(struct cyclades_port *info) set_line_char(info); - if (!IS_CYC_Z(*card)) { + if (!cy_is_Z(card)) { chip = channel >> 2; channel &= 0x03; index = card->bus_index; @@ -1931,7 +1919,7 @@ static int startup(struct cyclades_port *info) base_addr = card->base_addr; firm_id = base_addr + ID_ADDRESS; - if (!ISZLOADED(*card)) + if (!cyz_is_loaded(card)) return -ENODEV; zfw_ctrl = card->base_addr + @@ -2026,7 +2014,7 @@ static void start_xmit(struct cyclades_port *info) card = info->card; channel = info->line - card->first_line; - if (!IS_CYC_Z(*card)) { + if (!cy_is_Z(card)) { chip = channel >> 2; channel &= 0x03; index = card->bus_index; @@ -2070,7 +2058,7 @@ static void shutdown(struct cyclades_port *info) card = info->card; channel = info->line - card->first_line; - if (!IS_CYC_Z(*card)) { + if (!cy_is_Z(card)) { chip = channel >> 2; channel &= 0x03; index = card->bus_index; @@ -2126,7 +2114,7 @@ static void shutdown(struct cyclades_port *info) #endif firm_id = base_addr + ID_ADDRESS; - if (!ISZLOADED(*card)) + if (!cyz_is_loaded(card)) return; zfw_ctrl = card->base_addr + @@ -2233,7 +2221,7 @@ block_til_ready(struct tty_struct *tty, struct file *filp, #endif info->port.blocked_open++; - if (!IS_CYC_Z(*cinfo)) { + if (!cy_is_Z(cinfo)) { chip = channel >> 2; channel &= 0x03; index = cinfo->bus_index; @@ -2296,7 +2284,7 @@ block_til_ready(struct tty_struct *tty, struct file *filp, base_addr = cinfo->base_addr; firm_id = base_addr + ID_ADDRESS; - if (!ISZLOADED(*cinfo)) { + if (!cyz_is_loaded(cinfo)) { __set_current_state(TASK_RUNNING); remove_wait_queue(&info->port.open_wait, &wait); return -EINVAL; @@ -2397,16 +2385,14 @@ static int cy_open(struct tty_struct *tty, struct file *filp) treat it as absent from the system. This will make the user pay attention. */ - if (IS_CYC_Z(*info->card)) { + if (cy_is_Z(info->card)) { struct cyclades_card *cinfo = info->card; struct FIRM_ID __iomem *firm_id = cinfo->base_addr + ID_ADDRESS; - if (!ISZLOADED(*cinfo)) { - if (((ZE_V1 == readl(&((struct RUNTIME_9060 __iomem *) - (cinfo->ctl_addr))->mail_box_0)) && - Z_FPGA_CHECK(*cinfo)) && - (ZFIRM_HLT == readl( - &firm_id->signature))) { + if (!cyz_is_loaded(cinfo)) { + if (cinfo->hw_ver == ZE_V1 && cyz_fpga_loaded(cinfo) && + readl(&firm_id->signature) == + ZFIRM_HLT) { printk(KERN_ERR "cyc:Cyclades-Z Error: you " "need an external power supply for " "this number of ports.\nFirmware " @@ -2423,18 +2409,13 @@ static int cy_open(struct tty_struct *tty, struct file *filp) interrupts should be enabled as soon as the first open happens to one of its ports. */ if (!cinfo->intr_enabled) { - struct ZFW_CTRL __iomem *zfw_ctrl; - struct BOARD_CTRL __iomem *board_ctrl; - - zfw_ctrl = cinfo->base_addr + - (readl(&firm_id->zfwctrl_addr) & - 0xfffff); - - board_ctrl = &zfw_ctrl->board_ctrl; + u16 intr; /* Enable interrupts on the PLX chip */ - cy_writew(cinfo->ctl_addr + 0x68, - readw(cinfo->ctl_addr + 0x68) | 0x0900); + intr = readw(&cinfo->ctl_addr.p9060-> + intr_ctrl_stat) | 0x0900; + cy_writew(&cinfo->ctl_addr.p9060-> + intr_ctrl_stat, intr); /* Enable interrupts on the FW */ retval = cyz_issue_cmd(cinfo, 0, C_CM_IRQ_ENBL, 0L); @@ -2442,8 +2423,6 @@ static int cy_open(struct tty_struct *tty, struct file *filp) printk(KERN_ERR "cyc:IRQ enable retval " "was %x\n", retval); } - cinfo->nports = - (int)readl(&board_ctrl->n_channel); cinfo->intr_enabled = 1; } } @@ -2556,7 +2535,7 @@ static void cy_wait_until_sent(struct tty_struct *tty, int timeout) #endif card = info->card; channel = (info->line) - (card->first_line); - if (!IS_CYC_Z(*card)) { + if (!cy_is_Z(card)) { chip = channel >> 2; channel &= 0x03; index = card->bus_index; @@ -2601,7 +2580,7 @@ static void cy_flush_buffer(struct tty_struct *tty) info->xmit_cnt = info->xmit_head = info->xmit_tail = 0; spin_unlock_irqrestore(&card->card_lock, flags); - if (IS_CYC_Z(*card)) { /* If it is a Z card, flush the on-board + if (cy_is_Z(card)) { /* If it is a Z card, flush the on-board buffers as well */ spin_lock_irqsave(&card->card_lock, flags); retval = cyz_issue_cmd(card, channel, C_CM_FLUSH_TX, 0L); @@ -2682,7 +2661,7 @@ static void cy_close(struct tty_struct *tty, struct file *filp) spin_lock_irqsave(&card->card_lock, flags); - if (!IS_CYC_Z(*card)) { + if (!cy_is_Z(card)) { int channel = info->line - card->first_line; int index = card->bus_index; void __iomem *base_addr = card->base_addr + @@ -2902,7 +2881,7 @@ static int cy_chars_in_buffer(struct tty_struct *tty) channel = (info->line) - (card->first_line); #ifdef Z_EXT_CHARS_IN_BUFFER - if (!IS_CYC_Z(cy_card[card])) { + if (!cy_is_Z(card)) { #endif /* Z_EXT_CHARS_IN_BUFFER */ #ifdef CY_DEBUG_IO printk(KERN_DEBUG "cyc:cy_chars_in_buffer ttyC%d %d\n", @@ -2984,7 +2963,6 @@ static void set_line_char(struct cyclades_port *info) void __iomem *base_addr; int chip, channel, index; unsigned cflag, iflag; - unsigned short chip_number; int baud, baud_rate = 0; int i; @@ -3013,9 +2991,8 @@ static void set_line_char(struct cyclades_port *info) card = info->card; channel = info->line - card->first_line; - chip_number = channel / 4; - if (!IS_CYC_Z(*card)) { + if (!cy_is_Z(card)) { index = card->bus_index; @@ -3233,21 +3210,17 @@ static void set_line_char(struct cyclades_port *info) } else { struct FIRM_ID __iomem *firm_id; struct ZFW_CTRL __iomem *zfw_ctrl; - struct BOARD_CTRL __iomem *board_ctrl; struct CH_CTRL __iomem *ch_ctrl; - struct BUF_CTRL __iomem *buf_ctrl; __u32 sw_flow; int retval; firm_id = card->base_addr + ID_ADDRESS; - if (!ISZLOADED(*card)) + if (!cyz_is_loaded(card)) return; zfw_ctrl = card->base_addr + (readl(&firm_id->zfwctrl_addr) & 0xfffff); - board_ctrl = &zfw_ctrl->board_ctrl; ch_ctrl = &(zfw_ctrl->ch_ctrl[channel]); - buf_ctrl = &zfw_ctrl->buf_ctrl[channel]; /* baud rate */ baud = tty_get_baud_rate(info->port.tty); @@ -3457,7 +3430,7 @@ static int get_lsr_info(struct cyclades_port *info, unsigned int __user *value) card = info->card; channel = (info->line) - (card->first_line); - if (!IS_CYC_Z(*card)) { + if (!cy_is_Z(card)) { chip = channel >> 2; channel &= 0x03; index = card->bus_index; @@ -3497,7 +3470,7 @@ static int cy_tiocmget(struct tty_struct *tty, struct file *file) card = info->card; channel = info->line - card->first_line; - if (!IS_CYC_Z(*card)) { + if (!cy_is_Z(card)) { chip = channel >> 2; channel &= 0x03; index = card->bus_index; @@ -3523,7 +3496,7 @@ static int cy_tiocmget(struct tty_struct *tty, struct file *file) } else { base_addr = card->base_addr; firm_id = card->base_addr + ID_ADDRESS; - if (ISZLOADED(*card)) { + if (cyz_is_loaded(card)) { zfw_ctrl = card->base_addr + (readl(&firm_id->zfwctrl_addr) & 0xfffff); board_ctrl = &zfw_ctrl->board_ctrl; @@ -3566,7 +3539,7 @@ cy_tiocmset(struct tty_struct *tty, struct file *file, card = info->card; channel = (info->line) - (card->first_line); - if (!IS_CYC_Z(*card)) { + if (!cy_is_Z(card)) { chip = channel >> 2; channel &= 0x03; index = card->bus_index; @@ -3641,7 +3614,7 @@ cy_tiocmset(struct tty_struct *tty, struct file *file, base_addr = card->base_addr; firm_id = card->base_addr + ID_ADDRESS; - if (ISZLOADED(*card)) { + if (cyz_is_loaded(card)) { zfw_ctrl = card->base_addr + (readl(&firm_id->zfwctrl_addr) & 0xfffff); board_ctrl = &zfw_ctrl->board_ctrl; @@ -3713,7 +3686,7 @@ static int cy_break(struct tty_struct *tty, int break_state) card = info->card; spin_lock_irqsave(&card->card_lock, flags); - if (!IS_CYC_Z(*card)) { + if (!cy_is_Z(card)) { /* Let the transmit ISR take care of this (since it requires stuffing characters into the output stream). */ @@ -3782,7 +3755,7 @@ static int set_threshold(struct cyclades_port *info, unsigned long value) card = info->card; channel = info->line - card->first_line; - if (!IS_CYC_Z(*card)) { + if (!cy_is_Z(card)) { chip = channel >> 2; channel &= 0x03; index = card->bus_index; @@ -3810,7 +3783,7 @@ static int get_threshold(struct cyclades_port *info, card = info->card; channel = info->line - card->first_line; - if (!IS_CYC_Z(*card)) { + if (!cy_is_Z(card)) { chip = channel >> 2; channel &= 0x03; index = card->bus_index; @@ -3844,7 +3817,7 @@ static int set_timeout(struct cyclades_port *info, unsigned long value) card = info->card; channel = info->line - card->first_line; - if (!IS_CYC_Z(*card)) { + if (!cy_is_Z(card)) { chip = channel >> 2; channel &= 0x03; index = card->bus_index; @@ -3867,7 +3840,7 @@ static int get_timeout(struct cyclades_port *info, card = info->card; channel = info->line - card->first_line; - if (!IS_CYC_Z(*card)) { + if (!cy_is_Z(card)) { chip = channel >> 2; channel &= 0x03; index = card->bus_index; @@ -4121,7 +4094,7 @@ static void cy_send_xchar(struct tty_struct *tty, char ch) card = info->card; channel = info->line - card->first_line; - if (IS_CYC_Z(*card)) { + if (cy_is_Z(card)) { if (ch == STOP_CHAR(tty)) cyz_issue_cmd(card, channel, C_CM_SENDXOFF, 0L); else if (ch == START_CHAR(tty)) @@ -4154,7 +4127,7 @@ static void cy_throttle(struct tty_struct *tty) card = info->card; if (I_IXOFF(tty)) { - if (!IS_CYC_Z(*card)) + if (!cy_is_Z(card)) cy_send_xchar(tty, STOP_CHAR(tty)); else info->throttle = 1; @@ -4162,7 +4135,7 @@ static void cy_throttle(struct tty_struct *tty) if (tty->termios->c_cflag & CRTSCTS) { channel = info->line - card->first_line; - if (!IS_CYC_Z(*card)) { + if (!cy_is_Z(card)) { chip = channel >> 2; channel &= 0x03; index = card->bus_index; @@ -4219,7 +4192,7 @@ static void cy_unthrottle(struct tty_struct *tty) if (tty->termios->c_cflag & CRTSCTS) { card = info->card; channel = info->line - card->first_line; - if (!IS_CYC_Z(*card)) { + if (!cy_is_Z(card)) { chip = channel >> 2; channel &= 0x03; index = card->bus_index; @@ -4263,7 +4236,7 @@ static void cy_stop(struct tty_struct *tty) cinfo = info->card; channel = info->line - cinfo->first_line; - if (!IS_CYC_Z(*cinfo)) { + if (!cy_is_Z(cinfo)) { index = cinfo->bus_index; chip = channel >> 2; channel &= 0x03; @@ -4296,7 +4269,7 @@ static void cy_start(struct tty_struct *tty) cinfo = info->card; channel = info->line - cinfo->first_line; index = cinfo->bus_index; - if (!IS_CYC_Z(*cinfo)) { + if (!cy_is_Z(cinfo)) { chip = channel >> 2; channel &= 0x03; base_addr = cinfo->base_addr + (cy_chip_offset[chip] << index); @@ -4347,33 +4320,20 @@ static void cy_hangup(struct tty_struct *tty) static int __devinit cy_init_card(struct cyclades_card *cinfo) { struct cyclades_port *info; - u32 uninitialized_var(mailbox); - unsigned int nports, port; + unsigned int port; unsigned short chip_number; - int uninitialized_var(index); spin_lock_init(&cinfo->card_lock); + cinfo->intr_enabled = 0; - if (IS_CYC_Z(*cinfo)) { /* Cyclades-Z */ - mailbox = readl(&((struct RUNTIME_9060 __iomem *) - cinfo->ctl_addr)->mail_box_0); - nports = (mailbox == ZE_V1) ? ZE_V1_NPORTS : 8; - cinfo->intr_enabled = 0; - cinfo->nports = 0; /* Will be correctly set later, after - Z FW is loaded */ - } else { - index = cinfo->bus_index; - nports = cinfo->nports = CyPORTS_PER_CHIP * cinfo->num_chips; - } - - cinfo->ports = kzalloc(sizeof(*cinfo->ports) * nports, GFP_KERNEL); + cinfo->ports = kcalloc(cinfo->nports, sizeof(*cinfo->ports), + GFP_KERNEL); if (cinfo->ports == NULL) { printk(KERN_ERR "Cyclades: cannot allocate ports\n"); - cinfo->nports = 0; return -ENOMEM; } - for (port = cinfo->first_line; port < cinfo->first_line + nports; + for (port = cinfo->first_line; port < cinfo->first_line + cinfo->nports; port++) { info = &cinfo->ports[port - cinfo->first_line]; tty_port_init(&info->port); @@ -4387,9 +4347,9 @@ static int __devinit cy_init_card(struct cyclades_card *cinfo) init_completion(&info->shutdown_wait); init_waitqueue_head(&info->delta_msr_wait); - if (IS_CYC_Z(*cinfo)) { + if (cy_is_Z(cinfo)) { info->type = PORT_STARTECH; - if (mailbox == ZO_V1) + if (cinfo->hw_ver == ZO_V1) info->xmit_fifo_size = CYZ_FIFO_SIZE; else info->xmit_fifo_size = 4 * CYZ_FIFO_SIZE; @@ -4398,6 +4358,7 @@ static int __devinit cy_init_card(struct cyclades_card *cinfo) cyz_rx_restart, (unsigned long)info); #endif } else { + int index = cinfo->bus_index; info->type = PORT_CIRRUS; info->xmit_fifo_size = CyMAX_CHAR_FIFO; info->cor1 = CyPARITY_NONE | Cy_1_STOP | Cy_8_BITS; @@ -4430,7 +4391,7 @@ static int __devinit cy_init_card(struct cyclades_card *cinfo) } #ifndef CONFIG_CYZ_INTR - if (IS_CYC_Z(*cinfo) && !timer_pending(&cyz_timerlist)) { + if (cy_is_Z(cinfo) && !timer_pending(&cyz_timerlist)) { mod_timer(&cyz_timerlist, jiffies + 1); #ifdef CY_PCI_DEBUG printk(KERN_DEBUG "Cyclades-Z polling initialized\n"); @@ -4621,11 +4582,12 @@ static int __init cy_detect_isa(void) /* set cy_card */ cy_card[j].base_addr = cy_isa_address; - cy_card[j].ctl_addr = NULL; + cy_card[j].ctl_addr.p9050 = NULL; cy_card[j].irq = (int)cy_isa_irq; cy_card[j].bus_index = 0; cy_card[j].first_line = cy_next_channel; - cy_card[j].num_chips = cy_isa_nchan / 4; + cy_card[j].num_chips = cy_isa_nchan / CyPORTS_PER_CHIP; + cy_card[j].nports = cy_isa_nchan; if (cy_init_card(&cy_card[j])) { cy_card[j].base_addr = NULL; free_irq(cy_isa_irq, &cy_card[j]); @@ -4781,7 +4743,7 @@ static int __devinit cyz_load_fw(struct pci_dev *pdev, void __iomem *base_addr, struct CUSTOM_REG __iomem *cust = base_addr; struct ZFW_CTRL __iomem *pt_zfwctrl; void __iomem *tmp; - u32 mailbox, status; + u32 mailbox, status, nchan; unsigned int i; int retval; @@ -4793,7 +4755,7 @@ static int __devinit cyz_load_fw(struct pci_dev *pdev, void __iomem *base_addr, /* Check whether the firmware is already loaded and running. If positive, skip this board */ - if (Z_FPGA_LOADED(ctl_addr) && readl(&fid->signature) == ZFIRM_ID) { + if (__cyz_fpga_loaded(ctl_addr) && readl(&fid->signature) == ZFIRM_ID) { u32 cntval = readl(base_addr + 0x190); udelay(100); @@ -4812,7 +4774,7 @@ static int __devinit cyz_load_fw(struct pci_dev *pdev, void __iomem *base_addr, mailbox = readl(&ctl_addr->mail_box_0); - if (mailbox == 0 || Z_FPGA_LOADED(ctl_addr)) { + if (mailbox == 0 || __cyz_fpga_loaded(ctl_addr)) { /* stops CPU and set window to beginning of RAM */ cy_writel(&ctl_addr->loc_addr_base, WIN_CREG); cy_writel(&cust->cpu_stop, 0); @@ -4828,7 +4790,7 @@ static int __devinit cyz_load_fw(struct pci_dev *pdev, void __iomem *base_addr, base_addr); if (retval) goto err_rel; - if (!Z_FPGA_LOADED(ctl_addr)) { + if (!__cyz_fpga_loaded(ctl_addr)) { dev_err(&pdev->dev, "fw upload successful, but fw is " "not loaded\n"); goto err_rel; @@ -4887,7 +4849,7 @@ static int __devinit cyz_load_fw(struct pci_dev *pdev, void __iomem *base_addr, "system before loading the new FW to the " "Cyclades-Z.\n"); - if (Z_FPGA_LOADED(ctl_addr)) + if (__cyz_fpga_loaded(ctl_addr)) plx_init(pdev, irq, ctl_addr); retval = -EIO; @@ -4902,16 +4864,16 @@ static int __devinit cyz_load_fw(struct pci_dev *pdev, void __iomem *base_addr, base_addr + ID_ADDRESS, readl(&fid->zfwctrl_addr), base_addr + readl(&fid->zfwctrl_addr)); + nchan = readl(&pt_zfwctrl->board_ctrl.n_channel); dev_info(&pdev->dev, "Cyclades-Z FW loaded: version = %x, ports = %u\n", - readl(&pt_zfwctrl->board_ctrl.fw_version), - readl(&pt_zfwctrl->board_ctrl.n_channel)); + readl(&pt_zfwctrl->board_ctrl.fw_version), nchan); - if (readl(&pt_zfwctrl->board_ctrl.n_channel) == 0) { + if (nchan == 0) { dev_warn(&pdev->dev, "no Cyclades-Z ports were found. Please " "check the connection between the Z host card and the " "serial expanders.\n"); - if (Z_FPGA_LOADED(ctl_addr)) + if (__cyz_fpga_loaded(ctl_addr)) plx_init(pdev, irq, ctl_addr); dev_info(&pdev->dev, "Null number of ports detected. Board " @@ -4932,9 +4894,7 @@ static int __devinit cyz_load_fw(struct pci_dev *pdev, void __iomem *base_addr, cy_writel(&ctl_addr->intr_ctrl_stat, readl(&ctl_addr->intr_ctrl_stat) | 0x00030800UL); - plx_init(pdev, irq, ctl_addr); - - return 0; + return nchan; err_rel: release_firmware(fw); err: @@ -4946,7 +4906,7 @@ static int __devinit cy_pci_probe(struct pci_dev *pdev, { void __iomem *addr0 = NULL, *addr2 = NULL; char *card_name = NULL; - u32 mailbox; + u32 uninitialized_var(mailbox); unsigned int device_id, nchan = 0, card_no, i; unsigned char plx_ver; int retval, irq; @@ -5023,11 +4983,12 @@ static int __devinit cy_pci_probe(struct pci_dev *pdev, } /* Disable interrupts on the PLX before resetting it */ - cy_writew(addr0 + 0x68, readw(addr0 + 0x68) & ~0x0900); + cy_writew(&ctl_addr->intr_ctrl_stat, + readw(&ctl_addr->intr_ctrl_stat) & ~0x0900); plx_init(pdev, irq, addr0); - mailbox = (u32)readl(&ctl_addr->mail_box_0); + mailbox = readl(&ctl_addr->mail_box_0); addr2 = ioremap_nocache(pci_resource_start(pdev, 2), mailbox == ZE_V1 ? CyPCI_Ze_win : CyPCI_Zwin); @@ -5038,12 +4999,8 @@ static int __devinit cy_pci_probe(struct pci_dev *pdev, if (mailbox == ZE_V1) { card_name = "Cyclades-Ze"; - - readl(&ctl_addr->mail_box_0); - nchan = ZE_V1_NPORTS; } else { card_name = "Cyclades-8Zo"; - #ifdef CY_PCI_DEBUG if (mailbox == ZO_V1) { cy_writel(&ctl_addr->loc_addr_base, WIN_CREG); @@ -5065,15 +5022,12 @@ static int __devinit cy_pci_probe(struct pci_dev *pdev, */ if ((mailbox == ZO_V1) || (mailbox == ZO_V2)) cy_writel(addr2 + ID_ADDRESS, 0L); - - retval = cyz_load_fw(pdev, addr2, addr0, irq); - if (retval) - goto err_unmap; - /* This must be a Cyclades-8Zo/PCI. The extendable - version will have a different device_id and will - be allocated its maximum number of ports. */ - nchan = 8; } + + retval = cyz_load_fw(pdev, addr2, addr0, irq); + if (retval <= 0) + goto err_unmap; + nchan = retval; } if ((cy_next_channel + nchan) > NR_PORTS) { @@ -5103,8 +5057,10 @@ static int __devinit cy_pci_probe(struct pci_dev *pdev, dev_err(&pdev->dev, "could not allocate IRQ\n"); goto err_unmap; } - cy_card[card_no].num_chips = nchan / 4; + cy_card[card_no].num_chips = nchan / CyPORTS_PER_CHIP; } else { + cy_card[card_no].hw_ver = mailbox; + cy_card[card_no].num_chips = (unsigned int)-1; #ifdef CONFIG_CYZ_INTR /* allocate IRQ only if board has an IRQ */ if (irq != 0 && irq != 255) { @@ -5117,15 +5073,15 @@ static int __devinit cy_pci_probe(struct pci_dev *pdev, } } #endif /* CONFIG_CYZ_INTR */ - cy_card[card_no].num_chips = (unsigned int)-1; } /* set cy_card */ cy_card[card_no].base_addr = addr2; - cy_card[card_no].ctl_addr = addr0; + cy_card[card_no].ctl_addr.p9050 = addr0; cy_card[card_no].irq = irq; cy_card[card_no].bus_index = 1; cy_card[card_no].first_line = cy_next_channel; + cy_card[card_no].nports = nchan; retval = cy_init_card(&cy_card[card_no]); if (retval) goto err_null; @@ -5138,17 +5094,20 @@ static int __devinit cy_pci_probe(struct pci_dev *pdev, plx_ver = readb(addr2 + CyPLX_VER) & 0x0f; switch (plx_ver) { case PLX_9050: - cy_writeb(addr0 + 0x4c, 0x43); break; case PLX_9060: case PLX_9080: default: /* Old boards, use PLX_9060 */ - plx_init(pdev, irq, addr0); - cy_writew(addr0 + 0x68, readw(addr0 + 0x68) | 0x0900); + { + struct RUNTIME_9060 __iomem *ctl_addr = addr0; + plx_init(pdev, irq, ctl_addr); + cy_writew(&ctl_addr->intr_ctrl_stat, + readw(&ctl_addr->intr_ctrl_stat) | 0x0900); break; } + } } dev_info(&pdev->dev, "%s/PCI #%d found: %d channels starting from " @@ -5179,22 +5138,23 @@ static void __devexit cy_pci_remove(struct pci_dev *pdev) unsigned int i; /* non-Z with old PLX */ - if (!IS_CYC_Z(*cinfo) && (readb(cinfo->base_addr + CyPLX_VER) & 0x0f) == + if (!cy_is_Z(cinfo) && (readb(cinfo->base_addr + CyPLX_VER) & 0x0f) == PLX_9050) - cy_writeb(cinfo->ctl_addr + 0x4c, 0); + cy_writeb(cinfo->ctl_addr.p9050 + 0x4c, 0); else #ifndef CONFIG_CYZ_INTR - if (!IS_CYC_Z(*cinfo)) + if (!cy_is_Z(cinfo)) #endif - cy_writew(cinfo->ctl_addr + 0x68, - readw(cinfo->ctl_addr + 0x68) & ~0x0900); + cy_writew(&cinfo->ctl_addr.p9060->intr_ctrl_stat, + readw(&cinfo->ctl_addr.p9060->intr_ctrl_stat) & + ~0x0900); iounmap(cinfo->base_addr); - if (cinfo->ctl_addr) - iounmap(cinfo->ctl_addr); + if (cinfo->ctl_addr.p9050) + iounmap(cinfo->ctl_addr.p9050); if (cinfo->irq #ifndef CONFIG_CYZ_INTR - && !IS_CYC_Z(*cinfo) + && !cy_is_Z(cinfo) #endif /* CONFIG_CYZ_INTR */ ) free_irq(cinfo->irq, cinfo); @@ -5240,7 +5200,7 @@ static int cyclades_proc_show(struct seq_file *m, void *v) (cur_jifs - info->idle_stats.recv_idle)/ HZ, info->idle_stats.overruns, /* FIXME: double check locking */ - (long)info->port.tty->ldisc.ops->num); + (long)info->port.tty->ldisc->ops->num); else seq_printf(m, "%3d %8lu %10lu %8lu " "%10lu %8lu %9lu %6ld\n", @@ -5386,11 +5346,11 @@ static void __exit cy_cleanup_module(void) /* clear interrupt */ cy_writeb(card->base_addr + Cy_ClrIntr, 0); iounmap(card->base_addr); - if (card->ctl_addr) - iounmap(card->ctl_addr); + if (card->ctl_addr.p9050) + iounmap(card->ctl_addr.p9050); if (card->irq #ifndef CONFIG_CYZ_INTR - && !IS_CYC_Z(*card) + && !cy_is_Z(card) #endif /* CONFIG_CYZ_INTR */ ) free_irq(card->irq, card); diff --git a/drivers/char/epca.c b/drivers/char/epca.c index af7c13ca9493..abef1f7d84fe 100644 --- a/drivers/char/epca.c +++ b/drivers/char/epca.c @@ -745,7 +745,7 @@ static int epca_carrier_raised(struct tty_port *port) return 0; } -static void epca_raise_dtr_rts(struct tty_port *port) +static void epca_dtr_rts(struct tty_port *port, int onoff) { } @@ -925,7 +925,7 @@ static const struct tty_operations pc_ops = { static const struct tty_port_operations epca_port_ops = { .carrier_raised = epca_carrier_raised, - .raise_dtr_rts = epca_raise_dtr_rts, + .dtr_rts = epca_dtr_rts, }; static int info_open(struct tty_struct *tty, struct file *filp) @@ -1518,7 +1518,7 @@ static void doevent(int crd) if (event & MODEMCHG_IND) { /* A modem signal change has been indicated */ ch->imodem = mstat; - if (test_bit(ASYNC_CHECK_CD, &ch->port.flags)) { + if (test_bit(ASYNCB_CHECK_CD, &ch->port.flags)) { /* We are now receiving dcd */ if (mstat & ch->dcd) wake_up_interruptible(&ch->port.open_wait); @@ -1765,9 +1765,9 @@ static void epcaparam(struct tty_struct *tty, struct channel *ch) * that the driver will wait on carrier detect. */ if (ts->c_cflag & CLOCAL) - clear_bit(ASYNC_CHECK_CD, &ch->port.flags); + clear_bit(ASYNCB_CHECK_CD, &ch->port.flags); else - set_bit(ASYNC_CHECK_CD, &ch->port.flags); + set_bit(ASYNCB_CHECK_CD, &ch->port.flags); mval = ch->m_dtr | ch->m_rts; } /* End CBAUD not detected */ iflag = termios2digi_i(ch, ts->c_iflag); @@ -2114,8 +2114,8 @@ static int pc_ioctl(struct tty_struct *tty, struct file *file, tty_wait_until_sent(tty, 0); } else { /* ldisc lock already held in ioctl */ - if (tty->ldisc.ops->flush_buffer) - tty->ldisc.ops->flush_buffer(tty); + if (tty->ldisc->ops->flush_buffer) + tty->ldisc->ops->flush_buffer(tty); } unlock_kernel(); /* Fall Thru */ @@ -2244,7 +2244,8 @@ static void do_softint(struct work_struct *work) if (test_and_clear_bit(EPCA_EVENT_HANGUP, &ch->event)) { tty_hangup(tty); wake_up_interruptible(&ch->port.open_wait); - clear_bit(ASYNC_NORMAL_ACTIVE, &ch->port.flags); + clear_bit(ASYNCB_NORMAL_ACTIVE, + &ch->port.flags); } } tty_kref_put(tty); diff --git a/drivers/char/ip2/i2lib.c b/drivers/char/ip2/i2lib.c index 0061e18aff60..0d10b89218ed 100644 --- a/drivers/char/ip2/i2lib.c +++ b/drivers/char/ip2/i2lib.c @@ -868,11 +868,11 @@ i2Input(i2ChanStrPtr pCh) amountToMove = count; } // Move the first block - pCh->pTTY->ldisc.ops->receive_buf( pCh->pTTY, + pCh->pTTY->ldisc->ops->receive_buf( pCh->pTTY, &(pCh->Ibuf[stripIndex]), NULL, amountToMove ); // If we needed to wrap, do the second data move if (count > amountToMove) { - pCh->pTTY->ldisc.ops->receive_buf( pCh->pTTY, + pCh->pTTY->ldisc->ops->receive_buf( pCh->pTTY, pCh->Ibuf, NULL, count - amountToMove ); } // Bump and wrap the stripIndex all at once by the amount of data read. This diff --git a/drivers/char/ip2/ip2main.c b/drivers/char/ip2/ip2main.c index afd9247cf082..517271c762e6 100644 --- a/drivers/char/ip2/ip2main.c +++ b/drivers/char/ip2/ip2main.c @@ -1315,8 +1315,8 @@ static inline void isig(int sig, struct tty_struct *tty, int flush) if (tty->pgrp) kill_pgrp(tty->pgrp, sig, 1); if (flush || !L_NOFLSH(tty)) { - if ( tty->ldisc.ops->flush_buffer ) - tty->ldisc.ops->flush_buffer(tty); + if ( tty->ldisc->ops->flush_buffer ) + tty->ldisc->ops->flush_buffer(tty); i2InputFlush( tty->driver_data ); } } diff --git a/drivers/char/isicom.c b/drivers/char/isicom.c index a59eac584d16..4d745a89504f 100644 --- a/drivers/char/isicom.c +++ b/drivers/char/isicom.c @@ -329,7 +329,7 @@ static inline void drop_rts(struct isi_port *port) /* card->lock MUST NOT be held */ -static void isicom_raise_dtr_rts(struct tty_port *port) +static void isicom_dtr_rts(struct tty_port *port, int on) { struct isi_port *ip = container_of(port, struct isi_port, port); struct isi_board *card = ip->card; @@ -339,10 +339,17 @@ static void isicom_raise_dtr_rts(struct tty_port *port) if (!lock_card(card)) return; - outw(0x8000 | (channel << card->shift_count) | 0x02, base); - outw(0x0f04, base); - InterruptTheCard(base); - ip->status |= (ISI_DTR | ISI_RTS); + if (on) { + outw(0x8000 | (channel << card->shift_count) | 0x02, base); + outw(0x0f04, base); + InterruptTheCard(base); + ip->status |= (ISI_DTR | ISI_RTS); + } else { + outw(0x8000 | (channel << card->shift_count) | 0x02, base); + outw(0x0C04, base); + InterruptTheCard(base); + ip->status &= ~(ISI_DTR | ISI_RTS); + } unlock_card(card); } @@ -1339,7 +1346,7 @@ static const struct tty_operations isicom_ops = { static const struct tty_port_operations isicom_port_ops = { .carrier_raised = isicom_carrier_raised, - .raise_dtr_rts = isicom_raise_dtr_rts, + .dtr_rts = isicom_dtr_rts, }; static int __devinit reset_card(struct pci_dev *pdev, diff --git a/drivers/char/istallion.c b/drivers/char/istallion.c index fff19f7e29d2..e18800c400b1 100644 --- a/drivers/char/istallion.c +++ b/drivers/char/istallion.c @@ -1140,14 +1140,14 @@ static int stli_carrier_raised(struct tty_port *port) return (portp->sigs & TIOCM_CD) ? 1 : 0; } -static void stli_raise_dtr_rts(struct tty_port *port) +static void stli_dtr_rts(struct tty_port *port, int on) { struct stliport *portp = container_of(port, struct stliport, port); struct stlibrd *brdp = stli_brds[portp->brdnr]; - stli_mkasysigs(&portp->asig, 1, 1); + stli_mkasysigs(&portp->asig, on, on); if (stli_cmdwait(brdp, portp, A_SETSIGNALS, &portp->asig, sizeof(asysigs_t), 0) < 0) - printk(KERN_WARNING "istallion: dtr raise failed.\n"); + printk(KERN_WARNING "istallion: dtr set failed.\n"); } @@ -4417,7 +4417,7 @@ static const struct tty_operations stli_ops = { static const struct tty_port_operations stli_port_ops = { .carrier_raised = stli_carrier_raised, - .raise_dtr_rts = stli_raise_dtr_rts, + .dtr_rts = stli_dtr_rts, }; /*****************************************************************************/ diff --git a/drivers/char/moxa.c b/drivers/char/moxa.c index 4a4cab73d0be..65b6ff2442c6 100644 --- a/drivers/char/moxa.c +++ b/drivers/char/moxa.c @@ -1184,6 +1184,11 @@ static int moxa_open(struct tty_struct *tty, struct file *filp) return -ENODEV; } + if (port % MAX_PORTS_PER_BOARD >= brd->numPorts) { + mutex_unlock(&moxa_openlock); + return -ENODEV; + } + ch = &brd->ports[port % MAX_PORTS_PER_BOARD]; ch->port.count++; tty->driver_data = ch; diff --git a/drivers/char/mxser.c b/drivers/char/mxser.c index 13f8871e5b21..9533f43a30bb 100644 --- a/drivers/char/mxser.c +++ b/drivers/char/mxser.c @@ -547,14 +547,18 @@ static int mxser_carrier_raised(struct tty_port *port) return (inb(mp->ioaddr + UART_MSR) & UART_MSR_DCD)?1:0; } -static void mxser_raise_dtr_rts(struct tty_port *port) +static void mxser_dtr_rts(struct tty_port *port, int on) { struct mxser_port *mp = container_of(port, struct mxser_port, port); unsigned long flags; spin_lock_irqsave(&mp->slock, flags); - outb(inb(mp->ioaddr + UART_MCR) | - UART_MCR_DTR | UART_MCR_RTS, mp->ioaddr + UART_MCR); + if (on) + outb(inb(mp->ioaddr + UART_MCR) | + UART_MCR_DTR | UART_MCR_RTS, mp->ioaddr + UART_MCR); + else + outb(inb(mp->ioaddr + UART_MCR)&~(UART_MCR_DTR | UART_MCR_RTS), + mp->ioaddr + UART_MCR); spin_unlock_irqrestore(&mp->slock, flags); } @@ -2356,7 +2360,7 @@ static const struct tty_operations mxser_ops = { struct tty_port_operations mxser_port_ops = { .carrier_raised = mxser_carrier_raised, - .raise_dtr_rts = mxser_raise_dtr_rts, + .dtr_rts = mxser_dtr_rts, }; /* diff --git a/drivers/char/n_hdlc.c b/drivers/char/n_hdlc.c index bacb3e2872ae..461ece591a5b 100644 --- a/drivers/char/n_hdlc.c +++ b/drivers/char/n_hdlc.c @@ -342,8 +342,8 @@ static int n_hdlc_tty_open (struct tty_struct *tty) #endif /* Flush any pending characters in the driver and discipline. */ - if (tty->ldisc.ops->flush_buffer) - tty->ldisc.ops->flush_buffer(tty); + if (tty->ldisc->ops->flush_buffer) + tty->ldisc->ops->flush_buffer(tty); tty_driver_flush_buffer(tty); diff --git a/drivers/char/n_tty.c b/drivers/char/n_tty.c index f6f0e4ec2b51..94a5d5020abc 100644 --- a/drivers/char/n_tty.c +++ b/drivers/char/n_tty.c @@ -73,24 +73,6 @@ #define ECHO_OP_SET_CANON_COL 0x81 #define ECHO_OP_ERASE_TAB 0x82 -static inline unsigned char *alloc_buf(void) -{ - gfp_t prio = in_interrupt() ? GFP_ATOMIC : GFP_KERNEL; - - if (PAGE_SIZE != N_TTY_BUF_SIZE) - return kmalloc(N_TTY_BUF_SIZE, prio); - else - return (unsigned char *)__get_free_page(prio); -} - -static inline void free_buf(unsigned char *buf) -{ - if (PAGE_SIZE != N_TTY_BUF_SIZE) - kfree(buf); - else - free_page((unsigned long) buf); -} - static inline int tty_put_user(struct tty_struct *tty, unsigned char x, unsigned char __user *ptr) { @@ -1558,11 +1540,11 @@ static void n_tty_close(struct tty_struct *tty) { n_tty_flush_buffer(tty); if (tty->read_buf) { - free_buf(tty->read_buf); + kfree(tty->read_buf); tty->read_buf = NULL; } if (tty->echo_buf) { - free_buf(tty->echo_buf); + kfree(tty->echo_buf); tty->echo_buf = NULL; } } @@ -1584,17 +1566,16 @@ static int n_tty_open(struct tty_struct *tty) /* These are ugly. Currently a malloc failure here can panic */ if (!tty->read_buf) { - tty->read_buf = alloc_buf(); + tty->read_buf = kzalloc(N_TTY_BUF_SIZE, GFP_KERNEL); if (!tty->read_buf) return -ENOMEM; } if (!tty->echo_buf) { - tty->echo_buf = alloc_buf(); + tty->echo_buf = kzalloc(N_TTY_BUF_SIZE, GFP_KERNEL); + if (!tty->echo_buf) return -ENOMEM; } - memset(tty->read_buf, 0, N_TTY_BUF_SIZE); - memset(tty->echo_buf, 0, N_TTY_BUF_SIZE); reset_buffer_flags(tty); tty->column = 0; n_tty_set_termios(tty, NULL); diff --git a/drivers/char/pcmcia/synclink_cs.c b/drivers/char/pcmcia/synclink_cs.c index 19d79fc54461..77b364889224 100644 --- a/drivers/char/pcmcia/synclink_cs.c +++ b/drivers/char/pcmcia/synclink_cs.c @@ -383,7 +383,7 @@ static void async_mode(MGSLPC_INFO *info); static void tx_timeout(unsigned long context); static int carrier_raised(struct tty_port *port); -static void raise_dtr_rts(struct tty_port *port); +static void dtr_rts(struct tty_port *port, int onoff); #if SYNCLINK_GENERIC_HDLC #define dev_to_port(D) (dev_to_hdlc(D)->priv) @@ -513,7 +513,7 @@ static void ldisc_receive_buf(struct tty_struct *tty, static const struct tty_port_operations mgslpc_port_ops = { .carrier_raised = carrier_raised, - .raise_dtr_rts = raise_dtr_rts + .dtr_rts = dtr_rts }; static int mgslpc_probe(struct pcmcia_device *link) @@ -2528,13 +2528,16 @@ static int carrier_raised(struct tty_port *port) return 0; } -static void raise_dtr_rts(struct tty_port *port) +static void dtr_rts(struct tty_port *port, int onoff) { MGSLPC_INFO *info = container_of(port, MGSLPC_INFO, port); unsigned long flags; spin_lock_irqsave(&info->lock,flags); - info->serial_signals |= SerialSignal_RTS + SerialSignal_DTR; + if (onoff) + info->serial_signals |= SerialSignal_RTS + SerialSignal_DTR; + else + info->serial_signals &= ~SerialSignal_RTS + SerialSignal_DTR; set_signals(info); spin_unlock_irqrestore(&info->lock,flags); } diff --git a/drivers/char/pty.c b/drivers/char/pty.c index 31038a0052a2..5acd29e6e043 100644 --- a/drivers/char/pty.c +++ b/drivers/char/pty.c @@ -30,7 +30,6 @@ #include <asm/system.h> -/* These are global because they are accessed in tty_io.c */ #ifdef CONFIG_UNIX98_PTYS static struct tty_driver *ptm_driver; static struct tty_driver *pts_driver; @@ -111,7 +110,7 @@ static int pty_write(struct tty_struct *tty, const unsigned char *buf, c = to->receive_room; if (c > count) c = count; - to->ldisc.ops->receive_buf(to, buf, NULL, c); + to->ldisc->ops->receive_buf(to, buf, NULL, c); return c; } @@ -149,11 +148,11 @@ static int pty_chars_in_buffer(struct tty_struct *tty) int count; /* We should get the line discipline lock for "tty->link" */ - if (!to || !to->ldisc.ops->chars_in_buffer) + if (!to || !to->ldisc->ops->chars_in_buffer) return 0; /* The ldisc must report 0 if no characters available to be read */ - count = to->ldisc.ops->chars_in_buffer(to); + count = to->ldisc->ops->chars_in_buffer(to); if (tty->driver->subtype == PTY_TYPE_SLAVE) return count; @@ -187,8 +186,8 @@ static void pty_flush_buffer(struct tty_struct *tty) if (!to) return; - if (to->ldisc.ops->flush_buffer) - to->ldisc.ops->flush_buffer(to); + if (to->ldisc->ops->flush_buffer) + to->ldisc->ops->flush_buffer(to); if (to->packet) { spin_lock_irqsave(&tty->ctrl_lock, flags); diff --git a/drivers/char/rocket.c b/drivers/char/rocket.c index f59fc5cea067..63d5b628477a 100644 --- a/drivers/char/rocket.c +++ b/drivers/char/rocket.c @@ -872,11 +872,16 @@ static int carrier_raised(struct tty_port *port) return (sGetChanStatusLo(&info->channel) & CD_ACT) ? 1 : 0; } -static void raise_dtr_rts(struct tty_port *port) +static void dtr_rts(struct tty_port *port, int on) { struct r_port *info = container_of(port, struct r_port, port); - sSetDTR(&info->channel); - sSetRTS(&info->channel); + if (on) { + sSetDTR(&info->channel); + sSetRTS(&info->channel); + } else { + sClrDTR(&info->channel); + sClrRTS(&info->channel); + } } /* @@ -934,7 +939,7 @@ static int rp_open(struct tty_struct *tty, struct file *filp) /* * Info->count is now 1; so it's safe to sleep now. */ - if (!test_bit(ASYNC_INITIALIZED, &port->flags)) { + if (!test_bit(ASYNCB_INITIALIZED, &port->flags)) { cp = &info->channel; sSetRxTrigger(cp, TRIG_1); if (sGetChanStatus(cp) & CD_ACT) @@ -958,7 +963,7 @@ static int rp_open(struct tty_struct *tty, struct file *filp) sEnRxFIFO(cp); sEnTransmit(cp); - set_bit(ASYNC_INITIALIZED, &info->port.flags); + set_bit(ASYNCB_INITIALIZED, &info->port.flags); /* * Set up the tty->alt_speed kludge @@ -1641,7 +1646,7 @@ static int rp_write(struct tty_struct *tty, /* Write remaining data into the port's xmit_buf */ while (1) { /* Hung up ? */ - if (!test_bit(ASYNC_NORMAL_ACTIVE, &info->port.flags)) + if (!test_bit(ASYNCB_NORMAL_ACTIVE, &info->port.flags)) goto end; c = min(count, XMIT_BUF_SIZE - info->xmit_cnt - 1); c = min(c, XMIT_BUF_SIZE - info->xmit_head); @@ -2250,7 +2255,7 @@ static const struct tty_operations rocket_ops = { static const struct tty_port_operations rocket_port_ops = { .carrier_raised = carrier_raised, - .raise_dtr_rts = raise_dtr_rts, + .dtr_rts = dtr_rts, }; /* diff --git a/drivers/char/selection.c b/drivers/char/selection.c index cb8ca5698963..f97b9e848064 100644 --- a/drivers/char/selection.c +++ b/drivers/char/selection.c @@ -327,7 +327,7 @@ int paste_selection(struct tty_struct *tty) } count = sel_buffer_lth - pasted; count = min(count, tty->receive_room); - tty->ldisc.ops->receive_buf(tty, sel_buffer + pasted, + tty->ldisc->ops->receive_buf(tty, sel_buffer + pasted, NULL, count); pasted += count; } diff --git a/drivers/char/stallion.c b/drivers/char/stallion.c index 2ad813a801dc..53e504f41b20 100644 --- a/drivers/char/stallion.c +++ b/drivers/char/stallion.c @@ -772,11 +772,11 @@ static int stl_carrier_raised(struct tty_port *port) return (portp->sigs & TIOCM_CD) ? 1 : 0; } -static void stl_raise_dtr_rts(struct tty_port *port) +static void stl_dtr_rts(struct tty_port *port, int on) { struct stlport *portp = container_of(port, struct stlport, port); /* Takes brd_lock internally */ - stl_setsignals(portp, 1, 1); + stl_setsignals(portp, on, on); } /*****************************************************************************/ @@ -2547,7 +2547,7 @@ static const struct tty_operations stl_ops = { static const struct tty_port_operations stl_port_ops = { .carrier_raised = stl_carrier_raised, - .raise_dtr_rts = stl_raise_dtr_rts, + .dtr_rts = stl_dtr_rts, }; /*****************************************************************************/ diff --git a/drivers/char/synclink.c b/drivers/char/synclink.c index afd0b26ca056..afded3a2379c 100644 --- a/drivers/char/synclink.c +++ b/drivers/char/synclink.c @@ -3247,13 +3247,16 @@ static int carrier_raised(struct tty_port *port) return (info->serial_signals & SerialSignal_DCD) ? 1 : 0; } -static void raise_dtr_rts(struct tty_port *port) +static void dtr_rts(struct tty_port *port, int on) { struct mgsl_struct *info = container_of(port, struct mgsl_struct, port); unsigned long flags; spin_lock_irqsave(&info->irq_spinlock,flags); - info->serial_signals |= SerialSignal_RTS + SerialSignal_DTR; + if (on) + info->serial_signals |= SerialSignal_RTS + SerialSignal_DTR; + else + info->serial_signals &= ~(SerialSignal_RTS + SerialSignal_DTR); usc_set_serial_signals(info); spin_unlock_irqrestore(&info->irq_spinlock,flags); } @@ -4258,7 +4261,7 @@ static void mgsl_add_device( struct mgsl_struct *info ) static const struct tty_port_operations mgsl_port_ops = { .carrier_raised = carrier_raised, - .raise_dtr_rts = raise_dtr_rts, + .dtr_rts = dtr_rts, }; diff --git a/drivers/char/synclink_gt.c b/drivers/char/synclink_gt.c index 5e256494686a..1386625fc4ca 100644 --- a/drivers/char/synclink_gt.c +++ b/drivers/char/synclink_gt.c @@ -214,6 +214,7 @@ struct slgt_desc #define set_desc_next(a,b) (a).next = cpu_to_le32((unsigned int)(b)) #define set_desc_count(a,b)(a).count = cpu_to_le16((unsigned short)(b)) #define set_desc_eof(a,b) (a).status = cpu_to_le16((b) ? (le16_to_cpu((a).status) | BIT0) : (le16_to_cpu((a).status) & ~BIT0)) +#define set_desc_status(a, b) (a).status = cpu_to_le16((unsigned short)(b)) #define desc_count(a) (le16_to_cpu((a).count)) #define desc_status(a) (le16_to_cpu((a).status)) #define desc_complete(a) (le16_to_cpu((a).status) & BIT15) @@ -297,6 +298,7 @@ struct slgt_info { u32 max_frame_size; /* as set by device config */ unsigned int rbuf_fill_level; + unsigned int rx_pio; unsigned int if_mode; unsigned int base_clock; @@ -331,6 +333,8 @@ struct slgt_info { struct slgt_desc *rbufs; unsigned int rbuf_current; unsigned int rbuf_index; + unsigned int rbuf_fill_index; + unsigned short rbuf_fill_count; unsigned int tbuf_count; struct slgt_desc *tbufs; @@ -2110,6 +2114,40 @@ static void ri_change(struct slgt_info *info, unsigned short status) info->pending_bh |= BH_STATUS; } +static void isr_rxdata(struct slgt_info *info) +{ + unsigned int count = info->rbuf_fill_count; + unsigned int i = info->rbuf_fill_index; + unsigned short reg; + + while (rd_reg16(info, SSR) & IRQ_RXDATA) { + reg = rd_reg16(info, RDR); + DBGISR(("isr_rxdata %s RDR=%04X\n", info->device_name, reg)); + if (desc_complete(info->rbufs[i])) { + /* all buffers full */ + rx_stop(info); + info->rx_restart = 1; + continue; + } + info->rbufs[i].buf[count++] = (unsigned char)reg; + /* async mode saves status byte to buffer for each data byte */ + if (info->params.mode == MGSL_MODE_ASYNC) + info->rbufs[i].buf[count++] = (unsigned char)(reg >> 8); + if (count == info->rbuf_fill_level || (reg & BIT10)) { + /* buffer full or end of frame */ + set_desc_count(info->rbufs[i], count); + set_desc_status(info->rbufs[i], BIT15 | (reg >> 8)); + info->rbuf_fill_count = count = 0; + if (++i == info->rbuf_count) + i = 0; + info->pending_bh |= BH_RECEIVE; + } + } + + info->rbuf_fill_index = i; + info->rbuf_fill_count = count; +} + static void isr_serial(struct slgt_info *info) { unsigned short status = rd_reg16(info, SSR); @@ -2125,6 +2163,8 @@ static void isr_serial(struct slgt_info *info) if (info->tx_count) isr_txeom(info, status); } + if (info->rx_pio && (status & IRQ_RXDATA)) + isr_rxdata(info); if ((status & IRQ_RXBREAK) && (status & RXBREAK)) { info->icount.brk++; /* process break detection if tty control allows */ @@ -2141,7 +2181,8 @@ static void isr_serial(struct slgt_info *info) } else { if (status & (IRQ_TXIDLE + IRQ_TXUNDER)) isr_txeom(info, status); - + if (info->rx_pio && (status & IRQ_RXDATA)) + isr_rxdata(info); if (status & IRQ_RXIDLE) { if (status & RXIDLE) info->icount.rxidle++; @@ -2642,6 +2683,10 @@ static int rx_enable(struct slgt_info *info, int enable) return -EINVAL; } info->rbuf_fill_level = rbuf_fill_level; + if (rbuf_fill_level < 128) + info->rx_pio = 1; /* PIO mode */ + else + info->rx_pio = 0; /* DMA mode */ rx_stop(info); /* restart receiver to use new fill level */ } @@ -3099,13 +3144,16 @@ static int carrier_raised(struct tty_port *port) return (info->signals & SerialSignal_DCD) ? 1 : 0; } -static void raise_dtr_rts(struct tty_port *port) +static void dtr_rts(struct tty_port *port, int on) { unsigned long flags; struct slgt_info *info = container_of(port, struct slgt_info, port); spin_lock_irqsave(&info->lock,flags); - info->signals |= SerialSignal_RTS + SerialSignal_DTR; + if (on) + info->signals |= SerialSignal_RTS + SerialSignal_DTR; + else + info->signals &= ~(SerialSignal_RTS + SerialSignal_DTR); set_signals(info); spin_unlock_irqrestore(&info->lock,flags); } @@ -3419,7 +3467,7 @@ static void add_device(struct slgt_info *info) static const struct tty_port_operations slgt_port_ops = { .carrier_raised = carrier_raised, - .raise_dtr_rts = raise_dtr_rts, + .dtr_rts = dtr_rts, }; /* @@ -3841,15 +3889,27 @@ static void rx_start(struct slgt_info *info) rdma_reset(info); reset_rbufs(info); - /* set 1st descriptor address */ - wr_reg32(info, RDDAR, info->rbufs[0].pdesc); - - if (info->params.mode != MGSL_MODE_ASYNC) { - /* enable rx DMA and DMA interrupt */ - wr_reg32(info, RDCSR, (BIT2 + BIT0)); + if (info->rx_pio) { + /* rx request when rx FIFO not empty */ + wr_reg16(info, SCR, (unsigned short)(rd_reg16(info, SCR) & ~BIT14)); + slgt_irq_on(info, IRQ_RXDATA); + if (info->params.mode == MGSL_MODE_ASYNC) { + /* enable saving of rx status */ + wr_reg32(info, RDCSR, BIT6); + } } else { - /* enable saving of rx status, rx DMA and DMA interrupt */ - wr_reg32(info, RDCSR, (BIT6 + BIT2 + BIT0)); + /* rx request when rx FIFO half full */ + wr_reg16(info, SCR, (unsigned short)(rd_reg16(info, SCR) | BIT14)); + /* set 1st descriptor address */ + wr_reg32(info, RDDAR, info->rbufs[0].pdesc); + + if (info->params.mode != MGSL_MODE_ASYNC) { + /* enable rx DMA and DMA interrupt */ + wr_reg32(info, RDCSR, (BIT2 + BIT0)); + } else { + /* enable saving of rx status, rx DMA and DMA interrupt */ + wr_reg32(info, RDCSR, (BIT6 + BIT2 + BIT0)); + } } slgt_irq_on(info, IRQ_RXOVER); @@ -4467,6 +4527,8 @@ static void free_rbufs(struct slgt_info *info, unsigned int i, unsigned int last static void reset_rbufs(struct slgt_info *info) { free_rbufs(info, 0, info->rbuf_count - 1); + info->rbuf_fill_index = 0; + info->rbuf_fill_count = 0; } /* diff --git a/drivers/char/synclinkmp.c b/drivers/char/synclinkmp.c index 26de60efe4b2..6f727e3c53ad 100644 --- a/drivers/char/synclinkmp.c +++ b/drivers/char/synclinkmp.c @@ -3277,13 +3277,16 @@ static int carrier_raised(struct tty_port *port) return (info->serial_signals & SerialSignal_DCD) ? 1 : 0; } -static void raise_dtr_rts(struct tty_port *port) +static void dtr_rts(struct tty_port *port, int on) { SLMP_INFO *info = container_of(port, SLMP_INFO, port); unsigned long flags; spin_lock_irqsave(&info->lock,flags); - info->serial_signals |= SerialSignal_RTS + SerialSignal_DTR; + if (on) + info->serial_signals |= SerialSignal_RTS + SerialSignal_DTR; + else + info->serial_signals &= ~(SerialSignal_RTS + SerialSignal_DTR); set_signals(info); spin_unlock_irqrestore(&info->lock,flags); } @@ -3746,7 +3749,7 @@ static void add_device(SLMP_INFO *info) static const struct tty_port_operations port_ops = { .carrier_raised = carrier_raised, - .raise_dtr_rts = raise_dtr_rts, + .dtr_rts = dtr_rts, }; /* Allocate and initialize a device instance structure diff --git a/drivers/char/tty_audit.c b/drivers/char/tty_audit.c index 55ba6f142883..ac16fbec72d0 100644 --- a/drivers/char/tty_audit.c +++ b/drivers/char/tty_audit.c @@ -29,10 +29,7 @@ static struct tty_audit_buf *tty_audit_buf_alloc(int major, int minor, buf = kmalloc(sizeof(*buf), GFP_KERNEL); if (!buf) goto err; - if (PAGE_SIZE != N_TTY_BUF_SIZE) - buf->data = kmalloc(N_TTY_BUF_SIZE, GFP_KERNEL); - else - buf->data = (unsigned char *)__get_free_page(GFP_KERNEL); + buf->data = kmalloc(N_TTY_BUF_SIZE, GFP_KERNEL); if (!buf->data) goto err_buf; atomic_set(&buf->count, 1); @@ -52,10 +49,7 @@ err: static void tty_audit_buf_free(struct tty_audit_buf *buf) { WARN_ON(buf->valid != 0); - if (PAGE_SIZE != N_TTY_BUF_SIZE) - kfree(buf->data); - else - free_page((unsigned long)buf->data); + kfree(buf->data); kfree(buf); } diff --git a/drivers/char/tty_io.c b/drivers/char/tty_io.c index 66b99a2049e3..939e198d7670 100644 --- a/drivers/char/tty_io.c +++ b/drivers/char/tty_io.c @@ -295,7 +295,7 @@ struct tty_driver *tty_find_polling_driver(char *name, int *line) struct tty_driver *p, *res = NULL; int tty_line = 0; int len; - char *str; + char *str, *stp; for (str = name; *str; str++) if ((*str >= '0' && *str <= '9') || *str == ',') @@ -311,13 +311,14 @@ struct tty_driver *tty_find_polling_driver(char *name, int *line) list_for_each_entry(p, &tty_drivers, tty_drivers) { if (strncmp(name, p->name, len) != 0) continue; - if (*str == ',') - str++; - if (*str == '\0') - str = NULL; + stp = str; + if (*stp == ',') + stp++; + if (*stp == '\0') + stp = NULL; if (tty_line >= 0 && tty_line <= p->num && p->ops && - p->ops->poll_init && !p->ops->poll_init(p, tty_line, str)) { + p->ops->poll_init && !p->ops->poll_init(p, tty_line, stp)) { res = tty_driver_kref_get(p); *line = tty_line; break; @@ -470,43 +471,6 @@ void tty_wakeup(struct tty_struct *tty) EXPORT_SYMBOL_GPL(tty_wakeup); /** - * tty_ldisc_flush - flush line discipline queue - * @tty: tty - * - * Flush the line discipline queue (if any) for this tty. If there - * is no line discipline active this is a no-op. - */ - -void tty_ldisc_flush(struct tty_struct *tty) -{ - struct tty_ldisc *ld = tty_ldisc_ref(tty); - if (ld) { - if (ld->ops->flush_buffer) - ld->ops->flush_buffer(tty); - tty_ldisc_deref(ld); - } - tty_buffer_flush(tty); -} - -EXPORT_SYMBOL_GPL(tty_ldisc_flush); - -/** - * tty_reset_termios - reset terminal state - * @tty: tty to reset - * - * Restore a terminal to the driver default state - */ - -static void tty_reset_termios(struct tty_struct *tty) -{ - mutex_lock(&tty->termios_mutex); - *tty->termios = tty->driver->init_termios; - tty->termios->c_ispeed = tty_termios_input_baud_rate(tty->termios); - tty->termios->c_ospeed = tty_termios_baud_rate(tty->termios); - mutex_unlock(&tty->termios_mutex); -} - -/** * do_tty_hangup - actual handler for hangup events * @work: tty device * @@ -535,7 +499,6 @@ static void do_tty_hangup(struct work_struct *work) struct file *cons_filp = NULL; struct file *filp, *f = NULL; struct task_struct *p; - struct tty_ldisc *ld; int closecount = 0, n; unsigned long flags; int refs = 0; @@ -566,40 +529,8 @@ static void do_tty_hangup(struct work_struct *work) filp->f_op = &hung_up_tty_fops; } file_list_unlock(); - /* - * FIXME! What are the locking issues here? This may me overdoing - * things... This question is especially important now that we've - * removed the irqlock. - */ - ld = tty_ldisc_ref(tty); - if (ld != NULL) { - /* We may have no line discipline at this point */ - if (ld->ops->flush_buffer) - ld->ops->flush_buffer(tty); - tty_driver_flush_buffer(tty); - if ((test_bit(TTY_DO_WRITE_WAKEUP, &tty->flags)) && - ld->ops->write_wakeup) - ld->ops->write_wakeup(tty); - if (ld->ops->hangup) - ld->ops->hangup(tty); - } - /* - * FIXME: Once we trust the LDISC code better we can wait here for - * ldisc completion and fix the driver call race - */ - wake_up_interruptible_poll(&tty->write_wait, POLLOUT); - wake_up_interruptible_poll(&tty->read_wait, POLLIN); - /* - * Shutdown the current line discipline, and reset it to - * N_TTY. - */ - if (tty->driver->flags & TTY_DRIVER_RESET_TERMIOS) - tty_reset_termios(tty); - /* Defer ldisc switch */ - /* tty_deferred_ldisc_switch(N_TTY); - This should get done automatically when the port closes and - tty_release is called */ + tty_ldisc_hangup(tty); read_lock(&tasklist_lock); if (tty->session) { @@ -628,12 +559,15 @@ static void do_tty_hangup(struct work_struct *work) read_unlock(&tasklist_lock); spin_lock_irqsave(&tty->ctrl_lock, flags); - tty->flags = 0; + clear_bit(TTY_THROTTLED, &tty->flags); + clear_bit(TTY_PUSH, &tty->flags); + clear_bit(TTY_DO_WRITE_WAKEUP, &tty->flags); put_pid(tty->session); put_pid(tty->pgrp); tty->session = NULL; tty->pgrp = NULL; tty->ctrl_status = 0; + set_bit(TTY_HUPPED, &tty->flags); spin_unlock_irqrestore(&tty->ctrl_lock, flags); /* Account for the p->signal references we killed */ @@ -659,10 +593,7 @@ static void do_tty_hangup(struct work_struct *work) * can't yet guarantee all that. */ set_bit(TTY_HUPPED, &tty->flags); - if (ld) { - tty_ldisc_enable(tty); - tty_ldisc_deref(ld); - } + tty_ldisc_enable(tty); unlock_kernel(); if (f) fput(f); @@ -2480,6 +2411,24 @@ static int tty_tiocmset(struct tty_struct *tty, struct file *file, unsigned int return tty->ops->tiocmset(tty, file, set, clear); } +struct tty_struct *tty_pair_get_tty(struct tty_struct *tty) +{ + if (tty->driver->type == TTY_DRIVER_TYPE_PTY && + tty->driver->subtype == PTY_TYPE_MASTER) + tty = tty->link; + return tty; +} +EXPORT_SYMBOL(tty_pair_get_tty); + +struct tty_struct *tty_pair_get_pty(struct tty_struct *tty) +{ + if (tty->driver->type == TTY_DRIVER_TYPE_PTY && + tty->driver->subtype == PTY_TYPE_MASTER) + return tty; + return tty->link; +} +EXPORT_SYMBOL(tty_pair_get_pty); + /* * Split this up, as gcc can choke on it otherwise.. */ @@ -2495,11 +2444,7 @@ long tty_ioctl(struct file *file, unsigned int cmd, unsigned long arg) if (tty_paranoia_check(tty, inode, "tty_ioctl")) return -EINVAL; - real_tty = tty; - if (tty->driver->type == TTY_DRIVER_TYPE_PTY && - tty->driver->subtype == PTY_TYPE_MASTER) - real_tty = tty->link; - + real_tty = tty_pair_get_tty(tty); /* * Factor out some common prep work @@ -2555,7 +2500,7 @@ long tty_ioctl(struct file *file, unsigned int cmd, unsigned long arg) case TIOCGSID: return tiocgsid(tty, real_tty, p); case TIOCGETD: - return put_user(tty->ldisc.ops->num, (int __user *)p); + return put_user(tty->ldisc->ops->num, (int __user *)p); case TIOCSETD: return tiocsetd(tty, p); /* @@ -2770,6 +2715,7 @@ void initialize_tty_struct(struct tty_struct *tty, tty->buf.head = tty->buf.tail = NULL; tty_buffer_init(tty); mutex_init(&tty->termios_mutex); + mutex_init(&tty->ldisc_mutex); init_waitqueue_head(&tty->write_wait); init_waitqueue_head(&tty->read_wait); INIT_WORK(&tty->hangup_work, do_tty_hangup); diff --git a/drivers/char/tty_ioctl.c b/drivers/char/tty_ioctl.c index 6f4c7d0a53bf..8116bb1c8f80 100644 --- a/drivers/char/tty_ioctl.c +++ b/drivers/char/tty_ioctl.c @@ -97,14 +97,19 @@ EXPORT_SYMBOL(tty_driver_flush_buffer); * @tty: terminal * * Indicate that a tty should stop transmitting data down the stack. + * Takes the termios mutex to protect against parallel throttle/unthrottle + * and also to ensure the driver can consistently reference its own + * termios data at this point when implementing software flow control. */ void tty_throttle(struct tty_struct *tty) { + mutex_lock(&tty->termios_mutex); /* check TTY_THROTTLED first so it indicates our state */ if (!test_and_set_bit(TTY_THROTTLED, &tty->flags) && tty->ops->throttle) tty->ops->throttle(tty); + mutex_unlock(&tty->termios_mutex); } EXPORT_SYMBOL(tty_throttle); @@ -113,13 +118,21 @@ EXPORT_SYMBOL(tty_throttle); * @tty: terminal * * Indicate that a tty may continue transmitting data down the stack. + * Takes the termios mutex to protect against parallel throttle/unthrottle + * and also to ensure the driver can consistently reference its own + * termios data at this point when implementing software flow control. + * + * Drivers should however remember that the stack can issue a throttle, + * then change flow control method, then unthrottle. */ void tty_unthrottle(struct tty_struct *tty) { + mutex_lock(&tty->termios_mutex); if (test_and_clear_bit(TTY_THROTTLED, &tty->flags) && tty->ops->unthrottle) tty->ops->unthrottle(tty); + mutex_unlock(&tty->termios_mutex); } EXPORT_SYMBOL(tty_unthrottle); @@ -613,9 +626,25 @@ static int set_termios(struct tty_struct *tty, void __user *arg, int opt) return 0; } +static void copy_termios(struct tty_struct *tty, struct ktermios *kterm) +{ + mutex_lock(&tty->termios_mutex); + memcpy(kterm, tty->termios, sizeof(struct ktermios)); + mutex_unlock(&tty->termios_mutex); +} + +static void copy_termios_locked(struct tty_struct *tty, struct ktermios *kterm) +{ + mutex_lock(&tty->termios_mutex); + memcpy(kterm, tty->termios_locked, sizeof(struct ktermios)); + mutex_unlock(&tty->termios_mutex); +} + static int get_termio(struct tty_struct *tty, struct termio __user *termio) { - if (kernel_termios_to_user_termio(termio, tty->termios)) + struct ktermios kterm; + copy_termios(tty, &kterm); + if (kernel_termios_to_user_termio(termio, &kterm)) return -EFAULT; return 0; } @@ -917,6 +946,8 @@ int tty_mode_ioctl(struct tty_struct *tty, struct file *file, struct tty_struct *real_tty; void __user *p = (void __user *)arg; int ret = 0; + struct ktermios kterm; + struct termiox ktermx; if (tty->driver->type == TTY_DRIVER_TYPE_PTY && tty->driver->subtype == PTY_TYPE_MASTER) @@ -952,23 +983,20 @@ int tty_mode_ioctl(struct tty_struct *tty, struct file *file, return set_termios(real_tty, p, TERMIOS_OLD); #ifndef TCGETS2 case TCGETS: - mutex_lock(&real_tty->termios_mutex); - if (kernel_termios_to_user_termios((struct termios __user *)arg, real_tty->termios)) + copy_termios(real_tty, &kterm); + if (kernel_termios_to_user_termios((struct termios __user *)arg, &kterm)) ret = -EFAULT; - mutex_unlock(&real_tty->termios_mutex); return ret; #else case TCGETS: - mutex_lock(&real_tty->termios_mutex); - if (kernel_termios_to_user_termios_1((struct termios __user *)arg, real_tty->termios)) + copy_termios(real_tty, &kterm); + if (kernel_termios_to_user_termios_1((struct termios __user *)arg, &kterm)) ret = -EFAULT; - mutex_unlock(&real_tty->termios_mutex); return ret; case TCGETS2: - mutex_lock(&real_tty->termios_mutex); - if (kernel_termios_to_user_termios((struct termios2 __user *)arg, real_tty->termios)) + copy_termios(real_tty, &kterm); + if (kernel_termios_to_user_termios((struct termios2 __user *)arg, &kterm)) ret = -EFAULT; - mutex_unlock(&real_tty->termios_mutex); return ret; case TCSETSF2: return set_termios(real_tty, p, TERMIOS_FLUSH | TERMIOS_WAIT); @@ -987,34 +1015,36 @@ int tty_mode_ioctl(struct tty_struct *tty, struct file *file, return set_termios(real_tty, p, TERMIOS_TERMIO); #ifndef TCGETS2 case TIOCGLCKTRMIOS: - mutex_lock(&real_tty->termios_mutex); - if (kernel_termios_to_user_termios((struct termios __user *)arg, real_tty->termios_locked)) + copy_termios_locked(real_tty, &kterm); + if (kernel_termios_to_user_termios((struct termios __user *)arg, &kterm)) ret = -EFAULT; - mutex_unlock(&real_tty->termios_mutex); return ret; case TIOCSLCKTRMIOS: if (!capable(CAP_SYS_ADMIN)) return -EPERM; - mutex_lock(&real_tty->termios_mutex); - if (user_termios_to_kernel_termios(real_tty->termios_locked, + copy_termios_locked(real_tty, &kterm); + if (user_termios_to_kernel_termios(&kterm, (struct termios __user *) arg)) - ret = -EFAULT; + return -EFAULT; + mutex_lock(&real_tty->termios_mutex); + memcpy(real_tty->termios_locked, &kterm, sizeof(struct ktermios)); mutex_unlock(&real_tty->termios_mutex); - return ret; + return 0; #else case TIOCGLCKTRMIOS: - mutex_lock(&real_tty->termios_mutex); - if (kernel_termios_to_user_termios_1((struct termios __user *)arg, real_tty->termios_locked)) + copy_termios_locked(real_tty, &kterm); + if (kernel_termios_to_user_termios_1((struct termios __user *)arg, &kterm)) ret = -EFAULT; - mutex_unlock(&real_tty->termios_mutex); return ret; case TIOCSLCKTRMIOS: if (!capable(CAP_SYS_ADMIN)) - ret = -EPERM; - mutex_lock(&real_tty->termios_mutex); - if (user_termios_to_kernel_termios_1(real_tty->termios_locked, + return -EPERM; + copy_termios_locked(real_tty, &kterm); + if (user_termios_to_kernel_termios_1(&kterm, (struct termios __user *) arg)) - ret = -EFAULT; + return -EFAULT; + mutex_lock(&real_tty->termios_mutex); + memcpy(real_tty->termios_locked, &kterm, sizeof(struct ktermios)); mutex_unlock(&real_tty->termios_mutex); return ret; #endif @@ -1023,9 +1053,10 @@ int tty_mode_ioctl(struct tty_struct *tty, struct file *file, if (real_tty->termiox == NULL) return -EINVAL; mutex_lock(&real_tty->termios_mutex); - if (copy_to_user(p, real_tty->termiox, sizeof(struct termiox))) - ret = -EFAULT; + memcpy(&ktermx, real_tty->termiox, sizeof(struct termiox)); mutex_unlock(&real_tty->termios_mutex); + if (copy_to_user(p, &ktermx, sizeof(struct termiox))) + ret = -EFAULT; return ret; case TCSETX: return set_termiox(real_tty, p, 0); @@ -1035,10 +1066,9 @@ int tty_mode_ioctl(struct tty_struct *tty, struct file *file, return set_termiox(real_tty, p, TERMIOS_FLUSH); #endif case TIOCGSOFTCAR: - mutex_lock(&real_tty->termios_mutex); - ret = put_user(C_CLOCAL(real_tty) ? 1 : 0, + copy_termios(real_tty, &kterm); + ret = put_user((kterm.c_cflag & CLOCAL) ? 1 : 0, (int __user *)arg); - mutex_unlock(&real_tty->termios_mutex); return ret; case TIOCSSOFTCAR: if (get_user(arg, (unsigned int __user *) arg)) diff --git a/drivers/char/tty_ldisc.c b/drivers/char/tty_ldisc.c index f78f5b0127a8..39c8f86dedd4 100644 --- a/drivers/char/tty_ldisc.c +++ b/drivers/char/tty_ldisc.c @@ -115,19 +115,22 @@ EXPORT_SYMBOL(tty_unregister_ldisc); /** * tty_ldisc_try_get - try and reference an ldisc * @disc: ldisc number - * @ld: tty ldisc structure to complete * * Attempt to open and lock a line discipline into place. Return - * the line discipline refcounted and assigned in ld. On an error - * report the error code back + * the line discipline refcounted or an error. */ -static int tty_ldisc_try_get(int disc, struct tty_ldisc *ld) +static struct tty_ldisc *tty_ldisc_try_get(int disc) { unsigned long flags; + struct tty_ldisc *ld; struct tty_ldisc_ops *ldops; int err = -EINVAL; - + + ld = kmalloc(sizeof(struct tty_ldisc), GFP_KERNEL); + if (ld == NULL) + return ERR_PTR(-ENOMEM); + spin_lock_irqsave(&tty_ldisc_lock, flags); ld->ops = NULL; ldops = tty_ldiscs[disc]; @@ -140,17 +143,19 @@ static int tty_ldisc_try_get(int disc, struct tty_ldisc *ld) /* lock it */ ldops->refcount++; ld->ops = ldops; + ld->refcount = 0; err = 0; } } spin_unlock_irqrestore(&tty_ldisc_lock, flags); - return err; + if (err) + return ERR_PTR(err); + return ld; } /** * tty_ldisc_get - take a reference to an ldisc * @disc: ldisc number - * @ld: tty line discipline structure to use * * Takes a reference to a line discipline. Deals with refcounts and * module locking counts. Returns NULL if the discipline is not available. @@ -161,52 +166,54 @@ static int tty_ldisc_try_get(int disc, struct tty_ldisc *ld) * takes tty_ldisc_lock to guard against ldisc races */ -static int tty_ldisc_get(int disc, struct tty_ldisc *ld) +static struct tty_ldisc *tty_ldisc_get(int disc) { - int err; + struct tty_ldisc *ld; if (disc < N_TTY || disc >= NR_LDISCS) - return -EINVAL; - err = tty_ldisc_try_get(disc, ld); - if (err < 0) { + return ERR_PTR(-EINVAL); + ld = tty_ldisc_try_get(disc); + if (IS_ERR(ld)) { request_module("tty-ldisc-%d", disc); - err = tty_ldisc_try_get(disc, ld); + ld = tty_ldisc_try_get(disc); } - return err; + return ld; } /** * tty_ldisc_put - drop ldisc reference - * @disc: ldisc number + * @ld: ldisc * * Drop a reference to a line discipline. Manage refcounts and - * module usage counts + * module usage counts. Free the ldisc once the recount hits zero. * * Locking: * takes tty_ldisc_lock to guard against ldisc races */ -static void tty_ldisc_put(struct tty_ldisc_ops *ld) +static void tty_ldisc_put(struct tty_ldisc *ld) { unsigned long flags; - int disc = ld->num; + int disc = ld->ops->num; + struct tty_ldisc_ops *ldo; BUG_ON(disc < N_TTY || disc >= NR_LDISCS); spin_lock_irqsave(&tty_ldisc_lock, flags); - ld = tty_ldiscs[disc]; - BUG_ON(ld->refcount == 0); - ld->refcount--; - module_put(ld->owner); + ldo = tty_ldiscs[disc]; + BUG_ON(ldo->refcount == 0); + ldo->refcount--; + module_put(ldo->owner); spin_unlock_irqrestore(&tty_ldisc_lock, flags); + kfree(ld); } -static void * tty_ldiscs_seq_start(struct seq_file *m, loff_t *pos) +static void *tty_ldiscs_seq_start(struct seq_file *m, loff_t *pos) { return (*pos < NR_LDISCS) ? pos : NULL; } -static void * tty_ldiscs_seq_next(struct seq_file *m, void *v, loff_t *pos) +static void *tty_ldiscs_seq_next(struct seq_file *m, void *v, loff_t *pos) { (*pos)++; return (*pos < NR_LDISCS) ? pos : NULL; @@ -219,12 +226,13 @@ static void tty_ldiscs_seq_stop(struct seq_file *m, void *v) static int tty_ldiscs_seq_show(struct seq_file *m, void *v) { int i = *(loff_t *)v; - struct tty_ldisc ld; - - if (tty_ldisc_get(i, &ld) < 0) + struct tty_ldisc *ld; + + ld = tty_ldisc_try_get(i); + if (IS_ERR(ld)) return 0; - seq_printf(m, "%-10s %2d\n", ld.ops->name ? ld.ops->name : "???", i); - tty_ldisc_put(ld.ops); + seq_printf(m, "%-10s %2d\n", ld->ops->name ? ld->ops->name : "???", i); + tty_ldisc_put(ld); return 0; } @@ -263,8 +271,7 @@ const struct file_operations tty_ldiscs_proc_fops = { static void tty_ldisc_assign(struct tty_struct *tty, struct tty_ldisc *ld) { - ld->refcount = 0; - tty->ldisc = *ld; + tty->ldisc = ld; } /** @@ -286,7 +293,7 @@ static int tty_ldisc_try(struct tty_struct *tty) int ret = 0; spin_lock_irqsave(&tty_ldisc_lock, flags); - ld = &tty->ldisc; + ld = tty->ldisc; if (test_bit(TTY_LDISC, &tty->flags)) { ld->refcount++; ret = 1; @@ -315,10 +322,9 @@ struct tty_ldisc *tty_ldisc_ref_wait(struct tty_struct *tty) { /* wait_event is a macro */ wait_event(tty_ldisc_wait, tty_ldisc_try(tty)); - WARN_ON(tty->ldisc.refcount == 0); - return &tty->ldisc; + WARN_ON(tty->ldisc->refcount == 0); + return tty->ldisc; } - EXPORT_SYMBOL_GPL(tty_ldisc_ref_wait); /** @@ -335,10 +341,9 @@ EXPORT_SYMBOL_GPL(tty_ldisc_ref_wait); struct tty_ldisc *tty_ldisc_ref(struct tty_struct *tty) { if (tty_ldisc_try(tty)) - return &tty->ldisc; + return tty->ldisc; return NULL; } - EXPORT_SYMBOL_GPL(tty_ldisc_ref); /** @@ -366,7 +371,6 @@ void tty_ldisc_deref(struct tty_ldisc *ld) wake_up(&tty_ldisc_wait); spin_unlock_irqrestore(&tty_ldisc_lock, flags); } - EXPORT_SYMBOL_GPL(tty_ldisc_deref); /** @@ -389,6 +393,26 @@ void tty_ldisc_enable(struct tty_struct *tty) } /** + * tty_ldisc_flush - flush line discipline queue + * @tty: tty + * + * Flush the line discipline queue (if any) for this tty. If there + * is no line discipline active this is a no-op. + */ + +void tty_ldisc_flush(struct tty_struct *tty) +{ + struct tty_ldisc *ld = tty_ldisc_ref(tty); + if (ld) { + if (ld->ops->flush_buffer) + ld->ops->flush_buffer(tty); + tty_ldisc_deref(ld); + } + tty_buffer_flush(tty); +} +EXPORT_SYMBOL_GPL(tty_ldisc_flush); + +/** * tty_set_termios_ldisc - set ldisc field * @tty: tty structure * @num: line discipline number @@ -407,6 +431,39 @@ static void tty_set_termios_ldisc(struct tty_struct *tty, int num) mutex_unlock(&tty->termios_mutex); } +/** + * tty_ldisc_open - open a line discipline + * @tty: tty we are opening the ldisc on + * @ld: discipline to open + * + * A helper opening method. Also a convenient debugging and check + * point. + */ + +static int tty_ldisc_open(struct tty_struct *tty, struct tty_ldisc *ld) +{ + WARN_ON(test_and_set_bit(TTY_LDISC_OPEN, &tty->flags)); + if (ld->ops->open) + return ld->ops->open(tty); + return 0; +} + +/** + * tty_ldisc_close - close a line discipline + * @tty: tty we are opening the ldisc on + * @ld: discipline to close + * + * A helper close method. Also a convenient debugging and check + * point. + */ + +static void tty_ldisc_close(struct tty_struct *tty, struct tty_ldisc *ld) +{ + WARN_ON(!test_bit(TTY_LDISC_OPEN, &tty->flags)); + clear_bit(TTY_LDISC_OPEN, &tty->flags); + if (ld->ops->close) + ld->ops->close(tty); +} /** * tty_ldisc_restore - helper for tty ldisc change @@ -420,66 +477,136 @@ static void tty_set_termios_ldisc(struct tty_struct *tty, int num) static void tty_ldisc_restore(struct tty_struct *tty, struct tty_ldisc *old) { char buf[64]; - struct tty_ldisc new_ldisc; + struct tty_ldisc *new_ldisc; + int r; /* There is an outstanding reference here so this is safe */ - tty_ldisc_get(old->ops->num, old); + old = tty_ldisc_get(old->ops->num); + WARN_ON(IS_ERR(old)); tty_ldisc_assign(tty, old); tty_set_termios_ldisc(tty, old->ops->num); - if (old->ops->open && (old->ops->open(tty) < 0)) { - tty_ldisc_put(old->ops); + if (tty_ldisc_open(tty, old) < 0) { + tty_ldisc_put(old); /* This driver is always present */ - if (tty_ldisc_get(N_TTY, &new_ldisc) < 0) + new_ldisc = tty_ldisc_get(N_TTY); + if (IS_ERR(new_ldisc)) panic("n_tty: get"); - tty_ldisc_assign(tty, &new_ldisc); + tty_ldisc_assign(tty, new_ldisc); tty_set_termios_ldisc(tty, N_TTY); - if (new_ldisc.ops->open) { - int r = new_ldisc.ops->open(tty); - if (r < 0) - panic("Couldn't open N_TTY ldisc for " - "%s --- error %d.", - tty_name(tty, buf), r); - } + r = tty_ldisc_open(tty, new_ldisc); + if (r < 0) + panic("Couldn't open N_TTY ldisc for " + "%s --- error %d.", + tty_name(tty, buf), r); } } /** + * tty_ldisc_halt - shut down the line discipline + * @tty: tty device + * + * Shut down the line discipline and work queue for this tty device. + * The TTY_LDISC flag being cleared ensures no further references can + * be obtained while the delayed work queue halt ensures that no more + * data is fed to the ldisc. + * + * In order to wait for any existing references to complete see + * tty_ldisc_wait_idle. + */ + +static int tty_ldisc_halt(struct tty_struct *tty) +{ + clear_bit(TTY_LDISC, &tty->flags); + return cancel_delayed_work(&tty->buf.work); +} + +/** + * tty_ldisc_wait_idle - wait for the ldisc to become idle + * @tty: tty to wait for + * + * Wait for the line discipline to become idle. The discipline must + * have been halted for this to guarantee it remains idle. + * + * tty_ldisc_lock protects the ref counts currently. + */ + +static int tty_ldisc_wait_idle(struct tty_struct *tty) +{ + unsigned long flags; + spin_lock_irqsave(&tty_ldisc_lock, flags); + while (tty->ldisc->refcount) { + spin_unlock_irqrestore(&tty_ldisc_lock, flags); + if (wait_event_timeout(tty_ldisc_wait, + tty->ldisc->refcount == 0, 5 * HZ) == 0) + return -EBUSY; + spin_lock_irqsave(&tty_ldisc_lock, flags); + } + spin_unlock_irqrestore(&tty_ldisc_lock, flags); + return 0; +} + +/** * tty_set_ldisc - set line discipline * @tty: the terminal to set * @ldisc: the line discipline * * Set the discipline of a tty line. Must be called from a process - * context. + * context. The ldisc change logic has to protect itself against any + * overlapping ldisc change (including on the other end of pty pairs), + * the close of one side of a tty/pty pair, and eventually hangup. * - * Locking: takes tty_ldisc_lock. - * called functions take termios_mutex + * Locking: takes tty_ldisc_lock, termios_mutex */ int tty_set_ldisc(struct tty_struct *tty, int ldisc) { int retval; - struct tty_ldisc o_ldisc, new_ldisc; - int work; - unsigned long flags; + struct tty_ldisc *o_ldisc, *new_ldisc; + int work, o_work = 0; struct tty_struct *o_tty; -restart: - /* This is a bit ugly for now but means we can break the 'ldisc - is part of the tty struct' assumption later */ - retval = tty_ldisc_get(ldisc, &new_ldisc); - if (retval) - return retval; + new_ldisc = tty_ldisc_get(ldisc); + if (IS_ERR(new_ldisc)) + return PTR_ERR(new_ldisc); + + /* + * We need to look at the tty locking here for pty/tty pairs + * when both sides try to change in parallel. + */ + + o_tty = tty->link; /* o_tty is the pty side or NULL */ + + + /* + * Check the no-op case + */ + + if (tty->ldisc->ops->num == ldisc) { + tty_ldisc_put(new_ldisc); + return 0; + } /* * Problem: What do we do if this blocks ? + * We could deadlock here */ tty_wait_until_sent(tty, 0); - if (tty->ldisc.ops->num == ldisc) { - tty_ldisc_put(new_ldisc.ops); - return 0; + mutex_lock(&tty->ldisc_mutex); + + /* + * We could be midstream of another ldisc change which has + * dropped the lock during processing. If so we need to wait. + */ + + while (test_bit(TTY_LDISC_CHANGING, &tty->flags)) { + mutex_unlock(&tty->ldisc_mutex); + wait_event(tty_ldisc_wait, + test_bit(TTY_LDISC_CHANGING, &tty->flags) == 0); + mutex_lock(&tty->ldisc_mutex); } + set_bit(TTY_LDISC_CHANGING, &tty->flags); /* * No more input please, we are switching. The new ldisc @@ -489,8 +616,6 @@ restart: tty->receive_room = 0; o_ldisc = tty->ldisc; - o_tty = tty->link; - /* * Make sure we don't change while someone holds a * reference to the line discipline. The TTY_LDISC bit @@ -501,108 +626,181 @@ restart: * with a userspace app continually trying to use the tty in * parallel to the change and re-referencing the tty. */ - clear_bit(TTY_LDISC, &tty->flags); - if (o_tty) - clear_bit(TTY_LDISC, &o_tty->flags); - spin_lock_irqsave(&tty_ldisc_lock, flags); - if (tty->ldisc.refcount || (o_tty && o_tty->ldisc.refcount)) { - if (tty->ldisc.refcount) { - /* Free the new ldisc we grabbed. Must drop the lock - first. */ - spin_unlock_irqrestore(&tty_ldisc_lock, flags); - tty_ldisc_put(o_ldisc.ops); - /* - * There are several reasons we may be busy, including - * random momentary I/O traffic. We must therefore - * retry. We could distinguish between blocking ops - * and retries if we made tty_ldisc_wait() smarter. - * That is up for discussion. - */ - if (wait_event_interruptible(tty_ldisc_wait, tty->ldisc.refcount == 0) < 0) - return -ERESTARTSYS; - goto restart; - } - if (o_tty && o_tty->ldisc.refcount) { - spin_unlock_irqrestore(&tty_ldisc_lock, flags); - tty_ldisc_put(o_tty->ldisc.ops); - if (wait_event_interruptible(tty_ldisc_wait, o_tty->ldisc.refcount == 0) < 0) - return -ERESTARTSYS; - goto restart; - } - } - /* - * If the TTY_LDISC bit is set, then we are racing against - * another ldisc change - */ - if (test_bit(TTY_LDISC_CHANGING, &tty->flags)) { - struct tty_ldisc *ld; - spin_unlock_irqrestore(&tty_ldisc_lock, flags); - tty_ldisc_put(new_ldisc.ops); - ld = tty_ldisc_ref_wait(tty); - tty_ldisc_deref(ld); - goto restart; - } - /* - * This flag is used to avoid two parallel ldisc changes. Once - * open and close are fine grained locked this may work better - * as a mutex shared with the open/close/hup paths - */ - set_bit(TTY_LDISC_CHANGING, &tty->flags); + work = tty_ldisc_halt(tty); if (o_tty) - set_bit(TTY_LDISC_CHANGING, &o_tty->flags); - spin_unlock_irqrestore(&tty_ldisc_lock, flags); - - /* - * From this point on we know nobody has an ldisc - * usage reference, nor can they obtain one until - * we say so later on. - */ + o_work = tty_ldisc_halt(o_tty); - work = cancel_delayed_work(&tty->buf.work); /* - * Wait for ->hangup_work and ->buf.work handlers to terminate - * MUST NOT hold locks here. + * Wait for ->hangup_work and ->buf.work handlers to terminate. + * We must drop the mutex here in case a hangup is also in process. */ + + mutex_unlock(&tty->ldisc_mutex); + flush_scheduled_work(); + + /* Let any existing reference holders finish */ + retval = tty_ldisc_wait_idle(tty); + if (retval < 0) { + clear_bit(TTY_LDISC_CHANGING, &tty->flags); + tty_ldisc_put(new_ldisc); + return retval; + } + + mutex_lock(&tty->ldisc_mutex); + if (test_bit(TTY_HUPPED, &tty->flags)) { + /* We were raced by the hangup method. It will have stomped + the ldisc data and closed the ldisc down */ + clear_bit(TTY_LDISC_CHANGING, &tty->flags); + mutex_unlock(&tty->ldisc_mutex); + tty_ldisc_put(new_ldisc); + return -EIO; + } + /* Shutdown the current discipline. */ - if (o_ldisc.ops->close) - (o_ldisc.ops->close)(tty); + tty_ldisc_close(tty, o_ldisc); /* Now set up the new line discipline. */ - tty_ldisc_assign(tty, &new_ldisc); + tty_ldisc_assign(tty, new_ldisc); tty_set_termios_ldisc(tty, ldisc); - if (new_ldisc.ops->open) - retval = (new_ldisc.ops->open)(tty); + + retval = tty_ldisc_open(tty, new_ldisc); if (retval < 0) { - tty_ldisc_put(new_ldisc.ops); - tty_ldisc_restore(tty, &o_ldisc); + /* Back to the old one or N_TTY if we can't */ + tty_ldisc_put(new_ldisc); + tty_ldisc_restore(tty, o_ldisc); } + /* At this point we hold a reference to the new ldisc and a a reference to the old ldisc. If we ended up flipping back to the existing ldisc we have two references to it */ - if (tty->ldisc.ops->num != o_ldisc.ops->num && tty->ops->set_ldisc) + if (tty->ldisc->ops->num != o_ldisc->ops->num && tty->ops->set_ldisc) tty->ops->set_ldisc(tty); - tty_ldisc_put(o_ldisc.ops); + tty_ldisc_put(o_ldisc); /* - * Allow ldisc referencing to occur as soon as the driver - * ldisc callback completes. + * Allow ldisc referencing to occur again */ tty_ldisc_enable(tty); if (o_tty) tty_ldisc_enable(o_tty); - /* Restart it in case no characters kick it off. Safe if + /* Restart the work queue in case no characters kick it off. Safe if already running */ if (work) schedule_delayed_work(&tty->buf.work, 1); + if (o_work) + schedule_delayed_work(&o_tty->buf.work, 1); + mutex_unlock(&tty->ldisc_mutex); return retval; } +/** + * tty_reset_termios - reset terminal state + * @tty: tty to reset + * + * Restore a terminal to the driver default state. + */ + +static void tty_reset_termios(struct tty_struct *tty) +{ + mutex_lock(&tty->termios_mutex); + *tty->termios = tty->driver->init_termios; + tty->termios->c_ispeed = tty_termios_input_baud_rate(tty->termios); + tty->termios->c_ospeed = tty_termios_baud_rate(tty->termios); + mutex_unlock(&tty->termios_mutex); +} + + +/** + * tty_ldisc_reinit - reinitialise the tty ldisc + * @tty: tty to reinit + * + * Switch the tty back to N_TTY line discipline and leave the + * ldisc state closed + */ + +static void tty_ldisc_reinit(struct tty_struct *tty) +{ + struct tty_ldisc *ld; + + tty_ldisc_close(tty, tty->ldisc); + tty_ldisc_put(tty->ldisc); + tty->ldisc = NULL; + /* + * Switch the line discipline back + */ + ld = tty_ldisc_get(N_TTY); + BUG_ON(IS_ERR(ld)); + tty_ldisc_assign(tty, ld); + tty_set_termios_ldisc(tty, N_TTY); +} + +/** + * tty_ldisc_hangup - hangup ldisc reset + * @tty: tty being hung up + * + * Some tty devices reset their termios when they receive a hangup + * event. In that situation we must also switch back to N_TTY properly + * before we reset the termios data. + * + * Locking: We can take the ldisc mutex as the rest of the code is + * careful to allow for this. + * + * In the pty pair case this occurs in the close() path of the + * tty itself so we must be careful about locking rules. + */ + +void tty_ldisc_hangup(struct tty_struct *tty) +{ + struct tty_ldisc *ld; + + /* + * FIXME! What are the locking issues here? This may me overdoing + * things... This question is especially important now that we've + * removed the irqlock. + */ + ld = tty_ldisc_ref(tty); + if (ld != NULL) { + /* We may have no line discipline at this point */ + if (ld->ops->flush_buffer) + ld->ops->flush_buffer(tty); + tty_driver_flush_buffer(tty); + if ((test_bit(TTY_DO_WRITE_WAKEUP, &tty->flags)) && + ld->ops->write_wakeup) + ld->ops->write_wakeup(tty); + if (ld->ops->hangup) + ld->ops->hangup(tty); + tty_ldisc_deref(ld); + } + /* + * FIXME: Once we trust the LDISC code better we can wait here for + * ldisc completion and fix the driver call race + */ + wake_up_interruptible_poll(&tty->write_wait, POLLOUT); + wake_up_interruptible_poll(&tty->read_wait, POLLIN); + /* + * Shutdown the current line discipline, and reset it to + * N_TTY. + */ + if (tty->driver->flags & TTY_DRIVER_RESET_TERMIOS) { + /* Avoid racing set_ldisc */ + mutex_lock(&tty->ldisc_mutex); + /* Switch back to N_TTY */ + tty_ldisc_reinit(tty); + /* At this point we have a closed ldisc and we want to + reopen it. We could defer this to the next open but + it means auditing a lot of other paths so this is a FIXME */ + WARN_ON(tty_ldisc_open(tty, tty->ldisc)); + tty_ldisc_enable(tty); + mutex_unlock(&tty->ldisc_mutex); + tty_reset_termios(tty); + } +} /** * tty_ldisc_setup - open line discipline @@ -610,24 +808,23 @@ restart: * @o_tty: pair tty for pty/tty pairs * * Called during the initial open of a tty/pty pair in order to set up the - * line discplines and bind them to the tty. + * line disciplines and bind them to the tty. This has no locking issues + * as the device isn't yet active. */ int tty_ldisc_setup(struct tty_struct *tty, struct tty_struct *o_tty) { - struct tty_ldisc *ld = &tty->ldisc; + struct tty_ldisc *ld = tty->ldisc; int retval; - if (ld->ops->open) { - retval = (ld->ops->open)(tty); - if (retval) - return retval; - } - if (o_tty && o_tty->ldisc.ops->open) { - retval = (o_tty->ldisc.ops->open)(o_tty); + retval = tty_ldisc_open(tty, ld); + if (retval) + return retval; + + if (o_tty) { + retval = tty_ldisc_open(o_tty, o_tty->ldisc); if (retval) { - if (ld->ops->close) - (ld->ops->close)(tty); + tty_ldisc_close(tty, ld); return retval; } tty_ldisc_enable(o_tty); @@ -635,32 +832,25 @@ int tty_ldisc_setup(struct tty_struct *tty, struct tty_struct *o_tty) tty_ldisc_enable(tty); return 0; } - /** * tty_ldisc_release - release line discipline * @tty: tty being shut down * @o_tty: pair tty for pty/tty pairs * - * Called during the final close of a tty/pty pair in order to shut down the - * line discpline layer. + * Called during the final close of a tty/pty pair in order to shut down + * the line discpline layer. On exit the ldisc assigned is N_TTY and the + * ldisc has not been opened. */ void tty_ldisc_release(struct tty_struct *tty, struct tty_struct *o_tty) { - unsigned long flags; - struct tty_ldisc ld; /* * Prevent flush_to_ldisc() from rescheduling the work for later. Then * kill any delayed work. As this is the final close it does not * race with the set_ldisc code path. */ - clear_bit(TTY_LDISC, &tty->flags); - cancel_delayed_work(&tty->buf.work); - - /* - * Wait for ->hangup_work and ->buf.work handlers to terminate - */ + tty_ldisc_halt(tty); flush_scheduled_work(); /* @@ -668,38 +858,19 @@ void tty_ldisc_release(struct tty_struct *tty, struct tty_struct *o_tty) * side waiters as the file is closing so user count on the file * side is zero. */ - spin_lock_irqsave(&tty_ldisc_lock, flags); - while (tty->ldisc.refcount) { - spin_unlock_irqrestore(&tty_ldisc_lock, flags); - wait_event(tty_ldisc_wait, tty->ldisc.refcount == 0); - spin_lock_irqsave(&tty_ldisc_lock, flags); - } - spin_unlock_irqrestore(&tty_ldisc_lock, flags); + + tty_ldisc_wait_idle(tty); + /* * Shutdown the current line discipline, and reset it to N_TTY. * * FIXME: this MUST get fixed for the new reflocking */ - if (tty->ldisc.ops->close) - (tty->ldisc.ops->close)(tty); - tty_ldisc_put(tty->ldisc.ops); - /* - * Switch the line discipline back - */ - WARN_ON(tty_ldisc_get(N_TTY, &ld)); - tty_ldisc_assign(tty, &ld); - tty_set_termios_ldisc(tty, N_TTY); - if (o_tty) { - /* FIXME: could o_tty be in setldisc here ? */ - clear_bit(TTY_LDISC, &o_tty->flags); - if (o_tty->ldisc.ops->close) - (o_tty->ldisc.ops->close)(o_tty); - tty_ldisc_put(o_tty->ldisc.ops); - WARN_ON(tty_ldisc_get(N_TTY, &ld)); - tty_ldisc_assign(o_tty, &ld); - tty_set_termios_ldisc(o_tty, N_TTY); - } + tty_ldisc_reinit(tty); + /* This will need doing differently if we need to lock */ + if (o_tty) + tty_ldisc_release(o_tty, NULL); } /** @@ -712,10 +883,10 @@ void tty_ldisc_release(struct tty_struct *tty, struct tty_struct *o_tty) void tty_ldisc_init(struct tty_struct *tty) { - struct tty_ldisc ld; - if (tty_ldisc_get(N_TTY, &ld) < 0) + struct tty_ldisc *ld = tty_ldisc_get(N_TTY); + if (IS_ERR(ld)) panic("n_tty: init_tty"); - tty_ldisc_assign(tty, &ld); + tty_ldisc_assign(tty, ld); } void tty_ldisc_begin(void) diff --git a/drivers/char/tty_port.c b/drivers/char/tty_port.c index 9b8004c72686..62dadfc95e34 100644 --- a/drivers/char/tty_port.c +++ b/drivers/char/tty_port.c @@ -137,7 +137,7 @@ int tty_port_carrier_raised(struct tty_port *port) EXPORT_SYMBOL(tty_port_carrier_raised); /** - * tty_port_raise_dtr_rts - Riase DTR/RTS + * tty_port_raise_dtr_rts - Raise DTR/RTS * @port: tty port * * Wrapper for the DTR/RTS raise logic. For the moment this is used @@ -147,12 +147,28 @@ EXPORT_SYMBOL(tty_port_carrier_raised); void tty_port_raise_dtr_rts(struct tty_port *port) { - if (port->ops->raise_dtr_rts) - port->ops->raise_dtr_rts(port); + if (port->ops->dtr_rts) + port->ops->dtr_rts(port, 1); } EXPORT_SYMBOL(tty_port_raise_dtr_rts); /** + * tty_port_lower_dtr_rts - Lower DTR/RTS + * @port: tty port + * + * Wrapper for the DTR/RTS raise logic. For the moment this is used + * to hide some internal details. This will eventually become entirely + * internal to the tty port. + */ + +void tty_port_lower_dtr_rts(struct tty_port *port) +{ + if (port->ops->dtr_rts) + port->ops->dtr_rts(port, 0); +} +EXPORT_SYMBOL(tty_port_lower_dtr_rts); + +/** * tty_port_block_til_ready - Waiting logic for tty open * @port: the tty port being opened * @tty: the tty device being bound @@ -167,7 +183,7 @@ EXPORT_SYMBOL(tty_port_raise_dtr_rts); * - port flags and counts * * The passed tty_port must implement the carrier_raised method if it can - * do carrier detect and the raise_dtr_rts method if it supports software + * do carrier detect and the dtr_rts method if it supports software * management of these lines. Note that the dtr/rts raise is done each * iteration as a hangup may have previously dropped them while we wait. */ @@ -182,7 +198,8 @@ int tty_port_block_til_ready(struct tty_port *port, /* block if port is in the process of being closed */ if (tty_hung_up_p(filp) || port->flags & ASYNC_CLOSING) { - interruptible_sleep_on(&port->close_wait); + wait_event_interruptible(port->close_wait, + !(port->flags & ASYNC_CLOSING)); if (port->flags & ASYNC_HUP_NOTIFY) return -EAGAIN; else @@ -205,7 +222,6 @@ int tty_port_block_til_ready(struct tty_port *port, before the next open may complete */ retval = 0; - add_wait_queue(&port->open_wait, &wait); /* The port lock protects the port counts */ spin_lock_irqsave(&port->lock, flags); @@ -219,7 +235,7 @@ int tty_port_block_til_ready(struct tty_port *port, if (tty->termios->c_cflag & CBAUD) tty_port_raise_dtr_rts(port); - set_current_state(TASK_INTERRUPTIBLE); + prepare_to_wait(&port->open_wait, &wait, TASK_INTERRUPTIBLE); /* Check for a hangup or uninitialised port. Return accordingly */ if (tty_hung_up_p(filp) || !(port->flags & ASYNC_INITIALIZED)) { if (port->flags & ASYNC_HUP_NOTIFY) @@ -240,8 +256,7 @@ int tty_port_block_til_ready(struct tty_port *port, } schedule(); } - set_current_state(TASK_RUNNING); - remove_wait_queue(&port->open_wait, &wait); + finish_wait(&port->open_wait, &wait); /* Update counts. A parallel hangup will have set count to zero and we must not mess that up further */ @@ -292,6 +307,17 @@ int tty_port_close_start(struct tty_port *port, struct tty_struct *tty, struct f if (port->flags & ASYNC_INITIALIZED && port->closing_wait != ASYNC_CLOSING_WAIT_NONE) tty_wait_until_sent(tty, port->closing_wait); + if (port->drain_delay) { + unsigned int bps = tty_get_baud_rate(tty); + long timeout; + + if (bps > 1200) + timeout = max_t(long, (HZ * 10 * port->drain_delay) / bps, + HZ / 10); + else + timeout = 2 * HZ; + schedule_timeout_interruptible(timeout); + } return 1; } EXPORT_SYMBOL(tty_port_close_start); @@ -302,6 +328,9 @@ void tty_port_close_end(struct tty_port *port, struct tty_struct *tty) tty_ldisc_flush(tty); + if (tty->termios->c_cflag & HUPCL) + tty_port_lower_dtr_rts(port); + spin_lock_irqsave(&port->lock, flags); tty->closing = 0; diff --git a/drivers/ide/alim15x3.c b/drivers/ide/alim15x3.c index 537da1cde16d..e59b6dee9ae2 100644 --- a/drivers/ide/alim15x3.c +++ b/drivers/ide/alim15x3.c @@ -402,27 +402,23 @@ static u8 ali_cable_detect(ide_hwif_t *hwif) return cbl; } -#if !defined(CONFIG_SPARC64) && !defined(CONFIG_PPC) +#ifndef CONFIG_SPARC64 /** * init_hwif_ali15x3 - Initialize the ALI IDE x86 stuff * @hwif: interface to configure * * Obtain the IRQ tables for an ALi based IDE solution on the PC * class platforms. This part of the code isn't applicable to the - * Sparc and PowerPC systems. + * Sparc systems. */ static void __devinit init_hwif_ali15x3 (ide_hwif_t *hwif) { - struct pci_dev *dev = to_pci_dev(hwif->dev); u8 ideic, inmir; s8 irq_routing_table[] = { -1, 9, 3, 10, 4, 5, 7, 6, 1, 11, 0, 12, 0, 14, 0, 15 }; int irq = -1; - if (dev->device == PCI_DEVICE_ID_AL_M5229) - hwif->irq = hwif->channel ? 15 : 14; - if (isa_dev) { /* * read IDE interface control @@ -455,7 +451,7 @@ static void __devinit init_hwif_ali15x3 (ide_hwif_t *hwif) } #else #define init_hwif_ali15x3 NULL -#endif /* !defined(CONFIG_SPARC64) && !defined(CONFIG_PPC) */ +#endif /* CONFIG_SPARC64 */ /** * init_dma_ali15x3 - set up DMA on ALi15x3 diff --git a/drivers/ide/ide-atapi.c b/drivers/ide/ide-atapi.c index 7201b176d75b..afe5a4323879 100644 --- a/drivers/ide/ide-atapi.c +++ b/drivers/ide/ide-atapi.c @@ -80,34 +80,6 @@ void ide_init_pc(struct ide_atapi_pc *pc) EXPORT_SYMBOL_GPL(ide_init_pc); /* - * Generate a new packet command request in front of the request queue, before - * the current request, so that it will be processed immediately, on the next - * pass through the driver. - */ -static void ide_queue_pc_head(ide_drive_t *drive, struct gendisk *disk, - struct ide_atapi_pc *pc, struct request *rq) -{ - blk_rq_init(NULL, rq); - rq->cmd_type = REQ_TYPE_SPECIAL; - rq->cmd_flags |= REQ_PREEMPT; - rq->buffer = (char *)pc; - rq->rq_disk = disk; - - if (pc->req_xfer) { - rq->data = pc->buf; - rq->data_len = pc->req_xfer; - } - - memcpy(rq->cmd, pc->c, 12); - if (drive->media == ide_tape) - rq->cmd[13] = REQ_IDETAPE_PC1; - - drive->hwif->rq = NULL; - - elv_add_request(drive->queue, rq, ELEVATOR_INSERT_FRONT, 0); -} - -/* * Add a special packet command request to the tail of the request queue, * and wait for it to be serviced. */ @@ -119,19 +91,21 @@ int ide_queue_pc_tail(ide_drive_t *drive, struct gendisk *disk, rq = blk_get_request(drive->queue, READ, __GFP_WAIT); rq->cmd_type = REQ_TYPE_SPECIAL; - rq->buffer = (char *)pc; + rq->special = (char *)pc; if (pc->req_xfer) { - rq->data = pc->buf; - rq->data_len = pc->req_xfer; + error = blk_rq_map_kern(drive->queue, rq, pc->buf, pc->req_xfer, + GFP_NOIO); + if (error) + goto put_req; } memcpy(rq->cmd, pc->c, 12); if (drive->media == ide_tape) rq->cmd[13] = REQ_IDETAPE_PC1; error = blk_execute_rq(drive->queue, disk, rq, 0); +put_req: blk_put_request(rq); - return error; } EXPORT_SYMBOL_GPL(ide_queue_pc_tail); @@ -191,20 +165,103 @@ void ide_create_request_sense_cmd(ide_drive_t *drive, struct ide_atapi_pc *pc) } EXPORT_SYMBOL_GPL(ide_create_request_sense_cmd); +void ide_prep_sense(ide_drive_t *drive, struct request *rq) +{ + struct request_sense *sense = &drive->sense_data; + struct request *sense_rq = &drive->sense_rq; + unsigned int cmd_len, sense_len; + int err; + + debug_log("%s: enter\n", __func__); + + switch (drive->media) { + case ide_floppy: + cmd_len = 255; + sense_len = 18; + break; + case ide_tape: + cmd_len = 20; + sense_len = 20; + break; + default: + cmd_len = 18; + sense_len = 18; + } + + BUG_ON(sense_len > sizeof(*sense)); + + if (blk_sense_request(rq) || drive->sense_rq_armed) + return; + + memset(sense, 0, sizeof(*sense)); + + blk_rq_init(rq->q, sense_rq); + + err = blk_rq_map_kern(drive->queue, sense_rq, sense, sense_len, + GFP_NOIO); + if (unlikely(err)) { + if (printk_ratelimit()) + printk(KERN_WARNING "%s: failed to map sense buffer\n", + drive->name); + return; + } + + sense_rq->rq_disk = rq->rq_disk; + sense_rq->cmd[0] = GPCMD_REQUEST_SENSE; + sense_rq->cmd[4] = cmd_len; + sense_rq->cmd_type = REQ_TYPE_SENSE; + sense_rq->cmd_flags |= REQ_PREEMPT; + + if (drive->media == ide_tape) + sense_rq->cmd[13] = REQ_IDETAPE_PC1; + + drive->sense_rq_armed = true; +} +EXPORT_SYMBOL_GPL(ide_prep_sense); + +int ide_queue_sense_rq(ide_drive_t *drive, void *special) +{ + /* deferred failure from ide_prep_sense() */ + if (!drive->sense_rq_armed) { + printk(KERN_WARNING "%s: failed queue sense request\n", + drive->name); + return -ENOMEM; + } + + drive->sense_rq.special = special; + drive->sense_rq_armed = false; + + drive->hwif->rq = NULL; + + elv_add_request(drive->queue, &drive->sense_rq, + ELEVATOR_INSERT_FRONT, 0); + return 0; +} +EXPORT_SYMBOL_GPL(ide_queue_sense_rq); + /* * Called when an error was detected during the last packet command. - * We queue a request sense packet command in the head of the request list. + * We queue a request sense packet command at the head of the request + * queue. */ -void ide_retry_pc(ide_drive_t *drive, struct gendisk *disk) +void ide_retry_pc(ide_drive_t *drive) { - struct request *rq = &drive->request_sense_rq; + struct request *sense_rq = &drive->sense_rq; struct ide_atapi_pc *pc = &drive->request_sense_pc; (void)ide_read_error(drive); - ide_create_request_sense_cmd(drive, pc); + + /* init pc from sense_rq */ + ide_init_pc(pc); + memcpy(pc->c, sense_rq->cmd, 12); + pc->buf = bio_data(sense_rq->bio); /* pointer to mapped address */ + pc->req_xfer = sense_rq->data_len; + if (drive->media == ide_tape) set_bit(IDE_AFLAG_IGNORE_DSC, &drive->atapi_flags); - ide_queue_pc_head(drive, disk, pc, rq); + + if (ide_queue_sense_rq(drive, pc)) + ide_complete_rq(drive, -EIO, blk_rq_bytes(drive->hwif->rq)); } EXPORT_SYMBOL_GPL(ide_retry_pc); @@ -276,7 +333,6 @@ static ide_startstop_t ide_pc_intr(ide_drive_t *drive) struct ide_cmd *cmd = &hwif->cmd; struct request *rq = hwif->rq; const struct ide_tp_ops *tp_ops = hwif->tp_ops; - xfer_func_t *xferfunc; unsigned int timeout, done; u16 bcount; u8 stat, ireason, dsc = 0; @@ -303,11 +359,8 @@ static ide_startstop_t ide_pc_intr(ide_drive_t *drive) drive->name, rq_data_dir(pc->rq) ? "write" : "read"); pc->flags |= PC_FLAG_DMA_ERROR; - } else { + } else pc->xferred = pc->req_xfer; - if (drive->pc_update_buffers) - drive->pc_update_buffers(drive, pc); - } debug_log("%s: DMA finished\n", drive->name); } @@ -343,7 +396,7 @@ static ide_startstop_t ide_pc_intr(ide_drive_t *drive) debug_log("[cmd %x]: check condition\n", rq->cmd[0]); /* Retry operation */ - ide_retry_pc(drive, rq->rq_disk); + ide_retry_pc(drive); /* queued, but not started */ return ide_stopped; @@ -353,6 +406,12 @@ static ide_startstop_t ide_pc_intr(ide_drive_t *drive) if ((pc->flags & PC_FLAG_WAIT_FOR_DSC) && (stat & ATA_DSC) == 0) dsc = 1; + /* + * ->pc_callback() might change rq->data_len for + * residual count, cache total length. + */ + done = blk_rq_bytes(rq); + /* Command finished - Call the callback function */ uptodate = drive->pc_callback(drive, dsc); @@ -361,7 +420,6 @@ static ide_startstop_t ide_pc_intr(ide_drive_t *drive) if (blk_special_request(rq)) { rq->errors = 0; - done = blk_rq_bytes(rq); error = 0; } else { @@ -370,11 +428,6 @@ static ide_startstop_t ide_pc_intr(ide_drive_t *drive) rq->errors = -EIO; } - if (drive->media == ide_tape) - done = ide_rq_bytes(rq); /* FIXME */ - else - done = blk_rq_bytes(rq); - error = uptodate ? 0 : -EIO; } @@ -407,21 +460,11 @@ static ide_startstop_t ide_pc_intr(ide_drive_t *drive) return ide_do_reset(drive); } - xferfunc = write ? tp_ops->output_data : tp_ops->input_data; - - if (drive->media == ide_floppy && pc->buf == NULL) { - done = min_t(unsigned int, bcount, cmd->nleft); - ide_pio_bytes(drive, cmd, write, done); - } else if (drive->media == ide_tape && pc->bh) { - done = drive->pc_io_buffers(drive, pc, bcount, write); - } else { - done = min_t(unsigned int, bcount, pc->req_xfer - pc->xferred); - xferfunc(drive, NULL, pc->cur_pos, done); - } + done = min_t(unsigned int, bcount, cmd->nleft); + ide_pio_bytes(drive, cmd, write, done); - /* Update the current position */ + /* Update transferred byte count */ pc->xferred += done; - pc->cur_pos += done; bcount -= done; @@ -599,7 +642,6 @@ ide_startstop_t ide_issue_pc(ide_drive_t *drive, struct ide_cmd *cmd) /* We haven't transferred any data yet */ pc->xferred = 0; - pc->cur_pos = pc->buf; valid_tf = IDE_VALID_DEVICE; bcount = ((drive->media == ide_tape) ? diff --git a/drivers/ide/ide-cd.c b/drivers/ide/ide-cd.c index 925eb9e245d1..a75e4ee1cd17 100644 --- a/drivers/ide/ide-cd.c +++ b/drivers/ide/ide-cd.c @@ -206,54 +206,25 @@ static void cdrom_analyze_sense_data(ide_drive_t *drive, ide_cd_log_error(drive->name, failed_command, sense); } -static void cdrom_queue_request_sense(ide_drive_t *drive, void *sense, - struct request *failed_command) -{ - struct cdrom_info *info = drive->driver_data; - struct request *rq = &drive->request_sense_rq; - - ide_debug_log(IDE_DBG_SENSE, "enter"); - - if (sense == NULL) - sense = &info->sense_data; - - /* stuff the sense request in front of our current request */ - blk_rq_init(NULL, rq); - rq->cmd_type = REQ_TYPE_ATA_PC; - rq->rq_disk = info->disk; - - rq->data = sense; - rq->cmd[0] = GPCMD_REQUEST_SENSE; - rq->cmd[4] = 18; - rq->data_len = 18; - - rq->cmd_type = REQ_TYPE_SENSE; - rq->cmd_flags |= REQ_PREEMPT; - - /* NOTE! Save the failed command in "rq->buffer" */ - rq->buffer = (void *) failed_command; - - if (failed_command) - ide_debug_log(IDE_DBG_SENSE, "failed_cmd: 0x%x", - failed_command->cmd[0]); - - drive->hwif->rq = NULL; - - elv_add_request(drive->queue, rq, ELEVATOR_INSERT_FRONT, 0); -} - static void ide_cd_complete_failed_rq(ide_drive_t *drive, struct request *rq) { /* - * For REQ_TYPE_SENSE, "rq->buffer" points to the original - * failed request + * For REQ_TYPE_SENSE, "rq->special" points to the original + * failed request. Also, the sense data should be read + * directly from rq which might be different from the original + * sense buffer if it got copied during mapping. */ - struct request *failed = (struct request *)rq->buffer; - struct cdrom_info *info = drive->driver_data; - void *sense = &info->sense_data; + struct request *failed = (struct request *)rq->special; + void *sense = bio_data(rq->bio); if (failed) { if (failed->sense) { + /* + * Sense is always read into drive->sense_data. + * Copy back if the failed request has its + * sense pointer set. + */ + memcpy(failed->sense, sense, 18); sense = failed->sense; failed->sense_len = rq->sense_len; } @@ -428,7 +399,7 @@ static int cdrom_decode_status(ide_drive_t *drive, u8 stat) /* if we got a CHECK_CONDITION status, queue a request sense command */ if (stat & ATA_ERR) - cdrom_queue_request_sense(drive, NULL, NULL); + return ide_queue_sense_rq(drive, NULL) ? 2 : 1; return 1; end_request: @@ -442,8 +413,7 @@ end_request: hwif->rq = NULL; - cdrom_queue_request_sense(drive, rq->sense, rq); - return 1; + return ide_queue_sense_rq(drive, rq) ? 2 : 1; } else return 2; } @@ -503,14 +473,8 @@ static void ide_cd_request_sense_fixup(ide_drive_t *drive, struct ide_cmd *cmd) * and some drives don't send them. Sigh. */ if (rq->cmd[0] == GPCMD_REQUEST_SENSE && - cmd->nleft > 0 && cmd->nleft <= 5) { - unsigned int ofs = cmd->nbytes - cmd->nleft; - - while (cmd->nleft > 0) { - *((u8 *)rq->data + ofs++) = 0; - cmd->nleft--; - } - } + cmd->nleft > 0 && cmd->nleft <= 5) + cmd->nleft = 0; } int ide_cd_queue_pc(ide_drive_t *drive, const unsigned char *cmd, @@ -543,8 +507,12 @@ int ide_cd_queue_pc(ide_drive_t *drive, const unsigned char *cmd, rq->cmd_flags |= cmd_flags; rq->timeout = timeout; if (buffer) { - rq->data = buffer; - rq->data_len = *bufflen; + error = blk_rq_map_kern(drive->queue, rq, buffer, + *bufflen, GFP_NOIO); + if (error) { + blk_put_request(rq); + return error; + } } error = blk_execute_rq(drive->queue, info->disk, rq, 0); @@ -838,15 +806,10 @@ static void cdrom_do_block_pc(ide_drive_t *drive, struct request *rq) drive->dma = 0; /* sg request */ - if (rq->bio || ((rq->cmd_type == REQ_TYPE_ATA_PC) && rq->data_len)) { + if (rq->bio) { struct request_queue *q = drive->queue; + char *buf = bio_data(rq->bio); unsigned int alignment; - char *buf; - - if (rq->bio) - buf = bio_data(rq->bio); - else - buf = rq->data; drive->dma = !!(drive->dev_flags & IDE_DFLAG_USING_DMA); @@ -896,6 +859,9 @@ static ide_startstop_t ide_cd_do_request(ide_drive_t *drive, struct request *rq, goto out_end; } + /* prepare sense request for this command */ + ide_prep_sense(drive, rq); + memset(&cmd, 0, sizeof(cmd)); if (rq_data_dir(rq)) diff --git a/drivers/ide/ide-cd.h b/drivers/ide/ide-cd.h index 1d97101099ce..93a3cf1b0f3f 100644 --- a/drivers/ide/ide-cd.h +++ b/drivers/ide/ide-cd.h @@ -87,10 +87,6 @@ struct cdrom_info { struct atapi_toc *toc; - /* The result of the last successful request sense command - on this device. */ - struct request_sense sense_data; - u8 max_speed; /* Max speed of the drive. */ u8 current_speed; /* Current speed of the drive. */ diff --git a/drivers/ide/ide-disk.c b/drivers/ide/ide-disk.c index a9fbe2c31210..c2438804d3c4 100644 --- a/drivers/ide/ide-disk.c +++ b/drivers/ide/ide-disk.c @@ -411,7 +411,6 @@ static void idedisk_prepare_flush(struct request_queue *q, struct request *rq) cmd->protocol = ATA_PROT_NODATA; rq->cmd_type = REQ_TYPE_ATA_TASKFILE; - rq->cmd_flags |= REQ_SOFTBARRIER; rq->special = cmd; } diff --git a/drivers/ide/ide-dma.c b/drivers/ide/ide-dma.c index a0b8cab1d9a6..d9123ecae4a9 100644 --- a/drivers/ide/ide-dma.c +++ b/drivers/ide/ide-dma.c @@ -510,23 +510,11 @@ ide_startstop_t ide_dma_timeout_retry(ide_drive_t *drive, int error) /* * un-busy drive etc and make sure request is sane */ - rq = hwif->rq; - if (!rq) - goto out; - - hwif->rq = NULL; - - rq->errors = 0; - - if (!rq->bio) - goto out; - - rq->sector = rq->bio->bi_sector; - rq->current_nr_sectors = bio_iovec(rq->bio)->bv_len >> 9; - rq->hard_cur_sectors = rq->current_nr_sectors; - rq->buffer = bio_data(rq->bio); -out: + if (rq) { + hwif->rq = NULL; + rq->errors = 0; + } return ret; } diff --git a/drivers/ide/ide-floppy.c b/drivers/ide/ide-floppy.c index 2b4868d95f8b..537b7c558033 100644 --- a/drivers/ide/ide-floppy.c +++ b/drivers/ide/ide-floppy.c @@ -134,13 +134,17 @@ static ide_startstop_t ide_floppy_issue_pc(ide_drive_t *drive, drive->pc = pc; if (pc->retries > IDEFLOPPY_MAX_PC_RETRIES) { + unsigned int done = blk_rq_bytes(drive->hwif->rq); + if (!(pc->flags & PC_FLAG_SUPPRESS_ERROR)) ide_floppy_report_error(floppy, pc); + /* Giving up */ pc->error = IDE_DRV_ERROR_GENERAL; drive->failed_pc = NULL; drive->pc_callback(drive, 0); + ide_complete_rq(drive, -EIO, done); return ide_stopped; } @@ -216,15 +220,13 @@ static void idefloppy_blockpc_cmd(struct ide_disk_obj *floppy, ide_init_pc(pc); memcpy(pc->c, rq->cmd, sizeof(pc->c)); pc->rq = rq; - if (rq->data_len && rq_data_dir(rq) == WRITE) - pc->flags |= PC_FLAG_WRITING; - pc->buf = rq->data; - if (rq->bio) + if (rq->data_len) { pc->flags |= PC_FLAG_DMA_OK; - /* - * possibly problematic, doesn't look like ide-floppy correctly - * handled scattered requests if dma fails... - */ + if (rq_data_dir(rq) == WRITE) + pc->flags |= PC_FLAG_WRITING; + } + /* pio will be performed by ide_pio_bytes() which handles sg fine */ + pc->buf = NULL; pc->req_xfer = pc->buf_size = rq->data_len; } @@ -265,8 +267,8 @@ static ide_startstop_t ide_floppy_do_request(ide_drive_t *drive, } pc = &floppy->queued_pc; idefloppy_create_rw_cmd(drive, pc, rq, (unsigned long)block); - } else if (blk_special_request(rq)) { - pc = (struct ide_atapi_pc *) rq->buffer; + } else if (blk_special_request(rq) || blk_sense_request(rq)) { + pc = (struct ide_atapi_pc *)rq->special; } else if (blk_pc_request(rq)) { pc = &floppy->queued_pc; idefloppy_blockpc_cmd(floppy, pc, rq); @@ -275,6 +277,8 @@ static ide_startstop_t ide_floppy_do_request(ide_drive_t *drive, goto out_end; } + ide_prep_sense(drive, rq); + memset(&cmd, 0, sizeof(cmd)); if (rq_data_dir(rq)) diff --git a/drivers/ide/ide-io.c b/drivers/ide/ide-io.c index 6415a2e2ba87..41d804065d38 100644 --- a/drivers/ide/ide-io.c +++ b/drivers/ide/ide-io.c @@ -248,14 +248,7 @@ void ide_map_sg(ide_drive_t *drive, struct ide_cmd *cmd) struct scatterlist *sg = hwif->sg_table; struct request *rq = cmd->rq; - if (rq->cmd_type == REQ_TYPE_ATA_TASKFILE) { - sg_init_one(sg, rq->buffer, rq->nr_sectors * SECTOR_SIZE); - cmd->sg_nents = 1; - } else if (!rq->bio) { - sg_init_one(sg, rq->data, rq->data_len); - cmd->sg_nents = 1; - } else - cmd->sg_nents = blk_rq_map_sg(drive->queue, rq, sg); + cmd->sg_nents = blk_rq_map_sg(drive->queue, rq, sg); } EXPORT_SYMBOL_GPL(ide_map_sg); @@ -371,7 +364,7 @@ static ide_startstop_t start_request (ide_drive_t *drive, struct request *rq) if (rq->cmd_type == REQ_TYPE_ATA_TASKFILE) return execute_drive_cmd(drive, rq); else if (blk_pm_request(rq)) { - struct request_pm_state *pm = rq->data; + struct request_pm_state *pm = rq->special; #ifdef DEBUG_PM printk("%s: start_power_step(step: %d)\n", drive->name, pm->pm_step); @@ -484,6 +477,9 @@ void do_ide_request(struct request_queue *q) spin_unlock_irq(q->queue_lock); + /* HLD do_request() callback might sleep, make sure it's okay */ + might_sleep(); + if (ide_lock_host(host, hwif)) goto plug_device_2; diff --git a/drivers/ide/ide-ioctls.c b/drivers/ide/ide-ioctls.c index c1c25ebbaa1f..5991b23793f2 100644 --- a/drivers/ide/ide-ioctls.c +++ b/drivers/ide/ide-ioctls.c @@ -231,7 +231,6 @@ static int generic_drive_reset(ide_drive_t *drive) rq->cmd_type = REQ_TYPE_SPECIAL; rq->cmd_len = 1; rq->cmd[0] = REQ_DRIVE_RESET; - rq->cmd_flags |= REQ_SOFTBARRIER; if (blk_execute_rq(drive->queue, NULL, rq, 1)) ret = rq->errors; blk_put_request(rq); diff --git a/drivers/ide/ide-park.c b/drivers/ide/ide-park.c index 310d03f2b5b7..a914023d6d03 100644 --- a/drivers/ide/ide-park.c +++ b/drivers/ide/ide-park.c @@ -24,11 +24,8 @@ static void issue_park_cmd(ide_drive_t *drive, unsigned long timeout) start_queue = 1; spin_unlock_irq(&hwif->lock); - if (start_queue) { - spin_lock_irq(q->queue_lock); - blk_start_queueing(q); - spin_unlock_irq(q->queue_lock); - } + if (start_queue) + blk_run_queue(q); return; } spin_unlock_irq(&hwif->lock); diff --git a/drivers/ide/ide-pm.c b/drivers/ide/ide-pm.c index 0d8a151c0a01..ba1488bd8430 100644 --- a/drivers/ide/ide-pm.c +++ b/drivers/ide/ide-pm.c @@ -7,7 +7,6 @@ int generic_ide_suspend(struct device *dev, pm_message_t mesg) ide_hwif_t *hwif = drive->hwif; struct request *rq; struct request_pm_state rqpm; - struct ide_cmd cmd; int ret; /* call ACPI _GTM only once */ @@ -15,11 +14,9 @@ int generic_ide_suspend(struct device *dev, pm_message_t mesg) ide_acpi_get_timing(hwif); memset(&rqpm, 0, sizeof(rqpm)); - memset(&cmd, 0, sizeof(cmd)); rq = blk_get_request(drive->queue, READ, __GFP_WAIT); rq->cmd_type = REQ_TYPE_PM_SUSPEND; - rq->special = &cmd; - rq->data = &rqpm; + rq->special = &rqpm; rqpm.pm_step = IDE_PM_START_SUSPEND; if (mesg.event == PM_EVENT_PRETHAW) mesg.event = PM_EVENT_FREEZE; @@ -41,7 +38,6 @@ int generic_ide_resume(struct device *dev) ide_hwif_t *hwif = drive->hwif; struct request *rq; struct request_pm_state rqpm; - struct ide_cmd cmd; int err; /* call ACPI _PS0 / _STM only once */ @@ -53,12 +49,10 @@ int generic_ide_resume(struct device *dev) ide_acpi_exec_tfs(drive); memset(&rqpm, 0, sizeof(rqpm)); - memset(&cmd, 0, sizeof(cmd)); rq = blk_get_request(drive->queue, READ, __GFP_WAIT); rq->cmd_type = REQ_TYPE_PM_RESUME; rq->cmd_flags |= REQ_PREEMPT; - rq->special = &cmd; - rq->data = &rqpm; + rq->special = &rqpm; rqpm.pm_step = IDE_PM_START_RESUME; rqpm.pm_state = PM_EVENT_ON; @@ -77,7 +71,7 @@ int generic_ide_resume(struct device *dev) void ide_complete_power_step(ide_drive_t *drive, struct request *rq) { - struct request_pm_state *pm = rq->data; + struct request_pm_state *pm = rq->special; #ifdef DEBUG_PM printk(KERN_INFO "%s: complete_power_step(step: %d)\n", @@ -107,10 +101,8 @@ void ide_complete_power_step(ide_drive_t *drive, struct request *rq) ide_startstop_t ide_start_power_step(ide_drive_t *drive, struct request *rq) { - struct request_pm_state *pm = rq->data; - struct ide_cmd *cmd = rq->special; - - memset(cmd, 0, sizeof(*cmd)); + struct request_pm_state *pm = rq->special; + struct ide_cmd cmd = { }; switch (pm->pm_step) { case IDE_PM_FLUSH_CACHE: /* Suspend step 1 (flush cache) */ @@ -123,12 +115,12 @@ ide_startstop_t ide_start_power_step(ide_drive_t *drive, struct request *rq) return ide_stopped; } if (ata_id_flush_ext_enabled(drive->id)) - cmd->tf.command = ATA_CMD_FLUSH_EXT; + cmd.tf.command = ATA_CMD_FLUSH_EXT; else - cmd->tf.command = ATA_CMD_FLUSH; + cmd.tf.command = ATA_CMD_FLUSH; goto out_do_tf; case IDE_PM_STANDBY: /* Suspend step 2 (standby) */ - cmd->tf.command = ATA_CMD_STANDBYNOW1; + cmd.tf.command = ATA_CMD_STANDBYNOW1; goto out_do_tf; case IDE_PM_RESTORE_PIO: /* Resume step 1 (restore PIO) */ ide_set_max_pio(drive); @@ -141,7 +133,7 @@ ide_startstop_t ide_start_power_step(ide_drive_t *drive, struct request *rq) ide_complete_power_step(drive, rq); return ide_stopped; case IDE_PM_IDLE: /* Resume step 2 (idle) */ - cmd->tf.command = ATA_CMD_IDLEIMMEDIATE; + cmd.tf.command = ATA_CMD_IDLEIMMEDIATE; goto out_do_tf; case IDE_PM_RESTORE_DMA: /* Resume step 3 (restore DMA) */ /* @@ -163,11 +155,11 @@ ide_startstop_t ide_start_power_step(ide_drive_t *drive, struct request *rq) return ide_stopped; out_do_tf: - cmd->valid.out.tf = IDE_VALID_OUT_TF | IDE_VALID_DEVICE; - cmd->valid.in.tf = IDE_VALID_IN_TF | IDE_VALID_DEVICE; - cmd->protocol = ATA_PROT_NODATA; + cmd.valid.out.tf = IDE_VALID_OUT_TF | IDE_VALID_DEVICE; + cmd.valid.in.tf = IDE_VALID_IN_TF | IDE_VALID_DEVICE; + cmd.protocol = ATA_PROT_NODATA; - return do_rw_taskfile(drive, cmd); + return do_rw_taskfile(drive, &cmd); } /** @@ -181,7 +173,7 @@ out_do_tf: void ide_complete_pm_rq(ide_drive_t *drive, struct request *rq) { struct request_queue *q = drive->queue; - struct request_pm_state *pm = rq->data; + struct request_pm_state *pm = rq->special; unsigned long flags; ide_complete_power_step(drive, rq); @@ -207,7 +199,7 @@ void ide_complete_pm_rq(ide_drive_t *drive, struct request *rq) void ide_check_pm_state(ide_drive_t *drive, struct request *rq) { - struct request_pm_state *pm = rq->data; + struct request_pm_state *pm = rq->special; if (blk_pm_suspend_request(rq) && pm->pm_step == IDE_PM_START_SUSPEND) diff --git a/drivers/ide/ide-tape.c b/drivers/ide/ide-tape.c index 3a53e0834cf7..203bbeac182f 100644 --- a/drivers/ide/ide-tape.c +++ b/drivers/ide/ide-tape.c @@ -131,13 +131,6 @@ enum { IDETAPE_DIR_WRITE = (1 << 2), }; -struct idetape_bh { - u32 b_size; - atomic_t b_count; - struct idetape_bh *b_reqnext; - char *b_data; -}; - /* Tape door status */ #define DOOR_UNLOCKED 0 #define DOOR_LOCKED 1 @@ -219,18 +212,12 @@ typedef struct ide_tape_obj { /* Data buffer size chosen based on the tape's recommendation */ int buffer_size; - /* merge buffer */ - struct idetape_bh *merge_bh; - /* size of the merge buffer */ - int merge_bh_size; - /* pointer to current buffer head within the merge buffer */ - struct idetape_bh *bh; - char *b_data; - int b_count; - - int pages_per_buffer; - /* Wasted space in each stage */ - int excess_bh_size; + /* Staging buffer of buffer_size bytes */ + void *buf; + /* The read/write cursor */ + void *cur; + /* The number of valid bytes in buf */ + size_t valid; /* Measures average tape speed */ unsigned long avg_time; @@ -297,84 +284,6 @@ static struct ide_tape_obj *ide_tape_chrdev_get(unsigned int i) return tape; } -static int idetape_input_buffers(ide_drive_t *drive, struct ide_atapi_pc *pc, - unsigned int bcount) -{ - struct idetape_bh *bh = pc->bh; - int count; - - while (bcount) { - if (bh == NULL) - break; - count = min( - (unsigned int)(bh->b_size - atomic_read(&bh->b_count)), - bcount); - drive->hwif->tp_ops->input_data(drive, NULL, bh->b_data + - atomic_read(&bh->b_count), count); - bcount -= count; - atomic_add(count, &bh->b_count); - if (atomic_read(&bh->b_count) == bh->b_size) { - bh = bh->b_reqnext; - if (bh) - atomic_set(&bh->b_count, 0); - } - } - - pc->bh = bh; - - return bcount; -} - -static int idetape_output_buffers(ide_drive_t *drive, struct ide_atapi_pc *pc, - unsigned int bcount) -{ - struct idetape_bh *bh = pc->bh; - int count; - - while (bcount) { - if (bh == NULL) - break; - count = min((unsigned int)pc->b_count, (unsigned int)bcount); - drive->hwif->tp_ops->output_data(drive, NULL, pc->b_data, count); - bcount -= count; - pc->b_data += count; - pc->b_count -= count; - if (!pc->b_count) { - bh = bh->b_reqnext; - pc->bh = bh; - if (bh) { - pc->b_data = bh->b_data; - pc->b_count = atomic_read(&bh->b_count); - } - } - } - - return bcount; -} - -static void idetape_update_buffers(ide_drive_t *drive, struct ide_atapi_pc *pc) -{ - struct idetape_bh *bh = pc->bh; - int count; - unsigned int bcount = pc->xferred; - - if (pc->flags & PC_FLAG_WRITING) - return; - while (bcount) { - if (bh == NULL) { - printk(KERN_ERR "ide-tape: bh == NULL in %s\n", - __func__); - return; - } - count = min((unsigned int)bh->b_size, (unsigned int)bcount); - atomic_set(&bh->b_count, count); - if (atomic_read(&bh->b_count) == bh->b_size) - bh = bh->b_reqnext; - bcount -= count; - } - pc->bh = bh; -} - /* * called on each failed packet command retry to analyze the request sense. We * currently do not utilize this information. @@ -392,12 +301,10 @@ static void idetape_analyze_error(ide_drive_t *drive, u8 *sense) pc->c[0], tape->sense_key, tape->asc, tape->ascq); /* Correct pc->xferred by asking the tape. */ - if (pc->flags & PC_FLAG_DMA_ERROR) { + if (pc->flags & PC_FLAG_DMA_ERROR) pc->xferred = pc->req_xfer - tape->blk_size * get_unaligned_be32(&sense[3]); - idetape_update_buffers(drive, pc); - } /* * If error was the result of a zero-length read or write command, @@ -436,29 +343,6 @@ static void idetape_analyze_error(ide_drive_t *drive, u8 *sense) } } -/* Free data buffers completely. */ -static void ide_tape_kfree_buffer(idetape_tape_t *tape) -{ - struct idetape_bh *prev_bh, *bh = tape->merge_bh; - - while (bh) { - u32 size = bh->b_size; - - while (size) { - unsigned int order = fls(size >> PAGE_SHIFT)-1; - - if (bh->b_data) - free_pages((unsigned long)bh->b_data, order); - - size &= (order-1); - bh->b_data += (1 << order) * PAGE_SIZE; - } - prev_bh = bh; - bh = bh->b_reqnext; - kfree(prev_bh); - } -} - static void ide_tape_handle_dsc(ide_drive_t *); static int ide_tape_callback(ide_drive_t *drive, int dsc) @@ -496,7 +380,7 @@ static int ide_tape_callback(ide_drive_t *drive, int dsc) } tape->first_frame += blocks; - rq->current_nr_sectors -= blocks; + rq->data_len -= blocks * tape->blk_size; if (pc->error) { uptodate = 0; @@ -558,19 +442,6 @@ static void ide_tape_handle_dsc(ide_drive_t *drive) idetape_postpone_request(drive); } -static int ide_tape_io_buffers(ide_drive_t *drive, struct ide_atapi_pc *pc, - unsigned int bcount, int write) -{ - unsigned int bleft; - - if (write) - bleft = idetape_output_buffers(drive, pc, bcount); - else - bleft = idetape_input_buffers(drive, pc, bcount); - - return bcount - bleft; -} - /* * Packet Command Interface * @@ -622,6 +493,8 @@ static ide_startstop_t ide_tape_issue_pc(ide_drive_t *drive, if (pc->retries > IDETAPE_MAX_PC_RETRIES || (pc->flags & PC_FLAG_ABORT)) { + unsigned int done = blk_rq_bytes(drive->hwif->rq); + /* * We will "abort" retrying a packet command in case legitimate * error code was received (crossing a filemark, or end of the @@ -641,8 +514,10 @@ static ide_startstop_t ide_tape_issue_pc(ide_drive_t *drive, /* Giving up */ pc->error = IDE_DRV_ERROR_GENERAL; } + drive->failed_pc = NULL; drive->pc_callback(drive, 0); + ide_complete_rq(drive, -EIO, done); return ide_stopped; } debug_log(DBG_SENSE, "Retry #%d, cmd = %02X\n", pc->retries, pc->c[0]); @@ -695,7 +570,7 @@ static ide_startstop_t idetape_media_access_finished(ide_drive_t *drive) printk(KERN_ERR "ide-tape: %s: I/O error, ", tape->name); /* Retry operation */ - ide_retry_pc(drive, tape->disk); + ide_retry_pc(drive); return ide_stopped; } pc->error = 0; @@ -711,27 +586,22 @@ static void ide_tape_create_rw_cmd(idetape_tape_t *tape, struct ide_atapi_pc *pc, struct request *rq, u8 opcode) { - struct idetape_bh *bh = (struct idetape_bh *)rq->special; - unsigned int length = rq->current_nr_sectors; + unsigned int length = rq->nr_sectors; ide_init_pc(pc); put_unaligned(cpu_to_be32(length), (unsigned int *) &pc->c[1]); pc->c[1] = 1; - pc->bh = bh; pc->buf = NULL; pc->buf_size = length * tape->blk_size; pc->req_xfer = pc->buf_size; if (pc->req_xfer == tape->buffer_size) pc->flags |= PC_FLAG_DMA_OK; - if (opcode == READ_6) { + if (opcode == READ_6) pc->c[0] = READ_6; - atomic_set(&bh->b_count, 0); - } else if (opcode == WRITE_6) { + else if (opcode == WRITE_6) { pc->c[0] = WRITE_6; pc->flags |= PC_FLAG_WRITING; - pc->b_data = bh->b_data; - pc->b_count = atomic_read(&bh->b_count); } memcpy(rq->cmd, pc->c, 12); @@ -747,12 +617,10 @@ static ide_startstop_t idetape_do_request(ide_drive_t *drive, struct ide_cmd cmd; u8 stat; - debug_log(DBG_SENSE, "sector: %llu, nr_sectors: %lu," - " current_nr_sectors: %u\n", - (unsigned long long)rq->sector, rq->nr_sectors, - rq->current_nr_sectors); + debug_log(DBG_SENSE, "sector: %llu, nr_sectors: %lu\n", + (unsigned long long)rq->sector, rq->nr_sectors); - if (!blk_special_request(rq)) { + if (!(blk_special_request(rq) || blk_sense_request(rq))) { /* We do not support buffer cache originated requests. */ printk(KERN_NOTICE "ide-tape: %s: Unsupported request in " "request queue (%d)\n", drive->name, rq->cmd_type); @@ -828,7 +696,7 @@ static ide_startstop_t idetape_do_request(ide_drive_t *drive, goto out; } if (rq->cmd[13] & REQ_IDETAPE_PC1) { - pc = (struct ide_atapi_pc *) rq->buffer; + pc = (struct ide_atapi_pc *)rq->special; rq->cmd[13] &= ~(REQ_IDETAPE_PC1); rq->cmd[13] |= REQ_IDETAPE_PC2; goto out; @@ -840,6 +708,9 @@ static ide_startstop_t idetape_do_request(ide_drive_t *drive, BUG(); out: + /* prepare sense request for this command */ + ide_prep_sense(drive, rq); + memset(&cmd, 0, sizeof(cmd)); if (rq_data_dir(rq)) @@ -847,167 +718,10 @@ out: cmd.rq = rq; - return ide_tape_issue_pc(drive, &cmd, pc); -} - -/* - * The function below uses __get_free_pages to allocate a data buffer of size - * tape->buffer_size (or a bit more). We attempt to combine sequential pages as - * much as possible. - * - * It returns a pointer to the newly allocated buffer, or NULL in case of - * failure. - */ -static struct idetape_bh *ide_tape_kmalloc_buffer(idetape_tape_t *tape, - int full, int clear) -{ - struct idetape_bh *prev_bh, *bh, *merge_bh; - int pages = tape->pages_per_buffer; - unsigned int order, b_allocd; - char *b_data = NULL; - - merge_bh = kmalloc(sizeof(struct idetape_bh), GFP_KERNEL); - bh = merge_bh; - if (bh == NULL) - goto abort; - - order = fls(pages) - 1; - bh->b_data = (char *) __get_free_pages(GFP_KERNEL, order); - if (!bh->b_data) - goto abort; - b_allocd = (1 << order) * PAGE_SIZE; - pages &= (order-1); - - if (clear) - memset(bh->b_data, 0, b_allocd); - bh->b_reqnext = NULL; - bh->b_size = b_allocd; - atomic_set(&bh->b_count, full ? bh->b_size : 0); - - while (pages) { - order = fls(pages) - 1; - b_data = (char *) __get_free_pages(GFP_KERNEL, order); - if (!b_data) - goto abort; - b_allocd = (1 << order) * PAGE_SIZE; - - if (clear) - memset(b_data, 0, b_allocd); - - /* newly allocated page frames below buffer header or ...*/ - if (bh->b_data == b_data + b_allocd) { - bh->b_size += b_allocd; - bh->b_data -= b_allocd; - if (full) - atomic_add(b_allocd, &bh->b_count); - continue; - } - /* they are above the header */ - if (b_data == bh->b_data + bh->b_size) { - bh->b_size += b_allocd; - if (full) - atomic_add(b_allocd, &bh->b_count); - continue; - } - prev_bh = bh; - bh = kmalloc(sizeof(struct idetape_bh), GFP_KERNEL); - if (!bh) { - free_pages((unsigned long) b_data, order); - goto abort; - } - bh->b_reqnext = NULL; - bh->b_data = b_data; - bh->b_size = b_allocd; - atomic_set(&bh->b_count, full ? bh->b_size : 0); - prev_bh->b_reqnext = bh; - - pages &= (order-1); - } - - bh->b_size -= tape->excess_bh_size; - if (full) - atomic_sub(tape->excess_bh_size, &bh->b_count); - return merge_bh; -abort: - ide_tape_kfree_buffer(tape); - return NULL; -} + ide_init_sg_cmd(&cmd, pc->req_xfer); + ide_map_sg(drive, &cmd); -static int idetape_copy_stage_from_user(idetape_tape_t *tape, - const char __user *buf, int n) -{ - struct idetape_bh *bh = tape->bh; - int count; - int ret = 0; - - while (n) { - if (bh == NULL) { - printk(KERN_ERR "ide-tape: bh == NULL in %s\n", - __func__); - return 1; - } - count = min((unsigned int) - (bh->b_size - atomic_read(&bh->b_count)), - (unsigned int)n); - if (copy_from_user(bh->b_data + atomic_read(&bh->b_count), buf, - count)) - ret = 1; - n -= count; - atomic_add(count, &bh->b_count); - buf += count; - if (atomic_read(&bh->b_count) == bh->b_size) { - bh = bh->b_reqnext; - if (bh) - atomic_set(&bh->b_count, 0); - } - } - tape->bh = bh; - return ret; -} - -static int idetape_copy_stage_to_user(idetape_tape_t *tape, char __user *buf, - int n) -{ - struct idetape_bh *bh = tape->bh; - int count; - int ret = 0; - - while (n) { - if (bh == NULL) { - printk(KERN_ERR "ide-tape: bh == NULL in %s\n", - __func__); - return 1; - } - count = min(tape->b_count, n); - if (copy_to_user(buf, tape->b_data, count)) - ret = 1; - n -= count; - tape->b_data += count; - tape->b_count -= count; - buf += count; - if (!tape->b_count) { - bh = bh->b_reqnext; - tape->bh = bh; - if (bh) { - tape->b_data = bh->b_data; - tape->b_count = atomic_read(&bh->b_count); - } - } - } - return ret; -} - -static void idetape_init_merge_buffer(idetape_tape_t *tape) -{ - struct idetape_bh *bh = tape->merge_bh; - tape->bh = tape->merge_bh; - - if (tape->chrdev_dir == IDETAPE_DIR_WRITE) - atomic_set(&bh->b_count, 0); - else { - tape->b_data = bh->b_data; - tape->b_count = atomic_read(&bh->b_count); - } + return ide_tape_issue_pc(drive, &cmd, pc); } /* @@ -1107,10 +821,10 @@ static void __ide_tape_discard_merge_buffer(ide_drive_t *drive) return; clear_bit(IDE_AFLAG_FILEMARK, &drive->atapi_flags); - tape->merge_bh_size = 0; - if (tape->merge_bh != NULL) { - ide_tape_kfree_buffer(tape); - tape->merge_bh = NULL; + tape->valid = 0; + if (tape->buf != NULL) { + kfree(tape->buf); + tape->buf = NULL; } tape->chrdev_dir = IDETAPE_DIR_NONE; @@ -1164,36 +878,44 @@ static void ide_tape_discard_merge_buffer(ide_drive_t *drive, * Generate a read/write request for the block device interface and wait for it * to be serviced. */ -static int idetape_queue_rw_tail(ide_drive_t *drive, int cmd, int blocks, - struct idetape_bh *bh) +static int idetape_queue_rw_tail(ide_drive_t *drive, int cmd, int size) { idetape_tape_t *tape = drive->driver_data; struct request *rq; - int ret, errors; + int ret; debug_log(DBG_SENSE, "%s: cmd=%d\n", __func__, cmd); + BUG_ON(cmd != REQ_IDETAPE_READ && cmd != REQ_IDETAPE_WRITE); + BUG_ON(size < 0 || size % tape->blk_size); rq = blk_get_request(drive->queue, READ, __GFP_WAIT); rq->cmd_type = REQ_TYPE_SPECIAL; rq->cmd[13] = cmd; rq->rq_disk = tape->disk; - rq->special = (void *)bh; rq->sector = tape->first_frame; - rq->nr_sectors = blocks; - rq->current_nr_sectors = blocks; - blk_execute_rq(drive->queue, tape->disk, rq, 0); - errors = rq->errors; - ret = tape->blk_size * (blocks - rq->current_nr_sectors); - blk_put_request(rq); + if (size) { + ret = blk_rq_map_kern(drive->queue, rq, tape->buf, size, + __GFP_WAIT); + if (ret) + goto out_put; + } - if ((cmd & (REQ_IDETAPE_READ | REQ_IDETAPE_WRITE)) == 0) - return 0; + blk_execute_rq(drive->queue, tape->disk, rq, 0); - if (tape->merge_bh) - idetape_init_merge_buffer(tape); - if (errors == IDE_DRV_ERROR_GENERAL) - return -EIO; + /* calculate the number of transferred bytes and update buffer state */ + size -= rq->data_len; + tape->cur = tape->buf; + if (cmd == REQ_IDETAPE_READ) + tape->valid = size; + else + tape->valid = 0; + + ret = size; + if (rq->errors == IDE_DRV_ERROR_GENERAL) + ret = -EIO; +out_put: + blk_put_request(rq); return ret; } @@ -1230,153 +952,87 @@ static void idetape_create_space_cmd(struct ide_atapi_pc *pc, int count, u8 cmd) pc->flags |= PC_FLAG_WAIT_FOR_DSC; } -/* Queue up a character device originated write request. */ -static int idetape_add_chrdev_write_request(ide_drive_t *drive, int blocks) -{ - idetape_tape_t *tape = drive->driver_data; - - debug_log(DBG_CHRDEV, "Enter %s\n", __func__); - - return idetape_queue_rw_tail(drive, REQ_IDETAPE_WRITE, - blocks, tape->merge_bh); -} - static void ide_tape_flush_merge_buffer(ide_drive_t *drive) { idetape_tape_t *tape = drive->driver_data; - int blocks, min; - struct idetape_bh *bh; if (tape->chrdev_dir != IDETAPE_DIR_WRITE) { printk(KERN_ERR "ide-tape: bug: Trying to empty merge buffer" " but we are not writing.\n"); return; } - if (tape->merge_bh_size > tape->buffer_size) { - printk(KERN_ERR "ide-tape: bug: merge_buffer too big\n"); - tape->merge_bh_size = tape->buffer_size; - } - if (tape->merge_bh_size) { - blocks = tape->merge_bh_size / tape->blk_size; - if (tape->merge_bh_size % tape->blk_size) { - unsigned int i; - - blocks++; - i = tape->blk_size - tape->merge_bh_size % - tape->blk_size; - bh = tape->bh->b_reqnext; - while (bh) { - atomic_set(&bh->b_count, 0); - bh = bh->b_reqnext; - } - bh = tape->bh; - while (i) { - if (bh == NULL) { - printk(KERN_INFO "ide-tape: bug," - " bh NULL\n"); - break; - } - min = min(i, (unsigned int)(bh->b_size - - atomic_read(&bh->b_count))); - memset(bh->b_data + atomic_read(&bh->b_count), - 0, min); - atomic_add(min, &bh->b_count); - i -= min; - bh = bh->b_reqnext; - } - } - (void) idetape_add_chrdev_write_request(drive, blocks); - tape->merge_bh_size = 0; - } - if (tape->merge_bh != NULL) { - ide_tape_kfree_buffer(tape); - tape->merge_bh = NULL; + if (tape->buf) { + size_t aligned = roundup(tape->valid, tape->blk_size); + + memset(tape->cur, 0, aligned - tape->valid); + idetape_queue_rw_tail(drive, REQ_IDETAPE_WRITE, aligned); + kfree(tape->buf); + tape->buf = NULL; } tape->chrdev_dir = IDETAPE_DIR_NONE; } -static int idetape_init_read(ide_drive_t *drive) +static int idetape_init_rw(ide_drive_t *drive, int dir) { idetape_tape_t *tape = drive->driver_data; - int bytes_read; + int rc; - /* Initialize read operation */ - if (tape->chrdev_dir != IDETAPE_DIR_READ) { - if (tape->chrdev_dir == IDETAPE_DIR_WRITE) { - ide_tape_flush_merge_buffer(drive); - idetape_flush_tape_buffers(drive); - } - if (tape->merge_bh || tape->merge_bh_size) { - printk(KERN_ERR "ide-tape: merge_bh_size should be" - " 0 now\n"); - tape->merge_bh_size = 0; - } - tape->merge_bh = ide_tape_kmalloc_buffer(tape, 0, 0); - if (!tape->merge_bh) - return -ENOMEM; - tape->chrdev_dir = IDETAPE_DIR_READ; + BUG_ON(dir != IDETAPE_DIR_READ && dir != IDETAPE_DIR_WRITE); - /* - * Issue a read 0 command to ensure that DSC handshake is - * switched from completion mode to buffer available mode. - * No point in issuing this if DSC overlap isn't supported, some - * drives (Seagate STT3401A) will return an error. - */ - if (drive->dev_flags & IDE_DFLAG_DSC_OVERLAP) { - bytes_read = idetape_queue_rw_tail(drive, - REQ_IDETAPE_READ, 0, - tape->merge_bh); - if (bytes_read < 0) { - ide_tape_kfree_buffer(tape); - tape->merge_bh = NULL; - tape->chrdev_dir = IDETAPE_DIR_NONE; - return bytes_read; - } - } - } + if (tape->chrdev_dir == dir) + return 0; - return 0; -} + if (tape->chrdev_dir == IDETAPE_DIR_READ) + ide_tape_discard_merge_buffer(drive, 1); + else if (tape->chrdev_dir == IDETAPE_DIR_WRITE) { + ide_tape_flush_merge_buffer(drive); + idetape_flush_tape_buffers(drive); + } -/* called from idetape_chrdev_read() to service a chrdev read request. */ -static int idetape_add_chrdev_read_request(ide_drive_t *drive, int blocks) -{ - idetape_tape_t *tape = drive->driver_data; + if (tape->buf || tape->valid) { + printk(KERN_ERR "ide-tape: valid should be 0 now\n"); + tape->valid = 0; + } - debug_log(DBG_PROCS, "Enter %s, %d blocks\n", __func__, blocks); + tape->buf = kmalloc(tape->buffer_size, GFP_KERNEL); + if (!tape->buf) + return -ENOMEM; + tape->chrdev_dir = dir; + tape->cur = tape->buf; - /* If we are at a filemark, return a read length of 0 */ - if (test_bit(IDE_AFLAG_FILEMARK, &drive->atapi_flags)) - return 0; - - idetape_init_read(drive); + /* + * Issue a 0 rw command to ensure that DSC handshake is + * switched from completion mode to buffer available mode. No + * point in issuing this if DSC overlap isn't supported, some + * drives (Seagate STT3401A) will return an error. + */ + if (drive->dev_flags & IDE_DFLAG_DSC_OVERLAP) { + int cmd = dir == IDETAPE_DIR_READ ? REQ_IDETAPE_READ + : REQ_IDETAPE_WRITE; + + rc = idetape_queue_rw_tail(drive, cmd, 0); + if (rc < 0) { + kfree(tape->buf); + tape->buf = NULL; + tape->chrdev_dir = IDETAPE_DIR_NONE; + return rc; + } + } - return idetape_queue_rw_tail(drive, REQ_IDETAPE_READ, blocks, - tape->merge_bh); + return 0; } static void idetape_pad_zeros(ide_drive_t *drive, int bcount) { idetape_tape_t *tape = drive->driver_data; - struct idetape_bh *bh; - int blocks; + + memset(tape->buf, 0, tape->buffer_size); while (bcount) { - unsigned int count; + unsigned int count = min(tape->buffer_size, bcount); - bh = tape->merge_bh; - count = min(tape->buffer_size, bcount); + idetape_queue_rw_tail(drive, REQ_IDETAPE_WRITE, count); bcount -= count; - blocks = count / tape->blk_size; - while (count) { - atomic_set(&bh->b_count, - min(count, (unsigned int)bh->b_size)); - memset(bh->b_data, 0, atomic_read(&bh->b_count)); - count -= atomic_read(&bh->b_count); - bh = bh->b_reqnext; - } - idetape_queue_rw_tail(drive, REQ_IDETAPE_WRITE, blocks, - tape->merge_bh); } } @@ -1456,7 +1112,7 @@ static int idetape_space_over_filemarks(ide_drive_t *drive, short mt_op, } if (tape->chrdev_dir == IDETAPE_DIR_READ) { - tape->merge_bh_size = 0; + tape->valid = 0; if (test_and_clear_bit(IDE_AFLAG_FILEMARK, &drive->atapi_flags)) ++count; ide_tape_discard_merge_buffer(drive, 0); @@ -1505,9 +1161,9 @@ static ssize_t idetape_chrdev_read(struct file *file, char __user *buf, { struct ide_tape_obj *tape = file->private_data; ide_drive_t *drive = tape->drive; - ssize_t bytes_read, temp, actually_read = 0, rc; + size_t done = 0; ssize_t ret = 0; - u16 ctl = *(u16 *)&tape->caps[12]; + int rc; debug_log(DBG_CHRDEV, "Enter %s, count %Zd\n", __func__, count); @@ -1517,49 +1173,43 @@ static ssize_t idetape_chrdev_read(struct file *file, char __user *buf, (count % tape->blk_size) == 0) tape->user_bs_factor = count / tape->blk_size; } - rc = idetape_init_read(drive); + + rc = idetape_init_rw(drive, IDETAPE_DIR_READ); if (rc < 0) return rc; - if (count == 0) - return (0); - if (tape->merge_bh_size) { - actually_read = min((unsigned int)(tape->merge_bh_size), - (unsigned int)count); - if (idetape_copy_stage_to_user(tape, buf, actually_read)) - ret = -EFAULT; - buf += actually_read; - tape->merge_bh_size -= actually_read; - count -= actually_read; - } - while (count >= tape->buffer_size) { - bytes_read = idetape_add_chrdev_read_request(drive, ctl); - if (bytes_read <= 0) - goto finish; - if (idetape_copy_stage_to_user(tape, buf, bytes_read)) - ret = -EFAULT; - buf += bytes_read; - count -= bytes_read; - actually_read += bytes_read; - } - if (count) { - bytes_read = idetape_add_chrdev_read_request(drive, ctl); - if (bytes_read <= 0) - goto finish; - temp = min((unsigned long)count, (unsigned long)bytes_read); - if (idetape_copy_stage_to_user(tape, buf, temp)) + + while (done < count) { + size_t todo; + + /* refill if staging buffer is empty */ + if (!tape->valid) { + /* If we are at a filemark, nothing more to read */ + if (test_bit(IDE_AFLAG_FILEMARK, &drive->atapi_flags)) + break; + /* read */ + if (idetape_queue_rw_tail(drive, REQ_IDETAPE_READ, + tape->buffer_size) <= 0) + break; + } + + /* copy out */ + todo = min_t(size_t, count - done, tape->valid); + if (copy_to_user(buf + done, tape->cur, todo)) ret = -EFAULT; - actually_read += temp; - tape->merge_bh_size = bytes_read-temp; + + tape->cur += todo; + tape->valid -= todo; + done += todo; } -finish: - if (!actually_read && test_bit(IDE_AFLAG_FILEMARK, &drive->atapi_flags)) { + + if (!done && test_bit(IDE_AFLAG_FILEMARK, &drive->atapi_flags)) { debug_log(DBG_SENSE, "%s: spacing over filemark\n", tape->name); idetape_space_over_filemarks(drive, MTFSF, 1); return 0; } - return ret ? ret : actually_read; + return ret ? ret : done; } static ssize_t idetape_chrdev_write(struct file *file, const char __user *buf, @@ -1567,9 +1217,9 @@ static ssize_t idetape_chrdev_write(struct file *file, const char __user *buf, { struct ide_tape_obj *tape = file->private_data; ide_drive_t *drive = tape->drive; - ssize_t actually_written = 0; + size_t done = 0; ssize_t ret = 0; - u16 ctl = *(u16 *)&tape->caps[12]; + int rc; /* The drive is write protected. */ if (tape->write_prot) @@ -1578,80 +1228,31 @@ static ssize_t idetape_chrdev_write(struct file *file, const char __user *buf, debug_log(DBG_CHRDEV, "Enter %s, count %Zd\n", __func__, count); /* Initialize write operation */ - if (tape->chrdev_dir != IDETAPE_DIR_WRITE) { - if (tape->chrdev_dir == IDETAPE_DIR_READ) - ide_tape_discard_merge_buffer(drive, 1); - if (tape->merge_bh || tape->merge_bh_size) { - printk(KERN_ERR "ide-tape: merge_bh_size " - "should be 0 now\n"); - tape->merge_bh_size = 0; - } - tape->merge_bh = ide_tape_kmalloc_buffer(tape, 0, 0); - if (!tape->merge_bh) - return -ENOMEM; - tape->chrdev_dir = IDETAPE_DIR_WRITE; - idetape_init_merge_buffer(tape); + rc = idetape_init_rw(drive, IDETAPE_DIR_WRITE); + if (rc < 0) + return rc; - /* - * Issue a write 0 command to ensure that DSC handshake is - * switched from completion mode to buffer available mode. No - * point in issuing this if DSC overlap isn't supported, some - * drives (Seagate STT3401A) will return an error. - */ - if (drive->dev_flags & IDE_DFLAG_DSC_OVERLAP) { - ssize_t retval = idetape_queue_rw_tail(drive, - REQ_IDETAPE_WRITE, 0, - tape->merge_bh); - if (retval < 0) { - ide_tape_kfree_buffer(tape); - tape->merge_bh = NULL; - tape->chrdev_dir = IDETAPE_DIR_NONE; - return retval; - } - } - } - if (count == 0) - return (0); - if (tape->merge_bh_size) { - if (tape->merge_bh_size >= tape->buffer_size) { - printk(KERN_ERR "ide-tape: bug: merge buf too big\n"); - tape->merge_bh_size = 0; - } - actually_written = min((unsigned int) - (tape->buffer_size - tape->merge_bh_size), - (unsigned int)count); - if (idetape_copy_stage_from_user(tape, buf, actually_written)) - ret = -EFAULT; - buf += actually_written; - tape->merge_bh_size += actually_written; - count -= actually_written; - - if (tape->merge_bh_size == tape->buffer_size) { - ssize_t retval; - tape->merge_bh_size = 0; - retval = idetape_add_chrdev_write_request(drive, ctl); - if (retval <= 0) - return (retval); - } - } - while (count >= tape->buffer_size) { - ssize_t retval; - if (idetape_copy_stage_from_user(tape, buf, tape->buffer_size)) - ret = -EFAULT; - buf += tape->buffer_size; - count -= tape->buffer_size; - retval = idetape_add_chrdev_write_request(drive, ctl); - actually_written += tape->buffer_size; - if (retval <= 0) - return (retval); - } - if (count) { - actually_written += count; - if (idetape_copy_stage_from_user(tape, buf, count)) + while (done < count) { + size_t todo; + + /* flush if staging buffer is full */ + if (tape->valid == tape->buffer_size && + idetape_queue_rw_tail(drive, REQ_IDETAPE_WRITE, + tape->buffer_size) <= 0) + return rc; + + /* copy in */ + todo = min_t(size_t, count - done, + tape->buffer_size - tape->valid); + if (copy_from_user(tape->cur, buf + done, todo)) ret = -EFAULT; - tape->merge_bh_size += count; + + tape->cur += todo; + tape->valid += todo; + done += todo; } - return ret ? ret : actually_written; + + return ret ? ret : done; } static int idetape_write_filemark(ide_drive_t *drive) @@ -1812,7 +1413,7 @@ static int idetape_chrdev_ioctl(struct inode *inode, struct file *file, idetape_flush_tape_buffers(drive); } if (cmd == MTIOCGET || cmd == MTIOCPOS) { - block_offset = tape->merge_bh_size / + block_offset = tape->valid / (tape->blk_size * tape->user_bs_factor); position = idetape_read_position(drive); if (position < 0) @@ -1960,12 +1561,12 @@ static void idetape_write_release(ide_drive_t *drive, unsigned int minor) idetape_tape_t *tape = drive->driver_data; ide_tape_flush_merge_buffer(drive); - tape->merge_bh = ide_tape_kmalloc_buffer(tape, 1, 0); - if (tape->merge_bh != NULL) { + tape->buf = kmalloc(tape->buffer_size, GFP_KERNEL); + if (tape->buf != NULL) { idetape_pad_zeros(drive, tape->blk_size * (tape->user_bs_factor - 1)); - ide_tape_kfree_buffer(tape); - tape->merge_bh = NULL; + kfree(tape->buf); + tape->buf = NULL; } idetape_write_filemark(drive); idetape_flush_tape_buffers(drive); @@ -2159,8 +1760,6 @@ static void idetape_setup(ide_drive_t *drive, idetape_tape_t *tape, int minor) u16 *ctl = (u16 *)&tape->caps[12]; drive->pc_callback = ide_tape_callback; - drive->pc_update_buffers = idetape_update_buffers; - drive->pc_io_buffers = ide_tape_io_buffers; drive->dev_flags |= IDE_DFLAG_DSC_OVERLAP; @@ -2191,11 +1790,6 @@ static void idetape_setup(ide_drive_t *drive, idetape_tape_t *tape, int minor) tape->buffer_size = *ctl * tape->blk_size; } buffer_size = tape->buffer_size; - tape->pages_per_buffer = buffer_size / PAGE_SIZE; - if (buffer_size % PAGE_SIZE) { - tape->pages_per_buffer++; - tape->excess_bh_size = PAGE_SIZE - buffer_size % PAGE_SIZE; - } /* select the "best" DSC read/write polling freq */ speed = max(*(u16 *)&tape->caps[14], *(u16 *)&tape->caps[8]); @@ -2238,7 +1832,7 @@ static void ide_tape_release(struct device *dev) ide_drive_t *drive = tape->drive; struct gendisk *g = tape->disk; - BUG_ON(tape->merge_bh_size); + BUG_ON(tape->valid); drive->dev_flags &= ~IDE_DFLAG_DSC_OVERLAP; drive->driver_data = NULL; diff --git a/drivers/ide/ide-taskfile.c b/drivers/ide/ide-taskfile.c index 4aa6223c11be..f400eb4d4aff 100644 --- a/drivers/ide/ide-taskfile.c +++ b/drivers/ide/ide-taskfile.c @@ -424,7 +424,9 @@ int ide_raw_taskfile(ide_drive_t *drive, struct ide_cmd *cmd, u8 *buf, rq = blk_get_request(drive->queue, READ, __GFP_WAIT); rq->cmd_type = REQ_TYPE_ATA_TASKFILE; - rq->buffer = buf; + + if (cmd->tf_flags & IDE_TFLAG_WRITE) + rq->cmd_flags |= REQ_RW; /* * (ks) We transfer currently only whole sectors. @@ -432,18 +434,20 @@ int ide_raw_taskfile(ide_drive_t *drive, struct ide_cmd *cmd, u8 *buf, * if we would find a solution to transfer any size. * To support special commands like READ LONG. */ - rq->hard_nr_sectors = rq->nr_sectors = nsect; - rq->hard_cur_sectors = rq->current_nr_sectors = nsect; - - if (cmd->tf_flags & IDE_TFLAG_WRITE) - rq->cmd_flags |= REQ_RW; + if (nsect) { + error = blk_rq_map_kern(drive->queue, rq, buf, + nsect * SECTOR_SIZE, __GFP_WAIT); + if (error) + goto put_req; + } rq->special = cmd; cmd->rq = rq; error = blk_execute_rq(drive->queue, NULL, rq, 0); - blk_put_request(rq); +put_req: + blk_put_request(rq); return error; } diff --git a/drivers/md/dm.c b/drivers/md/dm.c index 424f7b048c30..3fd8b1e65483 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c @@ -20,7 +20,8 @@ #include <linux/idr.h> #include <linux/hdreg.h> #include <linux/blktrace_api.h> -#include <trace/block.h> + +#include <trace/events/block.h> #define DM_MSG_PREFIX "core" @@ -53,8 +54,6 @@ struct dm_target_io { union map_info info; }; -DEFINE_TRACE(block_bio_complete); - /* * For request-based dm. * One of these is allocated per request. @@ -656,8 +655,7 @@ static void __map_bio(struct dm_target *ti, struct bio *clone, /* the bio has been remapped so dispatch it */ trace_block_remap(bdev_get_queue(clone->bi_bdev), clone, - tio->io->bio->bi_bdev->bd_dev, - clone->bi_sector, sector); + tio->io->bio->bi_bdev->bd_dev, sector); generic_make_request(clone); } else if (r < 0 || r == DM_MAPIO_REQUEUE) { diff --git a/drivers/parport/parport_pc.c b/drivers/parport/parport_pc.c index 4e63cc9e2778..151bf5bc8afe 100644 --- a/drivers/parport/parport_pc.c +++ b/drivers/parport/parport_pc.c @@ -1,5 +1,5 @@ /* Low-level parallel-port routines for 8255-based PC-style hardware. - * + * * Authors: Phil Blundell <philb@gnu.org> * Tim Waugh <tim@cyberelk.demon.co.uk> * Jose Renau <renau@acm.org> @@ -11,7 +11,7 @@ * Cleaned up include files - Russell King <linux@arm.uk.linux.org> * DMA support - Bert De Jonghe <bert@sophis.be> * Many ECP bugs fixed. Fred Barnes & Jamie Lokier, 1999 - * More PCI support now conditional on CONFIG_PCI, 03/2001, Paul G. + * More PCI support now conditional on CONFIG_PCI, 03/2001, Paul G. * Various hacks, Fred Barnes, 04/2001 * Updated probing logic - Adam Belay <ambx1@neo.rr.com> */ @@ -56,10 +56,10 @@ #include <linux/pnp.h> #include <linux/platform_device.h> #include <linux/sysctl.h> +#include <linux/io.h> +#include <linux/uaccess.h> -#include <asm/io.h> #include <asm/dma.h> -#include <asm/uaccess.h> #include <linux/parport.h> #include <linux/parport_pc.h> @@ -82,7 +82,7 @@ #define ECR_TST 06 #define ECR_CNF 07 #define ECR_MODE_MASK 0xe0 -#define ECR_WRITE(p,v) frob_econtrol((p),0xff,(v)) +#define ECR_WRITE(p, v) frob_econtrol((p), 0xff, (v)) #undef DEBUG @@ -109,27 +109,27 @@ static int pci_registered_parport; static int pnp_registered_parport; /* frob_control, but for ECR */ -static void frob_econtrol (struct parport *pb, unsigned char m, +static void frob_econtrol(struct parport *pb, unsigned char m, unsigned char v) { unsigned char ectr = 0; if (m != 0xff) - ectr = inb (ECONTROL (pb)); + ectr = inb(ECONTROL(pb)); - DPRINTK (KERN_DEBUG "frob_econtrol(%02x,%02x): %02x -> %02x\n", + DPRINTK(KERN_DEBUG "frob_econtrol(%02x,%02x): %02x -> %02x\n", m, v, ectr, (ectr & ~m) ^ v); - outb ((ectr & ~m) ^ v, ECONTROL (pb)); + outb((ectr & ~m) ^ v, ECONTROL(pb)); } -static __inline__ void frob_set_mode (struct parport *p, int mode) +static inline void frob_set_mode(struct parport *p, int mode) { - frob_econtrol (p, ECR_MODE_MASK, mode << 5); + frob_econtrol(p, ECR_MODE_MASK, mode << 5); } #ifdef CONFIG_PARPORT_PC_FIFO -/* Safely change the mode bits in the ECR +/* Safely change the mode bits in the ECR Returns: 0 : Success -EBUSY: Could not drain FIFO in some finite amount of time, @@ -141,17 +141,18 @@ static int change_mode(struct parport *p, int m) unsigned char oecr; int mode; - DPRINTK(KERN_INFO "parport change_mode ECP-ISA to mode 0x%02x\n",m); + DPRINTK(KERN_INFO "parport change_mode ECP-ISA to mode 0x%02x\n", m); if (!priv->ecr) { - printk (KERN_DEBUG "change_mode: but there's no ECR!\n"); + printk(KERN_DEBUG "change_mode: but there's no ECR!\n"); return 0; } /* Bits <7:5> contain the mode. */ - oecr = inb (ECONTROL (p)); + oecr = inb(ECONTROL(p)); mode = (oecr >> 5) & 0x7; - if (mode == m) return 0; + if (mode == m) + return 0; if (mode >= 2 && !(priv->ctr & 0x20)) { /* This mode resets the FIFO, so we may @@ -163,19 +164,21 @@ static int change_mode(struct parport *p, int m) case ECR_ECP: /* ECP Parallel Port mode */ /* Busy wait for 200us */ for (counter = 0; counter < 40; counter++) { - if (inb (ECONTROL (p)) & 0x01) + if (inb(ECONTROL(p)) & 0x01) + break; + if (signal_pending(current)) break; - if (signal_pending (current)) break; - udelay (5); + udelay(5); } /* Poll slowly. */ - while (!(inb (ECONTROL (p)) & 0x01)) { - if (time_after_eq (jiffies, expire)) + while (!(inb(ECONTROL(p)) & 0x01)) { + if (time_after_eq(jiffies, expire)) /* The FIFO is stuck. */ return -EBUSY; - schedule_timeout_interruptible(msecs_to_jiffies(10)); - if (signal_pending (current)) + schedule_timeout_interruptible( + msecs_to_jiffies(10)); + if (signal_pending(current)) break; } } @@ -185,20 +188,20 @@ static int change_mode(struct parport *p, int m) /* We have to go through mode 001 */ oecr &= ~(7 << 5); oecr |= ECR_PS2 << 5; - ECR_WRITE (p, oecr); + ECR_WRITE(p, oecr); } /* Set the mode. */ oecr &= ~(7 << 5); oecr |= m << 5; - ECR_WRITE (p, oecr); + ECR_WRITE(p, oecr); return 0; } #ifdef CONFIG_PARPORT_1284 /* Find FIFO lossage; FIFO is reset */ #if 0 -static int get_fifo_residue (struct parport *p) +static int get_fifo_residue(struct parport *p) { int residue; int cnfga; @@ -206,26 +209,26 @@ static int get_fifo_residue (struct parport *p) /* Adjust for the contents of the FIFO. */ for (residue = priv->fifo_depth; ; residue--) { - if (inb (ECONTROL (p)) & 0x2) + if (inb(ECONTROL(p)) & 0x2) /* Full up. */ break; - outb (0, FIFO (p)); + outb(0, FIFO(p)); } - printk (KERN_DEBUG "%s: %d PWords were left in FIFO\n", p->name, + printk(KERN_DEBUG "%s: %d PWords were left in FIFO\n", p->name, residue); /* Reset the FIFO. */ - frob_set_mode (p, ECR_PS2); + frob_set_mode(p, ECR_PS2); /* Now change to config mode and clean up. FIXME */ - frob_set_mode (p, ECR_CNF); - cnfga = inb (CONFIGA (p)); - printk (KERN_DEBUG "%s: cnfgA contains 0x%02x\n", p->name, cnfga); + frob_set_mode(p, ECR_CNF); + cnfga = inb(CONFIGA(p)); + printk(KERN_DEBUG "%s: cnfgA contains 0x%02x\n", p->name, cnfga); if (!(cnfga & (1<<2))) { - printk (KERN_DEBUG "%s: Accounting for extra byte\n", p->name); + printk(KERN_DEBUG "%s: Accounting for extra byte\n", p->name); residue++; } @@ -233,9 +236,11 @@ static int get_fifo_residue (struct parport *p) * PWord != 1 byte. */ /* Back to PS2 mode. */ - frob_set_mode (p, ECR_PS2); + frob_set_mode(p, ECR_PS2); - DPRINTK (KERN_DEBUG "*** get_fifo_residue: done residue collecting (ecr = 0x%2.2x)\n", inb (ECONTROL (p))); + DPRINTK(KERN_DEBUG + "*** get_fifo_residue: done residue collecting (ecr = 0x%2.2x)\n", + inb(ECONTROL(p))); return residue; } #endif /* 0 */ @@ -257,8 +262,8 @@ static int clear_epp_timeout(struct parport *pb) /* To clear timeout some chips require double read */ parport_pc_read_status(pb); r = parport_pc_read_status(pb); - outb (r | 0x01, STATUS (pb)); /* Some reset by writing 1 */ - outb (r & 0xfe, STATUS (pb)); /* Others by writing 0 */ + outb(r | 0x01, STATUS(pb)); /* Some reset by writing 1 */ + outb(r & 0xfe, STATUS(pb)); /* Others by writing 0 */ r = parport_pc_read_status(pb); return !(r & 0x01); @@ -272,7 +277,8 @@ static int clear_epp_timeout(struct parport *pb) * of these are in parport_pc.h. */ -static void parport_pc_init_state(struct pardevice *dev, struct parport_state *s) +static void parport_pc_init_state(struct pardevice *dev, + struct parport_state *s) { s->u.pc.ctr = 0xc; if (dev->irq_func && @@ -289,22 +295,23 @@ static void parport_pc_save_state(struct parport *p, struct parport_state *s) const struct parport_pc_private *priv = p->physport->private_data; s->u.pc.ctr = priv->ctr; if (priv->ecr) - s->u.pc.ecr = inb (ECONTROL (p)); + s->u.pc.ecr = inb(ECONTROL(p)); } -static void parport_pc_restore_state(struct parport *p, struct parport_state *s) +static void parport_pc_restore_state(struct parport *p, + struct parport_state *s) { struct parport_pc_private *priv = p->physport->private_data; register unsigned char c = s->u.pc.ctr & priv->ctr_writable; - outb (c, CONTROL (p)); + outb(c, CONTROL(p)); priv->ctr = c; if (priv->ecr) - ECR_WRITE (p, s->u.pc.ecr); + ECR_WRITE(p, s->u.pc.ecr); } #ifdef CONFIG_PARPORT_1284 -static size_t parport_pc_epp_read_data (struct parport *port, void *buf, - size_t length, int flags) +static size_t parport_pc_epp_read_data(struct parport *port, void *buf, + size_t length, int flags) { size_t got = 0; @@ -316,54 +323,52 @@ static size_t parport_pc_epp_read_data (struct parport *port, void *buf, * nFault is 0 if there is at least 1 byte in the Warp's FIFO * pError is 1 if there are 16 bytes in the Warp's FIFO */ - status = inb (STATUS (port)); + status = inb(STATUS(port)); - while (!(status & 0x08) && (got < length)) { - if ((left >= 16) && (status & 0x20) && !(status & 0x08)) { + while (!(status & 0x08) && got < length) { + if (left >= 16 && (status & 0x20) && !(status & 0x08)) { /* can grab 16 bytes from warp fifo */ - if (!((long)buf & 0x03)) { - insl (EPPDATA (port), buf, 4); - } else { - insb (EPPDATA (port), buf, 16); - } + if (!((long)buf & 0x03)) + insl(EPPDATA(port), buf, 4); + else + insb(EPPDATA(port), buf, 16); buf += 16; got += 16; left -= 16; } else { /* grab single byte from the warp fifo */ - *((char *)buf) = inb (EPPDATA (port)); + *((char *)buf) = inb(EPPDATA(port)); buf++; got++; left--; } - status = inb (STATUS (port)); + status = inb(STATUS(port)); if (status & 0x01) { /* EPP timeout should never occur... */ - printk (KERN_DEBUG "%s: EPP timeout occurred while talking to " - "w91284pic (should not have done)\n", port->name); - clear_epp_timeout (port); + printk(KERN_DEBUG +"%s: EPP timeout occurred while talking to w91284pic (should not have done)\n", port->name); + clear_epp_timeout(port); } } return got; } if ((flags & PARPORT_EPP_FAST) && (length > 1)) { - if (!(((long)buf | length) & 0x03)) { - insl (EPPDATA (port), buf, (length >> 2)); - } else { - insb (EPPDATA (port), buf, length); - } - if (inb (STATUS (port)) & 0x01) { - clear_epp_timeout (port); + if (!(((long)buf | length) & 0x03)) + insl(EPPDATA(port), buf, (length >> 2)); + else + insb(EPPDATA(port), buf, length); + if (inb(STATUS(port)) & 0x01) { + clear_epp_timeout(port); return -EIO; } return length; } for (; got < length; got++) { - *((char*)buf) = inb (EPPDATA(port)); + *((char *)buf) = inb(EPPDATA(port)); buf++; - if (inb (STATUS (port)) & 0x01) { + if (inb(STATUS(port)) & 0x01) { /* EPP timeout */ - clear_epp_timeout (port); + clear_epp_timeout(port); break; } } @@ -371,28 +376,27 @@ static size_t parport_pc_epp_read_data (struct parport *port, void *buf, return got; } -static size_t parport_pc_epp_write_data (struct parport *port, const void *buf, - size_t length, int flags) +static size_t parport_pc_epp_write_data(struct parport *port, const void *buf, + size_t length, int flags) { size_t written = 0; if ((flags & PARPORT_EPP_FAST) && (length > 1)) { - if (!(((long)buf | length) & 0x03)) { - outsl (EPPDATA (port), buf, (length >> 2)); - } else { - outsb (EPPDATA (port), buf, length); - } - if (inb (STATUS (port)) & 0x01) { - clear_epp_timeout (port); + if (!(((long)buf | length) & 0x03)) + outsl(EPPDATA(port), buf, (length >> 2)); + else + outsb(EPPDATA(port), buf, length); + if (inb(STATUS(port)) & 0x01) { + clear_epp_timeout(port); return -EIO; } return length; } for (; written < length; written++) { - outb (*((char*)buf), EPPDATA(port)); + outb(*((char *)buf), EPPDATA(port)); buf++; - if (inb (STATUS(port)) & 0x01) { - clear_epp_timeout (port); + if (inb(STATUS(port)) & 0x01) { + clear_epp_timeout(port); break; } } @@ -400,24 +404,24 @@ static size_t parport_pc_epp_write_data (struct parport *port, const void *buf, return written; } -static size_t parport_pc_epp_read_addr (struct parport *port, void *buf, +static size_t parport_pc_epp_read_addr(struct parport *port, void *buf, size_t length, int flags) { size_t got = 0; if ((flags & PARPORT_EPP_FAST) && (length > 1)) { - insb (EPPADDR (port), buf, length); - if (inb (STATUS (port)) & 0x01) { - clear_epp_timeout (port); + insb(EPPADDR(port), buf, length); + if (inb(STATUS(port)) & 0x01) { + clear_epp_timeout(port); return -EIO; } return length; } for (; got < length; got++) { - *((char*)buf) = inb (EPPADDR (port)); + *((char *)buf) = inb(EPPADDR(port)); buf++; - if (inb (STATUS (port)) & 0x01) { - clear_epp_timeout (port); + if (inb(STATUS(port)) & 0x01) { + clear_epp_timeout(port); break; } } @@ -425,25 +429,25 @@ static size_t parport_pc_epp_read_addr (struct parport *port, void *buf, return got; } -static size_t parport_pc_epp_write_addr (struct parport *port, +static size_t parport_pc_epp_write_addr(struct parport *port, const void *buf, size_t length, int flags) { size_t written = 0; if ((flags & PARPORT_EPP_FAST) && (length > 1)) { - outsb (EPPADDR (port), buf, length); - if (inb (STATUS (port)) & 0x01) { - clear_epp_timeout (port); + outsb(EPPADDR(port), buf, length); + if (inb(STATUS(port)) & 0x01) { + clear_epp_timeout(port); return -EIO; } return length; } for (; written < length; written++) { - outb (*((char*)buf), EPPADDR (port)); + outb(*((char *)buf), EPPADDR(port)); buf++; - if (inb (STATUS (port)) & 0x01) { - clear_epp_timeout (port); + if (inb(STATUS(port)) & 0x01) { + clear_epp_timeout(port); break; } } @@ -451,74 +455,74 @@ static size_t parport_pc_epp_write_addr (struct parport *port, return written; } -static size_t parport_pc_ecpepp_read_data (struct parport *port, void *buf, - size_t length, int flags) +static size_t parport_pc_ecpepp_read_data(struct parport *port, void *buf, + size_t length, int flags) { size_t got; - frob_set_mode (port, ECR_EPP); - parport_pc_data_reverse (port); - parport_pc_write_control (port, 0x4); - got = parport_pc_epp_read_data (port, buf, length, flags); - frob_set_mode (port, ECR_PS2); + frob_set_mode(port, ECR_EPP); + parport_pc_data_reverse(port); + parport_pc_write_control(port, 0x4); + got = parport_pc_epp_read_data(port, buf, length, flags); + frob_set_mode(port, ECR_PS2); return got; } -static size_t parport_pc_ecpepp_write_data (struct parport *port, - const void *buf, size_t length, - int flags) +static size_t parport_pc_ecpepp_write_data(struct parport *port, + const void *buf, size_t length, + int flags) { size_t written; - frob_set_mode (port, ECR_EPP); - parport_pc_write_control (port, 0x4); - parport_pc_data_forward (port); - written = parport_pc_epp_write_data (port, buf, length, flags); - frob_set_mode (port, ECR_PS2); + frob_set_mode(port, ECR_EPP); + parport_pc_write_control(port, 0x4); + parport_pc_data_forward(port); + written = parport_pc_epp_write_data(port, buf, length, flags); + frob_set_mode(port, ECR_PS2); return written; } -static size_t parport_pc_ecpepp_read_addr (struct parport *port, void *buf, - size_t length, int flags) +static size_t parport_pc_ecpepp_read_addr(struct parport *port, void *buf, + size_t length, int flags) { size_t got; - frob_set_mode (port, ECR_EPP); - parport_pc_data_reverse (port); - parport_pc_write_control (port, 0x4); - got = parport_pc_epp_read_addr (port, buf, length, flags); - frob_set_mode (port, ECR_PS2); + frob_set_mode(port, ECR_EPP); + parport_pc_data_reverse(port); + parport_pc_write_control(port, 0x4); + got = parport_pc_epp_read_addr(port, buf, length, flags); + frob_set_mode(port, ECR_PS2); return got; } -static size_t parport_pc_ecpepp_write_addr (struct parport *port, +static size_t parport_pc_ecpepp_write_addr(struct parport *port, const void *buf, size_t length, int flags) { size_t written; - frob_set_mode (port, ECR_EPP); - parport_pc_write_control (port, 0x4); - parport_pc_data_forward (port); - written = parport_pc_epp_write_addr (port, buf, length, flags); - frob_set_mode (port, ECR_PS2); + frob_set_mode(port, ECR_EPP); + parport_pc_write_control(port, 0x4); + parport_pc_data_forward(port); + written = parport_pc_epp_write_addr(port, buf, length, flags); + frob_set_mode(port, ECR_PS2); return written; } #endif /* IEEE 1284 support */ #ifdef CONFIG_PARPORT_PC_FIFO -static size_t parport_pc_fifo_write_block_pio (struct parport *port, +static size_t parport_pc_fifo_write_block_pio(struct parport *port, const void *buf, size_t length) { int ret = 0; const unsigned char *bufp = buf; size_t left = length; unsigned long expire = jiffies + port->physport->cad->timeout; - const int fifo = FIFO (port); + const int fifo = FIFO(port); int poll_for = 8; /* 80 usecs */ const struct parport_pc_private *priv = port->physport->private_data; const int fifo_depth = priv->fifo_depth; @@ -526,25 +530,25 @@ static size_t parport_pc_fifo_write_block_pio (struct parport *port, port = port->physport; /* We don't want to be interrupted every character. */ - parport_pc_disable_irq (port); + parport_pc_disable_irq(port); /* set nErrIntrEn and serviceIntr */ - frob_econtrol (port, (1<<4) | (1<<2), (1<<4) | (1<<2)); + frob_econtrol(port, (1<<4) | (1<<2), (1<<4) | (1<<2)); /* Forward mode. */ - parport_pc_data_forward (port); /* Must be in PS2 mode */ + parport_pc_data_forward(port); /* Must be in PS2 mode */ while (left) { unsigned char byte; - unsigned char ecrval = inb (ECONTROL (port)); + unsigned char ecrval = inb(ECONTROL(port)); int i = 0; - if (need_resched() && time_before (jiffies, expire)) + if (need_resched() && time_before(jiffies, expire)) /* Can't yield the port. */ - schedule (); + schedule(); /* Anyone else waiting for the port? */ if (port->waithead) { - printk (KERN_DEBUG "Somebody wants the port\n"); + printk(KERN_DEBUG "Somebody wants the port\n"); break; } @@ -552,21 +556,22 @@ static size_t parport_pc_fifo_write_block_pio (struct parport *port, /* FIFO is full. Wait for interrupt. */ /* Clear serviceIntr */ - ECR_WRITE (port, ecrval & ~(1<<2)); - false_alarm: - ret = parport_wait_event (port, HZ); - if (ret < 0) break; + ECR_WRITE(port, ecrval & ~(1<<2)); +false_alarm: + ret = parport_wait_event(port, HZ); + if (ret < 0) + break; ret = 0; - if (!time_before (jiffies, expire)) { + if (!time_before(jiffies, expire)) { /* Timed out. */ - printk (KERN_DEBUG "FIFO write timed out\n"); + printk(KERN_DEBUG "FIFO write timed out\n"); break; } - ecrval = inb (ECONTROL (port)); + ecrval = inb(ECONTROL(port)); if (!(ecrval & (1<<2))) { if (need_resched() && - time_before (jiffies, expire)) - schedule (); + time_before(jiffies, expire)) + schedule(); goto false_alarm; } @@ -577,38 +582,38 @@ static size_t parport_pc_fifo_write_block_pio (struct parport *port, /* Can't fail now. */ expire = jiffies + port->cad->timeout; - poll: - if (signal_pending (current)) +poll: + if (signal_pending(current)) break; if (ecrval & 0x01) { /* FIFO is empty. Blast it full. */ const int n = left < fifo_depth ? left : fifo_depth; - outsb (fifo, bufp, n); + outsb(fifo, bufp, n); bufp += n; left -= n; /* Adjust the poll time. */ - if (i < (poll_for - 2)) poll_for--; + if (i < (poll_for - 2)) + poll_for--; continue; } else if (i++ < poll_for) { - udelay (10); - ecrval = inb (ECONTROL (port)); + udelay(10); + ecrval = inb(ECONTROL(port)); goto poll; } - /* Half-full (call me an optimist) */ + /* Half-full(call me an optimist) */ byte = *bufp++; - outb (byte, fifo); + outb(byte, fifo); left--; - } - -dump_parport_state ("leave fifo_write_block_pio", port); + } + dump_parport_state("leave fifo_write_block_pio", port); return length - left; } #ifdef HAS_DMA -static size_t parport_pc_fifo_write_block_dma (struct parport *port, +static size_t parport_pc_fifo_write_block_dma(struct parport *port, const void *buf, size_t length) { int ret = 0; @@ -621,7 +626,7 @@ static size_t parport_pc_fifo_write_block_dma (struct parport *port, unsigned long start = (unsigned long) buf; unsigned long end = (unsigned long) buf + length - 1; -dump_parport_state ("enter fifo_write_block_dma", port); + dump_parport_state("enter fifo_write_block_dma", port); if (end < MAX_DMA_ADDRESS) { /* If it would cross a 64k boundary, cap it at the end. */ if ((start ^ end) & ~0xffffUL) @@ -629,8 +634,9 @@ dump_parport_state ("enter fifo_write_block_dma", port); dma_addr = dma_handle = dma_map_single(dev, (void *)buf, length, DMA_TO_DEVICE); - } else { - /* above 16 MB we use a bounce buffer as ISA-DMA is not possible */ + } else { + /* above 16 MB we use a bounce buffer as ISA-DMA + is not possible */ maxlen = PAGE_SIZE; /* sizeof(priv->dma_buf) */ dma_addr = priv->dma_handle; dma_handle = 0; @@ -639,12 +645,12 @@ dump_parport_state ("enter fifo_write_block_dma", port); port = port->physport; /* We don't want to be interrupted every character. */ - parport_pc_disable_irq (port); + parport_pc_disable_irq(port); /* set nErrIntrEn and serviceIntr */ - frob_econtrol (port, (1<<4) | (1<<2), (1<<4) | (1<<2)); + frob_econtrol(port, (1<<4) | (1<<2), (1<<4) | (1<<2)); /* Forward mode. */ - parport_pc_data_forward (port); /* Must be in PS2 mode */ + parport_pc_data_forward(port); /* Must be in PS2 mode */ while (left) { unsigned long expire = jiffies + port->physport->cad->timeout; @@ -665,10 +671,10 @@ dump_parport_state ("enter fifo_write_block_dma", port); set_dma_count(port->dma, count); /* Set DMA mode */ - frob_econtrol (port, 1<<3, 1<<3); + frob_econtrol(port, 1<<3, 1<<3); /* Clear serviceIntr */ - frob_econtrol (port, 1<<2, 0); + frob_econtrol(port, 1<<2, 0); enable_dma(port->dma); release_dma_lock(dmaflag); @@ -676,20 +682,22 @@ dump_parport_state ("enter fifo_write_block_dma", port); /* assume DMA will be successful */ left -= count; buf += count; - if (dma_handle) dma_addr += count; + if (dma_handle) + dma_addr += count; /* Wait for interrupt. */ - false_alarm: - ret = parport_wait_event (port, HZ); - if (ret < 0) break; +false_alarm: + ret = parport_wait_event(port, HZ); + if (ret < 0) + break; ret = 0; - if (!time_before (jiffies, expire)) { + if (!time_before(jiffies, expire)) { /* Timed out. */ - printk (KERN_DEBUG "DMA write timed out\n"); + printk(KERN_DEBUG "DMA write timed out\n"); break; } /* Is serviceIntr set? */ - if (!(inb (ECONTROL (port)) & (1<<2))) { + if (!(inb(ECONTROL(port)) & (1<<2))) { cond_resched(); goto false_alarm; @@ -705,14 +713,15 @@ dump_parport_state ("enter fifo_write_block_dma", port); /* Anyone else waiting for the port? */ if (port->waithead) { - printk (KERN_DEBUG "Somebody wants the port\n"); + printk(KERN_DEBUG "Somebody wants the port\n"); break; } /* update for possible DMA residue ! */ buf -= count; left += count; - if (dma_handle) dma_addr -= count; + if (dma_handle) + dma_addr -= count; } /* Maybe got here through break, so adjust for DMA residue! */ @@ -723,12 +732,12 @@ dump_parport_state ("enter fifo_write_block_dma", port); release_dma_lock(dmaflag); /* Turn off DMA mode */ - frob_econtrol (port, 1<<3, 0); + frob_econtrol(port, 1<<3, 0); if (dma_handle) dma_unmap_single(dev, dma_handle, length, DMA_TO_DEVICE); -dump_parport_state ("leave fifo_write_block_dma", port); + dump_parport_state("leave fifo_write_block_dma", port); return length - left; } #endif @@ -738,13 +747,13 @@ static inline size_t parport_pc_fifo_write_block(struct parport *port, { #ifdef HAS_DMA if (port->dma != PARPORT_DMA_NONE) - return parport_pc_fifo_write_block_dma (port, buf, length); + return parport_pc_fifo_write_block_dma(port, buf, length); #endif - return parport_pc_fifo_write_block_pio (port, buf, length); + return parport_pc_fifo_write_block_pio(port, buf, length); } /* Parallel Port FIFO mode (ECP chipsets) */ -static size_t parport_pc_compat_write_block_pio (struct parport *port, +static size_t parport_pc_compat_write_block_pio(struct parport *port, const void *buf, size_t length, int flags) { @@ -756,14 +765,16 @@ static size_t parport_pc_compat_write_block_pio (struct parport *port, /* Special case: a timeout of zero means we cannot call schedule(). * Also if O_NONBLOCK is set then use the default implementation. */ if (port->physport->cad->timeout <= PARPORT_INACTIVITY_O_NONBLOCK) - return parport_ieee1284_write_compat (port, buf, + return parport_ieee1284_write_compat(port, buf, length, flags); /* Set up parallel port FIFO mode.*/ - parport_pc_data_forward (port); /* Must be in PS2 mode */ - parport_pc_frob_control (port, PARPORT_CONTROL_STROBE, 0); - r = change_mode (port, ECR_PPF); /* Parallel port FIFO */ - if (r) printk (KERN_DEBUG "%s: Warning change_mode ECR_PPF failed\n", port->name); + parport_pc_data_forward(port); /* Must be in PS2 mode */ + parport_pc_frob_control(port, PARPORT_CONTROL_STROBE, 0); + r = change_mode(port, ECR_PPF); /* Parallel port FIFO */ + if (r) + printk(KERN_DEBUG "%s: Warning change_mode ECR_PPF failed\n", + port->name); port->physport->ieee1284.phase = IEEE1284_PH_FWD_DATA; @@ -775,40 +786,39 @@ static size_t parport_pc_compat_write_block_pio (struct parport *port, * the FIFO is empty, so allow 4 seconds for each position * in the fifo. */ - expire = jiffies + (priv->fifo_depth * HZ * 4); + expire = jiffies + (priv->fifo_depth * HZ * 4); do { /* Wait for the FIFO to empty */ - r = change_mode (port, ECR_PS2); - if (r != -EBUSY) { + r = change_mode(port, ECR_PS2); + if (r != -EBUSY) break; - } - } while (time_before (jiffies, expire)); + } while (time_before(jiffies, expire)); if (r == -EBUSY) { - printk (KERN_DEBUG "%s: FIFO is stuck\n", port->name); + printk(KERN_DEBUG "%s: FIFO is stuck\n", port->name); /* Prevent further data transfer. */ - frob_set_mode (port, ECR_TST); + frob_set_mode(port, ECR_TST); /* Adjust for the contents of the FIFO. */ for (written -= priv->fifo_depth; ; written++) { - if (inb (ECONTROL (port)) & 0x2) { + if (inb(ECONTROL(port)) & 0x2) { /* Full up. */ break; } - outb (0, FIFO (port)); + outb(0, FIFO(port)); } /* Reset the FIFO and return to PS2 mode. */ - frob_set_mode (port, ECR_PS2); + frob_set_mode(port, ECR_PS2); } - r = parport_wait_peripheral (port, + r = parport_wait_peripheral(port, PARPORT_STATUS_BUSY, PARPORT_STATUS_BUSY); if (r) - printk (KERN_DEBUG - "%s: BUSY timeout (%d) in compat_write_block_pio\n", + printk(KERN_DEBUG + "%s: BUSY timeout (%d) in compat_write_block_pio\n", port->name, r); port->physport->ieee1284.phase = IEEE1284_PH_FWD_IDLE; @@ -818,7 +828,7 @@ static size_t parport_pc_compat_write_block_pio (struct parport *port, /* ECP */ #ifdef CONFIG_PARPORT_1284 -static size_t parport_pc_ecp_write_block_pio (struct parport *port, +static size_t parport_pc_ecp_write_block_pio(struct parport *port, const void *buf, size_t length, int flags) { @@ -830,36 +840,38 @@ static size_t parport_pc_ecp_write_block_pio (struct parport *port, /* Special case: a timeout of zero means we cannot call schedule(). * Also if O_NONBLOCK is set then use the default implementation. */ if (port->physport->cad->timeout <= PARPORT_INACTIVITY_O_NONBLOCK) - return parport_ieee1284_ecp_write_data (port, buf, + return parport_ieee1284_ecp_write_data(port, buf, length, flags); /* Switch to forward mode if necessary. */ if (port->physport->ieee1284.phase != IEEE1284_PH_FWD_IDLE) { /* Event 47: Set nInit high. */ - parport_frob_control (port, + parport_frob_control(port, PARPORT_CONTROL_INIT | PARPORT_CONTROL_AUTOFD, PARPORT_CONTROL_INIT | PARPORT_CONTROL_AUTOFD); /* Event 49: PError goes high. */ - r = parport_wait_peripheral (port, + r = parport_wait_peripheral(port, PARPORT_STATUS_PAPEROUT, PARPORT_STATUS_PAPEROUT); if (r) { - printk (KERN_DEBUG "%s: PError timeout (%d) " + printk(KERN_DEBUG "%s: PError timeout (%d) " "in ecp_write_block_pio\n", port->name, r); } } /* Set up ECP parallel port mode.*/ - parport_pc_data_forward (port); /* Must be in PS2 mode */ - parport_pc_frob_control (port, + parport_pc_data_forward(port); /* Must be in PS2 mode */ + parport_pc_frob_control(port, PARPORT_CONTROL_STROBE | PARPORT_CONTROL_AUTOFD, 0); - r = change_mode (port, ECR_ECP); /* ECP FIFO */ - if (r) printk (KERN_DEBUG "%s: Warning change_mode ECR_ECP failed\n", port->name); + r = change_mode(port, ECR_ECP); /* ECP FIFO */ + if (r) + printk(KERN_DEBUG "%s: Warning change_mode ECR_ECP failed\n", + port->name); port->physport->ieee1284.phase = IEEE1284_PH_FWD_DATA; /* Write the data to the FIFO. */ @@ -873,55 +885,54 @@ static size_t parport_pc_ecp_write_block_pio (struct parport *port, expire = jiffies + (priv->fifo_depth * (HZ * 4)); do { /* Wait for the FIFO to empty */ - r = change_mode (port, ECR_PS2); - if (r != -EBUSY) { + r = change_mode(port, ECR_PS2); + if (r != -EBUSY) break; - } - } while (time_before (jiffies, expire)); + } while (time_before(jiffies, expire)); if (r == -EBUSY) { - printk (KERN_DEBUG "%s: FIFO is stuck\n", port->name); + printk(KERN_DEBUG "%s: FIFO is stuck\n", port->name); /* Prevent further data transfer. */ - frob_set_mode (port, ECR_TST); + frob_set_mode(port, ECR_TST); /* Adjust for the contents of the FIFO. */ for (written -= priv->fifo_depth; ; written++) { - if (inb (ECONTROL (port)) & 0x2) { + if (inb(ECONTROL(port)) & 0x2) { /* Full up. */ break; } - outb (0, FIFO (port)); + outb(0, FIFO(port)); } /* Reset the FIFO and return to PS2 mode. */ - frob_set_mode (port, ECR_PS2); + frob_set_mode(port, ECR_PS2); /* Host transfer recovery. */ - parport_pc_data_reverse (port); /* Must be in PS2 mode */ - udelay (5); - parport_frob_control (port, PARPORT_CONTROL_INIT, 0); - r = parport_wait_peripheral (port, PARPORT_STATUS_PAPEROUT, 0); + parport_pc_data_reverse(port); /* Must be in PS2 mode */ + udelay(5); + parport_frob_control(port, PARPORT_CONTROL_INIT, 0); + r = parport_wait_peripheral(port, PARPORT_STATUS_PAPEROUT, 0); if (r) - printk (KERN_DEBUG "%s: PE,1 timeout (%d) " + printk(KERN_DEBUG "%s: PE,1 timeout (%d) " "in ecp_write_block_pio\n", port->name, r); - parport_frob_control (port, + parport_frob_control(port, PARPORT_CONTROL_INIT, PARPORT_CONTROL_INIT); - r = parport_wait_peripheral (port, + r = parport_wait_peripheral(port, PARPORT_STATUS_PAPEROUT, PARPORT_STATUS_PAPEROUT); - if (r) - printk (KERN_DEBUG "%s: PE,2 timeout (%d) " + if (r) + printk(KERN_DEBUG "%s: PE,2 timeout (%d) " "in ecp_write_block_pio\n", port->name, r); } - r = parport_wait_peripheral (port, - PARPORT_STATUS_BUSY, + r = parport_wait_peripheral(port, + PARPORT_STATUS_BUSY, PARPORT_STATUS_BUSY); - if(r) - printk (KERN_DEBUG + if (r) + printk(KERN_DEBUG "%s: BUSY timeout (%d) in ecp_write_block_pio\n", port->name, r); @@ -931,7 +942,7 @@ static size_t parport_pc_ecp_write_block_pio (struct parport *port, } #if 0 -static size_t parport_pc_ecp_read_block_pio (struct parport *port, +static size_t parport_pc_ecp_read_block_pio(struct parport *port, void *buf, size_t length, int flags) { @@ -944,13 +955,13 @@ static size_t parport_pc_ecp_read_block_pio (struct parport *port, char *bufp = buf; port = port->physport; -DPRINTK (KERN_DEBUG "parport_pc: parport_pc_ecp_read_block_pio\n"); -dump_parport_state ("enter fcn", port); + DPRINTK(KERN_DEBUG "parport_pc: parport_pc_ecp_read_block_pio\n"); + dump_parport_state("enter fcn", port); /* Special case: a timeout of zero means we cannot call schedule(). * Also if O_NONBLOCK is set then use the default implementation. */ if (port->cad->timeout <= PARPORT_INACTIVITY_O_NONBLOCK) - return parport_ieee1284_ecp_read_data (port, buf, + return parport_ieee1284_ecp_read_data(port, buf, length, flags); if (port->ieee1284.mode == IEEE1284_MODE_ECPRLE) { @@ -966,173 +977,178 @@ dump_parport_state ("enter fcn", port); * go through software emulation. Otherwise we may have to throw * away data. */ if (length < fifofull) - return parport_ieee1284_ecp_read_data (port, buf, + return parport_ieee1284_ecp_read_data(port, buf, length, flags); if (port->ieee1284.phase != IEEE1284_PH_REV_IDLE) { /* change to reverse-idle phase (must be in forward-idle) */ /* Event 38: Set nAutoFd low (also make sure nStrobe is high) */ - parport_frob_control (port, + parport_frob_control(port, PARPORT_CONTROL_AUTOFD | PARPORT_CONTROL_STROBE, PARPORT_CONTROL_AUTOFD); - parport_pc_data_reverse (port); /* Must be in PS2 mode */ - udelay (5); + parport_pc_data_reverse(port); /* Must be in PS2 mode */ + udelay(5); /* Event 39: Set nInit low to initiate bus reversal */ - parport_frob_control (port, + parport_frob_control(port, PARPORT_CONTROL_INIT, 0); /* Event 40: Wait for nAckReverse (PError) to go low */ - r = parport_wait_peripheral (port, PARPORT_STATUS_PAPEROUT, 0); - if (r) { - printk (KERN_DEBUG "%s: PE timeout Event 40 (%d) " + r = parport_wait_peripheral(port, PARPORT_STATUS_PAPEROUT, 0); + if (r) { + printk(KERN_DEBUG "%s: PE timeout Event 40 (%d) " "in ecp_read_block_pio\n", port->name, r); return 0; } } /* Set up ECP FIFO mode.*/ -/* parport_pc_frob_control (port, +/* parport_pc_frob_control(port, PARPORT_CONTROL_STROBE | PARPORT_CONTROL_AUTOFD, PARPORT_CONTROL_AUTOFD); */ - r = change_mode (port, ECR_ECP); /* ECP FIFO */ - if (r) printk (KERN_DEBUG "%s: Warning change_mode ECR_ECP failed\n", port->name); + r = change_mode(port, ECR_ECP); /* ECP FIFO */ + if (r) + printk(KERN_DEBUG "%s: Warning change_mode ECR_ECP failed\n", + port->name); port->ieee1284.phase = IEEE1284_PH_REV_DATA; /* the first byte must be collected manually */ -dump_parport_state ("pre 43", port); + dump_parport_state("pre 43", port); /* Event 43: Wait for nAck to go low */ - r = parport_wait_peripheral (port, PARPORT_STATUS_ACK, 0); + r = parport_wait_peripheral(port, PARPORT_STATUS_ACK, 0); if (r) { /* timed out while reading -- no data */ - printk (KERN_DEBUG "PIO read timed out (initial byte)\n"); + printk(KERN_DEBUG "PIO read timed out (initial byte)\n"); goto out_no_data; } /* read byte */ - *bufp++ = inb (DATA (port)); + *bufp++ = inb(DATA(port)); left--; -dump_parport_state ("43-44", port); + dump_parport_state("43-44", port); /* Event 44: nAutoFd (HostAck) goes high to acknowledge */ - parport_pc_frob_control (port, + parport_pc_frob_control(port, PARPORT_CONTROL_AUTOFD, 0); -dump_parport_state ("pre 45", port); + dump_parport_state("pre 45", port); /* Event 45: Wait for nAck to go high */ -/* r = parport_wait_peripheral (port, PARPORT_STATUS_ACK, PARPORT_STATUS_ACK); */ -dump_parport_state ("post 45", port); -r = 0; + /* r = parport_wait_peripheral(port, PARPORT_STATUS_ACK, + PARPORT_STATUS_ACK); */ + dump_parport_state("post 45", port); + r = 0; if (r) { /* timed out while waiting for peripheral to respond to ack */ - printk (KERN_DEBUG "ECP PIO read timed out (waiting for nAck)\n"); + printk(KERN_DEBUG "ECP PIO read timed out (waiting for nAck)\n"); /* keep hold of the byte we've got already */ goto out_no_data; } /* Event 46: nAutoFd (HostAck) goes low to accept more data */ - parport_pc_frob_control (port, + parport_pc_frob_control(port, PARPORT_CONTROL_AUTOFD, PARPORT_CONTROL_AUTOFD); -dump_parport_state ("rev idle", port); + dump_parport_state("rev idle", port); /* Do the transfer. */ while (left > fifofull) { int ret; unsigned long expire = jiffies + port->cad->timeout; - unsigned char ecrval = inb (ECONTROL (port)); + unsigned char ecrval = inb(ECONTROL(port)); - if (need_resched() && time_before (jiffies, expire)) + if (need_resched() && time_before(jiffies, expire)) /* Can't yield the port. */ - schedule (); + schedule(); /* At this point, the FIFO may already be full. In - * that case ECP is already holding back the - * peripheral (assuming proper design) with a delayed - * handshake. Work fast to avoid a peripheral - * timeout. */ + * that case ECP is already holding back the + * peripheral (assuming proper design) with a delayed + * handshake. Work fast to avoid a peripheral + * timeout. */ if (ecrval & 0x01) { /* FIFO is empty. Wait for interrupt. */ -dump_parport_state ("FIFO empty", port); + dump_parport_state("FIFO empty", port); /* Anyone else waiting for the port? */ if (port->waithead) { - printk (KERN_DEBUG "Somebody wants the port\n"); + printk(KERN_DEBUG "Somebody wants the port\n"); break; } /* Clear serviceIntr */ - ECR_WRITE (port, ecrval & ~(1<<2)); - false_alarm: -dump_parport_state ("waiting", port); - ret = parport_wait_event (port, HZ); -DPRINTK (KERN_DEBUG "parport_wait_event returned %d\n", ret); + ECR_WRITE(port, ecrval & ~(1<<2)); +false_alarm: + dump_parport_state("waiting", port); + ret = parport_wait_event(port, HZ); + DPRINTK(KERN_DEBUG "parport_wait_event returned %d\n", + ret); if (ret < 0) break; ret = 0; - if (!time_before (jiffies, expire)) { + if (!time_before(jiffies, expire)) { /* Timed out. */ -dump_parport_state ("timeout", port); - printk (KERN_DEBUG "PIO read timed out\n"); + dump_parport_state("timeout", port); + printk(KERN_DEBUG "PIO read timed out\n"); break; } - ecrval = inb (ECONTROL (port)); + ecrval = inb(ECONTROL(port)); if (!(ecrval & (1<<2))) { if (need_resched() && - time_before (jiffies, expire)) { - schedule (); + time_before(jiffies, expire)) { + schedule(); } goto false_alarm; } /* Depending on how the FIFO threshold was - * set, how long interrupt service took, and - * how fast the peripheral is, we might be - * lucky and have a just filled FIFO. */ + * set, how long interrupt service took, and + * how fast the peripheral is, we might be + * lucky and have a just filled FIFO. */ continue; } if (ecrval & 0x02) { /* FIFO is full. */ -dump_parport_state ("FIFO full", port); - insb (fifo, bufp, fifo_depth); + dump_parport_state("FIFO full", port); + insb(fifo, bufp, fifo_depth); bufp += fifo_depth; left -= fifo_depth; continue; } -DPRINTK (KERN_DEBUG "*** ecp_read_block_pio: reading one byte from the FIFO\n"); + DPRINTK(KERN_DEBUG + "*** ecp_read_block_pio: reading one byte from the FIFO\n"); /* FIFO not filled. We will cycle this loop for a while - * and either the peripheral will fill it faster, - * tripping a fast empty with insb, or we empty it. */ - *bufp++ = inb (fifo); + * and either the peripheral will fill it faster, + * tripping a fast empty with insb, or we empty it. */ + *bufp++ = inb(fifo); left--; } /* scoop up anything left in the FIFO */ - while (left && !(inb (ECONTROL (port) & 0x01))) { - *bufp++ = inb (fifo); + while (left && !(inb(ECONTROL(port) & 0x01))) { + *bufp++ = inb(fifo); left--; } port->ieee1284.phase = IEEE1284_PH_REV_IDLE; -dump_parport_state ("rev idle2", port); + dump_parport_state("rev idle2", port); out_no_data: /* Go to forward idle mode to shut the peripheral up (event 47). */ - parport_frob_control (port, PARPORT_CONTROL_INIT, PARPORT_CONTROL_INIT); + parport_frob_control(port, PARPORT_CONTROL_INIT, PARPORT_CONTROL_INIT); /* event 49: PError goes high */ - r = parport_wait_peripheral (port, + r = parport_wait_peripheral(port, PARPORT_STATUS_PAPEROUT, PARPORT_STATUS_PAPEROUT); if (r) { - printk (KERN_DEBUG + printk(KERN_DEBUG "%s: PE timeout FWDIDLE (%d) in ecp_read_block_pio\n", port->name, r); } @@ -1141,14 +1157,14 @@ out_no_data: /* Finish up. */ { - int lost = get_fifo_residue (port); + int lost = get_fifo_residue(port); if (lost) /* Shouldn't happen with compliant peripherals. */ - printk (KERN_DEBUG "%s: DATA LOSS (%d bytes)!\n", + printk(KERN_DEBUG "%s: DATA LOSS (%d bytes)!\n", port->name, lost); } -dump_parport_state ("fwd idle", port); + dump_parport_state("fwd idle", port); return length - left; } #endif /* 0 */ @@ -1164,8 +1180,7 @@ dump_parport_state ("fwd idle", port); /* GCC is not inlining extern inline function later overwriten to non-inline, so we use outlined_ variants here. */ -static const struct parport_operations parport_pc_ops = -{ +static const struct parport_operations parport_pc_ops = { .write_data = parport_pc_write_data, .read_data = parport_pc_read_data, @@ -1202,88 +1217,107 @@ static const struct parport_operations parport_pc_ops = }; #ifdef CONFIG_PARPORT_PC_SUPERIO + +static struct superio_struct *find_free_superio(void) +{ + int i; + for (i = 0; i < NR_SUPERIOS; i++) + if (superios[i].io == 0) + return &superios[i]; + return NULL; +} + + /* Super-IO chipset detection, Winbond, SMSC */ static void __devinit show_parconfig_smsc37c669(int io, int key) { - int cr1,cr4,cra,cr23,cr26,cr27,i=0; - static const char *const modes[]={ + int cr1, cr4, cra, cr23, cr26, cr27; + struct superio_struct *s; + + static const char *const modes[] = { "SPP and Bidirectional (PS/2)", "EPP and SPP", "ECP", "ECP and EPP" }; - outb(key,io); - outb(key,io); - outb(1,io); - cr1=inb(io+1); - outb(4,io); - cr4=inb(io+1); - outb(0x0a,io); - cra=inb(io+1); - outb(0x23,io); - cr23=inb(io+1); - outb(0x26,io); - cr26=inb(io+1); - outb(0x27,io); - cr27=inb(io+1); - outb(0xaa,io); + outb(key, io); + outb(key, io); + outb(1, io); + cr1 = inb(io + 1); + outb(4, io); + cr4 = inb(io + 1); + outb(0x0a, io); + cra = inb(io + 1); + outb(0x23, io); + cr23 = inb(io + 1); + outb(0x26, io); + cr26 = inb(io + 1); + outb(0x27, io); + cr27 = inb(io + 1); + outb(0xaa, io); if (verbose_probing) { - printk (KERN_INFO "SMSC 37c669 LPT Config: cr_1=0x%02x, 4=0x%02x, " + printk(KERN_INFO + "SMSC 37c669 LPT Config: cr_1=0x%02x, 4=0x%02x, " "A=0x%2x, 23=0x%02x, 26=0x%02x, 27=0x%02x\n", - cr1,cr4,cra,cr23,cr26,cr27); - + cr1, cr4, cra, cr23, cr26, cr27); + /* The documentation calls DMA and IRQ-Lines by letters, so the board maker can/will wire them appropriately/randomly... G=reserved H=IDE-irq, */ - printk (KERN_INFO "SMSC LPT Config: io=0x%04x, irq=%c, dma=%c, " - "fifo threshold=%d\n", cr23*4, - (cr27 &0x0f) ? 'A'-1+(cr27 &0x0f): '-', - (cr26 &0x0f) ? 'A'-1+(cr26 &0x0f): '-', cra & 0x0f); + printk(KERN_INFO + "SMSC LPT Config: io=0x%04x, irq=%c, dma=%c, fifo threshold=%d\n", + cr23 * 4, + (cr27 & 0x0f) ? 'A' - 1 + (cr27 & 0x0f) : '-', + (cr26 & 0x0f) ? 'A' - 1 + (cr26 & 0x0f) : '-', + cra & 0x0f); printk(KERN_INFO "SMSC LPT Config: enabled=%s power=%s\n", - (cr23*4 >=0x100) ?"yes":"no", (cr1 & 4) ? "yes" : "no"); - printk(KERN_INFO "SMSC LPT Config: Port mode=%s, EPP version =%s\n", - (cr1 & 0x08 ) ? "Standard mode only (SPP)" : modes[cr4 & 0x03], - (cr4 & 0x40) ? "1.7" : "1.9"); + (cr23 * 4 >= 0x100) ? "yes" : "no", + (cr1 & 4) ? "yes" : "no"); + printk(KERN_INFO + "SMSC LPT Config: Port mode=%s, EPP version =%s\n", + (cr1 & 0x08) ? "Standard mode only (SPP)" + : modes[cr4 & 0x03], + (cr4 & 0x40) ? "1.7" : "1.9"); } - + /* Heuristics ! BIOS setup for this mainboard device limits the choices to standard settings, i.e. io-address and IRQ are related, however DMA can be 1 or 3, assume DMA_A=DMA1, DMA_C=DMA3 (this is true e.g. for TYAN 1564D Tomcat IV) */ - if(cr23*4 >=0x100) { /* if active */ - while((superios[i].io!= 0) && (i<NR_SUPERIOS)) - i++; - if(i==NR_SUPERIOS) + if (cr23 * 4 >= 0x100) { /* if active */ + s = find_free_superio(); + if (s == NULL) printk(KERN_INFO "Super-IO: too many chips!\n"); else { int d; - switch (cr23*4) { - case 0x3bc: - superios[i].io = 0x3bc; - superios[i].irq = 7; - break; - case 0x378: - superios[i].io = 0x378; - superios[i].irq = 7; - break; - case 0x278: - superios[i].io = 0x278; - superios[i].irq = 5; + switch (cr23 * 4) { + case 0x3bc: + s->io = 0x3bc; + s->irq = 7; + break; + case 0x378: + s->io = 0x378; + s->irq = 7; + break; + case 0x278: + s->io = 0x278; + s->irq = 5; } - d=(cr26 &0x0f); - if((d==1) || (d==3)) - superios[i].dma= d; + d = (cr26 & 0x0f); + if (d == 1 || d == 3) + s->dma = d; else - superios[i].dma= PARPORT_DMA_NONE; + s->dma = PARPORT_DMA_NONE; } - } + } } static void __devinit show_parconfig_winbond(int io, int key) { - int cr30,cr60,cr61,cr70,cr74,crf0,i=0; + int cr30, cr60, cr61, cr70, cr74, crf0; + struct superio_struct *s; static const char *const modes[] = { "Standard (SPP) and Bidirectional(PS/2)", /* 0 */ "EPP-1.9 and SPP", @@ -1296,110 +1330,134 @@ static void __devinit show_parconfig_winbond(int io, int key) static char *const irqtypes[] = { "pulsed low, high-Z", "follows nACK" }; - + /* The registers are called compatible-PnP because the - register layout is modelled after ISA-PnP, the access - method is just another ... */ - outb(key,io); - outb(key,io); - outb(0x07,io); /* Register 7: Select Logical Device */ - outb(0x01,io+1); /* LD1 is Parallel Port */ - outb(0x30,io); - cr30=inb(io+1); - outb(0x60,io); - cr60=inb(io+1); - outb(0x61,io); - cr61=inb(io+1); - outb(0x70,io); - cr70=inb(io+1); - outb(0x74,io); - cr74=inb(io+1); - outb(0xf0,io); - crf0=inb(io+1); - outb(0xaa,io); + register layout is modelled after ISA-PnP, the access + method is just another ... */ + outb(key, io); + outb(key, io); + outb(0x07, io); /* Register 7: Select Logical Device */ + outb(0x01, io + 1); /* LD1 is Parallel Port */ + outb(0x30, io); + cr30 = inb(io + 1); + outb(0x60, io); + cr60 = inb(io + 1); + outb(0x61, io); + cr61 = inb(io + 1); + outb(0x70, io); + cr70 = inb(io + 1); + outb(0x74, io); + cr74 = inb(io + 1); + outb(0xf0, io); + crf0 = inb(io + 1); + outb(0xaa, io); if (verbose_probing) { - printk(KERN_INFO "Winbond LPT Config: cr_30=%02x 60,61=%02x%02x " - "70=%02x 74=%02x, f0=%02x\n", cr30,cr60,cr61,cr70,cr74,crf0); - printk(KERN_INFO "Winbond LPT Config: active=%s, io=0x%02x%02x irq=%d, ", - (cr30 & 0x01) ? "yes":"no", cr60,cr61,cr70&0x0f ); + printk(KERN_INFO + "Winbond LPT Config: cr_30=%02x 60,61=%02x%02x 70=%02x 74=%02x, f0=%02x\n", + cr30, cr60, cr61, cr70, cr74, crf0); + printk(KERN_INFO "Winbond LPT Config: active=%s, io=0x%02x%02x irq=%d, ", + (cr30 & 0x01) ? "yes" : "no", cr60, cr61, cr70 & 0x0f); if ((cr74 & 0x07) > 3) printk("dma=none\n"); else - printk("dma=%d\n",cr74 & 0x07); - printk(KERN_INFO "Winbond LPT Config: irqtype=%s, ECP fifo threshold=%d\n", - irqtypes[crf0>>7], (crf0>>3)&0x0f); - printk(KERN_INFO "Winbond LPT Config: Port mode=%s\n", modes[crf0 & 0x07]); + printk("dma=%d\n", cr74 & 0x07); + printk(KERN_INFO + "Winbond LPT Config: irqtype=%s, ECP fifo threshold=%d\n", + irqtypes[crf0>>7], (crf0>>3)&0x0f); + printk(KERN_INFO "Winbond LPT Config: Port mode=%s\n", + modes[crf0 & 0x07]); } - if(cr30 & 0x01) { /* the settings can be interrogated later ... */ - while((superios[i].io!= 0) && (i<NR_SUPERIOS)) - i++; - if(i==NR_SUPERIOS) + if (cr30 & 0x01) { /* the settings can be interrogated later ... */ + s = find_free_superio(); + if (s == NULL) printk(KERN_INFO "Super-IO: too many chips!\n"); else { - superios[i].io = (cr60<<8)|cr61; - superios[i].irq = cr70&0x0f; - superios[i].dma = (((cr74 & 0x07) > 3) ? + s->io = (cr60 << 8) | cr61; + s->irq = cr70 & 0x0f; + s->dma = (((cr74 & 0x07) > 3) ? PARPORT_DMA_NONE : (cr74 & 0x07)); } } } -static void __devinit decode_winbond(int efer, int key, int devid, int devrev, int oldid) +static void __devinit decode_winbond(int efer, int key, int devid, + int devrev, int oldid) { const char *type = "unknown"; - int id,progif=2; + int id, progif = 2; if (devid == devrev) /* simple heuristics, we happened to read some - non-winbond register */ + non-winbond register */ return; - id=(devid<<8) | devrev; + id = (devid << 8) | devrev; /* Values are from public data sheets pdf files, I can just - confirm 83977TF is correct :-) */ - if (id == 0x9771) type="83977F/AF"; - else if (id == 0x9773) type="83977TF / SMSC 97w33x/97w34x"; - else if (id == 0x9774) type="83977ATF"; - else if ((id & ~0x0f) == 0x5270) type="83977CTF / SMSC 97w36x"; - else if ((id & ~0x0f) == 0x52f0) type="83977EF / SMSC 97w35x"; - else if ((id & ~0x0f) == 0x5210) type="83627"; - else if ((id & ~0x0f) == 0x6010) type="83697HF"; - else if ((oldid &0x0f ) == 0x0a) { type="83877F"; progif=1;} - else if ((oldid &0x0f ) == 0x0b) { type="83877AF"; progif=1;} - else if ((oldid &0x0f ) == 0x0c) { type="83877TF"; progif=1;} - else if ((oldid &0x0f ) == 0x0d) { type="83877ATF"; progif=1;} - else progif=0; + confirm 83977TF is correct :-) */ + if (id == 0x9771) + type = "83977F/AF"; + else if (id == 0x9773) + type = "83977TF / SMSC 97w33x/97w34x"; + else if (id == 0x9774) + type = "83977ATF"; + else if ((id & ~0x0f) == 0x5270) + type = "83977CTF / SMSC 97w36x"; + else if ((id & ~0x0f) == 0x52f0) + type = "83977EF / SMSC 97w35x"; + else if ((id & ~0x0f) == 0x5210) + type = "83627"; + else if ((id & ~0x0f) == 0x6010) + type = "83697HF"; + else if ((oldid & 0x0f) == 0x0a) { + type = "83877F"; + progif = 1; + } else if ((oldid & 0x0f) == 0x0b) { + type = "83877AF"; + progif = 1; + } else if ((oldid & 0x0f) == 0x0c) { + type = "83877TF"; + progif = 1; + } else if ((oldid & 0x0f) == 0x0d) { + type = "83877ATF"; + progif = 1; + } else + progif = 0; if (verbose_probing) printk(KERN_INFO "Winbond chip at EFER=0x%x key=0x%02x " - "devid=%02x devrev=%02x oldid=%02x type=%s\n", + "devid=%02x devrev=%02x oldid=%02x type=%s\n", efer, key, devid, devrev, oldid, type); if (progif == 2) - show_parconfig_winbond(efer,key); + show_parconfig_winbond(efer, key); } static void __devinit decode_smsc(int efer, int key, int devid, int devrev) { - const char *type = "unknown"; + const char *type = "unknown"; void (*func)(int io, int key); - int id; + int id; - if (devid == devrev) + if (devid == devrev) /* simple heuristics, we happened to read some - non-smsc register */ + non-smsc register */ return; - func=NULL; - id=(devid<<8) | devrev; + func = NULL; + id = (devid << 8) | devrev; - if (id==0x0302) {type="37c669"; func=show_parconfig_smsc37c669;} - else if (id==0x6582) type="37c665IR"; - else if (devid==0x65) type="37c665GT"; - else if (devid==0x66) type="37c666GT"; + if (id == 0x0302) { + type = "37c669"; + func = show_parconfig_smsc37c669; + } else if (id == 0x6582) + type = "37c665IR"; + else if (devid == 0x65) + type = "37c665GT"; + else if (devid == 0x66) + type = "37c666GT"; if (verbose_probing) printk(KERN_INFO "SMSC chip at EFER=0x%x " @@ -1407,138 +1465,138 @@ static void __devinit decode_smsc(int efer, int key, int devid, int devrev) efer, key, devid, devrev, type); if (func) - func(efer,key); + func(efer, key); } static void __devinit winbond_check(int io, int key) { - int devid,devrev,oldid,x_devid,x_devrev,x_oldid; + int devid, devrev, oldid, x_devid, x_devrev, x_oldid; if (!request_region(io, 3, __func__)) return; /* First probe without key */ - outb(0x20,io); - x_devid=inb(io+1); - outb(0x21,io); - x_devrev=inb(io+1); - outb(0x09,io); - x_oldid=inb(io+1); - - outb(key,io); - outb(key,io); /* Write Magic Sequence to EFER, extended - funtion enable register */ - outb(0x20,io); /* Write EFIR, extended function index register */ - devid=inb(io+1); /* Read EFDR, extended function data register */ - outb(0x21,io); - devrev=inb(io+1); - outb(0x09,io); - oldid=inb(io+1); - outb(0xaa,io); /* Magic Seal */ + outb(0x20, io); + x_devid = inb(io + 1); + outb(0x21, io); + x_devrev = inb(io + 1); + outb(0x09, io); + x_oldid = inb(io + 1); + + outb(key, io); + outb(key, io); /* Write Magic Sequence to EFER, extended + funtion enable register */ + outb(0x20, io); /* Write EFIR, extended function index register */ + devid = inb(io + 1); /* Read EFDR, extended function data register */ + outb(0x21, io); + devrev = inb(io + 1); + outb(0x09, io); + oldid = inb(io + 1); + outb(0xaa, io); /* Magic Seal */ if ((x_devid == devid) && (x_devrev == devrev) && (x_oldid == oldid)) goto out; /* protection against false positives */ - decode_winbond(io,key,devid,devrev,oldid); + decode_winbond(io, key, devid, devrev, oldid); out: release_region(io, 3); } -static void __devinit winbond_check2(int io,int key) +static void __devinit winbond_check2(int io, int key) { - int devid,devrev,oldid,x_devid,x_devrev,x_oldid; + int devid, devrev, oldid, x_devid, x_devrev, x_oldid; if (!request_region(io, 3, __func__)) return; /* First probe without the key */ - outb(0x20,io+2); - x_devid=inb(io+2); - outb(0x21,io+1); - x_devrev=inb(io+2); - outb(0x09,io+1); - x_oldid=inb(io+2); - - outb(key,io); /* Write Magic Byte to EFER, extended - funtion enable register */ - outb(0x20,io+2); /* Write EFIR, extended function index register */ - devid=inb(io+2); /* Read EFDR, extended function data register */ - outb(0x21,io+1); - devrev=inb(io+2); - outb(0x09,io+1); - oldid=inb(io+2); - outb(0xaa,io); /* Magic Seal */ - - if ((x_devid == devid) && (x_devrev == devrev) && (x_oldid == oldid)) + outb(0x20, io + 2); + x_devid = inb(io + 2); + outb(0x21, io + 1); + x_devrev = inb(io + 2); + outb(0x09, io + 1); + x_oldid = inb(io + 2); + + outb(key, io); /* Write Magic Byte to EFER, extended + funtion enable register */ + outb(0x20, io + 2); /* Write EFIR, extended function index register */ + devid = inb(io + 2); /* Read EFDR, extended function data register */ + outb(0x21, io + 1); + devrev = inb(io + 2); + outb(0x09, io + 1); + oldid = inb(io + 2); + outb(0xaa, io); /* Magic Seal */ + + if (x_devid == devid && x_devrev == devrev && x_oldid == oldid) goto out; /* protection against false positives */ - decode_winbond(io,key,devid,devrev,oldid); + decode_winbond(io, key, devid, devrev, oldid); out: release_region(io, 3); } static void __devinit smsc_check(int io, int key) { - int id,rev,oldid,oldrev,x_id,x_rev,x_oldid,x_oldrev; + int id, rev, oldid, oldrev, x_id, x_rev, x_oldid, x_oldrev; if (!request_region(io, 3, __func__)) return; /* First probe without the key */ - outb(0x0d,io); - x_oldid=inb(io+1); - outb(0x0e,io); - x_oldrev=inb(io+1); - outb(0x20,io); - x_id=inb(io+1); - outb(0x21,io); - x_rev=inb(io+1); - - outb(key,io); - outb(key,io); /* Write Magic Sequence to EFER, extended - funtion enable register */ - outb(0x0d,io); /* Write EFIR, extended function index register */ - oldid=inb(io+1); /* Read EFDR, extended function data register */ - outb(0x0e,io); - oldrev=inb(io+1); - outb(0x20,io); - id=inb(io+1); - outb(0x21,io); - rev=inb(io+1); - outb(0xaa,io); /* Magic Seal */ - - if ((x_id == id) && (x_oldrev == oldrev) && - (x_oldid == oldid) && (x_rev == rev)) + outb(0x0d, io); + x_oldid = inb(io + 1); + outb(0x0e, io); + x_oldrev = inb(io + 1); + outb(0x20, io); + x_id = inb(io + 1); + outb(0x21, io); + x_rev = inb(io + 1); + + outb(key, io); + outb(key, io); /* Write Magic Sequence to EFER, extended + funtion enable register */ + outb(0x0d, io); /* Write EFIR, extended function index register */ + oldid = inb(io + 1); /* Read EFDR, extended function data register */ + outb(0x0e, io); + oldrev = inb(io + 1); + outb(0x20, io); + id = inb(io + 1); + outb(0x21, io); + rev = inb(io + 1); + outb(0xaa, io); /* Magic Seal */ + + if (x_id == id && x_oldrev == oldrev && + x_oldid == oldid && x_rev == rev) goto out; /* protection against false positives */ - decode_smsc(io,key,oldid,oldrev); + decode_smsc(io, key, oldid, oldrev); out: release_region(io, 3); } -static void __devinit detect_and_report_winbond (void) -{ +static void __devinit detect_and_report_winbond(void) +{ if (verbose_probing) printk(KERN_DEBUG "Winbond Super-IO detection, now testing ports 3F0,370,250,4E,2E ...\n"); - winbond_check(0x3f0,0x87); - winbond_check(0x370,0x87); - winbond_check(0x2e ,0x87); - winbond_check(0x4e ,0x87); - winbond_check(0x3f0,0x86); - winbond_check2(0x250,0x88); - winbond_check2(0x250,0x89); + winbond_check(0x3f0, 0x87); + winbond_check(0x370, 0x87); + winbond_check(0x2e , 0x87); + winbond_check(0x4e , 0x87); + winbond_check(0x3f0, 0x86); + winbond_check2(0x250, 0x88); + winbond_check2(0x250, 0x89); } -static void __devinit detect_and_report_smsc (void) +static void __devinit detect_and_report_smsc(void) { if (verbose_probing) printk(KERN_DEBUG "SMSC Super-IO detection, now testing Ports 2F0, 370 ...\n"); - smsc_check(0x3f0,0x55); - smsc_check(0x370,0x55); - smsc_check(0x3f0,0x44); - smsc_check(0x370,0x44); + smsc_check(0x3f0, 0x55); + smsc_check(0x370, 0x55); + smsc_check(0x3f0, 0x44); + smsc_check(0x370, 0x44); } static void __devinit detect_and_report_it87(void) @@ -1573,34 +1631,39 @@ static void __devinit detect_and_report_it87(void) } #endif /* CONFIG_PARPORT_PC_SUPERIO */ -static int get_superio_dma (struct parport *p) +static struct superio_struct *find_superio(struct parport *p) { - int i=0; - while( (superios[i].io != p->base) && (i<NR_SUPERIOS)) - i++; - if (i!=NR_SUPERIOS) - return superios[i].dma; + int i; + for (i = 0; i < NR_SUPERIOS; i++) + if (superios[i].io != p->base) + return &superios[i]; + return NULL; +} + +static int get_superio_dma(struct parport *p) +{ + struct superio_struct *s = find_superio(p); + if (s) + return s->dma; return PARPORT_DMA_NONE; } -static int get_superio_irq (struct parport *p) +static int get_superio_irq(struct parport *p) { - int i=0; - while( (superios[i].io != p->base) && (i<NR_SUPERIOS)) - i++; - if (i!=NR_SUPERIOS) - return superios[i].irq; - return PARPORT_IRQ_NONE; + struct superio_struct *s = find_superio(p); + if (s) + return s->irq; + return PARPORT_IRQ_NONE; } - + /* --- Mode detection ------------------------------------- */ /* * Checks for port existence, all ports support SPP MODE - * Returns: + * Returns: * 0 : No parallel port at this address - * PARPORT_MODE_PCSPP : SPP port detected + * PARPORT_MODE_PCSPP : SPP port detected * (if the user specified an ioport himself, * this shall always be the case!) * @@ -1610,7 +1673,7 @@ static int parport_SPP_supported(struct parport *pb) unsigned char r, w; /* - * first clear an eventually pending EPP timeout + * first clear an eventually pending EPP timeout * I (sailer@ife.ee.ethz.ch) have an SMSC chipset * that does not even respond to SPP cycles if an EPP * timeout is pending @@ -1619,19 +1682,19 @@ static int parport_SPP_supported(struct parport *pb) /* Do a simple read-write test to make sure the port exists. */ w = 0xc; - outb (w, CONTROL (pb)); + outb(w, CONTROL(pb)); /* Is there a control register that we can read from? Some * ports don't allow reads, so read_control just returns a * software copy. Some ports _do_ allow reads, so bypass the * software copy here. In addition, some bits aren't * writable. */ - r = inb (CONTROL (pb)); + r = inb(CONTROL(pb)); if ((r & 0xf) == w) { w = 0xe; - outb (w, CONTROL (pb)); - r = inb (CONTROL (pb)); - outb (0xc, CONTROL (pb)); + outb(w, CONTROL(pb)); + r = inb(CONTROL(pb)); + outb(0xc, CONTROL(pb)); if ((r & 0xf) == w) return PARPORT_MODE_PCSPP; } @@ -1639,18 +1702,18 @@ static int parport_SPP_supported(struct parport *pb) if (user_specified) /* That didn't work, but the user thinks there's a * port here. */ - printk (KERN_INFO "parport 0x%lx (WARNING): CTR: " + printk(KERN_INFO "parport 0x%lx (WARNING): CTR: " "wrote 0x%02x, read 0x%02x\n", pb->base, w, r); /* Try the data register. The data lines aren't tri-stated at * this stage, so we expect back what we wrote. */ w = 0xaa; - parport_pc_write_data (pb, w); - r = parport_pc_read_data (pb); + parport_pc_write_data(pb, w); + r = parport_pc_read_data(pb); if (r == w) { w = 0x55; - parport_pc_write_data (pb, w); - r = parport_pc_read_data (pb); + parport_pc_write_data(pb, w); + r = parport_pc_read_data(pb); if (r == w) return PARPORT_MODE_PCSPP; } @@ -1658,9 +1721,9 @@ static int parport_SPP_supported(struct parport *pb) if (user_specified) { /* Didn't work, but the user is convinced this is the * place. */ - printk (KERN_INFO "parport 0x%lx (WARNING): DATA: " + printk(KERN_INFO "parport 0x%lx (WARNING): DATA: " "wrote 0x%02x, read 0x%02x\n", pb->base, w, r); - printk (KERN_INFO "parport 0x%lx: You gave this address, " + printk(KERN_INFO "parport 0x%lx: You gave this address, " "but there is probably no parallel port there!\n", pb->base); } @@ -1691,33 +1754,33 @@ static int parport_ECR_present(struct parport *pb) struct parport_pc_private *priv = pb->private_data; unsigned char r = 0xc; - outb (r, CONTROL (pb)); - if ((inb (ECONTROL (pb)) & 0x3) == (r & 0x3)) { - outb (r ^ 0x2, CONTROL (pb)); /* Toggle bit 1 */ + outb(r, CONTROL(pb)); + if ((inb(ECONTROL(pb)) & 0x3) == (r & 0x3)) { + outb(r ^ 0x2, CONTROL(pb)); /* Toggle bit 1 */ - r = inb (CONTROL (pb)); - if ((inb (ECONTROL (pb)) & 0x2) == (r & 0x2)) + r = inb(CONTROL(pb)); + if ((inb(ECONTROL(pb)) & 0x2) == (r & 0x2)) goto no_reg; /* Sure that no ECR register exists */ } - - if ((inb (ECONTROL (pb)) & 0x3 ) != 0x1) + + if ((inb(ECONTROL(pb)) & 0x3) != 0x1) goto no_reg; - ECR_WRITE (pb, 0x34); - if (inb (ECONTROL (pb)) != 0x35) + ECR_WRITE(pb, 0x34); + if (inb(ECONTROL(pb)) != 0x35) goto no_reg; priv->ecr = 1; - outb (0xc, CONTROL (pb)); - + outb(0xc, CONTROL(pb)); + /* Go to mode 000 */ - frob_set_mode (pb, ECR_SPP); + frob_set_mode(pb, ECR_SPP); return 1; no_reg: - outb (0xc, CONTROL (pb)); - return 0; + outb(0xc, CONTROL(pb)); + return 0; } #ifdef CONFIG_PARPORT_1284 @@ -1727,7 +1790,7 @@ static int parport_ECR_present(struct parport *pb) * allows us to read data from the data lines. In theory we would get back * 0xff but any peripheral attached to the port may drag some or all of the * lines down to zero. So if we get back anything that isn't the contents - * of the data register we deem PS/2 support to be present. + * of the data register we deem PS/2 support to be present. * * Some SPP ports have "half PS/2" ability - you can't turn off the line * drivers, but an external peripheral with sufficiently beefy drivers of @@ -1735,26 +1798,28 @@ static int parport_ECR_present(struct parport *pb) * where they can then be read back as normal. Ports with this property * and the right type of device attached are likely to fail the SPP test, * (as they will appear to have stuck bits) and so the fact that they might - * be misdetected here is rather academic. + * be misdetected here is rather academic. */ static int parport_PS2_supported(struct parport *pb) { int ok = 0; - + clear_epp_timeout(pb); /* try to tri-state the buffer */ - parport_pc_data_reverse (pb); - + parport_pc_data_reverse(pb); + parport_pc_write_data(pb, 0x55); - if (parport_pc_read_data(pb) != 0x55) ok++; + if (parport_pc_read_data(pb) != 0x55) + ok++; parport_pc_write_data(pb, 0xaa); - if (parport_pc_read_data(pb) != 0xaa) ok++; + if (parport_pc_read_data(pb) != 0xaa) + ok++; /* cancel input mode */ - parport_pc_data_forward (pb); + parport_pc_data_forward(pb); if (ok) { pb->modes |= PARPORT_MODE_TRISTATE; @@ -1773,68 +1838,68 @@ static int parport_ECP_supported(struct parport *pb) int config, configb; int pword; struct parport_pc_private *priv = pb->private_data; - /* Translate ECP intrLine to ISA irq value */ - static const int intrline[]= { 0, 7, 9, 10, 11, 14, 15, 5 }; + /* Translate ECP intrLine to ISA irq value */ + static const int intrline[] = { 0, 7, 9, 10, 11, 14, 15, 5 }; /* If there is no ECR, we have no hope of supporting ECP. */ if (!priv->ecr) return 0; /* Find out FIFO depth */ - ECR_WRITE (pb, ECR_SPP << 5); /* Reset FIFO */ - ECR_WRITE (pb, ECR_TST << 5); /* TEST FIFO */ - for (i=0; i < 1024 && !(inb (ECONTROL (pb)) & 0x02); i++) - outb (0xaa, FIFO (pb)); + ECR_WRITE(pb, ECR_SPP << 5); /* Reset FIFO */ + ECR_WRITE(pb, ECR_TST << 5); /* TEST FIFO */ + for (i = 0; i < 1024 && !(inb(ECONTROL(pb)) & 0x02); i++) + outb(0xaa, FIFO(pb)); /* * Using LGS chipset it uses ECR register, but * it doesn't support ECP or FIFO MODE */ if (i == 1024) { - ECR_WRITE (pb, ECR_SPP << 5); + ECR_WRITE(pb, ECR_SPP << 5); return 0; } priv->fifo_depth = i; if (verbose_probing) - printk (KERN_DEBUG "0x%lx: FIFO is %d bytes\n", pb->base, i); + printk(KERN_DEBUG "0x%lx: FIFO is %d bytes\n", pb->base, i); /* Find out writeIntrThreshold */ - frob_econtrol (pb, 1<<2, 1<<2); - frob_econtrol (pb, 1<<2, 0); + frob_econtrol(pb, 1<<2, 1<<2); + frob_econtrol(pb, 1<<2, 0); for (i = 1; i <= priv->fifo_depth; i++) { - inb (FIFO (pb)); - udelay (50); - if (inb (ECONTROL (pb)) & (1<<2)) + inb(FIFO(pb)); + udelay(50); + if (inb(ECONTROL(pb)) & (1<<2)) break; } if (i <= priv->fifo_depth) { if (verbose_probing) - printk (KERN_DEBUG "0x%lx: writeIntrThreshold is %d\n", + printk(KERN_DEBUG "0x%lx: writeIntrThreshold is %d\n", pb->base, i); } else /* Number of bytes we know we can write if we get an - interrupt. */ + interrupt. */ i = 0; priv->writeIntrThreshold = i; /* Find out readIntrThreshold */ - frob_set_mode (pb, ECR_PS2); /* Reset FIFO and enable PS2 */ - parport_pc_data_reverse (pb); /* Must be in PS2 mode */ - frob_set_mode (pb, ECR_TST); /* Test FIFO */ - frob_econtrol (pb, 1<<2, 1<<2); - frob_econtrol (pb, 1<<2, 0); + frob_set_mode(pb, ECR_PS2); /* Reset FIFO and enable PS2 */ + parport_pc_data_reverse(pb); /* Must be in PS2 mode */ + frob_set_mode(pb, ECR_TST); /* Test FIFO */ + frob_econtrol(pb, 1<<2, 1<<2); + frob_econtrol(pb, 1<<2, 0); for (i = 1; i <= priv->fifo_depth; i++) { - outb (0xaa, FIFO (pb)); - if (inb (ECONTROL (pb)) & (1<<2)) + outb(0xaa, FIFO(pb)); + if (inb(ECONTROL(pb)) & (1<<2)) break; } if (i <= priv->fifo_depth) { if (verbose_probing) - printk (KERN_INFO "0x%lx: readIntrThreshold is %d\n", + printk(KERN_INFO "0x%lx: readIntrThreshold is %d\n", pb->base, i); } else /* Number of bytes we can read if we get an interrupt. */ @@ -1842,23 +1907,23 @@ static int parport_ECP_supported(struct parport *pb) priv->readIntrThreshold = i; - ECR_WRITE (pb, ECR_SPP << 5); /* Reset FIFO */ - ECR_WRITE (pb, 0xf4); /* Configuration mode */ - config = inb (CONFIGA (pb)); + ECR_WRITE(pb, ECR_SPP << 5); /* Reset FIFO */ + ECR_WRITE(pb, 0xf4); /* Configuration mode */ + config = inb(CONFIGA(pb)); pword = (config >> 4) & 0x7; switch (pword) { case 0: pword = 2; - printk (KERN_WARNING "0x%lx: Unsupported pword size!\n", + printk(KERN_WARNING "0x%lx: Unsupported pword size!\n", pb->base); break; case 2: pword = 4; - printk (KERN_WARNING "0x%lx: Unsupported pword size!\n", + printk(KERN_WARNING "0x%lx: Unsupported pword size!\n", pb->base); break; default: - printk (KERN_WARNING "0x%lx: Unknown implementation ID\n", + printk(KERN_WARNING "0x%lx: Unknown implementation ID\n", pb->base); /* Assume 1 */ case 1: @@ -1867,28 +1932,29 @@ static int parport_ECP_supported(struct parport *pb) priv->pword = pword; if (verbose_probing) { - printk (KERN_DEBUG "0x%lx: PWord is %d bits\n", pb->base, 8 * pword); - - printk (KERN_DEBUG "0x%lx: Interrupts are ISA-%s\n", pb->base, + printk(KERN_DEBUG "0x%lx: PWord is %d bits\n", + pb->base, 8 * pword); + + printk(KERN_DEBUG "0x%lx: Interrupts are ISA-%s\n", pb->base, config & 0x80 ? "Level" : "Pulses"); - configb = inb (CONFIGB (pb)); - printk (KERN_DEBUG "0x%lx: ECP port cfgA=0x%02x cfgB=0x%02x\n", + configb = inb(CONFIGB(pb)); + printk(KERN_DEBUG "0x%lx: ECP port cfgA=0x%02x cfgB=0x%02x\n", pb->base, config, configb); - printk (KERN_DEBUG "0x%lx: ECP settings irq=", pb->base); - if ((configb >>3) & 0x07) - printk("%d",intrline[(configb >>3) & 0x07]); + printk(KERN_DEBUG "0x%lx: ECP settings irq=", pb->base); + if ((configb >> 3) & 0x07) + printk("%d", intrline[(configb >> 3) & 0x07]); else printk("<none or set by other means>"); - printk (" dma="); - if( (configb & 0x03 ) == 0x00) + printk(" dma="); + if ((configb & 0x03) == 0x00) printk("<none or set by other means>\n"); else - printk("%d\n",configb & 0x07); + printk("%d\n", configb & 0x07); } /* Go back to mode 000 */ - frob_set_mode (pb, ECR_SPP); + frob_set_mode(pb, ECR_SPP); return 1; } @@ -1903,10 +1969,10 @@ static int parport_ECPPS2_supported(struct parport *pb) if (!priv->ecr) return 0; - oecr = inb (ECONTROL (pb)); - ECR_WRITE (pb, ECR_PS2 << 5); + oecr = inb(ECONTROL(pb)); + ECR_WRITE(pb, ECR_PS2 << 5); result = parport_PS2_supported(pb); - ECR_WRITE (pb, oecr); + ECR_WRITE(pb, oecr); return result; } @@ -1930,16 +1996,15 @@ static int parport_EPP_supported(struct parport *pb) */ /* If EPP timeout bit clear then EPP available */ - if (!clear_epp_timeout(pb)) { + if (!clear_epp_timeout(pb)) return 0; /* No way to clear timeout */ - } /* Check for Intel bug. */ if (priv->ecr) { unsigned char i; for (i = 0x00; i < 0x80; i += 0x20) { - ECR_WRITE (pb, i); - if (clear_epp_timeout (pb)) { + ECR_WRITE(pb, i); + if (clear_epp_timeout(pb)) { /* Phony EPP in ECP. */ return 0; } @@ -1963,17 +2028,16 @@ static int parport_ECPEPP_supported(struct parport *pb) int result; unsigned char oecr; - if (!priv->ecr) { + if (!priv->ecr) return 0; - } - oecr = inb (ECONTROL (pb)); + oecr = inb(ECONTROL(pb)); /* Search for SMC style EPP+ECP mode */ - ECR_WRITE (pb, 0x80); - outb (0x04, CONTROL (pb)); + ECR_WRITE(pb, 0x80); + outb(0x04, CONTROL(pb)); result = parport_EPP_supported(pb); - ECR_WRITE (pb, oecr); + ECR_WRITE(pb, oecr); if (result) { /* Set up access functions to use ECP+EPP hardware. */ @@ -1991,11 +2055,25 @@ static int parport_ECPEPP_supported(struct parport *pb) /* Don't bother probing for modes we know we won't use. */ static int __devinit parport_PS2_supported(struct parport *pb) { return 0; } #ifdef CONFIG_PARPORT_PC_FIFO -static int parport_ECP_supported(struct parport *pb) { return 0; } +static int parport_ECP_supported(struct parport *pb) +{ + return 0; +} #endif -static int __devinit parport_EPP_supported(struct parport *pb) { return 0; } -static int __devinit parport_ECPEPP_supported(struct parport *pb){return 0;} -static int __devinit parport_ECPPS2_supported(struct parport *pb){return 0;} +static int __devinit parport_EPP_supported(struct parport *pb) +{ + return 0; +} + +static int __devinit parport_ECPEPP_supported(struct parport *pb) +{ + return 0; +} + +static int __devinit parport_ECPPS2_supported(struct parport *pb) +{ + return 0; +} #endif /* No IEEE 1284 support */ @@ -2005,17 +2083,17 @@ static int __devinit parport_ECPPS2_supported(struct parport *pb){return 0;} static int programmable_irq_support(struct parport *pb) { int irq, intrLine; - unsigned char oecr = inb (ECONTROL (pb)); + unsigned char oecr = inb(ECONTROL(pb)); static const int lookup[8] = { PARPORT_IRQ_NONE, 7, 9, 10, 11, 14, 15, 5 }; - ECR_WRITE (pb, ECR_CNF << 5); /* Configuration MODE */ + ECR_WRITE(pb, ECR_CNF << 5); /* Configuration MODE */ - intrLine = (inb (CONFIGB (pb)) >> 3) & 0x07; + intrLine = (inb(CONFIGB(pb)) >> 3) & 0x07; irq = lookup[intrLine]; - ECR_WRITE (pb, oecr); + ECR_WRITE(pb, oecr); return irq; } @@ -2025,17 +2103,17 @@ static int irq_probe_ECP(struct parport *pb) unsigned long irqs; irqs = probe_irq_on(); - - ECR_WRITE (pb, ECR_SPP << 5); /* Reset FIFO */ - ECR_WRITE (pb, (ECR_TST << 5) | 0x04); - ECR_WRITE (pb, ECR_TST << 5); + + ECR_WRITE(pb, ECR_SPP << 5); /* Reset FIFO */ + ECR_WRITE(pb, (ECR_TST << 5) | 0x04); + ECR_WRITE(pb, ECR_TST << 5); /* If Full FIFO sure that writeIntrThreshold is generated */ - for (i=0; i < 1024 && !(inb (ECONTROL (pb)) & 0x02) ; i++) - outb (0xaa, FIFO (pb)); - + for (i = 0; i < 1024 && !(inb(ECONTROL(pb)) & 0x02) ; i++) + outb(0xaa, FIFO(pb)); + pb->irq = probe_irq_off(irqs); - ECR_WRITE (pb, ECR_SPP << 5); + ECR_WRITE(pb, ECR_SPP << 5); if (pb->irq <= 0) pb->irq = PARPORT_IRQ_NONE; @@ -2045,7 +2123,7 @@ static int irq_probe_ECP(struct parport *pb) /* * This detection seems that only works in National Semiconductors - * This doesn't work in SMC, LGS, and Winbond + * This doesn't work in SMC, LGS, and Winbond */ static int irq_probe_EPP(struct parport *pb) { @@ -2056,16 +2134,16 @@ static int irq_probe_EPP(struct parport *pb) unsigned char oecr; if (pb->modes & PARPORT_MODE_PCECR) - oecr = inb (ECONTROL (pb)); + oecr = inb(ECONTROL(pb)); irqs = probe_irq_on(); if (pb->modes & PARPORT_MODE_PCECR) - frob_econtrol (pb, 0x10, 0x10); - + frob_econtrol(pb, 0x10, 0x10); + clear_epp_timeout(pb); - parport_pc_frob_control (pb, 0x20, 0x20); - parport_pc_frob_control (pb, 0x10, 0x10); + parport_pc_frob_control(pb, 0x20, 0x20); + parport_pc_frob_control(pb, 0x10, 0x10); clear_epp_timeout(pb); /* Device isn't expecting an EPP read @@ -2074,9 +2152,9 @@ static int irq_probe_EPP(struct parport *pb) parport_pc_read_epp(pb); udelay(20); - pb->irq = probe_irq_off (irqs); + pb->irq = probe_irq_off(irqs); if (pb->modes & PARPORT_MODE_PCECR) - ECR_WRITE (pb, oecr); + ECR_WRITE(pb, oecr); parport_pc_write_control(pb, 0xc); if (pb->irq <= 0) @@ -2133,28 +2211,28 @@ static int parport_irq_probe(struct parport *pb) /* --- DMA detection -------------------------------------- */ /* Only if chipset conforms to ECP ISA Interface Standard */ -static int programmable_dma_support (struct parport *p) +static int programmable_dma_support(struct parport *p) { - unsigned char oecr = inb (ECONTROL (p)); + unsigned char oecr = inb(ECONTROL(p)); int dma; - frob_set_mode (p, ECR_CNF); - - dma = inb (CONFIGB(p)) & 0x07; + frob_set_mode(p, ECR_CNF); + + dma = inb(CONFIGB(p)) & 0x07; /* 000: Indicates jumpered 8-bit DMA if read-only. 100: Indicates jumpered 16-bit DMA if read-only. */ if ((dma & 0x03) == 0) dma = PARPORT_DMA_NONE; - ECR_WRITE (p, oecr); + ECR_WRITE(p, oecr); return dma; } -static int parport_dma_probe (struct parport *p) +static int parport_dma_probe(struct parport *p) { const struct parport_pc_private *priv = p->private_data; - if (priv->ecr) - p->dma = programmable_dma_support(p); /* ask ECP chipset first */ + if (priv->ecr) /* ask ECP chipset first */ + p->dma = programmable_dma_support(p); if (p->dma == PARPORT_DMA_NONE) { /* ask known Super-IO chips proper, although these claim ECP compatible, some don't report their DMA @@ -2212,7 +2290,7 @@ struct parport *parport_pc_probe_port(unsigned long int base, if (!base_res) goto out4; - memcpy(ops, &parport_pc_ops, sizeof (struct parport_operations)); + memcpy(ops, &parport_pc_ops, sizeof(struct parport_operations)); priv->ctr = 0xc; priv->ctr_writable = ~0x10; priv->ecr = 0; @@ -2239,7 +2317,7 @@ struct parport *parport_pc_probe_port(unsigned long int base, if (!parport_EPP_supported(p)) parport_ECPEPP_supported(p); } - if (!parport_SPP_supported (p)) + if (!parport_SPP_supported(p)) /* No port. */ goto out5; if (priv->ecr) @@ -2247,7 +2325,7 @@ struct parport *parport_pc_probe_port(unsigned long int base, else parport_PS2_supported(p); - p->size = (p->modes & PARPORT_MODE_EPP)?8:3; + p->size = (p->modes & PARPORT_MODE_EPP) ? 8 : 3; printk(KERN_INFO "%s: PC-style at 0x%lx", p->name, p->base); if (p->base_hi && priv->ecr) @@ -2271,7 +2349,7 @@ struct parport *parport_pc_probe_port(unsigned long int base, } } if (p->dma == PARPORT_DMA_AUTO) /* To use DMA, giving the irq - is mandatory (see above) */ + is mandatory (see above) */ p->dma = PARPORT_DMA_NONE; #ifdef CONFIG_PARPORT_PC_FIFO @@ -2288,16 +2366,23 @@ struct parport *parport_pc_probe_port(unsigned long int base, if (p->dma != PARPORT_DMA_NONE) { printk(", dma %d", p->dma); p->modes |= PARPORT_MODE_DMA; - } - else printk(", using FIFO"); - } - else + } else + printk(", using FIFO"); + } else /* We can't use the DMA channel after all. */ p->dma = PARPORT_DMA_NONE; #endif /* Allowed to use FIFO/DMA */ printk(" ["); -#define printmode(x) {if(p->modes&PARPORT_MODE_##x){printk("%s%s",f?",":"",#x);f++;}} + +#define printmode(x) \ + {\ + if (p->modes & PARPORT_MODE_##x) {\ + printk("%s%s", f ? "," : "", #x);\ + f++;\ + } \ + } + { int f = 0; printmode(PCSPP); @@ -2309,10 +2394,10 @@ struct parport *parport_pc_probe_port(unsigned long int base, } #undef printmode #ifndef CONFIG_PARPORT_1284 - printk ("(,...)"); + printk("(,...)"); #endif /* CONFIG_PARPORT_1284 */ printk("]\n"); - if (probedirq != PARPORT_IRQ_NONE) + if (probedirq != PARPORT_IRQ_NONE) printk(KERN_INFO "%s: irq %d detected\n", p->name, probedirq); /* If No ECP release the ports grabbed above. */ @@ -2328,7 +2413,7 @@ struct parport *parport_pc_probe_port(unsigned long int base, if (p->irq != PARPORT_IRQ_NONE) { if (request_irq(p->irq, parport_irq_handler, irqflags, p->name, p)) { - printk (KERN_WARNING "%s: irq %d in use, " + printk(KERN_WARNING "%s: irq %d in use, " "resorting to polled operation\n", p->name, p->irq); p->irq = PARPORT_IRQ_NONE; @@ -2338,8 +2423,8 @@ struct parport *parport_pc_probe_port(unsigned long int base, #ifdef CONFIG_PARPORT_PC_FIFO #ifdef HAS_DMA if (p->dma != PARPORT_DMA_NONE) { - if (request_dma (p->dma, p->name)) { - printk (KERN_WARNING "%s: dma %d in use, " + if (request_dma(p->dma, p->name)) { + printk(KERN_WARNING "%s: dma %d in use, " "resorting to PIO operation\n", p->name, p->dma); p->dma = PARPORT_DMA_NONE; @@ -2349,8 +2434,8 @@ struct parport *parport_pc_probe_port(unsigned long int base, PAGE_SIZE, &priv->dma_handle, GFP_KERNEL); - if (! priv->dma_buf) { - printk (KERN_WARNING "%s: " + if (!priv->dma_buf) { + printk(KERN_WARNING "%s: " "cannot get buffer for DMA, " "resorting to PIO operation\n", p->name); @@ -2369,10 +2454,10 @@ struct parport *parport_pc_probe_port(unsigned long int base, * Put the ECP detected port in PS2 mode. * Do this also for ports that have ECR but don't do ECP. */ - ECR_WRITE (p, 0x34); + ECR_WRITE(p, 0x34); parport_pc_write_data(p, 0); - parport_pc_data_forward (p); + parport_pc_data_forward(p); /* Now that we've told the sharing engine about the port, and found out its characteristics, let the high-level drivers @@ -2380,7 +2465,7 @@ struct parport *parport_pc_probe_port(unsigned long int base, spin_lock(&ports_lock); list_add(&priv->list, &ports_list); spin_unlock(&ports_lock); - parport_announce_port (p); + parport_announce_port(p); return p; @@ -2393,18 +2478,17 @@ out5: out4: parport_put_port(p); out3: - kfree (priv); + kfree(priv); out2: - kfree (ops); + kfree(ops); out1: if (pdev) platform_device_unregister(pdev); return NULL; } +EXPORT_SYMBOL(parport_pc_probe_port); -EXPORT_SYMBOL (parport_pc_probe_port); - -void parport_pc_unregister_port (struct parport *p) +void parport_pc_unregister_port(struct parport *p) { struct parport_pc_private *priv = p->private_data; struct parport_operations *ops = p->ops; @@ -2430,17 +2514,16 @@ void parport_pc_unregister_port (struct parport *p) priv->dma_buf, priv->dma_handle); #endif - kfree (p->private_data); + kfree(p->private_data); parport_put_port(p); - kfree (ops); /* hope no-one cached it */ + kfree(ops); /* hope no-one cached it */ } - -EXPORT_SYMBOL (parport_pc_unregister_port); +EXPORT_SYMBOL(parport_pc_unregister_port); #ifdef CONFIG_PCI /* ITE support maintained by Rich Liu <richliu@poorman.org> */ -static int __devinit sio_ite_8872_probe (struct pci_dev *pdev, int autoirq, +static int __devinit sio_ite_8872_probe(struct pci_dev *pdev, int autoirq, int autodma, const struct parport_pc_via_data *via) { @@ -2452,73 +2535,74 @@ static int __devinit sio_ite_8872_probe (struct pci_dev *pdev, int autoirq, int irq; int i; - DPRINTK (KERN_DEBUG "sio_ite_8872_probe()\n"); - - // make sure which one chip - for(i = 0; i < 5; i++) { + DPRINTK(KERN_DEBUG "sio_ite_8872_probe()\n"); + + /* make sure which one chip */ + for (i = 0; i < 5; i++) { base_res = request_region(inta_addr[i], 32, "it887x"); if (base_res) { int test; - pci_write_config_dword (pdev, 0x60, + pci_write_config_dword(pdev, 0x60, 0xe5000000 | inta_addr[i]); - pci_write_config_dword (pdev, 0x78, + pci_write_config_dword(pdev, 0x78, 0x00000000 | inta_addr[i]); - test = inb (inta_addr[i]); - if (test != 0xff) break; + test = inb(inta_addr[i]); + if (test != 0xff) + break; release_region(inta_addr[i], 0x8); } } - if(i >= 5) { - printk (KERN_INFO "parport_pc: cannot find ITE8872 INTA\n"); + if (i >= 5) { + printk(KERN_INFO "parport_pc: cannot find ITE8872 INTA\n"); return 0; } - type = inb (inta_addr[i] + 0x18); + type = inb(inta_addr[i] + 0x18); type &= 0x0f; switch (type) { case 0x2: - printk (KERN_INFO "parport_pc: ITE8871 found (1P)\n"); + printk(KERN_INFO "parport_pc: ITE8871 found (1P)\n"); ite8872set = 0x64200000; break; case 0xa: - printk (KERN_INFO "parport_pc: ITE8875 found (1P)\n"); + printk(KERN_INFO "parport_pc: ITE8875 found (1P)\n"); ite8872set = 0x64200000; break; case 0xe: - printk (KERN_INFO "parport_pc: ITE8872 found (2S1P)\n"); + printk(KERN_INFO "parport_pc: ITE8872 found (2S1P)\n"); ite8872set = 0x64e00000; break; case 0x6: - printk (KERN_INFO "parport_pc: ITE8873 found (1S)\n"); + printk(KERN_INFO "parport_pc: ITE8873 found (1S)\n"); return 0; case 0x8: - DPRINTK (KERN_DEBUG "parport_pc: ITE8874 found (2S)\n"); + DPRINTK(KERN_DEBUG "parport_pc: ITE8874 found (2S)\n"); return 0; default: - printk (KERN_INFO "parport_pc: unknown ITE887x\n"); - printk (KERN_INFO "parport_pc: please mail 'lspci -nvv' " + printk(KERN_INFO "parport_pc: unknown ITE887x\n"); + printk(KERN_INFO "parport_pc: please mail 'lspci -nvv' " "output to Rich.Liu@ite.com.tw\n"); return 0; } - pci_read_config_byte (pdev, 0x3c, &ite8872_irq); - pci_read_config_dword (pdev, 0x1c, &ite8872_lpt); + pci_read_config_byte(pdev, 0x3c, &ite8872_irq); + pci_read_config_dword(pdev, 0x1c, &ite8872_lpt); ite8872_lpt &= 0x0000ff00; - pci_read_config_dword (pdev, 0x20, &ite8872_lpthi); + pci_read_config_dword(pdev, 0x20, &ite8872_lpthi); ite8872_lpthi &= 0x0000ff00; - pci_write_config_dword (pdev, 0x6c, 0xe3000000 | ite8872_lpt); - pci_write_config_dword (pdev, 0x70, 0xe3000000 | ite8872_lpthi); - pci_write_config_dword (pdev, 0x80, (ite8872_lpthi<<16) | ite8872_lpt); - // SET SPP&EPP , Parallel Port NO DMA , Enable All Function - // SET Parallel IRQ - pci_write_config_dword (pdev, 0x9c, + pci_write_config_dword(pdev, 0x6c, 0xe3000000 | ite8872_lpt); + pci_write_config_dword(pdev, 0x70, 0xe3000000 | ite8872_lpthi); + pci_write_config_dword(pdev, 0x80, (ite8872_lpthi<<16) | ite8872_lpt); + /* SET SPP&EPP , Parallel Port NO DMA , Enable All Function */ + /* SET Parallel IRQ */ + pci_write_config_dword(pdev, 0x9c, ite8872set | (ite8872_irq * 0x11111)); - DPRINTK (KERN_DEBUG "ITE887x: The IRQ is %d.\n", ite8872_irq); - DPRINTK (KERN_DEBUG "ITE887x: The PARALLEL I/O port is 0x%x.\n", + DPRINTK(KERN_DEBUG "ITE887x: The IRQ is %d.\n", ite8872_irq); + DPRINTK(KERN_DEBUG "ITE887x: The PARALLEL I/O port is 0x%x.\n", ite8872_lpt); - DPRINTK (KERN_DEBUG "ITE887x: The PARALLEL I/O porthi is 0x%x.\n", + DPRINTK(KERN_DEBUG "ITE887x: The PARALLEL I/O porthi is 0x%x.\n", ite8872_lpthi); /* Let the user (or defaults) steer us away from interrupts */ @@ -2530,14 +2614,14 @@ static int __devinit sio_ite_8872_probe (struct pci_dev *pdev, int autoirq, * Release the resource so that parport_pc_probe_port can get it. */ release_resource(base_res); - if (parport_pc_probe_port (ite8872_lpt, ite8872_lpthi, + if (parport_pc_probe_port(ite8872_lpt, ite8872_lpthi, irq, PARPORT_DMA_NONE, &pdev->dev, 0)) { - printk (KERN_INFO + printk(KERN_INFO "parport_pc: ITE 8872 parallel port: io=0x%X", - ite8872_lpt); + ite8872_lpt); if (irq != PARPORT_IRQ_NONE) - printk (", irq=%d", irq); - printk ("\n"); + printk(", irq=%d", irq); + printk("\n"); return 1; } @@ -2546,7 +2630,7 @@ static int __devinit sio_ite_8872_probe (struct pci_dev *pdev, int autoirq, /* VIA 8231 support by Pavel Fedin <sonic_amiga@rambler.ru> based on VIA 686a support code by Jeff Garzik <jgarzik@pobox.com> */ -static int __devinitdata parport_init_mode = 0; +static int __devinitdata parport_init_mode; /* Data for two known VIA chips */ static struct parport_pc_via_data via_686a_data __devinitdata = { @@ -2568,7 +2652,7 @@ static struct parport_pc_via_data via_8231_data __devinitdata = { 0xF6 }; -static int __devinit sio_via_probe (struct pci_dev *pdev, int autoirq, +static int __devinit sio_via_probe(struct pci_dev *pdev, int autoirq, int autodma, const struct parport_pc_via_data *via) { @@ -2580,38 +2664,38 @@ static int __devinit sio_via_probe (struct pci_dev *pdev, int autoirq, printk(KERN_DEBUG "parport_pc: VIA 686A/8231 detected\n"); - switch(parport_init_mode) - { + switch (parport_init_mode) { case 1: - printk(KERN_DEBUG "parport_pc: setting SPP mode\n"); - siofunc = VIA_FUNCTION_PARPORT_SPP; - break; + printk(KERN_DEBUG "parport_pc: setting SPP mode\n"); + siofunc = VIA_FUNCTION_PARPORT_SPP; + break; case 2: - printk(KERN_DEBUG "parport_pc: setting PS/2 mode\n"); - siofunc = VIA_FUNCTION_PARPORT_SPP; - ppcontrol = VIA_PARPORT_BIDIR; - break; + printk(KERN_DEBUG "parport_pc: setting PS/2 mode\n"); + siofunc = VIA_FUNCTION_PARPORT_SPP; + ppcontrol = VIA_PARPORT_BIDIR; + break; case 3: - printk(KERN_DEBUG "parport_pc: setting EPP mode\n"); - siofunc = VIA_FUNCTION_PARPORT_EPP; - ppcontrol = VIA_PARPORT_BIDIR; - have_epp = 1; - break; + printk(KERN_DEBUG "parport_pc: setting EPP mode\n"); + siofunc = VIA_FUNCTION_PARPORT_EPP; + ppcontrol = VIA_PARPORT_BIDIR; + have_epp = 1; + break; case 4: - printk(KERN_DEBUG "parport_pc: setting ECP mode\n"); - siofunc = VIA_FUNCTION_PARPORT_ECP; - ppcontrol = VIA_PARPORT_BIDIR; - break; + printk(KERN_DEBUG "parport_pc: setting ECP mode\n"); + siofunc = VIA_FUNCTION_PARPORT_ECP; + ppcontrol = VIA_PARPORT_BIDIR; + break; case 5: - printk(KERN_DEBUG "parport_pc: setting EPP+ECP mode\n"); - siofunc = VIA_FUNCTION_PARPORT_ECP; - ppcontrol = VIA_PARPORT_BIDIR|VIA_PARPORT_ECPEPP; - have_epp = 1; - break; - default: - printk(KERN_DEBUG "parport_pc: probing current configuration\n"); - siofunc = VIA_FUNCTION_PROBE; - break; + printk(KERN_DEBUG "parport_pc: setting EPP+ECP mode\n"); + siofunc = VIA_FUNCTION_PARPORT_ECP; + ppcontrol = VIA_PARPORT_BIDIR|VIA_PARPORT_ECPEPP; + have_epp = 1; + break; + default: + printk(KERN_DEBUG + "parport_pc: probing current configuration\n"); + siofunc = VIA_FUNCTION_PROBE; + break; } /* * unlock super i/o configuration @@ -2622,38 +2706,36 @@ static int __devinit sio_via_probe (struct pci_dev *pdev, int autoirq, /* Bits 1-0: Parallel Port Mode / Enable */ outb(via->viacfg_function, VIA_CONFIG_INDEX); - tmp = inb (VIA_CONFIG_DATA); + tmp = inb(VIA_CONFIG_DATA); /* Bit 5: EPP+ECP enable; bit 7: PS/2 bidirectional port enable */ outb(via->viacfg_parport_control, VIA_CONFIG_INDEX); - tmp2 = inb (VIA_CONFIG_DATA); - if (siofunc == VIA_FUNCTION_PROBE) - { - siofunc = tmp & VIA_FUNCTION_PARPORT_DISABLE; - ppcontrol = tmp2; + tmp2 = inb(VIA_CONFIG_DATA); + if (siofunc == VIA_FUNCTION_PROBE) { + siofunc = tmp & VIA_FUNCTION_PARPORT_DISABLE; + ppcontrol = tmp2; + } else { + tmp &= ~VIA_FUNCTION_PARPORT_DISABLE; + tmp |= siofunc; + outb(via->viacfg_function, VIA_CONFIG_INDEX); + outb(tmp, VIA_CONFIG_DATA); + tmp2 &= ~(VIA_PARPORT_BIDIR|VIA_PARPORT_ECPEPP); + tmp2 |= ppcontrol; + outb(via->viacfg_parport_control, VIA_CONFIG_INDEX); + outb(tmp2, VIA_CONFIG_DATA); } - else - { - tmp &= ~VIA_FUNCTION_PARPORT_DISABLE; - tmp |= siofunc; - outb(via->viacfg_function, VIA_CONFIG_INDEX); - outb(tmp, VIA_CONFIG_DATA); - tmp2 &= ~(VIA_PARPORT_BIDIR|VIA_PARPORT_ECPEPP); - tmp2 |= ppcontrol; - outb(via->viacfg_parport_control, VIA_CONFIG_INDEX); - outb(tmp2, VIA_CONFIG_DATA); - } - + /* Parallel Port I/O Base Address, bits 9-2 */ outb(via->viacfg_parport_base, VIA_CONFIG_INDEX); port1 = inb(VIA_CONFIG_DATA) << 2; - - printk (KERN_DEBUG "parport_pc: Current parallel port base: 0x%X\n",port1); - if ((port1 == 0x3BC) && have_epp) - { - outb(via->viacfg_parport_base, VIA_CONFIG_INDEX); - outb((0x378 >> 2), VIA_CONFIG_DATA); - printk(KERN_DEBUG "parport_pc: Parallel port base changed to 0x378\n"); - port1 = 0x378; + + printk(KERN_DEBUG "parport_pc: Current parallel port base: 0x%X\n", + port1); + if (port1 == 0x3BC && have_epp) { + outb(via->viacfg_parport_base, VIA_CONFIG_INDEX); + outb((0x378 >> 2), VIA_CONFIG_DATA); + printk(KERN_DEBUG + "parport_pc: Parallel port base changed to 0x378\n"); + port1 = 0x378; } /* @@ -2667,36 +2749,39 @@ static int __devinit sio_via_probe (struct pci_dev *pdev, int autoirq, printk(KERN_INFO "parport_pc: VIA parallel port disabled in BIOS\n"); return 0; } - + /* Bits 7-4: PnP Routing for Parallel Port IRQ */ pci_read_config_byte(pdev, via->via_pci_parport_irq_reg, &tmp); irq = ((tmp & VIA_IRQCONTROL_PARALLEL) >> 4); - if (siofunc == VIA_FUNCTION_PARPORT_ECP) - { - /* Bits 3-2: PnP Routing for Parallel Port DMA */ - pci_read_config_byte(pdev, via->via_pci_parport_dma_reg, &tmp); - dma = ((tmp & VIA_DMACONTROL_PARALLEL) >> 2); - } - else - /* if ECP not enabled, DMA is not enabled, assumed bogus 'dma' value */ - dma = PARPORT_DMA_NONE; + if (siofunc == VIA_FUNCTION_PARPORT_ECP) { + /* Bits 3-2: PnP Routing for Parallel Port DMA */ + pci_read_config_byte(pdev, via->via_pci_parport_dma_reg, &tmp); + dma = ((tmp & VIA_DMACONTROL_PARALLEL) >> 2); + } else + /* if ECP not enabled, DMA is not enabled, assumed + bogus 'dma' value */ + dma = PARPORT_DMA_NONE; /* Let the user (or defaults) steer us away from interrupts and DMA */ if (autoirq == PARPORT_IRQ_NONE) { - irq = PARPORT_IRQ_NONE; - dma = PARPORT_DMA_NONE; + irq = PARPORT_IRQ_NONE; + dma = PARPORT_DMA_NONE; } if (autodma == PARPORT_DMA_NONE) - dma = PARPORT_DMA_NONE; + dma = PARPORT_DMA_NONE; switch (port1) { - case 0x3bc: port2 = 0x7bc; break; - case 0x378: port2 = 0x778; break; - case 0x278: port2 = 0x678; break; + case 0x3bc: + port2 = 0x7bc; break; + case 0x378: + port2 = 0x778; break; + case 0x278: + port2 = 0x678; break; default: - printk(KERN_INFO "parport_pc: Weird VIA parport base 0x%X, ignoring\n", - port1); + printk(KERN_INFO + "parport_pc: Weird VIA parport base 0x%X, ignoring\n", + port1); return 0; } @@ -2714,17 +2799,17 @@ static int __devinit sio_via_probe (struct pci_dev *pdev, int autoirq, } /* finally, do the probe with values obtained */ - if (parport_pc_probe_port (port1, port2, irq, dma, &pdev->dev, 0)) { - printk (KERN_INFO + if (parport_pc_probe_port(port1, port2, irq, dma, &pdev->dev, 0)) { + printk(KERN_INFO "parport_pc: VIA parallel port: io=0x%X", port1); if (irq != PARPORT_IRQ_NONE) - printk (", irq=%d", irq); + printk(", irq=%d", irq); if (dma != PARPORT_DMA_NONE) - printk (", dma=%d", dma); - printk ("\n"); + printk(", dma=%d", dma); + printk("\n"); return 1; } - + printk(KERN_WARNING "parport_pc: Strange, can't probe VIA parallel port: io=0x%X, irq=%d, dma=%d\n", port1, irq, dma); return 0; @@ -2732,8 +2817,8 @@ static int __devinit sio_via_probe (struct pci_dev *pdev, int autoirq, enum parport_pc_sio_types { - sio_via_686a = 0, /* Via VT82C686A motherboard Super I/O */ - sio_via_8231, /* Via VT8231 south bridge integrated Super IO */ + sio_via_686a = 0, /* Via VT82C686A motherboard Super I/O */ + sio_via_8231, /* Via VT8231 south bridge integrated Super IO */ sio_ite_8872, last_sio }; @@ -2804,15 +2889,15 @@ enum parport_pc_pci_cards { }; -/* each element directly indexed from enum list, above +/* each element directly indexed from enum list, above * (but offset by last_sio) */ static struct parport_pc_pci { int numports; struct { /* BAR (base address registers) numbers in the config - space header */ + space header */ int lo; - int hi; /* -1 if not there, >6 for offset-method (max - BAR is 6) */ + int hi; + /* -1 if not there, >6 for offset-method (max BAR is 6) */ } addr[4]; /* If set, this is called immediately after pci_enable_device. @@ -2857,7 +2942,7 @@ static struct parport_pc_pci { /* timedia_4018 */ { 2, { { 0, 1 }, { 2, 3 }, } }, /* timedia_9018a */ { 2, { { 0, 1 }, { 2, 3 }, } }, /* SYBA uses fixed offsets in - a 1K io window */ + a 1K io window */ /* syba_2p_epp AP138B */ { 2, { { 0, 0x078 }, { 0, 0x178 }, } }, /* syba_1p_ecp W83787 */ { 1, { { 0, 0x078 }, } }, /* titan_010l */ { 1, { { 3, -1 }, } }, @@ -2873,11 +2958,14 @@ static struct parport_pc_pci { /* oxsemi_pcie_pport */ { 1, { { 0, 1 }, } }, /* aks_0100 */ { 1, { { 0, -1 }, } }, /* mobility_pp */ { 1, { { 0, 1 }, } }, - /* netmos_9705 */ { 1, { { 0, -1 }, } }, /* untested */ - /* netmos_9715 */ { 2, { { 0, 1 }, { 2, 3 },} }, /* untested */ - /* netmos_9755 */ { 2, { { 0, 1 }, { 2, 3 },} }, /* untested */ - /* netmos_9805 */ { 1, { { 0, -1 }, } }, /* untested */ - /* netmos_9815 */ { 2, { { 0, -1 }, { 2, -1 }, } }, /* untested */ + + /* The netmos entries below are untested */ + /* netmos_9705 */ { 1, { { 0, -1 }, } }, + /* netmos_9715 */ { 2, { { 0, 1 }, { 2, 3 },} }, + /* netmos_9755 */ { 2, { { 0, 1 }, { 2, 3 },} }, + /* netmos_9805 */ { 1, { { 0, -1 }, } }, + /* netmos_9815 */ { 2, { { 0, -1 }, { 2, -1 }, } }, + /* quatech_sppxp100 */ { 1, { { 0, 1 }, } }, }; @@ -2906,7 +2994,7 @@ static const struct pci_device_id parport_pc_pci_tbl[] = { { PCI_VENDOR_ID_LAVA, PCI_DEVICE_ID_LAVA_BOCA_IOPPAR, PCI_ANY_ID, PCI_ANY_ID, 0, 0, boca_ioppar }, { PCI_VENDOR_ID_PLX, PCI_DEVICE_ID_PLX_9050, - PCI_SUBVENDOR_ID_EXSYS, PCI_SUBDEVICE_ID_EXSYS_4014, 0,0, plx_9050 }, + PCI_SUBVENDOR_ID_EXSYS, PCI_SUBDEVICE_ID_EXSYS_4014, 0, 0, plx_9050 }, /* PCI_VENDOR_ID_TIMEDIA/SUNIX has many differing cards ...*/ { 0x1409, 0x7168, 0x1409, 0x4078, 0, 0, timedia_4078a }, { 0x1409, 0x7168, 0x1409, 0x4079, 0, 0, timedia_4079h }, @@ -2940,7 +3028,8 @@ static const struct pci_device_id parport_pc_pci_tbl[] = { { 0x9710, 0x9805, 0x1000, 0x0010, 0, 0, titan_1284p1 }, { 0x9710, 0x9815, 0x1000, 0x0020, 0, 0, titan_1284p2 }, /* PCI_VENDOR_ID_AVLAB/Intek21 has another bunch of cards ...*/ - { 0x14db, 0x2120, PCI_ANY_ID, PCI_ANY_ID, 0, 0, avlab_1p}, /* AFAVLAB_TK9902 */ + /* AFAVLAB_TK9902 */ + { 0x14db, 0x2120, PCI_ANY_ID, PCI_ANY_ID, 0, 0, avlab_1p}, { 0x14db, 0x2121, PCI_ANY_ID, PCI_ANY_ID, 0, 0, avlab_2p}, { PCI_VENDOR_ID_OXSEMI, PCI_DEVICE_ID_OXSEMI_16PCI952PP, PCI_ANY_ID, PCI_ANY_ID, 0, 0, oxsemi_952 }, @@ -2983,14 +3072,14 @@ static const struct pci_device_id parport_pc_pci_tbl[] = { PCI_ANY_ID, PCI_ANY_ID, 0, 0, quatech_sppxp100 }, { 0, } /* terminate list */ }; -MODULE_DEVICE_TABLE(pci,parport_pc_pci_tbl); +MODULE_DEVICE_TABLE(pci, parport_pc_pci_tbl); struct pci_parport_data { int num; struct parport *ports[2]; }; -static int parport_pc_pci_probe (struct pci_dev *dev, +static int parport_pc_pci_probe(struct pci_dev *dev, const struct pci_device_id *id) { int err, count, n, i = id->driver_data; @@ -3003,7 +3092,8 @@ static int parport_pc_pci_probe (struct pci_dev *dev, /* This is a PCI card */ i -= last_sio; count = 0; - if ((err = pci_enable_device (dev)) != 0) + err = pci_enable_device(dev); + if (err) return err; data = kmalloc(sizeof(struct pci_parport_data), GFP_KERNEL); @@ -3011,7 +3101,7 @@ static int parport_pc_pci_probe (struct pci_dev *dev, return -ENOMEM; if (cards[i].preinit_hook && - cards[i].preinit_hook (dev, PARPORT_IRQ_NONE, PARPORT_DMA_NONE)) { + cards[i].preinit_hook(dev, PARPORT_IRQ_NONE, PARPORT_DMA_NONE)) { kfree(data); return -ENODEV; } @@ -3021,25 +3111,25 @@ static int parport_pc_pci_probe (struct pci_dev *dev, int hi = cards[i].addr[n].hi; int irq; unsigned long io_lo, io_hi; - io_lo = pci_resource_start (dev, lo); + io_lo = pci_resource_start(dev, lo); io_hi = 0; if ((hi >= 0) && (hi <= 6)) - io_hi = pci_resource_start (dev, hi); + io_hi = pci_resource_start(dev, hi); else if (hi > 6) io_lo += hi; /* Reinterpret the meaning of - "hi" as an offset (see SYBA - def.) */ + "hi" as an offset (see SYBA + def.) */ /* TODO: test if sharing interrupts works */ irq = dev->irq; if (irq == IRQ_NONE) { - printk (KERN_DEBUG + printk(KERN_DEBUG "PCI parallel port detected: %04x:%04x, I/O at %#lx(%#lx)\n", parport_pc_pci_tbl[i + last_sio].vendor, parport_pc_pci_tbl[i + last_sio].device, io_lo, io_hi); irq = PARPORT_IRQ_NONE; } else { - printk (KERN_DEBUG + printk(KERN_DEBUG "PCI parallel port detected: %04x:%04x, I/O at %#lx(%#lx), IRQ %d\n", parport_pc_pci_tbl[i + last_sio].vendor, parport_pc_pci_tbl[i + last_sio].device, @@ -3056,7 +3146,7 @@ static int parport_pc_pci_probe (struct pci_dev *dev, data->num = count; if (cards[i].postinit_hook) - cards[i].postinit_hook (dev, count == 0); + cards[i].postinit_hook(dev, count == 0); if (count) { pci_set_drvdata(dev, data); @@ -3090,7 +3180,7 @@ static struct pci_driver parport_pc_pci_driver = { .remove = __devexit_p(parport_pc_pci_remove), }; -static int __init parport_pc_init_superio (int autoirq, int autodma) +static int __init parport_pc_init_superio(int autoirq, int autodma) { const struct pci_device_id *id; struct pci_dev *pdev = NULL; @@ -3101,8 +3191,9 @@ static int __init parport_pc_init_superio (int autoirq, int autodma) if (id == NULL || id->driver_data >= last_sio) continue; - if (parport_pc_superio_info[id->driver_data].probe - (pdev, autoirq, autodma,parport_pc_superio_info[id->driver_data].via)) { + if (parport_pc_superio_info[id->driver_data].probe( + pdev, autoirq, autodma, + parport_pc_superio_info[id->driver_data].via)) { ret++; } } @@ -3111,7 +3202,10 @@ static int __init parport_pc_init_superio (int autoirq, int autodma) } #else static struct pci_driver parport_pc_pci_driver; -static int __init parport_pc_init_superio(int autoirq, int autodma) {return 0;} +static int __init parport_pc_init_superio(int autoirq, int autodma) +{ + return 0; +} #endif /* CONFIG_PCI */ #ifdef CONFIG_PNP @@ -3124,44 +3218,45 @@ static const struct pnp_device_id parport_pc_pnp_tbl[] = { { } }; -MODULE_DEVICE_TABLE(pnp,parport_pc_pnp_tbl); +MODULE_DEVICE_TABLE(pnp, parport_pc_pnp_tbl); -static int parport_pc_pnp_probe(struct pnp_dev *dev, const struct pnp_device_id *id) +static int parport_pc_pnp_probe(struct pnp_dev *dev, + const struct pnp_device_id *id) { struct parport *pdata; unsigned long io_lo, io_hi; int dma, irq; - if (pnp_port_valid(dev,0) && - !(pnp_port_flags(dev,0) & IORESOURCE_DISABLED)) { - io_lo = pnp_port_start(dev,0); + if (pnp_port_valid(dev, 0) && + !(pnp_port_flags(dev, 0) & IORESOURCE_DISABLED)) { + io_lo = pnp_port_start(dev, 0); } else return -EINVAL; - if (pnp_port_valid(dev,1) && - !(pnp_port_flags(dev,1) & IORESOURCE_DISABLED)) { - io_hi = pnp_port_start(dev,1); + if (pnp_port_valid(dev, 1) && + !(pnp_port_flags(dev, 1) & IORESOURCE_DISABLED)) { + io_hi = pnp_port_start(dev, 1); } else io_hi = 0; - if (pnp_irq_valid(dev,0) && - !(pnp_irq_flags(dev,0) & IORESOURCE_DISABLED)) { - irq = pnp_irq(dev,0); + if (pnp_irq_valid(dev, 0) && + !(pnp_irq_flags(dev, 0) & IORESOURCE_DISABLED)) { + irq = pnp_irq(dev, 0); } else irq = PARPORT_IRQ_NONE; - if (pnp_dma_valid(dev,0) && - !(pnp_dma_flags(dev,0) & IORESOURCE_DISABLED)) { - dma = pnp_dma(dev,0); + if (pnp_dma_valid(dev, 0) && + !(pnp_dma_flags(dev, 0) & IORESOURCE_DISABLED)) { + dma = pnp_dma(dev, 0); } else dma = PARPORT_DMA_NONE; dev_info(&dev->dev, "reported by %s\n", dev->protocol->name); - if (!(pdata = parport_pc_probe_port(io_lo, io_hi, - irq, dma, &dev->dev, 0))) + pdata = parport_pc_probe_port(io_lo, io_hi, irq, dma, &dev->dev, 0); + if (pdata == NULL) return -ENODEV; - pnp_set_drvdata(dev,pdata); + pnp_set_drvdata(dev, pdata); return 0; } @@ -3203,7 +3298,7 @@ static struct platform_driver parport_pc_platform_driver = { /* This is called by parport_pc_find_nonpci_ports (in asm/parport.h) */ static int __devinit __attribute__((unused)) -parport_pc_find_isa_ports (int autoirq, int autodma) +parport_pc_find_isa_ports(int autoirq, int autodma) { int count = 0; @@ -3227,7 +3322,7 @@ parport_pc_find_isa_ports (int autoirq, int autodma) * autoirq is PARPORT_IRQ_NONE, PARPORT_IRQ_AUTO, or PARPORT_IRQ_PROBEONLY * autodma is PARPORT_DMA_NONE or PARPORT_DMA_AUTO */ -static void __init parport_pc_find_ports (int autoirq, int autodma) +static void __init parport_pc_find_ports(int autoirq, int autodma) { int count = 0, err; @@ -3261,11 +3356,18 @@ static void __init parport_pc_find_ports (int autoirq, int autodma) * syntax and keep in mind that code below is a cleaned up version. */ -static int __initdata io[PARPORT_PC_MAX_PORTS+1] = { [0 ... PARPORT_PC_MAX_PORTS] = 0 }; -static int __initdata io_hi[PARPORT_PC_MAX_PORTS+1] = - { [0 ... PARPORT_PC_MAX_PORTS] = PARPORT_IOHI_AUTO }; -static int __initdata dmaval[PARPORT_PC_MAX_PORTS] = { [0 ... PARPORT_PC_MAX_PORTS-1] = PARPORT_DMA_NONE }; -static int __initdata irqval[PARPORT_PC_MAX_PORTS] = { [0 ... PARPORT_PC_MAX_PORTS-1] = PARPORT_IRQ_PROBEONLY }; +static int __initdata io[PARPORT_PC_MAX_PORTS+1] = { + [0 ... PARPORT_PC_MAX_PORTS] = 0 +}; +static int __initdata io_hi[PARPORT_PC_MAX_PORTS+1] = { + [0 ... PARPORT_PC_MAX_PORTS] = PARPORT_IOHI_AUTO +}; +static int __initdata dmaval[PARPORT_PC_MAX_PORTS] = { + [0 ... PARPORT_PC_MAX_PORTS-1] = PARPORT_DMA_NONE +}; +static int __initdata irqval[PARPORT_PC_MAX_PORTS] = { + [0 ... PARPORT_PC_MAX_PORTS-1] = PARPORT_IRQ_PROBEONLY +}; static int __init parport_parse_param(const char *s, int *val, int automatic, int none, int nofifo) @@ -3306,18 +3408,19 @@ static int __init parport_parse_dma(const char *dmastr, int *val) #ifdef CONFIG_PCI static int __init parport_init_mode_setup(char *str) { - printk(KERN_DEBUG "parport_pc.c: Specified parameter parport_init_mode=%s\n", str); - - if (!strcmp (str, "spp")) - parport_init_mode=1; - if (!strcmp (str, "ps2")) - parport_init_mode=2; - if (!strcmp (str, "epp")) - parport_init_mode=3; - if (!strcmp (str, "ecp")) - parport_init_mode=4; - if (!strcmp (str, "ecpepp")) - parport_init_mode=5; + printk(KERN_DEBUG + "parport_pc.c: Specified parameter parport_init_mode=%s\n", str); + + if (!strcmp(str, "spp")) + parport_init_mode = 1; + if (!strcmp(str, "ps2")) + parport_init_mode = 2; + if (!strcmp(str, "epp")) + parport_init_mode = 3; + if (!strcmp(str, "ecp")) + parport_init_mode = 4; + if (!strcmp(str, "ecpepp")) + parport_init_mode = 5; return 1; } #endif @@ -3341,7 +3444,8 @@ module_param(verbose_probing, int, 0644); #endif #ifdef CONFIG_PCI static char *init_mode; -MODULE_PARM_DESC(init_mode, "Initialise mode for VIA VT8231 port (spp, ps2, epp, ecp or ecpepp)"); +MODULE_PARM_DESC(init_mode, + "Initialise mode for VIA VT8231 port (spp, ps2, epp, ecp or ecpepp)"); module_param(init_mode, charp, 0); #endif @@ -3372,7 +3476,7 @@ static int __init parse_parport_params(void) irqval[0] = val; break; default: - printk (KERN_WARNING + printk(KERN_WARNING "parport_pc: irq specified " "without base address. Use 'io=' " "to specify one\n"); @@ -3385,7 +3489,7 @@ static int __init parse_parport_params(void) dmaval[0] = val; break; default: - printk (KERN_WARNING + printk(KERN_WARNING "parport_pc: dma specified " "without base address. Use 'io=' " "to specify one\n"); @@ -3396,7 +3500,7 @@ static int __init parse_parport_params(void) #else -static int parport_setup_ptr __initdata = 0; +static int parport_setup_ptr __initdata; /* * Acceptable parameters: @@ -3407,7 +3511,7 @@ static int parport_setup_ptr __initdata = 0; * * IRQ/DMA may be numeric or 'auto' or 'none' */ -static int __init parport_setup (char *str) +static int __init parport_setup(char *str) { char *endptr; char *sep; @@ -3419,15 +3523,15 @@ static int __init parport_setup (char *str) return 1; } - if (!strncmp (str, "auto", 4)) { + if (!strncmp(str, "auto", 4)) { irqval[0] = PARPORT_IRQ_AUTO; dmaval[0] = PARPORT_DMA_AUTO; return 1; } - val = simple_strtoul (str, &endptr, 0); + val = simple_strtoul(str, &endptr, 0); if (endptr == str) { - printk (KERN_WARNING "parport=%s not understood\n", str); + printk(KERN_WARNING "parport=%s not understood\n", str); return 1; } @@ -3461,7 +3565,7 @@ static int __init parse_parport_params(void) return io[0] == PARPORT_DISABLE; } -__setup ("parport=", parport_setup); +__setup("parport=", parport_setup); /* * Acceptable parameters: @@ -3469,7 +3573,7 @@ __setup ("parport=", parport_setup); * parport_init_mode=[spp|ps2|epp|ecp|ecpepp] */ #ifdef CONFIG_PCI -__setup("parport_init_mode=",parport_init_mode_setup); +__setup("parport_init_mode=", parport_init_mode_setup); #endif #endif @@ -3493,13 +3597,13 @@ static int __init parport_pc_init(void) for (i = 0; i < PARPORT_PC_MAX_PORTS; i++) { if (!io[i]) break; - if ((io_hi[i]) == PARPORT_IOHI_AUTO) - io_hi[i] = 0x400 + io[i]; + if (io_hi[i] == PARPORT_IOHI_AUTO) + io_hi[i] = 0x400 + io[i]; parport_pc_probe_port(io[i], io_hi[i], - irqval[i], dmaval[i], NULL, 0); + irqval[i], dmaval[i], NULL, 0); } } else - parport_pc_find_ports (irqval[0], dmaval[0]); + parport_pc_find_ports(irqval[0], dmaval[0]); return 0; } @@ -3507,9 +3611,9 @@ static int __init parport_pc_init(void) static void __exit parport_pc_exit(void) { if (pci_registered_parport) - pci_unregister_driver (&parport_pc_pci_driver); + pci_unregister_driver(&parport_pc_pci_driver); if (pnp_registered_parport) - pnp_unregister_driver (&parport_pc_pnp_driver); + pnp_unregister_driver(&parport_pc_pnp_driver); platform_driver_unregister(&parport_pc_platform_driver); while (!list_empty(&ports_list)) { diff --git a/drivers/scsi/sg.c b/drivers/scsi/sg.c index e1716f14cd47..91e316fe6522 100644 --- a/drivers/scsi/sg.c +++ b/drivers/scsi/sg.c @@ -1065,6 +1065,7 @@ sg_ioctl(struct inode *inode, struct file *filp, return blk_trace_setup(sdp->device->request_queue, sdp->disk->disk_name, MKDEV(SCSI_GENERIC_MAJOR, sdp->index), + NULL, (char *)arg); case BLKTRACESTART: return blk_trace_startstop(sdp->device->request_queue, 1); diff --git a/drivers/serial/8250.c b/drivers/serial/8250.c index a0127e93ade0..fb867a9f55e9 100644 --- a/drivers/serial/8250.c +++ b/drivers/serial/8250.c @@ -287,6 +287,13 @@ static const struct serial8250_config uart_config[] = { .fcr = UART_FCR_ENABLE_FIFO | UART_FCR_R_TRIG_10, .flags = UART_CAP_FIFO, }, + [PORT_AR7] = { + .name = "AR7", + .fifo_size = 16, + .tx_loadsz = 16, + .fcr = UART_FCR_ENABLE_FIFO | UART_FCR_R_TRIG_00, + .flags = UART_CAP_FIFO | UART_CAP_AFE, + }, }; #if defined (CONFIG_SERIAL_8250_AU1X00) diff --git a/drivers/serial/8250_pci.c b/drivers/serial/8250_pci.c index 938bc1b6c3fa..e371a9c15341 100644 --- a/drivers/serial/8250_pci.c +++ b/drivers/serial/8250_pci.c @@ -2776,6 +2776,9 @@ static struct pci_device_id serial_pci_tbl[] = { { PCI_VENDOR_ID_OXSEMI, 0x950a, PCI_ANY_ID, PCI_ANY_ID, 0, 0, pbn_b0_2_1130000 }, + { PCI_VENDOR_ID_OXSEMI, PCI_DEVICE_ID_OXSEMI_C950, + PCI_VENDOR_ID_OXSEMI, PCI_SUBDEVICE_ID_OXSEMI_C950, 0, 0, + pbn_b0_1_921600 }, { PCI_VENDOR_ID_OXSEMI, PCI_DEVICE_ID_OXSEMI_16PCI954, PCI_ANY_ID, PCI_ANY_ID, 0, 0, pbn_b0_4_115200 }, diff --git a/drivers/serial/Kconfig b/drivers/serial/Kconfig index 343e3a35b6a3..641e800ed693 100644 --- a/drivers/serial/Kconfig +++ b/drivers/serial/Kconfig @@ -833,6 +833,7 @@ config SERIAL_IMX bool "IMX serial port support" depends on ARM && (ARCH_IMX || ARCH_MXC) select SERIAL_CORE + select RATIONAL help If you have a machine based on a Motorola IMX CPU you can enable its onboard serial port by enabling this option. @@ -1433,4 +1434,11 @@ config SPORT_BAUD_RATE default 19200 if (SERIAL_SPORT_BAUD_RATE_19200) default 9600 if (SERIAL_SPORT_BAUD_RATE_9600) +config SERIAL_TIMBERDALE + tristate "Support for timberdale UART" + depends on MFD_TIMBERDALE + select SERIAL_CORE + ---help--- + Add support for UART controller on timberdale. + endmenu diff --git a/drivers/serial/Makefile b/drivers/serial/Makefile index d438eb2a73de..45a8658f54d5 100644 --- a/drivers/serial/Makefile +++ b/drivers/serial/Makefile @@ -77,3 +77,4 @@ obj-$(CONFIG_SERIAL_OF_PLATFORM_NWPSERIAL) += nwpserial.o obj-$(CONFIG_SERIAL_KS8695) += serial_ks8695.o obj-$(CONFIG_KGDB_SERIAL_CONSOLE) += kgdboc.o obj-$(CONFIG_SERIAL_QE) += ucc_uart.o +obj-$(CONFIG_SERIAL_TIMBERDALE) += timbuart.o diff --git a/drivers/serial/bfin_5xx.c b/drivers/serial/bfin_5xx.c index d86123e03391..e2f6b1bfac98 100644 --- a/drivers/serial/bfin_5xx.c +++ b/drivers/serial/bfin_5xx.c @@ -330,6 +330,11 @@ static void bfin_serial_tx_chars(struct bfin_serial_port *uart) /* Clear TFI bit */ UART_PUT_LSR(uart, TFI); #endif + /* Anomaly notes: + * 05000215 - we always clear ETBEI within last UART TX + * interrupt to end a string. It is always set + * when start a new tx. + */ UART_CLEAR_IER(uart, ETBEI); return; } @@ -415,6 +420,7 @@ static void bfin_serial_dma_tx_chars(struct bfin_serial_port *uart) set_dma_start_addr(uart->tx_dma_channel, (unsigned long)(xmit->buf+xmit->tail)); set_dma_x_count(uart->tx_dma_channel, uart->tx_count); set_dma_x_modify(uart->tx_dma_channel, 1); + SSYNC(); enable_dma(uart->tx_dma_channel); UART_SET_IER(uart, ETBEI); @@ -473,27 +479,41 @@ static void bfin_serial_dma_rx_chars(struct bfin_serial_port *uart) void bfin_serial_rx_dma_timeout(struct bfin_serial_port *uart) { int x_pos, pos; - unsigned long flags; - - spin_lock_irqsave(&uart->port.lock, flags); + dma_disable_irq(uart->rx_dma_channel); + spin_lock_bh(&uart->port.lock); + + /* 2D DMA RX buffer ring is used. Because curr_y_count and + * curr_x_count can't be read as an atomic operation, + * curr_y_count should be read before curr_x_count. When + * curr_x_count is read, curr_y_count may already indicate + * next buffer line. But, the position calculated here is + * still indicate the old line. The wrong position data may + * be smaller than current buffer tail, which cause garbages + * are received if it is not prohibit. + */ uart->rx_dma_nrows = get_dma_curr_ycount(uart->rx_dma_channel); x_pos = get_dma_curr_xcount(uart->rx_dma_channel); uart->rx_dma_nrows = DMA_RX_YCOUNT - uart->rx_dma_nrows; - if (uart->rx_dma_nrows == DMA_RX_YCOUNT) + if (uart->rx_dma_nrows == DMA_RX_YCOUNT || x_pos == 0) uart->rx_dma_nrows = 0; x_pos = DMA_RX_XCOUNT - x_pos; if (x_pos == DMA_RX_XCOUNT) x_pos = 0; pos = uart->rx_dma_nrows * DMA_RX_XCOUNT + x_pos; - if (pos != uart->rx_dma_buf.tail) { + /* Ignore receiving data if new position is in the same line of + * current buffer tail and small. + */ + if (pos > uart->rx_dma_buf.tail || + uart->rx_dma_nrows < (uart->rx_dma_buf.tail/DMA_RX_XCOUNT)) { uart->rx_dma_buf.head = pos; bfin_serial_dma_rx_chars(uart); uart->rx_dma_buf.tail = uart->rx_dma_buf.head; } - spin_unlock_irqrestore(&uart->port.lock, flags); + spin_unlock_bh(&uart->port.lock); + dma_enable_irq(uart->rx_dma_channel); mod_timer(&(uart->rx_dma_timer), jiffies + DMA_RX_FLUSH_JIFFIES); } @@ -514,6 +534,11 @@ static irqreturn_t bfin_serial_dma_tx_int(int irq, void *dev_id) if (!(get_dma_curr_irqstat(uart->tx_dma_channel)&DMA_RUN)) { disable_dma(uart->tx_dma_channel); clear_dma_irqstat(uart->tx_dma_channel); + /* Anomaly notes: + * 05000215 - we always clear ETBEI within last UART TX + * interrupt to end a string. It is always set + * when start a new tx. + */ UART_CLEAR_IER(uart, ETBEI); xmit->tail = (xmit->tail + uart->tx_count) & (UART_XMIT_SIZE - 1); uart->port.icount.tx += uart->tx_count; @@ -532,11 +557,26 @@ static irqreturn_t bfin_serial_dma_rx_int(int irq, void *dev_id) { struct bfin_serial_port *uart = dev_id; unsigned short irqstat; + int x_pos, pos; spin_lock(&uart->port.lock); irqstat = get_dma_curr_irqstat(uart->rx_dma_channel); clear_dma_irqstat(uart->rx_dma_channel); - bfin_serial_dma_rx_chars(uart); + + uart->rx_dma_nrows = get_dma_curr_ycount(uart->rx_dma_channel); + x_pos = get_dma_curr_xcount(uart->rx_dma_channel); + uart->rx_dma_nrows = DMA_RX_YCOUNT - uart->rx_dma_nrows; + if (uart->rx_dma_nrows == DMA_RX_YCOUNT || x_pos == 0) + uart->rx_dma_nrows = 0; + + pos = uart->rx_dma_nrows * DMA_RX_XCOUNT; + if (pos > uart->rx_dma_buf.tail || + uart->rx_dma_nrows < (uart->rx_dma_buf.tail/DMA_RX_XCOUNT)) { + uart->rx_dma_buf.head = pos; + bfin_serial_dma_rx_chars(uart); + uart->rx_dma_buf.tail = uart->rx_dma_buf.head; + } + spin_unlock(&uart->port.lock); return IRQ_HANDLED; @@ -789,8 +829,16 @@ bfin_serial_set_termios(struct uart_port *port, struct ktermios *termios, __func__); } - if (termios->c_cflag & CSTOPB) - lcr |= STB; + /* Anomaly notes: + * 05000231 - STOP bit is always set to 1 whatever the user is set. + */ + if (termios->c_cflag & CSTOPB) { + if (ANOMALY_05000231) + printk(KERN_WARNING "STOP bits other than 1 is not " + "supported in case of anomaly 05000231.\n"); + else + lcr |= STB; + } if (termios->c_cflag & PARENB) lcr |= PEN; if (!(termios->c_cflag & PARODD)) @@ -940,6 +988,10 @@ static void bfin_serial_reset_irda(struct uart_port *port) } #ifdef CONFIG_CONSOLE_POLL +/* Anomaly notes: + * 05000099 - Because we only use THRE in poll_put and DR in poll_get, + * losing other bits of UART_LSR is not a problem here. + */ static void bfin_serial_poll_put_char(struct uart_port *port, unsigned char chr) { struct bfin_serial_port *uart = (struct bfin_serial_port *)port; @@ -1245,12 +1297,17 @@ static __init void early_serial_write(struct console *con, const char *s, } } +/* + * This should have a .setup or .early_setup in it, but then things get called + * without the command line options, and the baud rate gets messed up - so + * don't let the common infrastructure play with things. (see calls to setup + * & earlysetup in ./kernel/printk.c:register_console() + */ static struct __initdata console bfin_early_serial_console = { .name = "early_BFuart", .write = early_serial_write, .device = uart_console_device, .flags = CON_PRINTBUFFER, - .setup = bfin_serial_console_setup, .index = -1, .data = &bfin_serial_reg, }; diff --git a/drivers/serial/bfin_sport_uart.c b/drivers/serial/bfin_sport_uart.c index 529c0ff7952c..34b4ae0fe760 100644 --- a/drivers/serial/bfin_sport_uart.c +++ b/drivers/serial/bfin_sport_uart.c @@ -101,15 +101,16 @@ static inline void tx_one_byte(struct sport_uart_port *up, unsigned int value) { pr_debug("%s value:%x\n", __func__, value); /* Place a Start and Stop bit */ - __asm__ volatile ( - "R2 = b#01111111100;\n\t" - "R3 = b#10000000001;\n\t" - "%0 <<= 2;\n\t" - "%0 = %0 & R2;\n\t" - "%0 = %0 | R3;\n\t" - :"=r"(value) - :"0"(value) - :"R2", "R3"); + __asm__ __volatile__ ( + "R2 = b#01111111100;" + "R3 = b#10000000001;" + "%0 <<= 2;" + "%0 = %0 & R2;" + "%0 = %0 | R3;" + : "=d"(value) + : "d"(value) + : "ASTAT", "R2", "R3" + ); pr_debug("%s value:%x\n", __func__, value); SPORT_PUT_TX(up, value); @@ -118,27 +119,30 @@ static inline void tx_one_byte(struct sport_uart_port *up, unsigned int value) static inline unsigned int rx_one_byte(struct sport_uart_port *up) { unsigned int value, extract; + u32 tmp_mask1, tmp_mask2, tmp_shift, tmp; value = SPORT_GET_RX32(up); pr_debug("%s value:%x\n", __func__, value); /* Extract 8 bits data */ - __asm__ volatile ( - "R5 = 0;\n\t" - "P0 = 8;\n\t" - "R1 = 0x1801(Z);\n\t" - "R3 = 0x0300(Z);\n\t" - "R4 = 0;\n\t" - "LSETUP(loop_s, loop_e) LC0 = P0;\nloop_s:\t" - "R2 = extract(%1, R1.L)(Z);\n\t" - "R2 <<= R4;\n\t" - "R5 = R5 | R2;\n\t" - "R1 = R1 - R3;\nloop_e:\t" - "R4 += 1;\n\t" - "%0 = R5;\n\t" - :"=r"(extract) - :"r"(value) - :"P0", "R1", "R2","R3","R4", "R5"); + __asm__ __volatile__ ( + "%[extr] = 0;" + "%[mask1] = 0x1801(Z);" + "%[mask2] = 0x0300(Z);" + "%[shift] = 0;" + "LSETUP(.Lloop_s, .Lloop_e) LC0 = %[lc];" + ".Lloop_s:" + "%[tmp] = extract(%[val], %[mask1].L)(Z);" + "%[tmp] <<= %[shift];" + "%[extr] = %[extr] | %[tmp];" + "%[mask1] = %[mask1] - %[mask2];" + ".Lloop_e:" + "%[shift] += 1;" + : [val]"=d"(value), [extr]"=d"(extract), [shift]"=d"(tmp_shift), [tmp]"=d"(tmp), + [mask1]"=d"(tmp_mask1), [mask2]"=d"(tmp_mask2) + : "d"(value), [lc]"a"(8) + : "ASTAT", "LB0", "LC0", "LT0" + ); pr_debug(" extract:%x\n", extract); return extract; @@ -149,7 +153,7 @@ static int sport_uart_setup(struct sport_uart_port *up, int sclk, int baud_rate) int tclkdiv, tfsdiv, rclkdiv; /* Set TCR1 and TCR2 */ - SPORT_PUT_TCR1(up, (LTFS | ITFS | TFSR | TLSBIT | ITCLK)); + SPORT_PUT_TCR1(up, (LATFS | ITFS | TFSR | TLSBIT | ITCLK)); SPORT_PUT_TCR2(up, 10); pr_debug("%s TCR1:%x, TCR2:%x\n", __func__, SPORT_GET_TCR1(up), SPORT_GET_TCR2(up)); @@ -419,7 +423,7 @@ static void sport_shutdown(struct uart_port *port) } static void sport_set_termios(struct uart_port *port, - struct termios *termios, struct termios *old) + struct ktermios *termios, struct ktermios *old) { pr_debug("%s enter, c_cflag:%08x\n", __func__, termios->c_cflag); uart_update_timeout(port, CS8 ,port->uartclk); diff --git a/drivers/serial/icom.c b/drivers/serial/icom.c index a461b3b2c72d..9f2891c2c4a2 100644 --- a/drivers/serial/icom.c +++ b/drivers/serial/icom.c @@ -137,7 +137,12 @@ static LIST_HEAD(icom_adapter_head); static spinlock_t icom_lock; #ifdef ICOM_TRACE -static inline void trace(struct icom_port *, char *, unsigned long) {}; +static inline void trace(struct icom_port *icom_port, char *trace_pt, + unsigned long trace_data) +{ + dev_info(&icom_port->adapter->pci_dev->dev, ":%d:%s - %lx\n", + icom_port->port, trace_pt, trace_data); +} #else static inline void trace(struct icom_port *icom_port, char *trace_pt, unsigned long trace_data) {}; #endif @@ -408,7 +413,7 @@ static void load_code(struct icom_port *icom_port) release_firmware(fw); /* Set Hardware level */ - if ((icom_port->adapter->version | ADAPTER_V2) == ADAPTER_V2) + if (icom_port->adapter->version == ADAPTER_V2) writeb(V2_HARDWARE, &(icom_port->dram->misc_flags)); /* Start the processor in Adapter */ @@ -861,7 +866,7 @@ static irqreturn_t icom_interrupt(int irq, void *dev_id) /* find icom_port for this interrupt */ icom_adapter = (struct icom_adapter *) dev_id; - if ((icom_adapter->version | ADAPTER_V2) == ADAPTER_V2) { + if (icom_adapter->version == ADAPTER_V2) { int_reg = icom_adapter->base_addr + 0x8024; adapter_interrupts = readl(int_reg); @@ -1647,15 +1652,6 @@ static void __exit icom_exit(void) module_init(icom_init); module_exit(icom_exit); -#ifdef ICOM_TRACE -static inline void trace(struct icom_port *icom_port, char *trace_pt, - unsigned long trace_data) -{ - dev_info(&icom_port->adapter->pci_dev->dev, ":%d:%s - %lx\n", - icom_port->port, trace_pt, trace_data); -} -#endif - MODULE_AUTHOR("Michael Anderson <mjanders@us.ibm.com>"); MODULE_DESCRIPTION("IBM iSeries Serial IOA driver"); MODULE_SUPPORTED_DEVICE diff --git a/drivers/serial/imx.c b/drivers/serial/imx.c index 5f0be40dfdab..7b5d1de9cfe3 100644 --- a/drivers/serial/imx.c +++ b/drivers/serial/imx.c @@ -8,6 +8,9 @@ * Author: Sascha Hauer <sascha@saschahauer.de> * Copyright (C) 2004 Pengutronix * + * Copyright (C) 2009 emlix GmbH + * Author: Fabian Godehardt (added IrDA support for iMX) + * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or @@ -41,6 +44,8 @@ #include <linux/serial_core.h> #include <linux/serial.h> #include <linux/clk.h> +#include <linux/delay.h> +#include <linux/rational.h> #include <asm/io.h> #include <asm/irq.h> @@ -148,6 +153,7 @@ #define UCR4_DREN (1<<0) /* Recv data ready interrupt enable */ #define UFCR_RXTL_SHF 0 /* Receiver trigger level shift */ #define UFCR_RFDIV (7<<7) /* Reference freq divider mask */ +#define UFCR_RFDIV_REG(x) (((x) < 7 ? 6 - (x) : 6) << 7) #define UFCR_TXTL_SHF 10 /* Transmitter trigger level shift */ #define USR1_PARITYERR (1<<15) /* Parity error interrupt flag */ #define USR1_RTSS (1<<14) /* RTS pin status */ @@ -211,10 +217,20 @@ struct imx_port { struct timer_list timer; unsigned int old_status; int txirq,rxirq,rtsirq; - int have_rtscts:1; + unsigned int have_rtscts:1; + unsigned int use_irda:1; + unsigned int irda_inv_rx:1; + unsigned int irda_inv_tx:1; + unsigned short trcv_delay; /* transceiver delay */ struct clk *clk; }; +#ifdef CONFIG_IRDA +#define USE_IRDA(sport) ((sport)->use_irda) +#else +#define USE_IRDA(sport) (0) +#endif + /* * Handle any change of modem status signal since we were last called. */ @@ -268,6 +284,48 @@ static void imx_stop_tx(struct uart_port *port) struct imx_port *sport = (struct imx_port *)port; unsigned long temp; + if (USE_IRDA(sport)) { + /* half duplex - wait for end of transmission */ + int n = 256; + while ((--n > 0) && + !(readl(sport->port.membase + USR2) & USR2_TXDC)) { + udelay(5); + barrier(); + } + /* + * irda transceiver - wait a bit more to avoid + * cutoff, hardware dependent + */ + udelay(sport->trcv_delay); + + /* + * half duplex - reactivate receive mode, + * flush receive pipe echo crap + */ + if (readl(sport->port.membase + USR2) & USR2_TXDC) { + temp = readl(sport->port.membase + UCR1); + temp &= ~(UCR1_TXMPTYEN | UCR1_TRDYEN); + writel(temp, sport->port.membase + UCR1); + + temp = readl(sport->port.membase + UCR4); + temp &= ~(UCR4_TCEN); + writel(temp, sport->port.membase + UCR4); + + while (readl(sport->port.membase + URXD0) & + URXD_CHARRDY) + barrier(); + + temp = readl(sport->port.membase + UCR1); + temp |= UCR1_RRDYEN; + writel(temp, sport->port.membase + UCR1); + + temp = readl(sport->port.membase + UCR4); + temp |= UCR4_DREN; + writel(temp, sport->port.membase + UCR4); + } + return; + } + temp = readl(sport->port.membase + UCR1); writel(temp & ~UCR1_TXMPTYEN, sport->port.membase + UCR1); } @@ -302,13 +360,15 @@ static inline void imx_transmit_buffer(struct imx_port *sport) /* send xmit->buf[xmit->tail] * out the port here */ writel(xmit->buf[xmit->tail], sport->port.membase + URTX0); - xmit->tail = (xmit->tail + 1) & - (UART_XMIT_SIZE - 1); + xmit->tail = (xmit->tail + 1) & (UART_XMIT_SIZE - 1); sport->port.icount.tx++; if (uart_circ_empty(xmit)) break; } + if (uart_circ_chars_pending(xmit) < WAKEUP_CHARS) + uart_write_wakeup(&sport->port); + if (uart_circ_empty(xmit)) imx_stop_tx(&sport->port); } @@ -321,9 +381,30 @@ static void imx_start_tx(struct uart_port *port) struct imx_port *sport = (struct imx_port *)port; unsigned long temp; + if (USE_IRDA(sport)) { + /* half duplex in IrDA mode; have to disable receive mode */ + temp = readl(sport->port.membase + UCR4); + temp &= ~(UCR4_DREN); + writel(temp, sport->port.membase + UCR4); + + temp = readl(sport->port.membase + UCR1); + temp &= ~(UCR1_RRDYEN); + writel(temp, sport->port.membase + UCR1); + } + temp = readl(sport->port.membase + UCR1); writel(temp | UCR1_TXMPTYEN, sport->port.membase + UCR1); + if (USE_IRDA(sport)) { + temp = readl(sport->port.membase + UCR1); + temp |= UCR1_TRDYEN; + writel(temp, sport->port.membase + UCR1); + + temp = readl(sport->port.membase + UCR4); + temp |= UCR4_TCEN; + writel(temp, sport->port.membase + UCR4); + } + if (readl(sport->port.membase + UTS) & UTS_TXEMPTY) imx_transmit_buffer(sport); } @@ -395,8 +476,7 @@ static irqreturn_t imx_rxint(int irq, void *dev_id) continue; } - if (uart_handle_sysrq_char - (&sport->port, (unsigned char)rx)) + if (uart_handle_sysrq_char(&sport->port, (unsigned char)rx)) continue; if (rx & (URXD_PRERR | URXD_OVRRUN | URXD_FRMERR) ) { @@ -471,26 +551,26 @@ static unsigned int imx_tx_empty(struct uart_port *port) */ static unsigned int imx_get_mctrl(struct uart_port *port) { - struct imx_port *sport = (struct imx_port *)port; - unsigned int tmp = TIOCM_DSR | TIOCM_CAR; + struct imx_port *sport = (struct imx_port *)port; + unsigned int tmp = TIOCM_DSR | TIOCM_CAR; - if (readl(sport->port.membase + USR1) & USR1_RTSS) - tmp |= TIOCM_CTS; + if (readl(sport->port.membase + USR1) & USR1_RTSS) + tmp |= TIOCM_CTS; - if (readl(sport->port.membase + UCR2) & UCR2_CTS) - tmp |= TIOCM_RTS; + if (readl(sport->port.membase + UCR2) & UCR2_CTS) + tmp |= TIOCM_RTS; - return tmp; + return tmp; } static void imx_set_mctrl(struct uart_port *port, unsigned int mctrl) { - struct imx_port *sport = (struct imx_port *)port; + struct imx_port *sport = (struct imx_port *)port; unsigned long temp; temp = readl(sport->port.membase + UCR2) & ~UCR2_CTS; - if (mctrl & TIOCM_RTS) + if (mctrl & TIOCM_RTS) temp |= UCR2_CTS; writel(temp, sport->port.membase + UCR2); @@ -534,12 +614,7 @@ static int imx_setup_ufcr(struct imx_port *sport, unsigned int mode) if(!ufcr_rfdiv) ufcr_rfdiv = 1; - if(ufcr_rfdiv >= 7) - ufcr_rfdiv = 6; - else - ufcr_rfdiv = 6 - ufcr_rfdiv; - - val |= UFCR_RFDIV & (ufcr_rfdiv << 7); + val |= UFCR_RFDIV_REG(ufcr_rfdiv); writel(val, sport->port.membase + UFCR); @@ -558,8 +633,24 @@ static int imx_startup(struct uart_port *port) * requesting IRQs */ temp = readl(sport->port.membase + UCR4); + + if (USE_IRDA(sport)) + temp |= UCR4_IRSC; + writel(temp & ~UCR4_DREN, sport->port.membase + UCR4); + if (USE_IRDA(sport)) { + /* reset fifo's and state machines */ + int i = 100; + temp = readl(sport->port.membase + UCR2); + temp &= ~UCR2_SRST; + writel(temp, sport->port.membase + UCR2); + while (!(readl(sport->port.membase + UCR2) & UCR2_SRST) && + (--i > 0)) { + udelay(1); + } + } + /* * Allocate the IRQ(s) i.MX1 has three interrupts whereas later * chips only have one interrupt. @@ -575,12 +666,16 @@ static int imx_startup(struct uart_port *port) if (retval) goto error_out2; - retval = request_irq(sport->rtsirq, imx_rtsint, - (sport->rtsirq < MAX_INTERNAL_IRQ) ? 0 : - IRQF_TRIGGER_FALLING | IRQF_TRIGGER_RISING, - DRIVER_NAME, sport); - if (retval) - goto error_out3; + /* do not use RTS IRQ on IrDA */ + if (!USE_IRDA(sport)) { + retval = request_irq(sport->rtsirq, imx_rtsint, + (sport->rtsirq < MAX_INTERNAL_IRQ) ? 0 : + IRQF_TRIGGER_FALLING | + IRQF_TRIGGER_RISING, + DRIVER_NAME, sport); + if (retval) + goto error_out3; + } } else { retval = request_irq(sport->port.irq, imx_int, 0, DRIVER_NAME, sport); @@ -597,18 +692,49 @@ static int imx_startup(struct uart_port *port) temp = readl(sport->port.membase + UCR1); temp |= UCR1_RRDYEN | UCR1_RTSDEN | UCR1_UARTEN; + + if (USE_IRDA(sport)) { + temp |= UCR1_IREN; + temp &= ~(UCR1_RTSDEN); + } + writel(temp, sport->port.membase + UCR1); temp = readl(sport->port.membase + UCR2); temp |= (UCR2_RXEN | UCR2_TXEN); writel(temp, sport->port.membase + UCR2); + if (USE_IRDA(sport)) { + /* clear RX-FIFO */ + int i = 64; + while ((--i > 0) && + (readl(sport->port.membase + URXD0) & URXD_CHARRDY)) { + barrier(); + } + } + #if defined CONFIG_ARCH_MX2 || defined CONFIG_ARCH_MX3 temp = readl(sport->port.membase + UCR3); temp |= UCR3_RXDMUXSEL; writel(temp, sport->port.membase + UCR3); #endif + if (USE_IRDA(sport)) { + temp = readl(sport->port.membase + UCR4); + if (sport->irda_inv_rx) + temp |= UCR4_INVR; + else + temp &= ~(UCR4_INVR); + writel(temp | UCR4_DREN, sport->port.membase + UCR4); + + temp = readl(sport->port.membase + UCR3); + if (sport->irda_inv_tx) + temp |= UCR3_INVT; + else + temp &= ~(UCR3_INVT); + writel(temp, sport->port.membase + UCR3); + } + /* * Enable modem status interrupts */ @@ -616,6 +742,16 @@ static int imx_startup(struct uart_port *port) imx_enable_ms(&sport->port); spin_unlock_irqrestore(&sport->port.lock,flags); + if (USE_IRDA(sport)) { + struct imxuart_platform_data *pdata; + pdata = sport->port.dev->platform_data; + sport->irda_inv_rx = pdata->irda_inv_rx; + sport->irda_inv_tx = pdata->irda_inv_tx; + sport->trcv_delay = pdata->transceiver_delay; + if (pdata->irda_enable) + pdata->irda_enable(1); + } + return 0; error_out3: @@ -633,6 +769,17 @@ static void imx_shutdown(struct uart_port *port) struct imx_port *sport = (struct imx_port *)port; unsigned long temp; + temp = readl(sport->port.membase + UCR2); + temp &= ~(UCR2_TXEN); + writel(temp, sport->port.membase + UCR2); + + if (USE_IRDA(sport)) { + struct imxuart_platform_data *pdata; + pdata = sport->port.dev->platform_data; + if (pdata->irda_enable) + pdata->irda_enable(0); + } + /* * Stop our timer. */ @@ -642,7 +789,8 @@ static void imx_shutdown(struct uart_port *port) * Free the interrupts */ if (sport->txirq > 0) { - free_irq(sport->rtsirq, sport); + if (!USE_IRDA(sport)) + free_irq(sport->rtsirq, sport); free_irq(sport->txirq, sport); free_irq(sport->rxirq, sport); } else @@ -654,6 +802,9 @@ static void imx_shutdown(struct uart_port *port) temp = readl(sport->port.membase + UCR1); temp &= ~(UCR1_TXMPTYEN | UCR1_RRDYEN | UCR1_RTSDEN | UCR1_UARTEN); + if (USE_IRDA(sport)) + temp &= ~(UCR1_IREN); + writel(temp, sport->port.membase + UCR1); } @@ -665,7 +816,9 @@ imx_set_termios(struct uart_port *port, struct ktermios *termios, unsigned long flags; unsigned int ucr2, old_ucr1, old_txrxen, baud, quot; unsigned int old_csize = old ? old->c_cflag & CSIZE : CS8; - unsigned int div, num, denom, ufcr; + unsigned int div, ufcr; + unsigned long num, denom; + uint64_t tdiv64; /* * If we don't support modem control lines, don't allow @@ -761,38 +914,39 @@ imx_set_termios(struct uart_port *port, struct ktermios *termios, sport->port.membase + UCR2); old_txrxen &= (UCR2_TXEN | UCR2_RXEN); - div = sport->port.uartclk / (baud * 16); - if (div > 7) - div = 7; - if (!div) + if (USE_IRDA(sport)) { + /* + * use maximum available submodule frequency to + * avoid missing short pulses due to low sampling rate + */ div = 1; - - num = baud; - denom = port->uartclk / div / 16; - - /* shift num and denom right until they fit into 16 bits */ - while (num > 0x10000 || denom > 0x10000) { - num >>= 1; - denom >>= 1; + } else { + div = sport->port.uartclk / (baud * 16); + if (div > 7) + div = 7; + if (!div) + div = 1; } - if (num > 0) - num -= 1; - if (denom > 0) - denom -= 1; - writel(num, sport->port.membase + UBIR); - writel(denom, sport->port.membase + UBMR); + rational_best_approximation(16 * div * baud, sport->port.uartclk, + 1 << 16, 1 << 16, &num, &denom); - if (div == 7) - div = 6; /* 6 in RFDIV means divide by 7 */ - else - div = 6 - div; + tdiv64 = sport->port.uartclk; + tdiv64 *= num; + do_div(tdiv64, denom * 16 * div); + tty_encode_baud_rate(sport->port.info->port.tty, + (speed_t)tdiv64, (speed_t)tdiv64); + + num -= 1; + denom -= 1; ufcr = readl(sport->port.membase + UFCR); - ufcr = (ufcr & (~UFCR_RFDIV)) | - (div << 7); + ufcr = (ufcr & (~UFCR_RFDIV)) | UFCR_RFDIV_REG(div); writel(ufcr, sport->port.membase + UFCR); + writel(num, sport->port.membase + UBIR); + writel(denom, sport->port.membase + UBMR); + #ifdef ONEMS writel(sport->port.uartclk / div / 1000, sport->port.membase + ONEMS); #endif @@ -1072,22 +1226,22 @@ static struct uart_driver imx_reg = { static int serial_imx_suspend(struct platform_device *dev, pm_message_t state) { - struct imx_port *sport = platform_get_drvdata(dev); + struct imx_port *sport = platform_get_drvdata(dev); - if (sport) - uart_suspend_port(&imx_reg, &sport->port); + if (sport) + uart_suspend_port(&imx_reg, &sport->port); - return 0; + return 0; } static int serial_imx_resume(struct platform_device *dev) { - struct imx_port *sport = platform_get_drvdata(dev); + struct imx_port *sport = platform_get_drvdata(dev); - if (sport) - uart_resume_port(&imx_reg, &sport->port); + if (sport) + uart_resume_port(&imx_reg, &sport->port); - return 0; + return 0; } static int serial_imx_probe(struct platform_device *pdev) @@ -1143,19 +1297,29 @@ static int serial_imx_probe(struct platform_device *pdev) imx_ports[pdev->id] = sport; pdata = pdev->dev.platform_data; - if(pdata && (pdata->flags & IMXUART_HAVE_RTSCTS)) + if (pdata && (pdata->flags & IMXUART_HAVE_RTSCTS)) sport->have_rtscts = 1; +#ifdef CONFIG_IRDA + if (pdata && (pdata->flags & IMXUART_IRDA)) + sport->use_irda = 1; +#endif + if (pdata->init) { ret = pdata->init(pdev); if (ret) goto clkput; } - uart_add_one_port(&imx_reg, &sport->port); + ret = uart_add_one_port(&imx_reg, &sport->port); + if (ret) + goto deinit; platform_set_drvdata(pdev, &sport->port); return 0; +deinit: + if (pdata->exit) + pdata->exit(pdev); clkput: clk_put(sport->clk); clk_disable(sport->clk); @@ -1193,13 +1357,13 @@ static int serial_imx_remove(struct platform_device *pdev) } static struct platform_driver serial_imx_driver = { - .probe = serial_imx_probe, - .remove = serial_imx_remove, + .probe = serial_imx_probe, + .remove = serial_imx_remove, .suspend = serial_imx_suspend, .resume = serial_imx_resume, .driver = { - .name = "imx-uart", + .name = "imx-uart", .owner = THIS_MODULE, }, }; diff --git a/drivers/serial/jsm/jsm.h b/drivers/serial/jsm/jsm.h index c0a3e2734e24..4e5f3bde0461 100644 --- a/drivers/serial/jsm/jsm.h +++ b/drivers/serial/jsm/jsm.h @@ -61,6 +61,7 @@ enum { if ((DBG_##nlevel & jsm_debug)) \ dev_printk(KERN_##klevel, pdev->dev, fmt, ## args) +#define MAXLINES 256 #define MAXPORTS 8 #define MAX_STOPS_SENT 5 diff --git a/drivers/serial/jsm/jsm_tty.c b/drivers/serial/jsm/jsm_tty.c index 31496dc0a0d1..107ce2e187b8 100644 --- a/drivers/serial/jsm/jsm_tty.c +++ b/drivers/serial/jsm/jsm_tty.c @@ -33,6 +33,8 @@ #include "jsm.h" +static DECLARE_BITMAP(linemap, MAXLINES); + static void jsm_carrier(struct jsm_channel *ch); static inline int jsm_get_mstat(struct jsm_channel *ch) @@ -433,6 +435,7 @@ int __devinit jsm_tty_init(struct jsm_board *brd) int __devinit jsm_uart_port_init(struct jsm_board *brd) { int i; + unsigned int line; struct jsm_channel *ch; if (!brd) @@ -459,9 +462,15 @@ int __devinit jsm_uart_port_init(struct jsm_board *brd) brd->channels[i]->uart_port.membase = brd->re_map_membase; brd->channels[i]->uart_port.fifosize = 16; brd->channels[i]->uart_port.ops = &jsm_ops; - brd->channels[i]->uart_port.line = brd->channels[i]->ch_portnum + brd->boardnum * 2; + line = find_first_zero_bit(linemap, MAXLINES); + if (line >= MAXLINES) { + printk(KERN_INFO "jsm: linemap is full, added device failed\n"); + continue; + } else + set_bit((int)line, linemap); + brd->channels[i]->uart_port.line = line; if (uart_add_one_port (&jsm_uart_driver, &brd->channels[i]->uart_port)) - printk(KERN_INFO "Added device failed\n"); + printk(KERN_INFO "jsm: add device failed\n"); else printk(KERN_INFO "Added device \n"); } @@ -494,6 +503,7 @@ int jsm_remove_uart_port(struct jsm_board *brd) ch = brd->channels[i]; + clear_bit((int)(ch->uart_port.line), linemap); uart_remove_one_port(&jsm_uart_driver, &brd->channels[i]->uart_port); } diff --git a/drivers/serial/timbuart.c b/drivers/serial/timbuart.c new file mode 100644 index 000000000000..ac9e5d5f742e --- /dev/null +++ b/drivers/serial/timbuart.c @@ -0,0 +1,526 @@ +/* + * timbuart.c timberdale FPGA UART driver + * Copyright (c) 2009 Intel Corporation + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ + +/* Supports: + * Timberdale FPGA UART + */ + +#include <linux/pci.h> +#include <linux/interrupt.h> +#include <linux/serial_core.h> +#include <linux/kernel.h> +#include <linux/platform_device.h> +#include <linux/ioport.h> + +#include "timbuart.h" + +struct timbuart_port { + struct uart_port port; + struct tasklet_struct tasklet; + int usedma; + u8 last_ier; + struct platform_device *dev; +}; + +static int baudrates[] = {9600, 19200, 38400, 57600, 115200, 230400, 460800, + 921600, 1843200, 3250000}; + +static void timbuart_mctrl_check(struct uart_port *port, u8 isr, u8 *ier); + +static irqreturn_t timbuart_handleinterrupt(int irq, void *devid); + +static void timbuart_stop_rx(struct uart_port *port) +{ + /* spin lock held by upper layer, disable all RX interrupts */ + u8 ier = ioread8(port->membase + TIMBUART_IER) & ~RXFLAGS; + iowrite8(ier, port->membase + TIMBUART_IER); +} + +static void timbuart_stop_tx(struct uart_port *port) +{ + /* spinlock held by upper layer, disable TX interrupt */ + u8 ier = ioread8(port->membase + TIMBUART_IER) & ~TXBAE; + iowrite8(ier, port->membase + TIMBUART_IER); +} + +static void timbuart_start_tx(struct uart_port *port) +{ + struct timbuart_port *uart = + container_of(port, struct timbuart_port, port); + + /* do not transfer anything here -> fire off the tasklet */ + tasklet_schedule(&uart->tasklet); +} + +static void timbuart_flush_buffer(struct uart_port *port) +{ + u8 ctl = ioread8(port->membase + TIMBUART_CTRL) | TIMBUART_CTRL_FLSHTX; + + iowrite8(ctl, port->membase + TIMBUART_CTRL); + iowrite8(TXBF, port->membase + TIMBUART_ISR); +} + +static void timbuart_rx_chars(struct uart_port *port) +{ + struct tty_struct *tty = port->info->port.tty; + + while (ioread8(port->membase + TIMBUART_ISR) & RXDP) { + u8 ch = ioread8(port->membase + TIMBUART_RXFIFO); + port->icount.rx++; + tty_insert_flip_char(tty, ch, TTY_NORMAL); + } + + spin_unlock(&port->lock); + tty_flip_buffer_push(port->info->port.tty); + spin_lock(&port->lock); + + dev_dbg(port->dev, "%s - total read %d bytes\n", + __func__, port->icount.rx); +} + +static void timbuart_tx_chars(struct uart_port *port) +{ + struct circ_buf *xmit = &port->info->xmit; + + while (!(ioread8(port->membase + TIMBUART_ISR) & TXBF) && + !uart_circ_empty(xmit)) { + iowrite8(xmit->buf[xmit->tail], + port->membase + TIMBUART_TXFIFO); + xmit->tail = (xmit->tail + 1) & (UART_XMIT_SIZE - 1); + port->icount.tx++; + } + + dev_dbg(port->dev, + "%s - total written %d bytes, CTL: %x, RTS: %x, baud: %x\n", + __func__, + port->icount.tx, + ioread8(port->membase + TIMBUART_CTRL), + port->mctrl & TIOCM_RTS, + ioread8(port->membase + TIMBUART_BAUDRATE)); +} + +static void timbuart_handle_tx_port(struct uart_port *port, u8 isr, u8 *ier) +{ + struct timbuart_port *uart = + container_of(port, struct timbuart_port, port); + struct circ_buf *xmit = &port->info->xmit; + + if (uart_circ_empty(xmit) || uart_tx_stopped(port)) + return; + + if (port->x_char) + return; + + if (isr & TXFLAGS) { + timbuart_tx_chars(port); + /* clear all TX interrupts */ + iowrite8(TXFLAGS, port->membase + TIMBUART_ISR); + + if (uart_circ_chars_pending(xmit) < WAKEUP_CHARS) + uart_write_wakeup(port); + } else + /* Re-enable any tx interrupt */ + *ier |= uart->last_ier & TXFLAGS; + + /* enable interrupts if there are chars in the transmit buffer, + * Or if we delivered some bytes and want the almost empty interrupt + * we wake up the upper layer later when we got the interrupt + * to give it some time to go out... + */ + if (!uart_circ_empty(xmit)) + *ier |= TXBAE; + + dev_dbg(port->dev, "%s - leaving\n", __func__); +} + +void timbuart_handle_rx_port(struct uart_port *port, u8 isr, u8 *ier) +{ + if (isr & RXFLAGS) { + /* Some RX status is set */ + if (isr & RXBF) { + u8 ctl = ioread8(port->membase + TIMBUART_CTRL) | + TIMBUART_CTRL_FLSHRX; + iowrite8(ctl, port->membase + TIMBUART_CTRL); + port->icount.overrun++; + } else if (isr & (RXDP)) + timbuart_rx_chars(port); + + /* ack all RX interrupts */ + iowrite8(RXFLAGS, port->membase + TIMBUART_ISR); + } + + /* always have the RX interrupts enabled */ + *ier |= RXBAF | RXBF | RXTT; + + dev_dbg(port->dev, "%s - leaving\n", __func__); +} + +void timbuart_tasklet(unsigned long arg) +{ + struct timbuart_port *uart = (struct timbuart_port *)arg; + u8 isr, ier = 0; + + spin_lock(&uart->port.lock); + + isr = ioread8(uart->port.membase + TIMBUART_ISR); + dev_dbg(uart->port.dev, "%s ISR: %x\n", __func__, isr); + + if (!uart->usedma) + timbuart_handle_tx_port(&uart->port, isr, &ier); + + timbuart_mctrl_check(&uart->port, isr, &ier); + + if (!uart->usedma) + timbuart_handle_rx_port(&uart->port, isr, &ier); + + iowrite8(ier, uart->port.membase + TIMBUART_IER); + + spin_unlock(&uart->port.lock); + dev_dbg(uart->port.dev, "%s leaving\n", __func__); +} + +static unsigned int timbuart_tx_empty(struct uart_port *port) +{ + u8 isr = ioread8(port->membase + TIMBUART_ISR); + + return (isr & TXBAE) ? TIOCSER_TEMT : 0; +} + +static unsigned int timbuart_get_mctrl(struct uart_port *port) +{ + u8 cts = ioread8(port->membase + TIMBUART_CTRL); + dev_dbg(port->dev, "%s - cts %x\n", __func__, cts); + + if (cts & TIMBUART_CTRL_CTS) + return TIOCM_CTS | TIOCM_DSR | TIOCM_CAR; + else + return TIOCM_DSR | TIOCM_CAR; +} + +static void timbuart_set_mctrl(struct uart_port *port, unsigned int mctrl) +{ + dev_dbg(port->dev, "%s - %x\n", __func__, mctrl); + + if (mctrl & TIOCM_RTS) + iowrite8(TIMBUART_CTRL_RTS, port->membase + TIMBUART_CTRL); + else + iowrite8(TIMBUART_CTRL_RTS, port->membase + TIMBUART_CTRL); +} + +static void timbuart_mctrl_check(struct uart_port *port, u8 isr, u8 *ier) +{ + unsigned int cts; + + if (isr & CTS_DELTA) { + /* ack */ + iowrite8(CTS_DELTA, port->membase + TIMBUART_ISR); + cts = timbuart_get_mctrl(port); + uart_handle_cts_change(port, cts & TIOCM_CTS); + wake_up_interruptible(&port->info->delta_msr_wait); + } + + *ier |= CTS_DELTA; +} + +static void timbuart_enable_ms(struct uart_port *port) +{ + /* N/A */ +} + +static void timbuart_break_ctl(struct uart_port *port, int ctl) +{ + /* N/A */ +} + +static int timbuart_startup(struct uart_port *port) +{ + struct timbuart_port *uart = + container_of(port, struct timbuart_port, port); + + dev_dbg(port->dev, "%s\n", __func__); + + iowrite8(TIMBUART_CTRL_FLSHRX, port->membase + TIMBUART_CTRL); + iowrite8(0xff, port->membase + TIMBUART_ISR); + /* Enable all but TX interrupts */ + iowrite8(RXBAF | RXBF | RXTT | CTS_DELTA, + port->membase + TIMBUART_IER); + + return request_irq(port->irq, timbuart_handleinterrupt, IRQF_SHARED, + "timb-uart", uart); +} + +static void timbuart_shutdown(struct uart_port *port) +{ + struct timbuart_port *uart = + container_of(port, struct timbuart_port, port); + dev_dbg(port->dev, "%s\n", __func__); + free_irq(port->irq, uart); + iowrite8(0, port->membase + TIMBUART_IER); +} + +static int get_bindex(int baud) +{ + int i; + + for (i = 0; i < ARRAY_SIZE(baudrates); i++) + if (baud <= baudrates[i]) + return i; + + return -1; +} + +static void timbuart_set_termios(struct uart_port *port, + struct ktermios *termios, + struct ktermios *old) +{ + unsigned int baud; + short bindex; + unsigned long flags; + + baud = uart_get_baud_rate(port, termios, old, 0, port->uartclk / 16); + bindex = get_bindex(baud); + dev_dbg(port->dev, "%s - bindex %d\n", __func__, bindex); + + if (bindex < 0) + bindex = 0; + baud = baudrates[bindex]; + + /* The serial layer calls into this once with old = NULL when setting + up initially */ + if (old) + tty_termios_copy_hw(termios, old); + tty_termios_encode_baud_rate(termios, baud, baud); + + spin_lock_irqsave(&port->lock, flags); + iowrite8((u8)bindex, port->membase + TIMBUART_BAUDRATE); + uart_update_timeout(port, termios->c_cflag, baud); + spin_unlock_irqrestore(&port->lock, flags); +} + +static const char *timbuart_type(struct uart_port *port) +{ + return port->type == PORT_UNKNOWN ? "timbuart" : NULL; +} + +/* We do not request/release mappings of the registers here, + * currently it's done in the proble function. + */ +static void timbuart_release_port(struct uart_port *port) +{ + struct platform_device *pdev = to_platform_device(port->dev); + int size = + resource_size(platform_get_resource(pdev, IORESOURCE_MEM, 0)); + + if (port->flags & UPF_IOREMAP) { + iounmap(port->membase); + port->membase = NULL; + } + + release_mem_region(port->mapbase, size); +} + +static int timbuart_request_port(struct uart_port *port) +{ + struct platform_device *pdev = to_platform_device(port->dev); + int size = + resource_size(platform_get_resource(pdev, IORESOURCE_MEM, 0)); + + if (!request_mem_region(port->mapbase, size, "timb-uart")) + return -EBUSY; + + if (port->flags & UPF_IOREMAP) { + port->membase = ioremap(port->mapbase, size); + if (port->membase == NULL) { + release_mem_region(port->mapbase, size); + return -ENOMEM; + } + } + + return 0; +} + +static irqreturn_t timbuart_handleinterrupt(int irq, void *devid) +{ + struct timbuart_port *uart = (struct timbuart_port *)devid; + + if (ioread8(uart->port.membase + TIMBUART_IPR)) { + uart->last_ier = ioread8(uart->port.membase + TIMBUART_IER); + + /* disable interrupts, the tasklet enables them again */ + iowrite8(0, uart->port.membase + TIMBUART_IER); + + /* fire off bottom half */ + tasklet_schedule(&uart->tasklet); + + return IRQ_HANDLED; + } else + return IRQ_NONE; +} + +/* + * Configure/autoconfigure the port. + */ +static void timbuart_config_port(struct uart_port *port, int flags) +{ + if (flags & UART_CONFIG_TYPE) { + port->type = PORT_TIMBUART; + timbuart_request_port(port); + } +} + +static int timbuart_verify_port(struct uart_port *port, + struct serial_struct *ser) +{ + /* we don't want the core code to modify any port params */ + return -EINVAL; +} + +static struct uart_ops timbuart_ops = { + .tx_empty = timbuart_tx_empty, + .set_mctrl = timbuart_set_mctrl, + .get_mctrl = timbuart_get_mctrl, + .stop_tx = timbuart_stop_tx, + .start_tx = timbuart_start_tx, + .flush_buffer = timbuart_flush_buffer, + .stop_rx = timbuart_stop_rx, + .enable_ms = timbuart_enable_ms, + .break_ctl = timbuart_break_ctl, + .startup = timbuart_startup, + .shutdown = timbuart_shutdown, + .set_termios = timbuart_set_termios, + .type = timbuart_type, + .release_port = timbuart_release_port, + .request_port = timbuart_request_port, + .config_port = timbuart_config_port, + .verify_port = timbuart_verify_port +}; + +static struct uart_driver timbuart_driver = { + .owner = THIS_MODULE, + .driver_name = "timberdale_uart", + .dev_name = "ttyTU", + .major = TIMBUART_MAJOR, + .minor = TIMBUART_MINOR, + .nr = 1 +}; + +static int timbuart_probe(struct platform_device *dev) +{ + int err; + struct timbuart_port *uart; + struct resource *iomem; + + dev_dbg(&dev->dev, "%s\n", __func__); + + uart = kzalloc(sizeof(*uart), GFP_KERNEL); + if (!uart) { + err = -EINVAL; + goto err_mem; + } + + uart->usedma = 0; + + uart->port.uartclk = 3250000 * 16; + uart->port.fifosize = TIMBUART_FIFO_SIZE; + uart->port.regshift = 2; + uart->port.iotype = UPIO_MEM; + uart->port.ops = &timbuart_ops; + uart->port.irq = 0; + uart->port.flags = UPF_BOOT_AUTOCONF | UPF_IOREMAP; + uart->port.line = 0; + uart->port.dev = &dev->dev; + + iomem = platform_get_resource(dev, IORESOURCE_MEM, 0); + if (!iomem) { + err = -ENOMEM; + goto err_register; + } + uart->port.mapbase = iomem->start; + uart->port.membase = NULL; + + uart->port.irq = platform_get_irq(dev, 0); + if (uart->port.irq < 0) { + err = -EINVAL; + goto err_register; + } + + tasklet_init(&uart->tasklet, timbuart_tasklet, (unsigned long)uart); + + err = uart_register_driver(&timbuart_driver); + if (err) + goto err_register; + + err = uart_add_one_port(&timbuart_driver, &uart->port); + if (err) + goto err_add_port; + + platform_set_drvdata(dev, uart); + + return 0; + +err_add_port: + uart_unregister_driver(&timbuart_driver); +err_register: + kfree(uart); +err_mem: + printk(KERN_ERR "timberdale: Failed to register Timberdale UART: %d\n", + err); + + return err; +} + +static int timbuart_remove(struct platform_device *dev) +{ + struct timbuart_port *uart = platform_get_drvdata(dev); + + tasklet_kill(&uart->tasklet); + uart_remove_one_port(&timbuart_driver, &uart->port); + uart_unregister_driver(&timbuart_driver); + kfree(uart); + + return 0; +} + +static struct platform_driver timbuart_platform_driver = { + .driver = { + .name = "timb-uart", + .owner = THIS_MODULE, + }, + .probe = timbuart_probe, + .remove = timbuart_remove, +}; + +/*--------------------------------------------------------------------------*/ + +static int __init timbuart_init(void) +{ + return platform_driver_register(&timbuart_platform_driver); +} + +static void __exit timbuart_exit(void) +{ + platform_driver_unregister(&timbuart_platform_driver); +} + +module_init(timbuart_init); +module_exit(timbuart_exit); + +MODULE_DESCRIPTION("Timberdale UART driver"); +MODULE_LICENSE("GPL v2"); +MODULE_ALIAS("platform:timb-uart"); + diff --git a/drivers/serial/timbuart.h b/drivers/serial/timbuart.h new file mode 100644 index 000000000000..7e566766bc43 --- /dev/null +++ b/drivers/serial/timbuart.h @@ -0,0 +1,58 @@ +/* + * timbuart.c timberdale FPGA GPIO driver + * Copyright (c) 2009 Intel Corporation + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ + +/* Supports: + * Timberdale FPGA UART + */ + +#ifndef _TIMBUART_H +#define _TIMBUART_H + +#define TIMBUART_FIFO_SIZE 2048 + +#define TIMBUART_RXFIFO 0x08 +#define TIMBUART_TXFIFO 0x0c +#define TIMBUART_IER 0x10 +#define TIMBUART_IPR 0x14 +#define TIMBUART_ISR 0x18 +#define TIMBUART_CTRL 0x1c +#define TIMBUART_BAUDRATE 0x20 + +#define TIMBUART_CTRL_RTS 0x01 +#define TIMBUART_CTRL_CTS 0x02 +#define TIMBUART_CTRL_FLSHTX 0x40 +#define TIMBUART_CTRL_FLSHRX 0x80 + +#define TXBF 0x01 +#define TXBAE 0x02 +#define CTS_DELTA 0x04 +#define RXDP 0x08 +#define RXBAF 0x10 +#define RXBF 0x20 +#define RXTT 0x40 +#define RXBNAE 0x80 +#define TXBE 0x100 + +#define RXFLAGS (RXDP | RXBAF | RXBF | RXTT | RXBNAE) +#define TXFLAGS (TXBF | TXBAE) + +#define TIMBUART_MAJOR 204 +#define TIMBUART_MINOR 192 + +#endif /* _TIMBUART_H */ + diff --git a/drivers/usb/class/cdc-acm.c b/drivers/usb/class/cdc-acm.c index 7a1164dd1d37..ddeb69192537 100644 --- a/drivers/usb/class/cdc-acm.c +++ b/drivers/usb/class/cdc-acm.c @@ -16,7 +16,8 @@ * v0.9 - thorough cleaning, URBification, almost a rewrite * v0.10 - some more cleanups * v0.11 - fixed flow control, read error doesn't stop reads - * v0.12 - added TIOCM ioctls, added break handling, made struct acm kmalloced + * v0.12 - added TIOCM ioctls, added break handling, made struct acm + * kmalloced * v0.13 - added termios, added hangup * v0.14 - sized down struct acm * v0.15 - fixed flow control again - characters could be lost @@ -62,7 +63,7 @@ #include <linux/tty_flip.h> #include <linux/module.h> #include <linux/mutex.h> -#include <asm/uaccess.h> +#include <linux/uaccess.h> #include <linux/usb.h> #include <linux/usb/cdc.h> #include <asm/byteorder.h> @@ -87,7 +88,10 @@ static struct acm *acm_table[ACM_TTY_MINORS]; static DEFINE_MUTEX(open_mutex); -#define ACM_READY(acm) (acm && acm->dev && acm->used) +#define ACM_READY(acm) (acm && acm->dev && acm->port.count) + +static const struct tty_port_operations acm_port_ops = { +}; #ifdef VERBOSE_DEBUG #define verbose 1 @@ -99,13 +103,15 @@ static DEFINE_MUTEX(open_mutex); * Functions for ACM control messages. */ -static int acm_ctrl_msg(struct acm *acm, int request, int value, void *buf, int len) +static int acm_ctrl_msg(struct acm *acm, int request, int value, + void *buf, int len) { int retval = usb_control_msg(acm->dev, usb_sndctrlpipe(acm->dev, 0), request, USB_RT_ACM, value, acm->control->altsetting[0].desc.bInterfaceNumber, buf, len, 5000); - dbg("acm_control_msg: rq: 0x%02x val: %#x len: %#x result: %d", request, value, len, retval); + dbg("acm_control_msg: rq: 0x%02x val: %#x len: %#x result: %d", + request, value, len, retval); return retval < 0 ? retval : 0; } @@ -150,9 +156,8 @@ static int acm_wb_is_avail(struct acm *acm) n = ACM_NW; spin_lock_irqsave(&acm->write_lock, flags); - for (i = 0; i < ACM_NW; i++) { + for (i = 0; i < ACM_NW; i++) n -= acm->wb[i].use; - } spin_unlock_irqrestore(&acm->write_lock, flags); return n; } @@ -183,7 +188,8 @@ static int acm_start_wb(struct acm *acm, struct acm_wb *wb) wb->urb->transfer_buffer_length = wb->len; wb->urb->dev = acm->dev; - if ((rc = usb_submit_urb(wb->urb, GFP_ATOMIC)) < 0) { + rc = usb_submit_urb(wb->urb, GFP_ATOMIC); + if (rc < 0) { dbg("usb_submit_urb(write bulk) failed: %d", rc); acm_write_done(acm, wb); } @@ -262,6 +268,7 @@ static void acm_ctrl_irq(struct urb *urb) { struct acm *acm = urb->context; struct usb_cdc_notification *dr = urb->transfer_buffer; + struct tty_struct *tty; unsigned char *data; int newctrl; int retval; @@ -287,40 +294,45 @@ static void acm_ctrl_irq(struct urb *urb) data = (unsigned char *)(dr + 1); switch (dr->bNotificationType) { + case USB_CDC_NOTIFY_NETWORK_CONNECTION: + dbg("%s network", dr->wValue ? + "connected to" : "disconnected from"); + break; - case USB_CDC_NOTIFY_NETWORK_CONNECTION: - - dbg("%s network", dr->wValue ? "connected to" : "disconnected from"); - break; - - case USB_CDC_NOTIFY_SERIAL_STATE: - - newctrl = get_unaligned_le16(data); + case USB_CDC_NOTIFY_SERIAL_STATE: + tty = tty_port_tty_get(&acm->port); + newctrl = get_unaligned_le16(data); - if (acm->tty && !acm->clocal && (acm->ctrlin & ~newctrl & ACM_CTRL_DCD)) { + if (tty) { + if (!acm->clocal && + (acm->ctrlin & ~newctrl & ACM_CTRL_DCD)) { dbg("calling hangup"); - tty_hangup(acm->tty); + tty_hangup(tty); } + tty_kref_put(tty); + } - acm->ctrlin = newctrl; - - dbg("input control lines: dcd%c dsr%c break%c ring%c framing%c parity%c overrun%c", - acm->ctrlin & ACM_CTRL_DCD ? '+' : '-', acm->ctrlin & ACM_CTRL_DSR ? '+' : '-', - acm->ctrlin & ACM_CTRL_BRK ? '+' : '-', acm->ctrlin & ACM_CTRL_RI ? '+' : '-', - acm->ctrlin & ACM_CTRL_FRAMING ? '+' : '-', acm->ctrlin & ACM_CTRL_PARITY ? '+' : '-', - acm->ctrlin & ACM_CTRL_OVERRUN ? '+' : '-'); + acm->ctrlin = newctrl; + dbg("input control lines: dcd%c dsr%c break%c ring%c framing%c parity%c overrun%c", + acm->ctrlin & ACM_CTRL_DCD ? '+' : '-', + acm->ctrlin & ACM_CTRL_DSR ? '+' : '-', + acm->ctrlin & ACM_CTRL_BRK ? '+' : '-', + acm->ctrlin & ACM_CTRL_RI ? '+' : '-', + acm->ctrlin & ACM_CTRL_FRAMING ? '+' : '-', + acm->ctrlin & ACM_CTRL_PARITY ? '+' : '-', + acm->ctrlin & ACM_CTRL_OVERRUN ? '+' : '-'); break; - default: - dbg("unknown notification %d received: index %d len %d data0 %d data1 %d", - dr->bNotificationType, dr->wIndex, - dr->wLength, data[0], data[1]); - break; + default: + dbg("unknown notification %d received: index %d len %d data0 %d data1 %d", + dr->bNotificationType, dr->wIndex, + dr->wLength, data[0], data[1]); + break; } exit: usb_mark_last_busy(acm->dev); - retval = usb_submit_urb (urb, GFP_ATOMIC); + retval = usb_submit_urb(urb, GFP_ATOMIC); if (retval) dev_err(&urb->dev->dev, "%s - usb_submit_urb failed with " "result %d", __func__, retval); @@ -371,15 +383,14 @@ static void acm_rx_tasklet(unsigned long _acm) { struct acm *acm = (void *)_acm; struct acm_rb *buf; - struct tty_struct *tty = acm->tty; + struct tty_struct *tty; struct acm_ru *rcv; unsigned long flags; unsigned char throttled; dbg("Entering acm_rx_tasklet"); - if (!ACM_READY(acm)) - { + if (!ACM_READY(acm)) { dbg("acm_rx_tasklet: ACM not ready"); return; } @@ -387,12 +398,13 @@ static void acm_rx_tasklet(unsigned long _acm) spin_lock_irqsave(&acm->throttle_lock, flags); throttled = acm->throttle; spin_unlock_irqrestore(&acm->throttle_lock, flags); - if (throttled) - { + if (throttled) { dbg("acm_rx_tasklet: throttled"); return; } + tty = tty_port_tty_get(&acm->port); + next_buffer: spin_lock_irqsave(&acm->read_lock, flags); if (list_empty(&acm->filled_read_bufs)) { @@ -406,20 +418,22 @@ next_buffer: dbg("acm_rx_tasklet: procesing buf 0x%p, size = %d", buf, buf->size); - tty_buffer_request_room(tty, buf->size); - spin_lock_irqsave(&acm->throttle_lock, flags); - throttled = acm->throttle; - spin_unlock_irqrestore(&acm->throttle_lock, flags); - if (!throttled) - tty_insert_flip_string(tty, buf->base, buf->size); - tty_flip_buffer_push(tty); - - if (throttled) { - dbg("Throttling noticed"); - spin_lock_irqsave(&acm->read_lock, flags); - list_add(&buf->list, &acm->filled_read_bufs); - spin_unlock_irqrestore(&acm->read_lock, flags); - return; + if (tty) { + spin_lock_irqsave(&acm->throttle_lock, flags); + throttled = acm->throttle; + spin_unlock_irqrestore(&acm->throttle_lock, flags); + if (!throttled) { + tty_buffer_request_room(tty, buf->size); + tty_insert_flip_string(tty, buf->base, buf->size); + tty_flip_buffer_push(tty); + } else { + tty_kref_put(tty); + dbg("Throttling noticed"); + spin_lock_irqsave(&acm->read_lock, flags); + list_add(&buf->list, &acm->filled_read_bufs); + spin_unlock_irqrestore(&acm->read_lock, flags); + return; + } } spin_lock_irqsave(&acm->read_lock, flags); @@ -428,6 +442,8 @@ next_buffer: goto next_buffer; urbs: + tty_kref_put(tty); + while (!list_empty(&acm->spare_read_bufs)) { spin_lock_irqsave(&acm->read_lock, flags); if (list_empty(&acm->spare_read_urbs)) { @@ -454,10 +470,11 @@ urbs: rcv->urb->transfer_dma = buf->dma; rcv->urb->transfer_flags |= URB_NO_TRANSFER_DMA_MAP; - /* This shouldn't kill the driver as unsuccessful URBs are returned to the - free-urbs-pool and resubmited ASAP */ + /* This shouldn't kill the driver as unsuccessful URBs are + returned to the free-urbs-pool and resubmited ASAP */ spin_lock_irqsave(&acm->read_lock, flags); - if (acm->susp_count || usb_submit_urb(rcv->urb, GFP_ATOMIC) < 0) { + if (acm->susp_count || + usb_submit_urb(rcv->urb, GFP_ATOMIC) < 0) { list_add(&buf->list, &acm->spare_read_bufs); list_add(&rcv->list, &acm->spare_read_urbs); acm->processing = 0; @@ -499,11 +516,14 @@ static void acm_write_bulk(struct urb *urb) static void acm_softint(struct work_struct *work) { struct acm *acm = container_of(work, struct acm, work); + struct tty_struct *tty; dev_vdbg(&acm->data->dev, "tx work\n"); if (!ACM_READY(acm)) return; - tty_wakeup(acm->tty); + tty = tty_port_tty_get(&acm->port); + tty_wakeup(tty); + tty_kref_put(tty); } static void acm_waker(struct work_struct *waker) @@ -543,8 +563,9 @@ static int acm_tty_open(struct tty_struct *tty, struct file *filp) rv = 0; set_bit(TTY_NO_WRITE_SPLIT, &tty->flags); + tty->driver_data = acm; - acm->tty = tty; + tty_port_tty_set(&acm->port, tty); if (usb_autopm_get_interface(acm->control) < 0) goto early_bail; @@ -552,11 +573,10 @@ static int acm_tty_open(struct tty_struct *tty, struct file *filp) acm->control->needs_remote_wakeup = 1; mutex_lock(&acm->mutex); - if (acm->used++) { + if (acm->port.count++) { usb_autopm_put_interface(acm->control); goto done; - } - + } acm->ctrlurb->dev = acm->dev; if (usb_submit_urb(acm->ctrlurb, GFP_KERNEL)) { @@ -567,22 +587,22 @@ static int acm_tty_open(struct tty_struct *tty, struct file *filp) if (0 > acm_set_control(acm, acm->ctrlout = ACM_CTRL_DTR | ACM_CTRL_RTS) && (acm->ctrl_caps & USB_CDC_CAP_LINE)) goto full_bailout; + usb_autopm_put_interface(acm->control); INIT_LIST_HEAD(&acm->spare_read_urbs); INIT_LIST_HEAD(&acm->spare_read_bufs); INIT_LIST_HEAD(&acm->filled_read_bufs); - for (i = 0; i < acm->rx_buflimit; i++) { + + for (i = 0; i < acm->rx_buflimit; i++) list_add(&(acm->ru[i].list), &acm->spare_read_urbs); - } - for (i = 0; i < acm->rx_buflimit; i++) { + for (i = 0; i < acm->rx_buflimit; i++) list_add(&(acm->rb[i].list), &acm->spare_read_bufs); - } acm->throttle = 0; tasklet_schedule(&acm->urb_task); - + rv = tty_port_block_til_ready(&acm->port, tty, filp); done: mutex_unlock(&acm->mutex); err_out: @@ -593,16 +613,17 @@ full_bailout: usb_kill_urb(acm->ctrlurb); bail_out: usb_autopm_put_interface(acm->control); - acm->used--; + acm->port.count--; mutex_unlock(&acm->mutex); early_bail: mutex_unlock(&open_mutex); + tty_port_tty_set(&acm->port, NULL); return -EIO; } static void acm_tty_unregister(struct acm *acm) { - int i,nr; + int i, nr; nr = acm->rx_buflimit; tty_unregister_device(acm_tty_driver, acm->minor); @@ -619,41 +640,56 @@ static void acm_tty_unregister(struct acm *acm) static int acm_tty_chars_in_buffer(struct tty_struct *tty); +static void acm_port_down(struct acm *acm, int drain) +{ + int i, nr = acm->rx_buflimit; + mutex_lock(&open_mutex); + if (acm->dev) { + usb_autopm_get_interface(acm->control); + acm_set_control(acm, acm->ctrlout = 0); + /* try letting the last writes drain naturally */ + if (drain) { + wait_event_interruptible_timeout(acm->drain_wait, + (ACM_NW == acm_wb_is_avail(acm)) || !acm->dev, + ACM_CLOSE_TIMEOUT * HZ); + } + usb_kill_urb(acm->ctrlurb); + for (i = 0; i < ACM_NW; i++) + usb_kill_urb(acm->wb[i].urb); + for (i = 0; i < nr; i++) + usb_kill_urb(acm->ru[i].urb); + acm->control->needs_remote_wakeup = 0; + usb_autopm_put_interface(acm->control); + } + mutex_unlock(&open_mutex); +} + +static void acm_tty_hangup(struct tty_struct *tty) +{ + struct acm *acm = tty->driver_data; + tty_port_hangup(&acm->port); + acm_port_down(acm, 0); +} + static void acm_tty_close(struct tty_struct *tty, struct file *filp) { struct acm *acm = tty->driver_data; - int i,nr; - if (!acm || !acm->used) + /* Perform the closing process and see if we need to do the hardware + shutdown */ + if (tty_port_close_start(&acm->port, tty, filp) == 0) return; - - nr = acm->rx_buflimit; + acm_port_down(acm, 0); + tty_port_close_end(&acm->port, tty); mutex_lock(&open_mutex); - if (!--acm->used) { - if (acm->dev) { - usb_autopm_get_interface(acm->control); - acm_set_control(acm, acm->ctrlout = 0); - - /* try letting the last writes drain naturally */ - wait_event_interruptible_timeout(acm->drain_wait, - (ACM_NW == acm_wb_is_avail(acm)) - || !acm->dev, - ACM_CLOSE_TIMEOUT * HZ); - - usb_kill_urb(acm->ctrlurb); - for (i = 0; i < ACM_NW; i++) - usb_kill_urb(acm->wb[i].urb); - for (i = 0; i < nr; i++) - usb_kill_urb(acm->ru[i].urb); - acm->control->needs_remote_wakeup = 0; - usb_autopm_put_interface(acm->control); - } else - acm_tty_unregister(acm); - } + tty_port_tty_set(&acm->port, NULL); + if (!acm->dev) + acm_tty_unregister(acm); mutex_unlock(&open_mutex); } -static int acm_tty_write(struct tty_struct *tty, const unsigned char *buf, int count) +static int acm_tty_write(struct tty_struct *tty, + const unsigned char *buf, int count) { struct acm *acm = tty->driver_data; int stat; @@ -669,7 +705,8 @@ static int acm_tty_write(struct tty_struct *tty, const unsigned char *buf, int c return 0; spin_lock_irqsave(&acm->write_lock, flags); - if ((wbn = acm_wb_alloc(acm)) < 0) { + wbn = acm_wb_alloc(acm); + if (wbn < 0) { spin_unlock_irqrestore(&acm->write_lock, flags); return 0; } @@ -681,7 +718,8 @@ static int acm_tty_write(struct tty_struct *tty, const unsigned char *buf, int c wb->len = count; spin_unlock_irqrestore(&acm->write_lock, flags); - if ((stat = acm_write_start(acm, wbn)) < 0) + stat = acm_write_start(acm, wbn); + if (stat < 0) return stat; return count; } @@ -767,8 +805,10 @@ static int acm_tty_tiocmset(struct tty_struct *tty, struct file *file, return -EINVAL; newctrl = acm->ctrlout; - set = (set & TIOCM_DTR ? ACM_CTRL_DTR : 0) | (set & TIOCM_RTS ? ACM_CTRL_RTS : 0); - clear = (clear & TIOCM_DTR ? ACM_CTRL_DTR : 0) | (clear & TIOCM_RTS ? ACM_CTRL_RTS : 0); + set = (set & TIOCM_DTR ? ACM_CTRL_DTR : 0) | + (set & TIOCM_RTS ? ACM_CTRL_RTS : 0); + clear = (clear & TIOCM_DTR ? ACM_CTRL_DTR : 0) | + (clear & TIOCM_RTS ? ACM_CTRL_RTS : 0); newctrl = (newctrl & ~clear) | set; @@ -777,7 +817,8 @@ static int acm_tty_tiocmset(struct tty_struct *tty, struct file *file, return acm_set_control(acm, acm->ctrlout = newctrl); } -static int acm_tty_ioctl(struct tty_struct *tty, struct file *file, unsigned int cmd, unsigned long arg) +static int acm_tty_ioctl(struct tty_struct *tty, struct file *file, + unsigned int cmd, unsigned long arg) { struct acm *acm = tty->driver_data; @@ -799,7 +840,8 @@ static const __u8 acm_tty_size[] = { 5, 6, 7, 8 }; -static void acm_tty_set_termios(struct tty_struct *tty, struct ktermios *termios_old) +static void acm_tty_set_termios(struct tty_struct *tty, + struct ktermios *termios_old) { struct acm *acm = tty->driver_data; struct ktermios *termios = tty->termios; @@ -809,19 +851,23 @@ static void acm_tty_set_termios(struct tty_struct *tty, struct ktermios *termios if (!ACM_READY(acm)) return; + /* FIXME: Needs to support the tty_baud interface */ + /* FIXME: Broken on sparc */ newline.dwDTERate = cpu_to_le32p(acm_tty_speed + (termios->c_cflag & CBAUD & ~CBAUDEX) + (termios->c_cflag & CBAUDEX ? 15 : 0)); newline.bCharFormat = termios->c_cflag & CSTOPB ? 2 : 0; newline.bParityType = termios->c_cflag & PARENB ? - (termios->c_cflag & PARODD ? 1 : 2) + (termios->c_cflag & CMSPAR ? 2 : 0) : 0; + (termios->c_cflag & PARODD ? 1 : 2) + + (termios->c_cflag & CMSPAR ? 2 : 0) : 0; newline.bDataBits = acm_tty_size[(termios->c_cflag & CSIZE) >> 4]; - + /* FIXME: Needs to clear unsupported bits in the termios */ acm->clocal = ((termios->c_cflag & CLOCAL) != 0); if (!newline.dwDTERate) { newline.dwDTERate = acm->line.dwDTERate; newctrl &= ~ACM_CTRL_DTR; - } else newctrl |= ACM_CTRL_DTR; + } else + newctrl |= ACM_CTRL_DTR; if (newctrl != acm->ctrlout) acm_set_control(acm, acm->ctrlout = newctrl); @@ -846,9 +892,8 @@ static void acm_write_buffers_free(struct acm *acm) struct acm_wb *wb; struct usb_device *usb_dev = interface_to_usbdev(acm->control); - for (wb = &acm->wb[0], i = 0; i < ACM_NW; i++, wb++) { + for (wb = &acm->wb[0], i = 0; i < ACM_NW; i++, wb++) usb_buffer_free(usb_dev, acm->writesize, wb->buf, wb->dmah); - } } static void acm_read_buffers_free(struct acm *acm) @@ -857,7 +902,8 @@ static void acm_read_buffers_free(struct acm *acm) int i, n = acm->rx_buflimit; for (i = 0; i < n; i++) - usb_buffer_free(usb_dev, acm->readsize, acm->rb[i].base, acm->rb[i].dma); + usb_buffer_free(usb_dev, acm->readsize, + acm->rb[i].base, acm->rb[i].dma); } /* Little helper: write buffers allocate */ @@ -882,8 +928,8 @@ static int acm_write_buffers_alloc(struct acm *acm) return 0; } -static int acm_probe (struct usb_interface *intf, - const struct usb_device_id *id) +static int acm_probe(struct usb_interface *intf, + const struct usb_device_id *id) { struct usb_cdc_union_desc *union_header = NULL; struct usb_cdc_country_functional_desc *cfd = NULL; @@ -897,7 +943,7 @@ static int acm_probe (struct usb_interface *intf, struct usb_device *usb_dev = interface_to_usbdev(intf); struct acm *acm; int minor; - int ctrlsize,readsize; + int ctrlsize, readsize; u8 *buf; u8 ac_management_function = 0; u8 call_management_function = 0; @@ -917,7 +963,7 @@ static int acm_probe (struct usb_interface *intf, control_interface = usb_ifnum_to_if(usb_dev, 0); goto skip_normal_probe; } - + /* normal probing*/ if (!buffer) { dev_err(&intf->dev, "Weird descriptor references\n"); @@ -925,8 +971,10 @@ static int acm_probe (struct usb_interface *intf, } if (!buflen) { - if (intf->cur_altsetting->endpoint->extralen && intf->cur_altsetting->endpoint->extra) { - dev_dbg(&intf->dev,"Seeking extra descriptors on endpoint\n"); + if (intf->cur_altsetting->endpoint->extralen && + intf->cur_altsetting->endpoint->extra) { + dev_dbg(&intf->dev, + "Seeking extra descriptors on endpoint\n"); buflen = intf->cur_altsetting->endpoint->extralen; buffer = intf->cur_altsetting->endpoint->extra; } else { @@ -937,47 +985,43 @@ static int acm_probe (struct usb_interface *intf, } while (buflen > 0) { - if (buffer [1] != USB_DT_CS_INTERFACE) { + if (buffer[1] != USB_DT_CS_INTERFACE) { dev_err(&intf->dev, "skipping garbage\n"); goto next_desc; } - switch (buffer [2]) { - case USB_CDC_UNION_TYPE: /* we've found it */ - if (union_header) { - dev_err(&intf->dev, "More than one " - "union descriptor, " - "skipping ...\n"); - goto next_desc; - } - union_header = (struct usb_cdc_union_desc *) - buffer; - break; - case USB_CDC_COUNTRY_TYPE: /* export through sysfs*/ - cfd = (struct usb_cdc_country_functional_desc *)buffer; - break; - case USB_CDC_HEADER_TYPE: /* maybe check version */ - break; /* for now we ignore it */ - case USB_CDC_ACM_TYPE: - ac_management_function = buffer[3]; - break; - case USB_CDC_CALL_MANAGEMENT_TYPE: - call_management_function = buffer[3]; - call_interface_num = buffer[4]; - if ((call_management_function & 3) != 3) - dev_err(&intf->dev, "This device " - "cannot do calls on its own. " - "It is no modem.\n"); - break; - default: - /* there are LOTS more CDC descriptors that - * could legitimately be found here. - */ - dev_dbg(&intf->dev, "Ignoring descriptor: " - "type %02x, length %d\n", - buffer[2], buffer[0]); - break; + switch (buffer[2]) { + case USB_CDC_UNION_TYPE: /* we've found it */ + if (union_header) { + dev_err(&intf->dev, "More than one " + "union descriptor, skipping ...\n"); + goto next_desc; } + union_header = (struct usb_cdc_union_desc *)buffer; + break; + case USB_CDC_COUNTRY_TYPE: /* export through sysfs*/ + cfd = (struct usb_cdc_country_functional_desc *)buffer; + break; + case USB_CDC_HEADER_TYPE: /* maybe check version */ + break; /* for now we ignore it */ + case USB_CDC_ACM_TYPE: + ac_management_function = buffer[3]; + break; + case USB_CDC_CALL_MANAGEMENT_TYPE: + call_management_function = buffer[3]; + call_interface_num = buffer[4]; + if ((call_management_function & 3) != 3) + dev_err(&intf->dev, "This device cannot do calls on its own. It is not a modem.\n"); + break; + default: + /* there are LOTS more CDC descriptors that + * could legitimately be found here. + */ + dev_dbg(&intf->dev, "Ignoring descriptor: " + "type %02x, length %d\n", + buffer[2], buffer[0]); + break; + } next_desc: buflen -= buffer[0]; buffer += buffer[0]; @@ -985,33 +1029,36 @@ next_desc: if (!union_header) { if (call_interface_num > 0) { - dev_dbg(&intf->dev,"No union descriptor, using call management descriptor\n"); + dev_dbg(&intf->dev, "No union descriptor, using call management descriptor\n"); data_interface = usb_ifnum_to_if(usb_dev, (data_interface_num = call_interface_num)); control_interface = intf; } else { - dev_dbg(&intf->dev,"No union descriptor, giving up\n"); + dev_dbg(&intf->dev, + "No union descriptor, giving up\n"); return -ENODEV; } } else { control_interface = usb_ifnum_to_if(usb_dev, union_header->bMasterInterface0); data_interface = usb_ifnum_to_if(usb_dev, (data_interface_num = union_header->bSlaveInterface0)); if (!control_interface || !data_interface) { - dev_dbg(&intf->dev,"no interfaces\n"); + dev_dbg(&intf->dev, "no interfaces\n"); return -ENODEV; } } - + if (data_interface_num != call_interface_num) - dev_dbg(&intf->dev,"Separate call control interface. That is not fully supported.\n"); + dev_dbg(&intf->dev, "Separate call control interface. That is not fully supported.\n"); skip_normal_probe: /*workaround for switched interfaces */ - if (data_interface->cur_altsetting->desc.bInterfaceClass != CDC_DATA_INTERFACE_TYPE) { - if (control_interface->cur_altsetting->desc.bInterfaceClass == CDC_DATA_INTERFACE_TYPE) { + if (data_interface->cur_altsetting->desc.bInterfaceClass + != CDC_DATA_INTERFACE_TYPE) { + if (control_interface->cur_altsetting->desc.bInterfaceClass + == CDC_DATA_INTERFACE_TYPE) { struct usb_interface *t; - dev_dbg(&intf->dev,"Your device has switched interfaces.\n"); - + dev_dbg(&intf->dev, + "Your device has switched interfaces.\n"); t = control_interface; control_interface = data_interface; data_interface = t; @@ -1023,9 +1070,9 @@ skip_normal_probe: /* Accept probe requests only for the control interface */ if (intf != control_interface) return -ENODEV; - + if (usb_interface_claimed(data_interface)) { /* valid in this context */ - dev_dbg(&intf->dev,"The data interface isn't available\n"); + dev_dbg(&intf->dev, "The data interface isn't available\n"); return -EBUSY; } @@ -1042,8 +1089,8 @@ skip_normal_probe: if (!usb_endpoint_dir_in(epread)) { /* descriptors are swapped */ struct usb_endpoint_descriptor *t; - dev_dbg(&intf->dev,"The data interface has switched endpoints\n"); - + dev_dbg(&intf->dev, + "The data interface has switched endpoints\n"); t = epread; epread = epwrite; epwrite = t; @@ -1056,13 +1103,15 @@ skip_normal_probe: return -ENODEV; } - if (!(acm = kzalloc(sizeof(struct acm), GFP_KERNEL))) { + acm = kzalloc(sizeof(struct acm), GFP_KERNEL); + if (acm == NULL) { dev_dbg(&intf->dev, "out of memory (acm kzalloc)\n"); goto alloc_fail; } ctrlsize = le16_to_cpu(epctrl->wMaxPacketSize); - readsize = le16_to_cpu(epread->wMaxPacketSize)* ( quirks == SINGLE_RX_URB ? 1 : 2); + readsize = le16_to_cpu(epread->wMaxPacketSize) * + (quirks == SINGLE_RX_URB ? 1 : 2); acm->writesize = le16_to_cpu(epwrite->wMaxPacketSize) * 20; acm->control = control_interface; acm->data = data_interface; @@ -1082,6 +1131,8 @@ skip_normal_probe: spin_lock_init(&acm->read_lock); mutex_init(&acm->mutex); acm->rx_endpoint = usb_rcvbulkpipe(usb_dev, epread->bEndpointAddress); + tty_port_init(&acm->port); + acm->port.ops = &acm_port_ops; buf = usb_buffer_alloc(usb_dev, ctrlsize, GFP_KERNEL, &acm->ctrl_dma); if (!buf) { @@ -1103,8 +1154,10 @@ skip_normal_probe: for (i = 0; i < num_rx_buf; i++) { struct acm_ru *rcv = &(acm->ru[i]); - if (!(rcv->urb = usb_alloc_urb(0, GFP_KERNEL))) { - dev_dbg(&intf->dev, "out of memory (read urbs usb_alloc_urb)\n"); + rcv->urb = usb_alloc_urb(0, GFP_KERNEL); + if (rcv->urb == NULL) { + dev_dbg(&intf->dev, + "out of memory (read urbs usb_alloc_urb)\n"); goto alloc_fail7; } @@ -1117,26 +1170,29 @@ skip_normal_probe: rb->base = usb_buffer_alloc(acm->dev, readsize, GFP_KERNEL, &rb->dma); if (!rb->base) { - dev_dbg(&intf->dev, "out of memory (read bufs usb_buffer_alloc)\n"); + dev_dbg(&intf->dev, + "out of memory (read bufs usb_buffer_alloc)\n"); goto alloc_fail7; } } - for(i = 0; i < ACM_NW; i++) - { + for (i = 0; i < ACM_NW; i++) { struct acm_wb *snd = &(acm->wb[i]); - if (!(snd->urb = usb_alloc_urb(0, GFP_KERNEL))) { - dev_dbg(&intf->dev, "out of memory (write urbs usb_alloc_urb)"); + snd->urb = usb_alloc_urb(0, GFP_KERNEL); + if (snd->urb == NULL) { + dev_dbg(&intf->dev, + "out of memory (write urbs usb_alloc_urb)"); goto alloc_fail7; } - usb_fill_bulk_urb(snd->urb, usb_dev, usb_sndbulkpipe(usb_dev, epwrite->bEndpointAddress), - NULL, acm->writesize, acm_write_bulk, snd); + usb_fill_bulk_urb(snd->urb, usb_dev, + usb_sndbulkpipe(usb_dev, epwrite->bEndpointAddress), + NULL, acm->writesize, acm_write_bulk, snd); snd->urb->transfer_flags |= URB_NO_TRANSFER_DMA_MAP; snd->instance = acm; } - usb_set_intfdata (intf, acm); + usb_set_intfdata(intf, acm); i = device_create_file(&intf->dev, &dev_attr_bmCapabilities); if (i < 0) @@ -1147,7 +1203,8 @@ skip_normal_probe: if (!acm->country_codes) goto skip_countries; acm->country_code_size = cfd->bLength - 4; - memcpy(acm->country_codes, (u8 *)&cfd->wCountyCode0, cfd->bLength - 4); + memcpy(acm->country_codes, (u8 *)&cfd->wCountyCode0, + cfd->bLength - 4); acm->country_rel_date = cfd->iCountryCodeRelDate; i = device_create_file(&intf->dev, &dev_attr_wCountryCodes); @@ -1156,7 +1213,8 @@ skip_normal_probe: goto skip_countries; } - i = device_create_file(&intf->dev, &dev_attr_iCountryCodeRelDate); + i = device_create_file(&intf->dev, + &dev_attr_iCountryCodeRelDate); if (i < 0) { kfree(acm->country_codes); goto skip_countries; @@ -1164,8 +1222,10 @@ skip_normal_probe: } skip_countries: - usb_fill_int_urb(acm->ctrlurb, usb_dev, usb_rcvintpipe(usb_dev, epctrl->bEndpointAddress), - acm->ctrl_buffer, ctrlsize, acm_ctrl_irq, acm, epctrl->bInterval); + usb_fill_int_urb(acm->ctrlurb, usb_dev, + usb_rcvintpipe(usb_dev, epctrl->bEndpointAddress), + acm->ctrl_buffer, ctrlsize, acm_ctrl_irq, acm, + epctrl->bInterval); acm->ctrlurb->transfer_flags |= URB_NO_TRANSFER_DMA_MAP; acm->ctrlurb->transfer_dma = acm->ctrl_dma; @@ -1212,7 +1272,7 @@ static void stop_data_traffic(struct acm *acm) tasklet_disable(&acm->urb_task); usb_kill_urb(acm->ctrlurb); - for(i = 0; i < ACM_NW; i++) + for (i = 0; i < ACM_NW; i++) usb_kill_urb(acm->wb[i].urb); for (i = 0; i < acm->rx_buflimit; i++) usb_kill_urb(acm->ru[i].urb); @@ -1227,13 +1287,14 @@ static void acm_disconnect(struct usb_interface *intf) { struct acm *acm = usb_get_intfdata(intf); struct usb_device *usb_dev = interface_to_usbdev(intf); + struct tty_struct *tty; /* sibling interface is already cleaning up */ if (!acm) return; mutex_lock(&open_mutex); - if (acm->country_codes){ + if (acm->country_codes) { device_remove_file(&acm->control->dev, &dev_attr_wCountryCodes); device_remove_file(&acm->control->dev, @@ -1247,22 +1308,25 @@ static void acm_disconnect(struct usb_interface *intf) stop_data_traffic(acm); acm_write_buffers_free(acm); - usb_buffer_free(usb_dev, acm->ctrlsize, acm->ctrl_buffer, acm->ctrl_dma); + usb_buffer_free(usb_dev, acm->ctrlsize, acm->ctrl_buffer, + acm->ctrl_dma); acm_read_buffers_free(acm); usb_driver_release_interface(&acm_driver, intf == acm->control ? acm->data : acm->control); - if (!acm->used) { + if (acm->port.count == 0) { acm_tty_unregister(acm); mutex_unlock(&open_mutex); return; } mutex_unlock(&open_mutex); - - if (acm->tty) - tty_hangup(acm->tty); + tty = tty_port_tty_get(&acm->port); + if (tty) { + tty_hangup(tty); + tty_kref_put(tty); + } } #ifdef CONFIG_PM @@ -1297,7 +1361,7 @@ static int acm_suspend(struct usb_interface *intf, pm_message_t message) */ mutex_lock(&acm->mutex); - if (acm->used) + if (acm->port.count) stop_data_traffic(acm); mutex_unlock(&acm->mutex); @@ -1319,7 +1383,7 @@ static int acm_resume(struct usb_interface *intf) return 0; mutex_lock(&acm->mutex); - if (acm->used) { + if (acm->port.count) { rv = usb_submit_urb(acm->ctrlurb, GFP_NOIO); if (rv < 0) goto err_out; @@ -1398,7 +1462,7 @@ static struct usb_device_id acm_ids[] = { { USB_INTERFACE_INFO(USB_CLASS_COMM, USB_CDC_SUBCLASS_ACM, USB_CDC_ACM_PROTO_AT_GSM) }, { USB_INTERFACE_INFO(USB_CLASS_COMM, USB_CDC_SUBCLASS_ACM, - USB_CDC_ACM_PROTO_AT_3G ) }, + USB_CDC_ACM_PROTO_AT_3G) }, { USB_INTERFACE_INFO(USB_CLASS_COMM, USB_CDC_SUBCLASS_ACM, USB_CDC_ACM_PROTO_AT_CDMA) }, @@ -1406,7 +1470,7 @@ static struct usb_device_id acm_ids[] = { { } }; -MODULE_DEVICE_TABLE (usb, acm_ids); +MODULE_DEVICE_TABLE(usb, acm_ids); static struct usb_driver acm_driver = { .name = "cdc_acm", @@ -1429,6 +1493,7 @@ static struct usb_driver acm_driver = { static const struct tty_operations acm_ops = { .open = acm_tty_open, .close = acm_tty_close, + .hangup = acm_tty_hangup, .write = acm_tty_write, .write_room = acm_tty_write_room, .ioctl = acm_tty_ioctl, @@ -1460,7 +1525,8 @@ static int __init acm_init(void) acm_tty_driver->subtype = SERIAL_TYPE_NORMAL, acm_tty_driver->flags = TTY_DRIVER_REAL_RAW | TTY_DRIVER_DYNAMIC_DEV; acm_tty_driver->init_termios = tty_std_termios; - acm_tty_driver->init_termios.c_cflag = B9600 | CS8 | CREAD | HUPCL | CLOCAL; + acm_tty_driver->init_termios.c_cflag = B9600 | CS8 | CREAD | + HUPCL | CLOCAL; tty_set_operations(acm_tty_driver, &acm_ops); retval = tty_register_driver(acm_tty_driver); @@ -1492,7 +1558,7 @@ static void __exit acm_exit(void) module_init(acm_init); module_exit(acm_exit); -MODULE_AUTHOR( DRIVER_AUTHOR ); -MODULE_DESCRIPTION( DRIVER_DESC ); +MODULE_AUTHOR(DRIVER_AUTHOR); +MODULE_DESCRIPTION(DRIVER_DESC); MODULE_LICENSE("GPL"); MODULE_ALIAS_CHARDEV_MAJOR(ACM_TTY_MAJOR); diff --git a/drivers/usb/class/cdc-acm.h b/drivers/usb/class/cdc-acm.h index 1f95e7aa1b66..4c3856420add 100644 --- a/drivers/usb/class/cdc-acm.h +++ b/drivers/usb/class/cdc-acm.h @@ -89,8 +89,8 @@ struct acm { struct usb_device *dev; /* the corresponding usb device */ struct usb_interface *control; /* control interface */ struct usb_interface *data; /* data interface */ - struct tty_struct *tty; /* the corresponding tty */ - struct urb *ctrlurb; /* urbs */ + struct tty_port port; /* our tty port data */ + struct urb *ctrlurb; /* urbs */ u8 *ctrl_buffer; /* buffers of urbs */ dma_addr_t ctrl_dma; /* dma handles of buffers */ u8 *country_codes; /* country codes from device */ @@ -120,7 +120,6 @@ struct acm { unsigned int ctrlout; /* output control lines (DTR, RTS) */ unsigned int writesize; /* max packet size for the output bulk endpoint */ unsigned int readsize,ctrlsize; /* buffer sizes for freeing */ - unsigned int used; /* someone has this acm's device open */ unsigned int minor; /* acm minor number */ unsigned char throttle; /* throttled by tty layer */ unsigned char clocal; /* termios CLOCAL */ diff --git a/drivers/usb/serial/belkin_sa.c b/drivers/usb/serial/belkin_sa.c index b7eacad4d48c..2bfd6dd85b5a 100644 --- a/drivers/usb/serial/belkin_sa.c +++ b/drivers/usb/serial/belkin_sa.c @@ -93,8 +93,7 @@ static int belkin_sa_startup(struct usb_serial *serial); static void belkin_sa_shutdown(struct usb_serial *serial); static int belkin_sa_open(struct tty_struct *tty, struct usb_serial_port *port, struct file *filp); -static void belkin_sa_close(struct tty_struct *tty, - struct usb_serial_port *port, struct file *filp); +static void belkin_sa_close(struct usb_serial_port *port); static void belkin_sa_read_int_callback(struct urb *urb); static void belkin_sa_set_termios(struct tty_struct *tty, struct usb_serial_port *port, struct ktermios * old); @@ -244,8 +243,7 @@ exit: } /* belkin_sa_open */ -static void belkin_sa_close(struct tty_struct *tty, - struct usb_serial_port *port, struct file *filp) +static void belkin_sa_close(struct usb_serial_port *port) { dbg("%s port %d", __func__, port->number); diff --git a/drivers/usb/serial/ch341.c b/drivers/usb/serial/ch341.c index ab4cc277aa65..2830766f5b39 100644 --- a/drivers/usb/serial/ch341.c +++ b/drivers/usb/serial/ch341.c @@ -262,32 +262,40 @@ error: kfree(priv); return r; } -static void ch341_close(struct tty_struct *tty, struct usb_serial_port *port, - struct file *filp) +static int ch341_carrier_raised(struct usb_serial_port *port) +{ + struct ch341_private *priv = usb_get_serial_port_data(port); + if (priv->line_status & CH341_BIT_DCD) + return 1; + return 0; +} + +static void ch341_dtr_rts(struct usb_serial_port *port, int on) { struct ch341_private *priv = usb_get_serial_port_data(port); unsigned long flags; - unsigned int c_cflag; dbg("%s - port %d", __func__, port->number); + /* drop DTR and RTS */ + spin_lock_irqsave(&priv->lock, flags); + if (on) + priv->line_control |= CH341_BIT_RTS | CH341_BIT_DTR; + else + priv->line_control &= ~(CH341_BIT_RTS | CH341_BIT_DTR); + spin_unlock_irqrestore(&priv->lock, flags); + ch341_set_handshake(port->serial->dev, priv->line_control); + wake_up_interruptible(&priv->delta_msr_wait); +} + +static void ch341_close(struct usb_serial_port *port) +{ + dbg("%s - port %d", __func__, port->number); /* shutdown our urbs */ dbg("%s - shutting down urbs", __func__); usb_kill_urb(port->write_urb); usb_kill_urb(port->read_urb); usb_kill_urb(port->interrupt_in_urb); - - if (tty) { - c_cflag = tty->termios->c_cflag; - if (c_cflag & HUPCL) { - /* drop DTR and RTS */ - spin_lock_irqsave(&priv->lock, flags); - priv->line_control = 0; - spin_unlock_irqrestore(&priv->lock, flags); - ch341_set_handshake(port->serial->dev, 0); - } - } - wake_up_interruptible(&priv->delta_msr_wait); } @@ -302,7 +310,6 @@ static int ch341_open(struct tty_struct *tty, struct usb_serial_port *port, dbg("ch341_open()"); priv->baud_rate = DEFAULT_BAUD_RATE; - priv->line_control = CH341_BIT_RTS | CH341_BIT_DTR; r = ch341_configure(serial->dev, priv); if (r) @@ -322,7 +329,7 @@ static int ch341_open(struct tty_struct *tty, struct usb_serial_port *port, if (r) { dev_err(&port->dev, "%s - failed submitting interrupt urb," " error %d\n", __func__, r); - ch341_close(tty, port, NULL); + ch341_close(port); return -EPROTO; } @@ -343,9 +350,6 @@ static void ch341_set_termios(struct tty_struct *tty, dbg("ch341_set_termios()"); - if (!tty || !tty->termios) - return; - baud_rate = tty_get_baud_rate(tty); priv->baud_rate = baud_rate; @@ -568,6 +572,8 @@ static struct usb_serial_driver ch341_device = { .usb_driver = &ch341_driver, .num_ports = 1, .open = ch341_open, + .dtr_rts = ch341_dtr_rts, + .carrier_raised = ch341_carrier_raised, .close = ch341_close, .ioctl = ch341_ioctl, .set_termios = ch341_set_termios, diff --git a/drivers/usb/serial/console.c b/drivers/usb/serial/console.c index 19e24045b137..247b61bfb7f4 100644 --- a/drivers/usb/serial/console.c +++ b/drivers/usb/serial/console.c @@ -169,7 +169,9 @@ static int usb_console_setup(struct console *co, char *options) kfree(tty); } } - + /* So we know not to kill the hardware on a hangup on this + port. We have also bumped the use count by one so it won't go + idle */ port->console = 1; retval = 0; @@ -182,7 +184,7 @@ free_tty: kfree(tty); reset_open_count: port->port.count = 0; -goto out; + goto out; } static void usb_console_write(struct console *co, diff --git a/drivers/usb/serial/cp210x.c b/drivers/usb/serial/cp210x.c index e8d5133ce9c8..16a154d3b2fe 100644 --- a/drivers/usb/serial/cp210x.c +++ b/drivers/usb/serial/cp210x.c @@ -1,5 +1,5 @@ /* - * Silicon Laboratories CP2101/CP2102 USB to RS232 serial adaptor driver + * Silicon Laboratories CP210x USB to RS232 serial adaptor driver * * Copyright (C) 2005 Craig Shelley (craig@microtron.org.uk) * @@ -27,44 +27,46 @@ /* * Version Information */ -#define DRIVER_VERSION "v0.08" -#define DRIVER_DESC "Silicon Labs CP2101/CP2102 RS232 serial adaptor driver" +#define DRIVER_VERSION "v0.09" +#define DRIVER_DESC "Silicon Labs CP210x RS232 serial adaptor driver" /* * Function Prototypes */ -static int cp2101_open(struct tty_struct *, struct usb_serial_port *, +static int cp210x_open(struct tty_struct *, struct usb_serial_port *, struct file *); -static void cp2101_cleanup(struct usb_serial_port *); -static void cp2101_close(struct tty_struct *, struct usb_serial_port *, - struct file*); -static void cp2101_get_termios(struct tty_struct *, +static void cp210x_cleanup(struct usb_serial_port *); +static void cp210x_close(struct usb_serial_port *); +static void cp210x_get_termios(struct tty_struct *, struct usb_serial_port *port); -static void cp2101_get_termios_port(struct usb_serial_port *port, +static void cp210x_get_termios_port(struct usb_serial_port *port, unsigned int *cflagp, unsigned int *baudp); -static void cp2101_set_termios(struct tty_struct *, struct usb_serial_port *, +static void cp210x_set_termios(struct tty_struct *, struct usb_serial_port *, struct ktermios*); -static int cp2101_tiocmget(struct tty_struct *, struct file *); -static int cp2101_tiocmset(struct tty_struct *, struct file *, +static int cp210x_tiocmget(struct tty_struct *, struct file *); +static int cp210x_tiocmset(struct tty_struct *, struct file *, unsigned int, unsigned int); -static int cp2101_tiocmset_port(struct usb_serial_port *port, struct file *, +static int cp210x_tiocmset_port(struct usb_serial_port *port, struct file *, unsigned int, unsigned int); -static void cp2101_break_ctl(struct tty_struct *, int); -static int cp2101_startup(struct usb_serial *); -static void cp2101_shutdown(struct usb_serial *); +static void cp210x_break_ctl(struct tty_struct *, int); +static int cp210x_startup(struct usb_serial *); +static void cp210x_shutdown(struct usb_serial *); static int debug; static struct usb_device_id id_table [] = { { USB_DEVICE(0x0471, 0x066A) }, /* AKTAKOM ACE-1001 cable */ { USB_DEVICE(0x0489, 0xE000) }, /* Pirelli Broadband S.p.A, DP-L10 SIP/GSM Mobile */ + { USB_DEVICE(0x0745, 0x1000) }, /* CipherLab USB CCD Barcode Scanner 1000 */ { USB_DEVICE(0x08e6, 0x5501) }, /* Gemalto Prox-PU/CU contactless smartcard reader */ + { USB_DEVICE(0x08FD, 0x000A) }, /* Digianswer A/S , ZigBee/802.15.4 MAC Device */ { USB_DEVICE(0x0FCF, 0x1003) }, /* Dynastream ANT development board */ { USB_DEVICE(0x0FCF, 0x1004) }, /* Dynastream ANT2USB */ { USB_DEVICE(0x0FCF, 0x1006) }, /* Dynastream ANT development board */ { USB_DEVICE(0x10A6, 0xAA26) }, /* Knock-off DCU-11 cable */ { USB_DEVICE(0x10AB, 0x10C5) }, /* Siemens MC60 Cable */ { USB_DEVICE(0x10B5, 0xAC70) }, /* Nokia CA-42 USB */ + { USB_DEVICE(0x10C4, 0x0F91) }, /* Vstabi */ { USB_DEVICE(0x10C4, 0x800A) }, /* SPORTident BSM7-D-USB main station */ { USB_DEVICE(0x10C4, 0x803B) }, /* Pololu USB-serial converter */ { USB_DEVICE(0x10C4, 0x8053) }, /* Enfora EDG1228 */ @@ -85,10 +87,12 @@ static struct usb_device_id id_table [] = { { USB_DEVICE(0x10C4, 0x81C8) }, /* Lipowsky Industrie Elektronik GmbH, Baby-JTAG */ { USB_DEVICE(0x10C4, 0x81E2) }, /* Lipowsky Industrie Elektronik GmbH, Baby-LIN */ { USB_DEVICE(0x10C4, 0x81E7) }, /* Aerocomm Radio */ + { USB_DEVICE(0x10C4, 0x81F2) }, /* C1007 HF band RFID controller */ { USB_DEVICE(0x10C4, 0x8218) }, /* Lipowsky Industrie Elektronik GmbH, HARP-1 */ { USB_DEVICE(0x10C4, 0x822B) }, /* Modem EDGE(GSM) Comander 2 */ { USB_DEVICE(0x10C4, 0x826B) }, /* Cygnal Integrated Products, Inc., Fasttrax GPS demostration module */ { USB_DEVICE(0x10c4, 0x8293) }, /* Telegesys ETRX2USB */ + { USB_DEVICE(0x10C4, 0x82F9) }, /* Procyon AVS */ { USB_DEVICE(0x10C4, 0x8341) }, /* Siemens MC35PU GPRS Modem */ { USB_DEVICE(0x10C4, 0x83A8) }, /* Amber Wireless AMB2560 */ { USB_DEVICE(0x10C4, 0x846E) }, /* BEI USB Sensor Interface (VCP) */ @@ -99,7 +103,9 @@ static struct usb_device_id id_table [] = { { USB_DEVICE(0x10C4, 0xF003) }, /* Elan Digital Systems USBpulse100 */ { USB_DEVICE(0x10C4, 0xF004) }, /* Elan Digital Systems USBcount50 */ { USB_DEVICE(0x10C5, 0xEA61) }, /* Silicon Labs MobiData GPRS USB Modem */ + { USB_DEVICE(0x10CE, 0xEA6A) }, /* Silicon Labs MobiData GPRS USB Modem 100EU */ { USB_DEVICE(0x13AD, 0x9999) }, /* Baltech card reader */ + { USB_DEVICE(0x1555, 0x0004) }, /* Owen AC4 USB-RS485 Converter */ { USB_DEVICE(0x166A, 0x0303) }, /* Clipsal 5500PCU C-Bus USB interface */ { USB_DEVICE(0x16D6, 0x0001) }, /* Jablotron serial interface */ { USB_DEVICE(0x18EF, 0xE00F) }, /* ELV USB-I2C-Interface */ @@ -108,53 +114,70 @@ static struct usb_device_id id_table [] = { MODULE_DEVICE_TABLE(usb, id_table); -static struct usb_driver cp2101_driver = { - .name = "cp2101", +static struct usb_driver cp210x_driver = { + .name = "cp210x", .probe = usb_serial_probe, .disconnect = usb_serial_disconnect, .id_table = id_table, .no_dynamic_id = 1, }; -static struct usb_serial_driver cp2101_device = { +static struct usb_serial_driver cp210x_device = { .driver = { .owner = THIS_MODULE, - .name = "cp2101", + .name = "cp210x", }, - .usb_driver = &cp2101_driver, + .usb_driver = &cp210x_driver, .id_table = id_table, .num_ports = 1, - .open = cp2101_open, - .close = cp2101_close, - .break_ctl = cp2101_break_ctl, - .set_termios = cp2101_set_termios, - .tiocmget = cp2101_tiocmget, - .tiocmset = cp2101_tiocmset, - .attach = cp2101_startup, - .shutdown = cp2101_shutdown, + .open = cp210x_open, + .close = cp210x_close, + .break_ctl = cp210x_break_ctl, + .set_termios = cp210x_set_termios, + .tiocmget = cp210x_tiocmget, + .tiocmset = cp210x_tiocmset, + .attach = cp210x_startup, + .shutdown = cp210x_shutdown, }; /* Config request types */ #define REQTYPE_HOST_TO_DEVICE 0x41 #define REQTYPE_DEVICE_TO_HOST 0xc1 -/* Config SET requests. To GET, add 1 to the request number */ -#define CP2101_UART 0x00 /* Enable / Disable */ -#define CP2101_BAUDRATE 0x01 /* (BAUD_RATE_GEN_FREQ / baudrate) */ -#define CP2101_BITS 0x03 /* 0x(0)(databits)(parity)(stopbits) */ -#define CP2101_BREAK 0x05 /* On / Off */ -#define CP2101_CONTROL 0x07 /* Flow control line states */ -#define CP2101_MODEMCTL 0x13 /* Modem controls */ -#define CP2101_CONFIG_6 0x19 /* 6 bytes of config data ??? */ - -/* CP2101_UART */ +/* Config request codes */ +#define CP210X_IFC_ENABLE 0x00 +#define CP210X_SET_BAUDDIV 0x01 +#define CP210X_GET_BAUDDIV 0x02 +#define CP210X_SET_LINE_CTL 0x03 +#define CP210X_GET_LINE_CTL 0x04 +#define CP210X_SET_BREAK 0x05 +#define CP210X_IMM_CHAR 0x06 +#define CP210X_SET_MHS 0x07 +#define CP210X_GET_MDMSTS 0x08 +#define CP210X_SET_XON 0x09 +#define CP210X_SET_XOFF 0x0A +#define CP210X_SET_EVENTMASK 0x0B +#define CP210X_GET_EVENTMASK 0x0C +#define CP210X_SET_CHAR 0x0D +#define CP210X_GET_CHARS 0x0E +#define CP210X_GET_PROPS 0x0F +#define CP210X_GET_COMM_STATUS 0x10 +#define CP210X_RESET 0x11 +#define CP210X_PURGE 0x12 +#define CP210X_SET_FLOW 0x13 +#define CP210X_GET_FLOW 0x14 +#define CP210X_EMBED_EVENTS 0x15 +#define CP210X_GET_EVENTSTATE 0x16 +#define CP210X_SET_CHARS 0x19 + +/* CP210X_IFC_ENABLE */ #define UART_ENABLE 0x0001 #define UART_DISABLE 0x0000 -/* CP2101_BAUDRATE */ +/* CP210X_(SET|GET)_BAUDDIV */ #define BAUD_RATE_GEN_FREQ 0x384000 -/* CP2101_BITS */ +/* CP210X_(SET|GET)_LINE_CTL */ #define BITS_DATA_MASK 0X0f00 #define BITS_DATA_5 0X0500 #define BITS_DATA_6 0X0600 @@ -174,11 +197,11 @@ static struct usb_serial_driver cp2101_device = { #define BITS_STOP_1_5 0x0001 #define BITS_STOP_2 0x0002 -/* CP2101_BREAK */ +/* CP210X_SET_BREAK */ #define BREAK_ON 0x0000 #define BREAK_OFF 0x0001 -/* CP2101_CONTROL */ +/* CP210X_(SET_MHS|GET_MDMSTS) */ #define CONTROL_DTR 0x0001 #define CONTROL_RTS 0x0002 #define CONTROL_CTS 0x0010 @@ -189,13 +212,13 @@ static struct usb_serial_driver cp2101_device = { #define CONTROL_WRITE_RTS 0x0200 /* - * cp2101_get_config - * Reads from the CP2101 configuration registers + * cp210x_get_config + * Reads from the CP210x configuration registers * 'size' is specified in bytes. * 'data' is a pointer to a pre-allocated array of integers large * enough to hold 'size' bytes (with 4 bytes to each integer) */ -static int cp2101_get_config(struct usb_serial_port *port, u8 request, +static int cp210x_get_config(struct usb_serial_port *port, u8 request, unsigned int *data, int size) { struct usb_serial *serial = port->serial; @@ -211,9 +234,6 @@ static int cp2101_get_config(struct usb_serial_port *port, u8 request, return -ENOMEM; } - /* For get requests, the request number must be incremented */ - request++; - /* Issue the request, attempting to read 'size' bytes */ result = usb_control_msg(serial->dev, usb_rcvctrlpipe(serial->dev, 0), request, REQTYPE_DEVICE_TO_HOST, 0x0000, @@ -236,12 +256,12 @@ static int cp2101_get_config(struct usb_serial_port *port, u8 request, } /* - * cp2101_set_config - * Writes to the CP2101 configuration registers + * cp210x_set_config + * Writes to the CP210x configuration registers * Values less than 16 bits wide are sent directly * 'size' is specified in bytes. */ -static int cp2101_set_config(struct usb_serial_port *port, u8 request, +static int cp210x_set_config(struct usb_serial_port *port, u8 request, unsigned int *data, int size) { struct usb_serial *serial = port->serial; @@ -292,21 +312,21 @@ static int cp2101_set_config(struct usb_serial_port *port, u8 request, } /* - * cp2101_set_config_single - * Convenience function for calling cp2101_set_config on single data values + * cp210x_set_config_single + * Convenience function for calling cp210x_set_config on single data values * without requiring an integer pointer */ -static inline int cp2101_set_config_single(struct usb_serial_port *port, +static inline int cp210x_set_config_single(struct usb_serial_port *port, u8 request, unsigned int data) { - return cp2101_set_config(port, request, &data, 2); + return cp210x_set_config(port, request, &data, 2); } /* - * cp2101_quantise_baudrate + * cp210x_quantise_baudrate * Quantises the baud rate as per AN205 Table 1 */ -static unsigned int cp2101_quantise_baudrate(unsigned int baud) { +static unsigned int cp210x_quantise_baudrate(unsigned int baud) { if (baud <= 56) baud = 0; else if (baud <= 300) baud = 300; else if (baud <= 600) baud = 600; @@ -343,7 +363,7 @@ static unsigned int cp2101_quantise_baudrate(unsigned int baud) { return baud; } -static int cp2101_open(struct tty_struct *tty, struct usb_serial_port *port, +static int cp210x_open(struct tty_struct *tty, struct usb_serial_port *port, struct file *filp) { struct usb_serial *serial = port->serial; @@ -351,7 +371,7 @@ static int cp2101_open(struct tty_struct *tty, struct usb_serial_port *port, dbg("%s - port %d", __func__, port->number); - if (cp2101_set_config_single(port, CP2101_UART, UART_ENABLE)) { + if (cp210x_set_config_single(port, CP210X_IFC_ENABLE, UART_ENABLE)) { dev_err(&port->dev, "%s - Unable to enable UART\n", __func__); return -EPROTO; @@ -373,17 +393,17 @@ static int cp2101_open(struct tty_struct *tty, struct usb_serial_port *port, } /* Configure the termios structure */ - cp2101_get_termios(tty, port); + cp210x_get_termios(tty, port); /* Set the DTR and RTS pins low */ - cp2101_tiocmset_port(tty ? (struct usb_serial_port *) tty->driver_data + cp210x_tiocmset_port(tty ? (struct usb_serial_port *) tty->driver_data : port, NULL, TIOCM_DTR | TIOCM_RTS, 0); return 0; } -static void cp2101_cleanup(struct usb_serial_port *port) +static void cp210x_cleanup(struct usb_serial_port *port) { struct usb_serial *serial = port->serial; @@ -398,8 +418,7 @@ static void cp2101_cleanup(struct usb_serial_port *port) } } -static void cp2101_close(struct tty_struct *tty, struct usb_serial_port *port, - struct file *filp) +static void cp210x_close(struct usb_serial_port *port) { dbg("%s - port %d", __func__, port->number); @@ -410,23 +429,23 @@ static void cp2101_close(struct tty_struct *tty, struct usb_serial_port *port, mutex_lock(&port->serial->disc_mutex); if (!port->serial->disconnected) - cp2101_set_config_single(port, CP2101_UART, UART_DISABLE); + cp210x_set_config_single(port, CP210X_IFC_ENABLE, UART_DISABLE); mutex_unlock(&port->serial->disc_mutex); } /* - * cp2101_get_termios + * cp210x_get_termios * Reads the baud rate, data bits, parity, stop bits and flow control mode * from the device, corrects any unsupported values, and configures the * termios structure to reflect the state of the device */ -static void cp2101_get_termios(struct tty_struct *tty, +static void cp210x_get_termios(struct tty_struct *tty, struct usb_serial_port *port) { unsigned int baud; if (tty) { - cp2101_get_termios_port(tty->driver_data, + cp210x_get_termios_port(tty->driver_data, &tty->termios->c_cflag, &baud); tty_encode_baud_rate(tty, baud, baud); } @@ -434,15 +453,15 @@ static void cp2101_get_termios(struct tty_struct *tty, else { unsigned int cflag; cflag = 0; - cp2101_get_termios_port(port, &cflag, &baud); + cp210x_get_termios_port(port, &cflag, &baud); } } /* - * cp2101_get_termios_port - * This is the heart of cp2101_get_termios which always uses a &usb_serial_port. + * cp210x_get_termios_port + * This is the heart of cp210x_get_termios which always uses a &usb_serial_port. */ -static void cp2101_get_termios_port(struct usb_serial_port *port, +static void cp210x_get_termios_port(struct usb_serial_port *port, unsigned int *cflagp, unsigned int *baudp) { unsigned int cflag, modem_ctl[4]; @@ -451,17 +470,17 @@ static void cp2101_get_termios_port(struct usb_serial_port *port, dbg("%s - port %d", __func__, port->number); - cp2101_get_config(port, CP2101_BAUDRATE, &baud, 2); + cp210x_get_config(port, CP210X_GET_BAUDDIV, &baud, 2); /* Convert to baudrate */ if (baud) - baud = cp2101_quantise_baudrate((BAUD_RATE_GEN_FREQ + baud/2)/ baud); + baud = cp210x_quantise_baudrate((BAUD_RATE_GEN_FREQ + baud/2)/ baud); dbg("%s - baud rate = %d", __func__, baud); *baudp = baud; cflag = *cflagp; - cp2101_get_config(port, CP2101_BITS, &bits, 2); + cp210x_get_config(port, CP210X_GET_LINE_CTL, &bits, 2); cflag &= ~CSIZE; switch (bits & BITS_DATA_MASK) { case BITS_DATA_5: @@ -486,14 +505,14 @@ static void cp2101_get_termios_port(struct usb_serial_port *port, cflag |= CS8; bits &= ~BITS_DATA_MASK; bits |= BITS_DATA_8; - cp2101_set_config(port, CP2101_BITS, &bits, 2); + cp210x_set_config(port, CP210X_SET_LINE_CTL, &bits, 2); break; default: dbg("%s - Unknown number of data bits, using 8", __func__); cflag |= CS8; bits &= ~BITS_DATA_MASK; bits |= BITS_DATA_8; - cp2101_set_config(port, CP2101_BITS, &bits, 2); + cp210x_set_config(port, CP210X_SET_LINE_CTL, &bits, 2); break; } @@ -516,20 +535,20 @@ static void cp2101_get_termios_port(struct usb_serial_port *port, __func__); cflag &= ~PARENB; bits &= ~BITS_PARITY_MASK; - cp2101_set_config(port, CP2101_BITS, &bits, 2); + cp210x_set_config(port, CP210X_SET_LINE_CTL, &bits, 2); break; case BITS_PARITY_SPACE: dbg("%s - parity = SPACE (not supported, disabling parity)", __func__); cflag &= ~PARENB; bits &= ~BITS_PARITY_MASK; - cp2101_set_config(port, CP2101_BITS, &bits, 2); + cp210x_set_config(port, CP210X_SET_LINE_CTL, &bits, 2); break; default: dbg("%s - Unknown parity mode, disabling parity", __func__); cflag &= ~PARENB; bits &= ~BITS_PARITY_MASK; - cp2101_set_config(port, CP2101_BITS, &bits, 2); + cp210x_set_config(port, CP210X_SET_LINE_CTL, &bits, 2); break; } @@ -542,7 +561,7 @@ static void cp2101_get_termios_port(struct usb_serial_port *port, dbg("%s - stop bits = 1.5 (not supported, using 1 stop bit)", __func__); bits &= ~BITS_STOP_MASK; - cp2101_set_config(port, CP2101_BITS, &bits, 2); + cp210x_set_config(port, CP210X_SET_LINE_CTL, &bits, 2); break; case BITS_STOP_2: dbg("%s - stop bits = 2", __func__); @@ -552,11 +571,11 @@ static void cp2101_get_termios_port(struct usb_serial_port *port, dbg("%s - Unknown number of stop bits, using 1 stop bit", __func__); bits &= ~BITS_STOP_MASK; - cp2101_set_config(port, CP2101_BITS, &bits, 2); + cp210x_set_config(port, CP210X_SET_LINE_CTL, &bits, 2); break; } - cp2101_get_config(port, CP2101_MODEMCTL, modem_ctl, 16); + cp210x_get_config(port, CP210X_GET_FLOW, modem_ctl, 16); if (modem_ctl[0] & 0x0008) { dbg("%s - flow control = CRTSCTS", __func__); cflag |= CRTSCTS; @@ -568,7 +587,7 @@ static void cp2101_get_termios_port(struct usb_serial_port *port, *cflagp = cflag; } -static void cp2101_set_termios(struct tty_struct *tty, +static void cp210x_set_termios(struct tty_struct *tty, struct usb_serial_port *port, struct ktermios *old_termios) { unsigned int cflag, old_cflag; @@ -583,13 +602,13 @@ static void cp2101_set_termios(struct tty_struct *tty, tty->termios->c_cflag &= ~CMSPAR; cflag = tty->termios->c_cflag; old_cflag = old_termios->c_cflag; - baud = cp2101_quantise_baudrate(tty_get_baud_rate(tty)); + baud = cp210x_quantise_baudrate(tty_get_baud_rate(tty)); /* If the baud rate is to be updated*/ if (baud != tty_termios_baud_rate(old_termios) && baud != 0) { dbg("%s - Setting baud rate to %d baud", __func__, baud); - if (cp2101_set_config_single(port, CP2101_BAUDRATE, + if (cp210x_set_config_single(port, CP210X_SET_BAUDDIV, ((BAUD_RATE_GEN_FREQ + baud/2) / baud))) { dbg("Baud rate requested not supported by device\n"); baud = tty_termios_baud_rate(old_termios); @@ -600,7 +619,7 @@ static void cp2101_set_termios(struct tty_struct *tty, /* If the number of data bits is to be updated */ if ((cflag & CSIZE) != (old_cflag & CSIZE)) { - cp2101_get_config(port, CP2101_BITS, &bits, 2); + cp210x_get_config(port, CP210X_GET_LINE_CTL, &bits, 2); bits &= ~BITS_DATA_MASK; switch (cflag & CSIZE) { case CS5: @@ -624,19 +643,19 @@ static void cp2101_set_termios(struct tty_struct *tty, dbg("%s - data bits = 9", __func__); break;*/ default: - dbg("cp2101 driver does not " + dbg("cp210x driver does not " "support the number of bits requested," " using 8 bit mode\n"); bits |= BITS_DATA_8; break; } - if (cp2101_set_config(port, CP2101_BITS, &bits, 2)) + if (cp210x_set_config(port, CP210X_SET_LINE_CTL, &bits, 2)) dbg("Number of data bits requested " "not supported by device\n"); } if ((cflag & (PARENB|PARODD)) != (old_cflag & (PARENB|PARODD))) { - cp2101_get_config(port, CP2101_BITS, &bits, 2); + cp210x_get_config(port, CP210X_GET_LINE_CTL, &bits, 2); bits &= ~BITS_PARITY_MASK; if (cflag & PARENB) { if (cflag & PARODD) { @@ -647,13 +666,13 @@ static void cp2101_set_termios(struct tty_struct *tty, dbg("%s - parity = EVEN", __func__); } } - if (cp2101_set_config(port, CP2101_BITS, &bits, 2)) + if (cp210x_set_config(port, CP210X_SET_LINE_CTL, &bits, 2)) dbg("Parity mode not supported " "by device\n"); } if ((cflag & CSTOPB) != (old_cflag & CSTOPB)) { - cp2101_get_config(port, CP2101_BITS, &bits, 2); + cp210x_get_config(port, CP210X_GET_LINE_CTL, &bits, 2); bits &= ~BITS_STOP_MASK; if (cflag & CSTOPB) { bits |= BITS_STOP_2; @@ -662,13 +681,13 @@ static void cp2101_set_termios(struct tty_struct *tty, bits |= BITS_STOP_1; dbg("%s - stop bits = 1", __func__); } - if (cp2101_set_config(port, CP2101_BITS, &bits, 2)) + if (cp210x_set_config(port, CP210X_SET_LINE_CTL, &bits, 2)) dbg("Number of stop bits requested " "not supported by device\n"); } if ((cflag & CRTSCTS) != (old_cflag & CRTSCTS)) { - cp2101_get_config(port, CP2101_MODEMCTL, modem_ctl, 16); + cp210x_get_config(port, CP210X_GET_FLOW, modem_ctl, 16); dbg("%s - read modem controls = 0x%.4x 0x%.4x 0x%.4x 0x%.4x", __func__, modem_ctl[0], modem_ctl[1], modem_ctl[2], modem_ctl[3]); @@ -688,19 +707,19 @@ static void cp2101_set_termios(struct tty_struct *tty, dbg("%s - write modem controls = 0x%.4x 0x%.4x 0x%.4x 0x%.4x", __func__, modem_ctl[0], modem_ctl[1], modem_ctl[2], modem_ctl[3]); - cp2101_set_config(port, CP2101_MODEMCTL, modem_ctl, 16); + cp210x_set_config(port, CP210X_SET_FLOW, modem_ctl, 16); } } -static int cp2101_tiocmset (struct tty_struct *tty, struct file *file, +static int cp210x_tiocmset (struct tty_struct *tty, struct file *file, unsigned int set, unsigned int clear) { struct usb_serial_port *port = tty->driver_data; - return cp2101_tiocmset_port(port, file, set, clear); + return cp210x_tiocmset_port(port, file, set, clear); } -static int cp2101_tiocmset_port(struct usb_serial_port *port, struct file *file, +static int cp210x_tiocmset_port(struct usb_serial_port *port, struct file *file, unsigned int set, unsigned int clear) { unsigned int control = 0; @@ -726,10 +745,10 @@ static int cp2101_tiocmset_port(struct usb_serial_port *port, struct file *file, dbg("%s - control = 0x%.4x", __func__, control); - return cp2101_set_config(port, CP2101_CONTROL, &control, 2); + return cp210x_set_config(port, CP210X_SET_MHS, &control, 2); } -static int cp2101_tiocmget (struct tty_struct *tty, struct file *file) +static int cp210x_tiocmget (struct tty_struct *tty, struct file *file) { struct usb_serial_port *port = tty->driver_data; unsigned int control; @@ -737,7 +756,7 @@ static int cp2101_tiocmget (struct tty_struct *tty, struct file *file) dbg("%s - port %d", __func__, port->number); - cp2101_get_config(port, CP2101_CONTROL, &control, 1); + cp210x_get_config(port, CP210X_GET_MDMSTS, &control, 1); result = ((control & CONTROL_DTR) ? TIOCM_DTR : 0) |((control & CONTROL_RTS) ? TIOCM_RTS : 0) @@ -751,7 +770,7 @@ static int cp2101_tiocmget (struct tty_struct *tty, struct file *file) return result; } -static void cp2101_break_ctl (struct tty_struct *tty, int break_state) +static void cp210x_break_ctl (struct tty_struct *tty, int break_state) { struct usb_serial_port *port = tty->driver_data; unsigned int state; @@ -763,17 +782,17 @@ static void cp2101_break_ctl (struct tty_struct *tty, int break_state) state = BREAK_ON; dbg("%s - turning break %s", __func__, state == BREAK_OFF ? "off" : "on"); - cp2101_set_config(port, CP2101_BREAK, &state, 2); + cp210x_set_config(port, CP210X_SET_BREAK, &state, 2); } -static int cp2101_startup(struct usb_serial *serial) +static int cp210x_startup(struct usb_serial *serial) { - /* CP2101 buffers behave strangely unless device is reset */ + /* cp210x buffers behave strangely unless device is reset */ usb_reset_device(serial->dev); return 0; } -static void cp2101_shutdown(struct usb_serial *serial) +static void cp210x_shutdown(struct usb_serial *serial) { int i; @@ -781,21 +800,21 @@ static void cp2101_shutdown(struct usb_serial *serial) /* Stop reads and writes on all ports */ for (i = 0; i < serial->num_ports; ++i) - cp2101_cleanup(serial->port[i]); + cp210x_cleanup(serial->port[i]); } -static int __init cp2101_init(void) +static int __init cp210x_init(void) { int retval; - retval = usb_serial_register(&cp2101_device); + retval = usb_serial_register(&cp210x_device); if (retval) return retval; /* Failed to register */ - retval = usb_register(&cp2101_driver); + retval = usb_register(&cp210x_driver); if (retval) { /* Failed to register */ - usb_serial_deregister(&cp2101_device); + usb_serial_deregister(&cp210x_device); return retval; } @@ -805,14 +824,14 @@ static int __init cp2101_init(void) return 0; } -static void __exit cp2101_exit(void) +static void __exit cp210x_exit(void) { - usb_deregister(&cp2101_driver); - usb_serial_deregister(&cp2101_device); + usb_deregister(&cp210x_driver); + usb_serial_deregister(&cp210x_device); } -module_init(cp2101_init); -module_exit(cp2101_exit); +module_init(cp210x_init); +module_exit(cp210x_exit); MODULE_DESCRIPTION(DRIVER_DESC); MODULE_VERSION(DRIVER_VERSION); diff --git a/drivers/usb/serial/cyberjack.c b/drivers/usb/serial/cyberjack.c index dd501bb63ed6..933ba913e66c 100644 --- a/drivers/usb/serial/cyberjack.c +++ b/drivers/usb/serial/cyberjack.c @@ -61,8 +61,7 @@ static int cyberjack_startup(struct usb_serial *serial); static void cyberjack_shutdown(struct usb_serial *serial); static int cyberjack_open(struct tty_struct *tty, struct usb_serial_port *port, struct file *filp); -static void cyberjack_close(struct tty_struct *tty, - struct usb_serial_port *port, struct file *filp); +static void cyberjack_close(struct usb_serial_port *port); static int cyberjack_write(struct tty_struct *tty, struct usb_serial_port *port, const unsigned char *buf, int count); static int cyberjack_write_room(struct tty_struct *tty); @@ -185,8 +184,7 @@ static int cyberjack_open(struct tty_struct *tty, return result; } -static void cyberjack_close(struct tty_struct *tty, - struct usb_serial_port *port, struct file *filp) +static void cyberjack_close(struct usb_serial_port *port) { dbg("%s - port %d", __func__, port->number); diff --git a/drivers/usb/serial/cypress_m8.c b/drivers/usb/serial/cypress_m8.c index e568710b263f..669f93848539 100644 --- a/drivers/usb/serial/cypress_m8.c +++ b/drivers/usb/serial/cypress_m8.c @@ -174,8 +174,8 @@ static int cypress_ca42v2_startup(struct usb_serial *serial); static void cypress_shutdown(struct usb_serial *serial); static int cypress_open(struct tty_struct *tty, struct usb_serial_port *port, struct file *filp); -static void cypress_close(struct tty_struct *tty, - struct usb_serial_port *port, struct file *filp); +static void cypress_close(struct usb_serial_port *port); +static void cypress_dtr_rts(struct usb_serial_port *port, int on); static int cypress_write(struct tty_struct *tty, struct usb_serial_port *port, const unsigned char *buf, int count); static void cypress_send(struct usb_serial_port *port); @@ -218,6 +218,7 @@ static struct usb_serial_driver cypress_earthmate_device = { .shutdown = cypress_shutdown, .open = cypress_open, .close = cypress_close, + .dtr_rts = cypress_dtr_rts, .write = cypress_write, .write_room = cypress_write_room, .ioctl = cypress_ioctl, @@ -244,6 +245,7 @@ static struct usb_serial_driver cypress_hidcom_device = { .shutdown = cypress_shutdown, .open = cypress_open, .close = cypress_close, + .dtr_rts = cypress_dtr_rts, .write = cypress_write, .write_room = cypress_write_room, .ioctl = cypress_ioctl, @@ -270,6 +272,7 @@ static struct usb_serial_driver cypress_ca42v2_device = { .shutdown = cypress_shutdown, .open = cypress_open, .close = cypress_close, + .dtr_rts = cypress_dtr_rts, .write = cypress_write, .write_room = cypress_write_room, .ioctl = cypress_ioctl, @@ -656,11 +659,7 @@ static int cypress_open(struct tty_struct *tty, priv->rx_flags = 0; spin_unlock_irqrestore(&priv->lock, flags); - /* raise both lines and set termios */ - spin_lock_irqsave(&priv->lock, flags); - priv->line_control = CONTROL_DTR | CONTROL_RTS; - priv->cmd_ctrl = 1; - spin_unlock_irqrestore(&priv->lock, flags); + /* Set termios */ result = cypress_write(tty, port, NULL, 0); if (result) { @@ -694,76 +693,42 @@ static int cypress_open(struct tty_struct *tty, __func__, result); cypress_set_dead(port); } - + port->port.drain_delay = 256; return result; } /* cypress_open */ +static void cypress_dtr_rts(struct usb_serial_port *port, int on) +{ + struct cypress_private *priv = usb_get_serial_port_data(port); + /* drop dtr and rts */ + priv = usb_get_serial_port_data(port); + spin_lock_irq(&priv->lock); + if (on == 0) + priv->line_control = 0; + else + priv->line_control = CONTROL_DTR | CONTROL_RTS; + priv->cmd_ctrl = 1; + spin_unlock_irq(&priv->lock); + cypress_write(NULL, port, NULL, 0); +} -static void cypress_close(struct tty_struct *tty, - struct usb_serial_port *port, struct file *filp) +static void cypress_close(struct usb_serial_port *port) { struct cypress_private *priv = usb_get_serial_port_data(port); - unsigned int c_cflag; - int bps; - long timeout; - wait_queue_t wait; dbg("%s - port %d", __func__, port->number); - /* wait for data to drain from buffer */ - spin_lock_irq(&priv->lock); - timeout = CYPRESS_CLOSING_WAIT; - init_waitqueue_entry(&wait, current); - add_wait_queue(&tty->write_wait, &wait); - for (;;) { - set_current_state(TASK_INTERRUPTIBLE); - if (cypress_buf_data_avail(priv->buf) == 0 - || timeout == 0 || signal_pending(current) - /* without mutex, allowed due to harmless failure mode */ - || port->serial->disconnected) - break; - spin_unlock_irq(&priv->lock); - timeout = schedule_timeout(timeout); - spin_lock_irq(&priv->lock); - } - set_current_state(TASK_RUNNING); - remove_wait_queue(&tty->write_wait, &wait); - /* clear out any remaining data in the buffer */ - cypress_buf_clear(priv->buf); - spin_unlock_irq(&priv->lock); - /* writing is potentially harmful, lock must be taken */ mutex_lock(&port->serial->disc_mutex); if (port->serial->disconnected) { mutex_unlock(&port->serial->disc_mutex); return; } - /* wait for characters to drain from device */ - if (tty) { - bps = tty_get_baud_rate(tty); - if (bps > 1200) - timeout = max((HZ * 2560) / bps, HZ / 10); - else - timeout = 2 * HZ; - schedule_timeout_interruptible(timeout); - } - + cypress_buf_clear(priv->buf); dbg("%s - stopping urbs", __func__); usb_kill_urb(port->interrupt_in_urb); usb_kill_urb(port->interrupt_out_urb); - if (tty) { - c_cflag = tty->termios->c_cflag; - if (c_cflag & HUPCL) { - /* drop dtr and rts */ - priv = usb_get_serial_port_data(port); - spin_lock_irq(&priv->lock); - priv->line_control = 0; - priv->cmd_ctrl = 1; - spin_unlock_irq(&priv->lock); - cypress_write(tty, port, NULL, 0); - } - } if (stats) dev_info(&port->dev, "Statistics: %d Bytes In | %d Bytes Out | %d Commands Issued\n", diff --git a/drivers/usb/serial/digi_acceleport.c b/drivers/usb/serial/digi_acceleport.c index 38ba4ea8b6bf..30f5140eff03 100644 --- a/drivers/usb/serial/digi_acceleport.c +++ b/drivers/usb/serial/digi_acceleport.c @@ -422,7 +422,6 @@ struct digi_port { int dp_throttled; int dp_throttle_restart; wait_queue_head_t dp_flush_wait; - int dp_in_close; /* close in progress */ wait_queue_head_t dp_close_wait; /* wait queue for close */ struct work_struct dp_wakeup_work; struct usb_serial_port *dp_port; @@ -456,8 +455,9 @@ static int digi_write_room(struct tty_struct *tty); static int digi_chars_in_buffer(struct tty_struct *tty); static int digi_open(struct tty_struct *tty, struct usb_serial_port *port, struct file *filp); -static void digi_close(struct tty_struct *tty, struct usb_serial_port *port, - struct file *filp); +static void digi_close(struct usb_serial_port *port); +static int digi_carrier_raised(struct usb_serial_port *port); +static void digi_dtr_rts(struct usb_serial_port *port, int on); static int digi_startup_device(struct usb_serial *serial); static int digi_startup(struct usb_serial *serial); static void digi_shutdown(struct usb_serial *serial); @@ -510,6 +510,8 @@ static struct usb_serial_driver digi_acceleport_2_device = { .num_ports = 3, .open = digi_open, .close = digi_close, + .dtr_rts = digi_dtr_rts, + .carrier_raised = digi_carrier_raised, .write = digi_write, .write_room = digi_write_room, .write_bulk_callback = digi_write_bulk_callback, @@ -1328,6 +1330,19 @@ static int digi_chars_in_buffer(struct tty_struct *tty) } +static void digi_dtr_rts(struct usb_serial_port *port, int on) +{ + /* Adjust DTR and RTS */ + digi_set_modem_signals(port, on * (TIOCM_DTR|TIOCM_RTS), 1); +} + +static int digi_carrier_raised(struct usb_serial_port *port) +{ + struct digi_port *priv = usb_get_serial_port_data(port); + if (priv->dp_modem_signals & TIOCM_CD) + return 1; + return 0; +} static int digi_open(struct tty_struct *tty, struct usb_serial_port *port, struct file *filp) @@ -1336,7 +1351,6 @@ static int digi_open(struct tty_struct *tty, struct usb_serial_port *port, unsigned char buf[32]; struct digi_port *priv = usb_get_serial_port_data(port); struct ktermios not_termios; - unsigned long flags = 0; dbg("digi_open: TOP: port=%d, open_count=%d", priv->dp_port_num, port->port.count); @@ -1345,26 +1359,6 @@ static int digi_open(struct tty_struct *tty, struct usb_serial_port *port, if (digi_startup_device(port->serial) != 0) return -ENXIO; - spin_lock_irqsave(&priv->dp_port_lock, flags); - - /* don't wait on a close in progress for non-blocking opens */ - if (priv->dp_in_close && (filp->f_flags&(O_NDELAY|O_NONBLOCK)) == 0) { - spin_unlock_irqrestore(&priv->dp_port_lock, flags); - return -EAGAIN; - } - - /* wait for a close in progress to finish */ - while (priv->dp_in_close) { - cond_wait_interruptible_timeout_irqrestore( - &priv->dp_close_wait, DIGI_RETRY_TIMEOUT, - &priv->dp_port_lock, flags); - if (signal_pending(current)) - return -EINTR; - spin_lock_irqsave(&priv->dp_port_lock, flags); - } - - spin_unlock_irqrestore(&priv->dp_port_lock, flags); - /* read modem signals automatically whenever they change */ buf[0] = DIGI_CMD_READ_INPUT_SIGNALS; buf[1] = priv->dp_port_num; @@ -1387,16 +1381,11 @@ static int digi_open(struct tty_struct *tty, struct usb_serial_port *port, not_termios.c_iflag = ~tty->termios->c_iflag; digi_set_termios(tty, port, ¬_termios); } - - /* set DTR and RTS */ - digi_set_modem_signals(port, TIOCM_DTR|TIOCM_RTS, 1); - return 0; } -static void digi_close(struct tty_struct *tty, struct usb_serial_port *port, - struct file *filp) +static void digi_close(struct usb_serial_port *port) { DEFINE_WAIT(wait); int ret; @@ -1411,28 +1400,9 @@ static void digi_close(struct tty_struct *tty, struct usb_serial_port *port, if (port->serial->disconnected) goto exit; - /* do cleanup only after final close on this port */ - spin_lock_irq(&priv->dp_port_lock); - priv->dp_in_close = 1; - spin_unlock_irq(&priv->dp_port_lock); - - /* tell line discipline to process only XON/XOFF */ - tty->closing = 1; - - /* wait for output to drain */ - if ((filp->f_flags&(O_NDELAY|O_NONBLOCK)) == 0) - tty_wait_until_sent(tty, DIGI_CLOSE_TIMEOUT); - - /* flush driver and line discipline buffers */ - tty_driver_flush_buffer(tty); - tty_ldisc_flush(tty); - if (port->serial->dev) { - /* wait for transmit idle */ - if ((filp->f_flags&(O_NDELAY|O_NONBLOCK)) == 0) - digi_transmit_idle(port, DIGI_CLOSE_TIMEOUT); - /* drop DTR and RTS */ - digi_set_modem_signals(port, 0, 0); + /* FIXME: Transmit idle belongs in the wait_unti_sent path */ + digi_transmit_idle(port, DIGI_CLOSE_TIMEOUT); /* disable input flow control */ buf[0] = DIGI_CMD_SET_INPUT_FLOW_CONTROL; @@ -1477,11 +1447,9 @@ static void digi_close(struct tty_struct *tty, struct usb_serial_port *port, /* shutdown any outstanding bulk writes */ usb_kill_urb(port->write_urb); } - tty->closing = 0; exit: spin_lock_irq(&priv->dp_port_lock); priv->dp_write_urb_in_use = 0; - priv->dp_in_close = 0; wake_up_interruptible(&priv->dp_close_wait); spin_unlock_irq(&priv->dp_port_lock); mutex_unlock(&port->serial->disc_mutex); @@ -1560,7 +1528,6 @@ static int digi_startup(struct usb_serial *serial) priv->dp_throttled = 0; priv->dp_throttle_restart = 0; init_waitqueue_head(&priv->dp_flush_wait); - priv->dp_in_close = 0; init_waitqueue_head(&priv->dp_close_wait); INIT_WORK(&priv->dp_wakeup_work, digi_wakeup_write_lock); priv->dp_port = serial->port[i]; diff --git a/drivers/usb/serial/empeg.c b/drivers/usb/serial/empeg.c index c709ec474a80..2b141ccb0cd9 100644 --- a/drivers/usb/serial/empeg.c +++ b/drivers/usb/serial/empeg.c @@ -81,8 +81,7 @@ static int debug; /* function prototypes for an empeg-car player */ static int empeg_open(struct tty_struct *tty, struct usb_serial_port *port, struct file *filp); -static void empeg_close(struct tty_struct *tty, struct usb_serial_port *port, - struct file *filp); +static void empeg_close(struct usb_serial_port *port); static int empeg_write(struct tty_struct *tty, struct usb_serial_port *port, const unsigned char *buf, int count); @@ -181,8 +180,7 @@ static int empeg_open(struct tty_struct *tty, struct usb_serial_port *port, } -static void empeg_close(struct tty_struct *tty, struct usb_serial_port *port, - struct file *filp) +static void empeg_close(struct usb_serial_port *port) { dbg("%s - port %d", __func__, port->number); diff --git a/drivers/usb/serial/ftdi_sio.c b/drivers/usb/serial/ftdi_sio.c index d9fcdaedf389..683304d60615 100644 --- a/drivers/usb/serial/ftdi_sio.c +++ b/drivers/usb/serial/ftdi_sio.c @@ -89,6 +89,7 @@ struct ftdi_private { int force_rtscts; /* if non-zero, force RTS-CTS to always be enabled */ + unsigned int latency; /* latency setting in use */ spinlock_t tx_lock; /* spinlock for transmit state */ unsigned long tx_bytes; unsigned long tx_outstanding_bytes; @@ -719,8 +720,8 @@ static int ftdi_sio_port_probe(struct usb_serial_port *port); static int ftdi_sio_port_remove(struct usb_serial_port *port); static int ftdi_open(struct tty_struct *tty, struct usb_serial_port *port, struct file *filp); -static void ftdi_close(struct tty_struct *tty, - struct usb_serial_port *port, struct file *filp); +static void ftdi_close(struct usb_serial_port *port); +static void ftdi_dtr_rts(struct usb_serial_port *port, int on); static int ftdi_write(struct tty_struct *tty, struct usb_serial_port *port, const unsigned char *buf, int count); static int ftdi_write_room(struct tty_struct *tty); @@ -758,6 +759,7 @@ static struct usb_serial_driver ftdi_sio_device = { .port_remove = ftdi_sio_port_remove, .open = ftdi_open, .close = ftdi_close, + .dtr_rts = ftdi_dtr_rts, .throttle = ftdi_throttle, .unthrottle = ftdi_unthrottle, .write = ftdi_write, @@ -1037,7 +1039,54 @@ static int change_speed(struct tty_struct *tty, struct usb_serial_port *port) return rv; } +static int write_latency_timer(struct usb_serial_port *port) +{ + struct ftdi_private *priv = usb_get_serial_port_data(port); + struct usb_device *udev = port->serial->dev; + char buf[1]; + int rv = 0; + int l = priv->latency; + + if (priv->flags & ASYNC_LOW_LATENCY) + l = 1; + + dbg("%s: setting latency timer = %i", __func__, l); + + rv = usb_control_msg(udev, + usb_sndctrlpipe(udev, 0), + FTDI_SIO_SET_LATENCY_TIMER_REQUEST, + FTDI_SIO_SET_LATENCY_TIMER_REQUEST_TYPE, + l, priv->interface, + buf, 0, WDR_TIMEOUT); + + if (rv < 0) + dev_err(&port->dev, "Unable to write latency timer: %i\n", rv); + return rv; +} + +static int read_latency_timer(struct usb_serial_port *port) +{ + struct ftdi_private *priv = usb_get_serial_port_data(port); + struct usb_device *udev = port->serial->dev; + unsigned short latency = 0; + int rv = 0; + + dbg("%s", __func__); + + rv = usb_control_msg(udev, + usb_rcvctrlpipe(udev, 0), + FTDI_SIO_GET_LATENCY_TIMER_REQUEST, + FTDI_SIO_GET_LATENCY_TIMER_REQUEST_TYPE, + 0, priv->interface, + (char *) &latency, 1, WDR_TIMEOUT); + + if (rv < 0) { + dev_err(&port->dev, "Unable to read latency timer: %i\n", rv); + return -EIO; + } + return latency; +} static int get_serial_info(struct usb_serial_port *port, struct serial_struct __user *retinfo) @@ -1097,6 +1146,7 @@ static int set_serial_info(struct tty_struct *tty, priv->custom_divisor = new_serial.custom_divisor; tty->low_latency = (priv->flags & ASYNC_LOW_LATENCY) ? 1 : 0; + write_latency_timer(port); check_and_exit: if ((old_priv.flags & ASYNC_SPD_MASK) != @@ -1192,27 +1242,13 @@ static ssize_t show_latency_timer(struct device *dev, { struct usb_serial_port *port = to_usb_serial_port(dev); struct ftdi_private *priv = usb_get_serial_port_data(port); - struct usb_device *udev = port->serial->dev; - unsigned short latency = 0; - int rv = 0; - - - dbg("%s", __func__); - - rv = usb_control_msg(udev, - usb_rcvctrlpipe(udev, 0), - FTDI_SIO_GET_LATENCY_TIMER_REQUEST, - FTDI_SIO_GET_LATENCY_TIMER_REQUEST_TYPE, - 0, priv->interface, - (char *) &latency, 1, WDR_TIMEOUT); - - if (rv < 0) { - dev_err(dev, "Unable to read latency timer: %i\n", rv); - return -EIO; - } - return sprintf(buf, "%i\n", latency); + if (priv->flags & ASYNC_LOW_LATENCY) + return sprintf(buf, "1\n"); + else + return sprintf(buf, "%i\n", priv->latency); } + /* Write a new value of the latency timer, in units of milliseconds. */ static ssize_t store_latency_timer(struct device *dev, struct device_attribute *attr, const char *valbuf, @@ -1220,25 +1256,13 @@ static ssize_t store_latency_timer(struct device *dev, { struct usb_serial_port *port = to_usb_serial_port(dev); struct ftdi_private *priv = usb_get_serial_port_data(port); - struct usb_device *udev = port->serial->dev; - char buf[1]; int v = simple_strtoul(valbuf, NULL, 10); int rv = 0; - dbg("%s: setting latency timer = %i", __func__, v); - - rv = usb_control_msg(udev, - usb_sndctrlpipe(udev, 0), - FTDI_SIO_SET_LATENCY_TIMER_REQUEST, - FTDI_SIO_SET_LATENCY_TIMER_REQUEST_TYPE, - v, priv->interface, - buf, 0, WDR_TIMEOUT); - - if (rv < 0) { - dev_err(dev, "Unable to write latency timer: %i\n", rv); + priv->latency = v; + rv = write_latency_timer(port); + if (rv < 0) return -EIO; - } - return count; } @@ -1392,6 +1416,7 @@ static int ftdi_sio_port_probe(struct usb_serial_port *port) usb_set_serial_port_data(port, priv); ftdi_determine_type(port); + read_latency_timer(port); create_sysfs_attrs(port); return 0; } @@ -1514,6 +1539,8 @@ static int ftdi_open(struct tty_struct *tty, if (tty) tty->low_latency = (priv->flags & ASYNC_LOW_LATENCY) ? 1 : 0; + write_latency_timer(port); + /* No error checking for this (will get errors later anyway) */ /* See ftdi_sio.h for description of what is reset */ usb_control_msg(dev, usb_sndctrlpipe(dev, 0), @@ -1529,11 +1556,6 @@ static int ftdi_open(struct tty_struct *tty, if (tty) ftdi_set_termios(tty, port, tty->termios); - /* FIXME: Flow control might be enabled, so it should be checked - - we have no control of defaults! */ - /* Turn on RTS and DTR since we are not flow controlling by default */ - set_mctrl(port, TIOCM_DTR | TIOCM_RTS); - /* Not throttled */ spin_lock_irqsave(&priv->rx_lock, flags); priv->rx_flags &= ~(THROTTLED | ACTUALLY_THROTTLED); @@ -1558,6 +1580,30 @@ static int ftdi_open(struct tty_struct *tty, } /* ftdi_open */ +static void ftdi_dtr_rts(struct usb_serial_port *port, int on) +{ + struct ftdi_private *priv = usb_get_serial_port_data(port); + char buf[1]; + + mutex_lock(&port->serial->disc_mutex); + if (!port->serial->disconnected) { + /* Disable flow control */ + if (!on && usb_control_msg(port->serial->dev, + usb_sndctrlpipe(port->serial->dev, 0), + FTDI_SIO_SET_FLOW_CTRL_REQUEST, + FTDI_SIO_SET_FLOW_CTRL_REQUEST_TYPE, + 0, priv->interface, buf, 0, + WDR_TIMEOUT) < 0) { + dev_err(&port->dev, "error from flowcontrol urb\n"); + } + /* drop RTS and DTR */ + if (on) + set_mctrl(port, TIOCM_DTR | TIOCM_RTS); + else + clear_mctrl(port, TIOCM_DTR | TIOCM_RTS); + } + mutex_unlock(&port->serial->disc_mutex); +} /* * usbserial:__serial_close only calls ftdi_close if the point is open @@ -1567,31 +1613,12 @@ static int ftdi_open(struct tty_struct *tty, * */ -static void ftdi_close(struct tty_struct *tty, - struct usb_serial_port *port, struct file *filp) +static void ftdi_close(struct usb_serial_port *port) { /* ftdi_close */ - unsigned int c_cflag = tty->termios->c_cflag; struct ftdi_private *priv = usb_get_serial_port_data(port); - char buf[1]; dbg("%s", __func__); - mutex_lock(&port->serial->disc_mutex); - if (c_cflag & HUPCL && !port->serial->disconnected) { - /* Disable flow control */ - if (usb_control_msg(port->serial->dev, - usb_sndctrlpipe(port->serial->dev, 0), - FTDI_SIO_SET_FLOW_CTRL_REQUEST, - FTDI_SIO_SET_FLOW_CTRL_REQUEST_TYPE, - 0, priv->interface, buf, 0, - WDR_TIMEOUT) < 0) { - dev_err(&port->dev, "error from flowcontrol urb\n"); - } - - /* drop RTS and DTR */ - clear_mctrl(port, TIOCM_DTR | TIOCM_RTS); - } /* Note change no line if hupcl is off */ - mutex_unlock(&port->serial->disc_mutex); /* cancel any scheduled reading */ cancel_delayed_work_sync(&priv->rx_work); diff --git a/drivers/usb/serial/garmin_gps.c b/drivers/usb/serial/garmin_gps.c index 586d30ff450b..ee25a3fe3b09 100644 --- a/drivers/usb/serial/garmin_gps.c +++ b/drivers/usb/serial/garmin_gps.c @@ -993,8 +993,7 @@ static int garmin_open(struct tty_struct *tty, } -static void garmin_close(struct tty_struct *tty, - struct usb_serial_port *port, struct file *filp) +static void garmin_close(struct usb_serial_port *port) { struct usb_serial *serial = port->serial; struct garmin_data *garmin_data_p = usb_get_serial_port_data(port); diff --git a/drivers/usb/serial/generic.c b/drivers/usb/serial/generic.c index 4cec9906ccf3..be82ea956720 100644 --- a/drivers/usb/serial/generic.c +++ b/drivers/usb/serial/generic.c @@ -184,8 +184,7 @@ int usb_serial_generic_resume(struct usb_serial *serial) } EXPORT_SYMBOL_GPL(usb_serial_generic_resume); -void usb_serial_generic_close(struct tty_struct *tty, - struct usb_serial_port *port, struct file *filp) +void usb_serial_generic_close(struct usb_serial_port *port) { dbg("%s - port %d", __func__, port->number); generic_cleanup(port); diff --git a/drivers/usb/serial/io_edgeport.c b/drivers/usb/serial/io_edgeport.c index fb4a73d090f6..53ef5996e33d 100644 --- a/drivers/usb/serial/io_edgeport.c +++ b/drivers/usb/serial/io_edgeport.c @@ -207,8 +207,7 @@ static void edge_bulk_out_cmd_callback(struct urb *urb); /* function prototypes for the usbserial callbacks */ static int edge_open(struct tty_struct *tty, struct usb_serial_port *port, struct file *filp); -static void edge_close(struct tty_struct *tty, struct usb_serial_port *port, - struct file *filp); +static void edge_close(struct usb_serial_port *port); static int edge_write(struct tty_struct *tty, struct usb_serial_port *port, const unsigned char *buf, int count); static int edge_write_room(struct tty_struct *tty); @@ -965,7 +964,7 @@ static int edge_open(struct tty_struct *tty, if (!edge_port->txfifo.fifo) { dbg("%s - no memory", __func__); - edge_close(tty, port, filp); + edge_close(port); return -ENOMEM; } @@ -975,7 +974,7 @@ static int edge_open(struct tty_struct *tty, if (!edge_port->write_urb) { dbg("%s - no memory", __func__); - edge_close(tty, port, filp); + edge_close(port); return -ENOMEM; } @@ -1099,8 +1098,7 @@ static void block_until_tx_empty(struct edgeport_port *edge_port) * edge_close * this function is called by the tty driver when a port is closed *****************************************************************************/ -static void edge_close(struct tty_struct *tty, - struct usb_serial_port *port, struct file *filp) +static void edge_close(struct usb_serial_port *port) { struct edgeport_serial *edge_serial; struct edgeport_port *edge_port; diff --git a/drivers/usb/serial/io_ti.c b/drivers/usb/serial/io_ti.c index 513b25e044c1..eabf20eeb370 100644 --- a/drivers/usb/serial/io_ti.c +++ b/drivers/usb/serial/io_ti.c @@ -2009,8 +2009,7 @@ release_es_lock: return status; } -static void edge_close(struct tty_struct *tty, - struct usb_serial_port *port, struct file *filp) +static void edge_close(struct usb_serial_port *port) { struct edgeport_serial *edge_serial; struct edgeport_port *edge_port; diff --git a/drivers/usb/serial/ipaq.c b/drivers/usb/serial/ipaq.c index cd62825a9ac3..c610a99fa477 100644 --- a/drivers/usb/serial/ipaq.c +++ b/drivers/usb/serial/ipaq.c @@ -76,8 +76,7 @@ static int initial_wait; /* Function prototypes for an ipaq */ static int ipaq_open(struct tty_struct *tty, struct usb_serial_port *port, struct file *filp); -static void ipaq_close(struct tty_struct *tty, - struct usb_serial_port *port, struct file *filp); +static void ipaq_close(struct usb_serial_port *port); static int ipaq_calc_num_ports(struct usb_serial *serial); static int ipaq_startup(struct usb_serial *serial); static void ipaq_shutdown(struct usb_serial *serial); @@ -714,8 +713,7 @@ error: } -static void ipaq_close(struct tty_struct *tty, - struct usb_serial_port *port, struct file *filp) +static void ipaq_close(struct usb_serial_port *port) { struct ipaq_private *priv = usb_get_serial_port_data(port); diff --git a/drivers/usb/serial/ipw.c b/drivers/usb/serial/ipw.c index da2a2b46644a..29ad038b9c8d 100644 --- a/drivers/usb/serial/ipw.c +++ b/drivers/usb/serial/ipw.c @@ -302,23 +302,17 @@ static int ipw_open(struct tty_struct *tty, return 0; } -static void ipw_close(struct tty_struct *tty, - struct usb_serial_port *port, struct file *filp) +static void ipw_dtr_rts(struct usb_serial_port *port, int on) { struct usb_device *dev = port->serial->dev; int result; - if (tty_hung_up_p(filp)) { - dbg("%s: tty_hung_up_p ...", __func__); - return; - } - /*--1: drop the dtr */ dbg("%s:dropping dtr", __func__); result = usb_control_msg(dev, usb_sndctrlpipe(dev, 0), IPW_SIO_SET_PIN, USB_TYPE_VENDOR | USB_RECIP_INTERFACE | USB_DIR_OUT, - IPW_PIN_CLRDTR, + on ? IPW_PIN_SETDTR : IPW_PIN_CLRDTR, 0, NULL, 0, @@ -332,7 +326,7 @@ static void ipw_close(struct tty_struct *tty, result = usb_control_msg(dev, usb_sndctrlpipe(dev, 0), IPW_SIO_SET_PIN, USB_TYPE_VENDOR | USB_RECIP_INTERFACE | USB_DIR_OUT, - IPW_PIN_CLRRTS, + on ? IPW_PIN_SETRTS : IPW_PIN_CLRRTS, 0, NULL, 0, @@ -340,7 +334,12 @@ static void ipw_close(struct tty_struct *tty, if (result < 0) dev_err(&port->dev, "dropping rts failed (error = %d)\n", result); +} +static void ipw_close(struct usb_serial_port *port) +{ + struct usb_device *dev = port->serial->dev; + int result; /*--3: purge */ dbg("%s:sending purge", __func__); @@ -461,6 +460,7 @@ static struct usb_serial_driver ipw_device = { .num_ports = 1, .open = ipw_open, .close = ipw_close, + .dtr_rts = ipw_dtr_rts, .port_probe = ipw_probe, .port_remove = ipw_disconnect, .write = ipw_write, diff --git a/drivers/usb/serial/ir-usb.c b/drivers/usb/serial/ir-usb.c index 4e2cda93da59..66009b6b763a 100644 --- a/drivers/usb/serial/ir-usb.c +++ b/drivers/usb/serial/ir-usb.c @@ -88,8 +88,7 @@ static int xbof = -1; static int ir_startup (struct usb_serial *serial); static int ir_open(struct tty_struct *tty, struct usb_serial_port *port, struct file *filep); -static void ir_close(struct tty_struct *tty, struct usb_serial_port *port, - struct file *filep); +static void ir_close(struct usb_serial_port *port); static int ir_write(struct tty_struct *tty, struct usb_serial_port *port, const unsigned char *buf, int count); static void ir_write_bulk_callback (struct urb *urb); @@ -346,8 +345,7 @@ static int ir_open(struct tty_struct *tty, return result; } -static void ir_close(struct tty_struct *tty, - struct usb_serial_port *port, struct file * filp) +static void ir_close(struct usb_serial_port *port) { dbg("%s - port %d", __func__, port->number); diff --git a/drivers/usb/serial/iuu_phoenix.c b/drivers/usb/serial/iuu_phoenix.c index 4473d442b2aa..76a3cc327bb9 100644 --- a/drivers/usb/serial/iuu_phoenix.c +++ b/drivers/usb/serial/iuu_phoenix.c @@ -40,7 +40,7 @@ static int debug; /* * Version Information */ -#define DRIVER_VERSION "v0.5" +#define DRIVER_VERSION "v0.10" #define DRIVER_DESC "Infinity USB Unlimited Phoenix driver" static struct usb_device_id id_table[] = { @@ -70,7 +70,6 @@ static void read_rxcmd_callback(struct urb *urb); struct iuu_private { spinlock_t lock; /* store irq state */ wait_queue_head_t delta_msr_wait; - u8 line_control; u8 line_status; u8 termios_initialized; int tiostatus; /* store IUART SIGNAL for tiocmget call */ @@ -651,32 +650,33 @@ static int iuu_bulk_write(struct usb_serial_port *port) unsigned long flags; int result; int i; + int buf_len; char *buf_ptr = port->write_urb->transfer_buffer; dbg("%s - enter", __func__); + spin_lock_irqsave(&priv->lock, flags); *buf_ptr++ = IUU_UART_ESC; *buf_ptr++ = IUU_UART_TX; *buf_ptr++ = priv->writelen; - memcpy(buf_ptr, priv->writebuf, - priv->writelen); + memcpy(buf_ptr, priv->writebuf, priv->writelen); + buf_len = priv->writelen; + priv->writelen = 0; + spin_unlock_irqrestore(&priv->lock, flags); if (debug == 1) { - for (i = 0; i < priv->writelen; i++) + for (i = 0; i < buf_len; i++) sprintf(priv->dbgbuf + i*2 , "%02X", priv->writebuf[i]); - priv->dbgbuf[priv->writelen+i*2] = 0; + priv->dbgbuf[buf_len+i*2] = 0; dbg("%s - writing %i chars : %s", __func__, - priv->writelen, priv->dbgbuf); + buf_len, priv->dbgbuf); } usb_fill_bulk_urb(port->write_urb, port->serial->dev, usb_sndbulkpipe(port->serial->dev, port->bulk_out_endpointAddress), - port->write_urb->transfer_buffer, priv->writelen + 3, + port->write_urb->transfer_buffer, buf_len + 3, iuu_rxcmd, port); result = usb_submit_urb(port->write_urb, GFP_ATOMIC); - spin_lock_irqsave(&priv->lock, flags); - priv->writelen = 0; - spin_unlock_irqrestore(&priv->lock, flags); usb_serial_port_softint(port); return result; } @@ -770,14 +770,10 @@ static int iuu_uart_write(struct tty_struct *tty, struct usb_serial_port *port, return -ENOMEM; spin_lock_irqsave(&priv->lock, flags); - if (priv->writelen > 0) { - /* buffer already filled but not commited */ - spin_unlock_irqrestore(&priv->lock, flags); - return 0; - } + /* fill the buffer */ - memcpy(priv->writebuf, buf, count); - priv->writelen = count; + memcpy(priv->writebuf + priv->writelen, buf, count); + priv->writelen += count; spin_unlock_irqrestore(&priv->lock, flags); return count; @@ -819,7 +815,7 @@ static int iuu_uart_on(struct usb_serial_port *port) buf[0] = IUU_UART_ENABLE; buf[1] = (u8) ((IUU_BAUD_9600 >> 8) & 0x00FF); buf[2] = (u8) (0x00FF & IUU_BAUD_9600); - buf[3] = (u8) (0x0F0 & IUU_TWO_STOP_BITS) | (0x07 & IUU_PARITY_EVEN); + buf[3] = (u8) (0x0F0 & IUU_ONE_STOP_BIT) | (0x07 & IUU_PARITY_EVEN); status = bulk_immediate(port, buf, 4); if (status != IUU_OPERATION_OK) { @@ -946,19 +942,59 @@ static int iuu_uart_baud(struct usb_serial_port *port, u32 baud, return status; } -static int set_control_lines(struct usb_device *dev, u8 value) +static void iuu_set_termios(struct tty_struct *tty, + struct usb_serial_port *port, struct ktermios *old_termios) { - return 0; + const u32 supported_mask = CMSPAR|PARENB|PARODD; + + unsigned int cflag = tty->termios->c_cflag; + int status; + u32 actual; + u32 parity; + int csize = CS7; + int baud = 9600; /* Fixed for the moment */ + u32 newval = cflag & supported_mask; + + /* compute the parity parameter */ + parity = 0; + if (cflag & CMSPAR) { /* Using mark space */ + if (cflag & PARODD) + parity |= IUU_PARITY_SPACE; + else + parity |= IUU_PARITY_MARK; + } else if (!(cflag & PARENB)) { + parity |= IUU_PARITY_NONE; + csize = CS8; + } else if (cflag & PARODD) + parity |= IUU_PARITY_ODD; + else + parity |= IUU_PARITY_EVEN; + + parity |= (cflag & CSTOPB ? IUU_TWO_STOP_BITS : IUU_ONE_STOP_BIT); + + /* set it */ + status = iuu_uart_baud(port, + (clockmode == 2) ? 16457 : 9600 * boost / 100, + &actual, parity); + + /* set the termios value to the real one, so the user now what has + * changed. We support few fields so its easies to copy the old hw + * settings back over and then adjust them + */ + if (old_termios) + tty_termios_copy_hw(tty->termios, old_termios); + if (status != 0) /* Set failed - return old bits */ + return; + /* Re-encode speed, parity and csize */ + tty_encode_baud_rate(tty, baud, baud); + tty->termios->c_cflag &= ~(supported_mask|CSIZE); + tty->termios->c_cflag |= newval | csize; } -static void iuu_close(struct tty_struct *tty, - struct usb_serial_port *port, struct file *filp) +static void iuu_close(struct usb_serial_port *port) { /* iuu_led (port,255,0,0,0); */ struct usb_serial *serial; - struct iuu_private *priv = usb_get_serial_port_data(port); - unsigned long flags; - unsigned int c_cflag; serial = port->serial; if (!serial) @@ -968,17 +1004,6 @@ static void iuu_close(struct tty_struct *tty, iuu_uart_off(port); if (serial->dev) { - if (tty) { - c_cflag = tty->termios->c_cflag; - if (c_cflag & HUPCL) { - /* drop DTR and RTS */ - priv = usb_get_serial_port_data(port); - spin_lock_irqsave(&priv->lock, flags); - priv->line_control = 0; - spin_unlock_irqrestore(&priv->lock, flags); - set_control_lines(port->serial->dev, 0); - } - } /* free writebuf */ /* shutdown our urbs */ dbg("%s - shutting down urbs", __func__); @@ -1154,7 +1179,7 @@ static int iuu_open(struct tty_struct *tty, if (result) { dev_err(&port->dev, "%s - failed submitting read urb," " error %d\n", __func__, result); - iuu_close(tty, port, NULL); + iuu_close(port); return -EPROTO; } else { dbg("%s - rxcmd OK", __func__); @@ -1175,6 +1200,7 @@ static struct usb_serial_driver iuu_device = { .read_bulk_callback = iuu_uart_read_callback, .tiocmget = iuu_tiocmget, .tiocmset = iuu_tiocmset, + .set_termios = iuu_set_termios, .attach = iuu_startup, .shutdown = iuu_shutdown, }; diff --git a/drivers/usb/serial/keyspan.c b/drivers/usb/serial/keyspan.c index 00daa8f7759a..f1195a98f316 100644 --- a/drivers/usb/serial/keyspan.c +++ b/drivers/usb/serial/keyspan.c @@ -1298,8 +1298,16 @@ static inline void stop_urb(struct urb *urb) usb_kill_urb(urb); } -static void keyspan_close(struct tty_struct *tty, - struct usb_serial_port *port, struct file *filp) +static void keyspan_dtr_rts(struct usb_serial_port *port, int on) +{ + struct keyspan_port_private *p_priv = usb_get_serial_port_data(port); + + p_priv->rts_state = on; + p_priv->dtr_state = on; + keyspan_send_setup(port, 0); +} + +static void keyspan_close(struct usb_serial_port *port) { int i; struct usb_serial *serial = port->serial; @@ -1336,7 +1344,6 @@ static void keyspan_close(struct tty_struct *tty, stop_urb(p_priv->out_urbs[i]); } } - tty_port_tty_set(&port->port, NULL); } /* download the firmware to a pre-renumeration device */ diff --git a/drivers/usb/serial/keyspan.h b/drivers/usb/serial/keyspan.h index 38b4582e0734..0d4569b60768 100644 --- a/drivers/usb/serial/keyspan.h +++ b/drivers/usb/serial/keyspan.h @@ -38,9 +38,8 @@ static int keyspan_open (struct tty_struct *tty, struct usb_serial_port *port, struct file *filp); -static void keyspan_close (struct tty_struct *tty, - struct usb_serial_port *port, - struct file *filp); +static void keyspan_close (struct usb_serial_port *port); +static void keyspan_dtr_rts (struct usb_serial_port *port, int on); static int keyspan_startup (struct usb_serial *serial); static void keyspan_shutdown (struct usb_serial *serial); static int keyspan_write_room (struct tty_struct *tty); @@ -562,6 +561,7 @@ static struct usb_serial_driver keyspan_1port_device = { .num_ports = 1, .open = keyspan_open, .close = keyspan_close, + .dtr_rts = keyspan_dtr_rts, .write = keyspan_write, .write_room = keyspan_write_room, .set_termios = keyspan_set_termios, @@ -582,6 +582,7 @@ static struct usb_serial_driver keyspan_2port_device = { .num_ports = 2, .open = keyspan_open, .close = keyspan_close, + .dtr_rts = keyspan_dtr_rts, .write = keyspan_write, .write_room = keyspan_write_room, .set_termios = keyspan_set_termios, @@ -602,6 +603,7 @@ static struct usb_serial_driver keyspan_4port_device = { .num_ports = 4, .open = keyspan_open, .close = keyspan_close, + .dtr_rts = keyspan_dtr_rts, .write = keyspan_write, .write_room = keyspan_write_room, .set_termios = keyspan_set_termios, diff --git a/drivers/usb/serial/keyspan_pda.c b/drivers/usb/serial/keyspan_pda.c index bf1ae247da66..ab769dbea1b3 100644 --- a/drivers/usb/serial/keyspan_pda.c +++ b/drivers/usb/serial/keyspan_pda.c @@ -651,6 +651,35 @@ static int keyspan_pda_chars_in_buffer(struct tty_struct *tty) } +static void keyspan_pda_dtr_rts(struct usb_serial_port *port, int on) +{ + struct usb_serial *serial = port->serial; + + if (serial->dev) { + if (on) + keyspan_pda_set_modem_info(serial, (1<<7) | (1<< 2)); + else + keyspan_pda_set_modem_info(serial, 0); + } +} + +static int keyspan_pda_carrier_raised(struct usb_serial_port *port) +{ + struct usb_serial *serial = port->serial; + unsigned char modembits; + + /* If we can read the modem status and the DCD is low then + carrier is not raised yet */ + if (keyspan_pda_get_modem_info(serial, &modembits) >= 0) { + if (!(modembits & (1>>6))) + return 0; + } + /* Carrier raised, or we failed (eg disconnected) so + progress accordingly */ + return 1; +} + + static int keyspan_pda_open(struct tty_struct *tty, struct usb_serial_port *port, struct file *filp) { @@ -682,13 +711,6 @@ static int keyspan_pda_open(struct tty_struct *tty, priv->tx_room = room; priv->tx_throttled = room ? 0 : 1; - /* the normal serial device seems to always turn on DTR and RTS here, - so do the same */ - if (tty && (tty->termios->c_cflag & CBAUD)) - keyspan_pda_set_modem_info(serial, (1<<7) | (1<<2)); - else - keyspan_pda_set_modem_info(serial, 0); - /*Start reading from the device*/ port->interrupt_in_urb->dev = serial->dev; rc = usb_submit_urb(port->interrupt_in_urb, GFP_KERNEL); @@ -700,19 +722,11 @@ static int keyspan_pda_open(struct tty_struct *tty, error: return rc; } - - -static void keyspan_pda_close(struct tty_struct *tty, - struct usb_serial_port *port, struct file *filp) +static void keyspan_pda_close(struct usb_serial_port *port) { struct usb_serial *serial = port->serial; if (serial->dev) { - /* the normal serial device seems to always shut - off DTR and RTS now */ - if (tty->termios->c_cflag & HUPCL) - keyspan_pda_set_modem_info(serial, 0); - /* shutdown our bulk reads and writes */ usb_kill_urb(port->write_urb); usb_kill_urb(port->interrupt_in_urb); @@ -839,6 +853,8 @@ static struct usb_serial_driver keyspan_pda_device = { .usb_driver = &keyspan_pda_driver, .id_table = id_table_std, .num_ports = 1, + .dtr_rts = keyspan_pda_dtr_rts, + .carrier_raised = keyspan_pda_carrier_raised, .open = keyspan_pda_open, .close = keyspan_pda_close, .write = keyspan_pda_write, diff --git a/drivers/usb/serial/kl5kusb105.c b/drivers/usb/serial/kl5kusb105.c index fcd9082f3e7f..fa817c66b3e8 100644 --- a/drivers/usb/serial/kl5kusb105.c +++ b/drivers/usb/serial/kl5kusb105.c @@ -76,8 +76,7 @@ static int klsi_105_startup(struct usb_serial *serial); static void klsi_105_shutdown(struct usb_serial *serial); static int klsi_105_open(struct tty_struct *tty, struct usb_serial_port *port, struct file *filp); -static void klsi_105_close(struct tty_struct *tty, - struct usb_serial_port *port, struct file *filp); +static void klsi_105_close(struct usb_serial_port *port); static int klsi_105_write(struct tty_struct *tty, struct usb_serial_port *port, const unsigned char *buf, int count); static void klsi_105_write_bulk_callback(struct urb *urb); @@ -447,8 +446,7 @@ exit: } /* klsi_105_open */ -static void klsi_105_close(struct tty_struct *tty, - struct usb_serial_port *port, struct file *filp) +static void klsi_105_close(struct usb_serial_port *port) { struct klsi_105_private *priv = usb_get_serial_port_data(port); int rc; diff --git a/drivers/usb/serial/kobil_sct.c b/drivers/usb/serial/kobil_sct.c index c148544953b3..6b570498287f 100644 --- a/drivers/usb/serial/kobil_sct.c +++ b/drivers/usb/serial/kobil_sct.c @@ -72,8 +72,7 @@ static int kobil_startup(struct usb_serial *serial); static void kobil_shutdown(struct usb_serial *serial); static int kobil_open(struct tty_struct *tty, struct usb_serial_port *port, struct file *filp); -static void kobil_close(struct tty_struct *tty, struct usb_serial_port *port, - struct file *filp); +static void kobil_close(struct usb_serial_port *port); static int kobil_write(struct tty_struct *tty, struct usb_serial_port *port, const unsigned char *buf, int count); static int kobil_write_room(struct tty_struct *tty); @@ -209,7 +208,7 @@ static void kobil_shutdown(struct usb_serial *serial) for (i = 0; i < serial->num_ports; ++i) { while (serial->port[i]->port.count > 0) - kobil_close(NULL, serial->port[i], NULL); + kobil_close(serial->port[i]); kfree(usb_get_serial_port_data(serial->port[i])); usb_set_serial_port_data(serial->port[i], NULL); } @@ -346,11 +345,11 @@ static int kobil_open(struct tty_struct *tty, } -static void kobil_close(struct tty_struct *tty, - struct usb_serial_port *port, struct file *filp) +static void kobil_close(struct usb_serial_port *port) { dbg("%s - port %d", __func__, port->number); + /* FIXME: Add rts/dtr methods */ if (port->write_urb) { usb_kill_urb(port->write_urb); usb_free_urb(port->write_urb); diff --git a/drivers/usb/serial/mct_u232.c b/drivers/usb/serial/mct_u232.c index 82930a7d5093..873795548fc0 100644 --- a/drivers/usb/serial/mct_u232.c +++ b/drivers/usb/serial/mct_u232.c @@ -95,8 +95,8 @@ static int mct_u232_startup(struct usb_serial *serial); static void mct_u232_shutdown(struct usb_serial *serial); static int mct_u232_open(struct tty_struct *tty, struct usb_serial_port *port, struct file *filp); -static void mct_u232_close(struct tty_struct *tty, - struct usb_serial_port *port, struct file *filp); +static void mct_u232_close(struct usb_serial_port *port); +static void mct_u232_dtr_rts(struct usb_serial_port *port, int on); static void mct_u232_read_int_callback(struct urb *urb); static void mct_u232_set_termios(struct tty_struct *tty, struct usb_serial_port *port, struct ktermios *old); @@ -140,6 +140,7 @@ static struct usb_serial_driver mct_u232_device = { .num_ports = 1, .open = mct_u232_open, .close = mct_u232_close, + .dtr_rts = mct_u232_dtr_rts, .throttle = mct_u232_throttle, .unthrottle = mct_u232_unthrottle, .read_int_callback = mct_u232_read_int_callback, @@ -496,29 +497,29 @@ error: return retval; } /* mct_u232_open */ - -static void mct_u232_close(struct tty_struct *tty, - struct usb_serial_port *port, struct file *filp) +static void mct_u232_dtr_rts(struct usb_serial_port *port, int on) { - unsigned int c_cflag; unsigned int control_state; struct mct_u232_private *priv = usb_get_serial_port_data(port); - dbg("%s port %d", __func__, port->number); - if (tty) { - c_cflag = tty->termios->c_cflag; - mutex_lock(&port->serial->disc_mutex); - if (c_cflag & HUPCL && !port->serial->disconnected) { - /* drop DTR and RTS */ - spin_lock_irq(&priv->lock); + mutex_lock(&port->serial->disc_mutex); + if (!port->serial->disconnected) { + /* drop DTR and RTS */ + spin_lock_irq(&priv->lock); + if (on) + priv->control_state |= TIOCM_DTR | TIOCM_RTS; + else priv->control_state &= ~(TIOCM_DTR | TIOCM_RTS); - control_state = priv->control_state; - spin_unlock_irq(&priv->lock); - mct_u232_set_modem_ctrl(port->serial, control_state); - } - mutex_unlock(&port->serial->disc_mutex); + control_state = priv->control_state; + spin_unlock_irq(&priv->lock); + mct_u232_set_modem_ctrl(port->serial, control_state); } + mutex_unlock(&port->serial->disc_mutex); +} +static void mct_u232_close(struct usb_serial_port *port) +{ + dbg("%s port %d", __func__, port->number); if (port->serial->dev) { /* shutdown our urbs */ diff --git a/drivers/usb/serial/mos7720.c b/drivers/usb/serial/mos7720.c index 24e3b5d4b4d4..9e1a013ee7f6 100644 --- a/drivers/usb/serial/mos7720.c +++ b/drivers/usb/serial/mos7720.c @@ -533,8 +533,7 @@ static int mos7720_chars_in_buffer(struct tty_struct *tty) return chars; } -static void mos7720_close(struct tty_struct *tty, - struct usb_serial_port *port, struct file *filp) +static void mos7720_close(struct usb_serial_port *port) { struct usb_serial *serial; struct moschip_port *mos7720_port; diff --git a/drivers/usb/serial/mos7840.c b/drivers/usb/serial/mos7840.c index 84fb1dcd30dc..10b78a37214f 100644 --- a/drivers/usb/serial/mos7840.c +++ b/drivers/usb/serial/mos7840.c @@ -1135,54 +1135,12 @@ static int mos7840_chars_in_buffer(struct tty_struct *tty) } -/************************************************************************ - * - * mos7840_block_until_tx_empty - * - * This function will block the close until one of the following: - * 1. TX count are 0 - * 2. The mos7840 has stopped - * 3. A timeout of 3 seconds without activity has expired - * - ************************************************************************/ -static void mos7840_block_until_tx_empty(struct tty_struct *tty, - struct moschip_port *mos7840_port) -{ - int timeout = HZ / 10; - int wait = 30; - int count; - - while (1) { - - count = mos7840_chars_in_buffer(tty); - - /* Check for Buffer status */ - if (count <= 0) - return; - - /* Block the thread for a while */ - interruptible_sleep_on_timeout(&mos7840_port->wait_chase, - timeout); - - /* No activity.. count down section */ - wait--; - if (wait == 0) { - dbg("%s - TIMEOUT", __func__); - return; - } else { - /* Reset timeout value back to seconds */ - wait = 30; - } - } -} - /***************************************************************************** * mos7840_close * this function is called by the tty driver when a port is closed *****************************************************************************/ -static void mos7840_close(struct tty_struct *tty, - struct usb_serial_port *port, struct file *filp) +static void mos7840_close(struct usb_serial_port *port) { struct usb_serial *serial; struct moschip_port *mos7840_port; @@ -1223,10 +1181,6 @@ static void mos7840_close(struct tty_struct *tty, } } - if (serial->dev) - /* flush and block until tx is empty */ - mos7840_block_until_tx_empty(tty, mos7840_port); - /* While closing port, shutdown all bulk read, write * * and interrupt read if they exists */ if (serial->dev) { diff --git a/drivers/usb/serial/navman.c b/drivers/usb/serial/navman.c index bcdcbb822705..f5f3751a888c 100644 --- a/drivers/usb/serial/navman.c +++ b/drivers/usb/serial/navman.c @@ -98,8 +98,7 @@ static int navman_open(struct tty_struct *tty, return result; } -static void navman_close(struct tty_struct *tty, - struct usb_serial_port *port, struct file *filp) +static void navman_close(struct usb_serial_port *port) { dbg("%s - port %d", __func__, port->number); diff --git a/drivers/usb/serial/omninet.c b/drivers/usb/serial/omninet.c index df6539712726..1104617334f5 100644 --- a/drivers/usb/serial/omninet.c +++ b/drivers/usb/serial/omninet.c @@ -66,8 +66,7 @@ static int debug; /* function prototypes */ static int omninet_open(struct tty_struct *tty, struct usb_serial_port *port, struct file *filp); -static void omninet_close(struct tty_struct *tty, struct usb_serial_port *port, - struct file *filp); +static void omninet_close(struct usb_serial_port *port); static void omninet_read_bulk_callback(struct urb *urb); static void omninet_write_bulk_callback(struct urb *urb); static int omninet_write(struct tty_struct *tty, struct usb_serial_port *port, @@ -189,8 +188,7 @@ static int omninet_open(struct tty_struct *tty, return result; } -static void omninet_close(struct tty_struct *tty, - struct usb_serial_port *port, struct file *filp) +static void omninet_close(struct usb_serial_port *port) { dbg("%s - port %d", __func__, port->number); usb_kill_urb(port->read_urb); diff --git a/drivers/usb/serial/opticon.c b/drivers/usb/serial/opticon.c index b500ad10b758..c20480aa9755 100644 --- a/drivers/usb/serial/opticon.c +++ b/drivers/usb/serial/opticon.c @@ -173,8 +173,7 @@ static int opticon_open(struct tty_struct *tty, struct usb_serial_port *port, return result; } -static void opticon_close(struct tty_struct *tty, struct usb_serial_port *port, - struct file *filp) +static void opticon_close(struct usb_serial_port *port) { struct opticon_private *priv = usb_get_serial_data(port->serial); diff --git a/drivers/usb/serial/option.c b/drivers/usb/serial/option.c index 7817b82889ca..a16d69fadba1 100644 --- a/drivers/usb/serial/option.c +++ b/drivers/usb/serial/option.c @@ -45,8 +45,9 @@ /* Function prototypes */ static int option_open(struct tty_struct *tty, struct usb_serial_port *port, struct file *filp); -static void option_close(struct tty_struct *tty, struct usb_serial_port *port, - struct file *filp); +static void option_close(struct usb_serial_port *port); +static void option_dtr_rts(struct usb_serial_port *port, int on); + static int option_startup(struct usb_serial *serial); static void option_shutdown(struct usb_serial *serial); static int option_write_room(struct tty_struct *tty); @@ -61,7 +62,7 @@ static void option_set_termios(struct tty_struct *tty, static int option_tiocmget(struct tty_struct *tty, struct file *file); static int option_tiocmset(struct tty_struct *tty, struct file *file, unsigned int set, unsigned int clear); -static int option_send_setup(struct tty_struct *tty, struct usb_serial_port *port); +static int option_send_setup(struct usb_serial_port *port); static int option_suspend(struct usb_serial *serial, pm_message_t message); static int option_resume(struct usb_serial *serial); @@ -551,6 +552,7 @@ static struct usb_serial_driver option_1port_device = { .num_ports = 1, .open = option_open, .close = option_close, + .dtr_rts = option_dtr_rts, .write = option_write, .write_room = option_write_room, .chars_in_buffer = option_chars_in_buffer, @@ -630,7 +632,7 @@ static void option_set_termios(struct tty_struct *tty, dbg("%s", __func__); /* Doesn't support option setting */ tty_termios_copy_hw(tty->termios, old_termios); - option_send_setup(tty, port); + option_send_setup(port); } static int option_tiocmget(struct tty_struct *tty, struct file *file) @@ -669,7 +671,7 @@ static int option_tiocmset(struct tty_struct *tty, struct file *file, portdata->rts_state = 0; if (clear & TIOCM_DTR) portdata->dtr_state = 0; - return option_send_setup(tty, port); + return option_send_setup(port); } /* Write */ @@ -897,10 +899,6 @@ static int option_open(struct tty_struct *tty, dbg("%s", __func__); - /* Set some sane defaults */ - portdata->rts_state = 1; - portdata->dtr_state = 1; - /* Reset low level data toggle and start reading from endpoints */ for (i = 0; i < N_IN_URB; i++) { urb = portdata->in_urbs[i]; @@ -936,37 +934,43 @@ static int option_open(struct tty_struct *tty, usb_pipeout(urb->pipe), 0); */ } - option_send_setup(tty, port); + option_send_setup(port); return 0; } -static void option_close(struct tty_struct *tty, - struct usb_serial_port *port, struct file *filp) +static void option_dtr_rts(struct usb_serial_port *port, int on) { - int i; struct usb_serial *serial = port->serial; struct option_port_private *portdata; dbg("%s", __func__); portdata = usb_get_serial_port_data(port); + mutex_lock(&serial->disc_mutex); + portdata->rts_state = on; + portdata->dtr_state = on; + if (serial->dev) + option_send_setup(port); + mutex_unlock(&serial->disc_mutex); +} - portdata->rts_state = 0; - portdata->dtr_state = 0; - if (serial->dev) { - mutex_lock(&serial->disc_mutex); - if (!serial->disconnected) - option_send_setup(tty, port); - mutex_unlock(&serial->disc_mutex); +static void option_close(struct usb_serial_port *port) +{ + int i; + struct usb_serial *serial = port->serial; + struct option_port_private *portdata; + + dbg("%s", __func__); + portdata = usb_get_serial_port_data(port); + if (serial->dev) { /* Stop reading/writing urbs */ for (i = 0; i < N_IN_URB; i++) usb_kill_urb(portdata->in_urbs[i]); for (i = 0; i < N_OUT_URB; i++) usb_kill_urb(portdata->out_urbs[i]); } - tty_port_tty_set(&port->port, NULL); } /* Helper functions used by option_setup_urbs */ @@ -1032,28 +1036,24 @@ static void option_setup_urbs(struct usb_serial *serial) * This is exactly the same as SET_CONTROL_LINE_STATE from the PSTN * CDC. */ -static int option_send_setup(struct tty_struct *tty, - struct usb_serial_port *port) +static int option_send_setup(struct usb_serial_port *port) { struct usb_serial *serial = port->serial; struct option_port_private *portdata; int ifNum = serial->interface->cur_altsetting->desc.bInterfaceNumber; + int val = 0; dbg("%s", __func__); portdata = usb_get_serial_port_data(port); - if (tty) { - int val = 0; - if (portdata->dtr_state) - val |= 0x01; - if (portdata->rts_state) - val |= 0x02; + if (portdata->dtr_state) + val |= 0x01; + if (portdata->rts_state) + val |= 0x02; - return usb_control_msg(serial->dev, - usb_rcvctrlpipe(serial->dev, 0), - 0x22, 0x21, val, ifNum, NULL, 0, USB_CTRL_SET_TIMEOUT); - } - return 0; + return usb_control_msg(serial->dev, + usb_rcvctrlpipe(serial->dev, 0), + 0x22, 0x21, val, ifNum, NULL, 0, USB_CTRL_SET_TIMEOUT); } static int option_startup(struct usb_serial *serial) diff --git a/drivers/usb/serial/oti6858.c b/drivers/usb/serial/oti6858.c index ba551f00f16f..7de54781fe61 100644 --- a/drivers/usb/serial/oti6858.c +++ b/drivers/usb/serial/oti6858.c @@ -143,8 +143,7 @@ struct oti6858_control_pkt { /* function prototypes */ static int oti6858_open(struct tty_struct *tty, struct usb_serial_port *port, struct file *filp); -static void oti6858_close(struct tty_struct *tty, - struct usb_serial_port *port, struct file *filp); +static void oti6858_close(struct usb_serial_port *port); static void oti6858_set_termios(struct tty_struct *tty, struct usb_serial_port *port, struct ktermios *old); static int oti6858_ioctl(struct tty_struct *tty, struct file *file, @@ -622,67 +621,30 @@ static int oti6858_open(struct tty_struct *tty, if (result != 0) { dev_err(&port->dev, "%s(): usb_submit_urb() failed" " with error %d\n", __func__, result); - oti6858_close(tty, port, NULL); + oti6858_close(port); return -EPROTO; } /* setup termios */ if (tty) oti6858_set_termios(tty, port, &tmp_termios); - + port->port.drain_delay = 256; /* FIXME: check the FIFO length */ return 0; } -static void oti6858_close(struct tty_struct *tty, - struct usb_serial_port *port, struct file *filp) +static void oti6858_close(struct usb_serial_port *port) { struct oti6858_private *priv = usb_get_serial_port_data(port); unsigned long flags; - long timeout; - wait_queue_t wait; dbg("%s(port = %d)", __func__, port->number); - /* wait for data to drain from the buffer */ spin_lock_irqsave(&priv->lock, flags); - timeout = 30 * HZ; /* PL2303_CLOSING_WAIT */ - init_waitqueue_entry(&wait, current); - add_wait_queue(&tty->write_wait, &wait); - dbg("%s(): entering wait loop", __func__); - for (;;) { - set_current_state(TASK_INTERRUPTIBLE); - if (oti6858_buf_data_avail(priv->buf) == 0 - || timeout == 0 || signal_pending(current) - || port->serial->disconnected) - break; - spin_unlock_irqrestore(&priv->lock, flags); - timeout = schedule_timeout(timeout); - spin_lock_irqsave(&priv->lock, flags); - } - set_current_state(TASK_RUNNING); - remove_wait_queue(&tty->write_wait, &wait); - dbg("%s(): after wait loop", __func__); - /* clear out any remaining data in the buffer */ oti6858_buf_clear(priv->buf); spin_unlock_irqrestore(&priv->lock, flags); - /* wait for characters to drain from the device */ - /* (this is long enough for the entire 256 byte */ - /* pl2303 hardware buffer to drain with no flow */ - /* control for data rates of 1200 bps or more, */ - /* for lower rates we should really know how much */ - /* data is in the buffer to compute a delay */ - /* that is not unnecessarily long) */ - /* FIXME - bps = tty_get_baud_rate(tty); - if (bps > 1200) - timeout = max((HZ*2560)/bps,HZ/10); - else - */ - timeout = 2*HZ; - schedule_timeout_interruptible(timeout); - dbg("%s(): after schedule_timeout_interruptible()", __func__); + dbg("%s(): after buf_clear()", __func__); /* cancel scheduled setup */ cancel_delayed_work(&priv->delayed_setup_work); @@ -694,15 +656,6 @@ static void oti6858_close(struct tty_struct *tty, usb_kill_urb(port->write_urb); usb_kill_urb(port->read_urb); usb_kill_urb(port->interrupt_in_urb); - - /* - if (tty && (tty->termios->c_cflag) & HUPCL) { - // drop DTR and RTS - spin_lock_irqsave(&priv->lock, flags); - priv->pending_setup.control &= ~CONTROL_MASK; - spin_unlock_irqrestore(&priv->lock, flags); - } - */ } static int oti6858_tiocmset(struct tty_struct *tty, struct file *file, diff --git a/drivers/usb/serial/pl2303.c b/drivers/usb/serial/pl2303.c index 751a533a4347..e02dc3d643c7 100644 --- a/drivers/usb/serial/pl2303.c +++ b/drivers/usb/serial/pl2303.c @@ -652,69 +652,41 @@ static void pl2303_set_termios(struct tty_struct *tty, kfree(buf); } -static void pl2303_close(struct tty_struct *tty, - struct usb_serial_port *port, struct file *filp) +static void pl2303_dtr_rts(struct usb_serial_port *port, int on) +{ + struct pl2303_private *priv = usb_get_serial_port_data(port); + unsigned long flags; + u8 control; + + spin_lock_irqsave(&priv->lock, flags); + /* Change DTR and RTS */ + if (on) + priv->line_control |= (CONTROL_DTR | CONTROL_RTS); + else + priv->line_control &= ~(CONTROL_DTR | CONTROL_RTS); + control = priv->line_control; + spin_unlock_irqrestore(&priv->lock, flags); + set_control_lines(port->serial->dev, control); +} + +static void pl2303_close(struct usb_serial_port *port) { struct pl2303_private *priv = usb_get_serial_port_data(port); unsigned long flags; - unsigned int c_cflag; - int bps; - long timeout; - wait_queue_t wait; dbg("%s - port %d", __func__, port->number); - /* wait for data to drain from the buffer */ spin_lock_irqsave(&priv->lock, flags); - timeout = PL2303_CLOSING_WAIT; - init_waitqueue_entry(&wait, current); - add_wait_queue(&tty->write_wait, &wait); - for (;;) { - set_current_state(TASK_INTERRUPTIBLE); - if (pl2303_buf_data_avail(priv->buf) == 0 || - timeout == 0 || signal_pending(current) || - port->serial->disconnected) - break; - spin_unlock_irqrestore(&priv->lock, flags); - timeout = schedule_timeout(timeout); - spin_lock_irqsave(&priv->lock, flags); - } - set_current_state(TASK_RUNNING); - remove_wait_queue(&tty->write_wait, &wait); /* clear out any remaining data in the buffer */ pl2303_buf_clear(priv->buf); spin_unlock_irqrestore(&priv->lock, flags); - /* wait for characters to drain from the device */ - /* (this is long enough for the entire 256 byte */ - /* pl2303 hardware buffer to drain with no flow */ - /* control for data rates of 1200 bps or more, */ - /* for lower rates we should really know how much */ - /* data is in the buffer to compute a delay */ - /* that is not unnecessarily long) */ - bps = tty_get_baud_rate(tty); - if (bps > 1200) - timeout = max((HZ*2560)/bps, HZ/10); - else - timeout = 2*HZ; - schedule_timeout_interruptible(timeout); - /* shutdown our urbs */ dbg("%s - shutting down urbs", __func__); usb_kill_urb(port->write_urb); usb_kill_urb(port->read_urb); usb_kill_urb(port->interrupt_in_urb); - if (tty) { - c_cflag = tty->termios->c_cflag; - if (c_cflag & HUPCL) { - /* drop DTR and RTS */ - spin_lock_irqsave(&priv->lock, flags); - priv->line_control = 0; - spin_unlock_irqrestore(&priv->lock, flags); - set_control_lines(port->serial->dev, 0); - } - } } static int pl2303_open(struct tty_struct *tty, @@ -748,7 +720,7 @@ static int pl2303_open(struct tty_struct *tty, if (result) { dev_err(&port->dev, "%s - failed submitting read urb," " error %d\n", __func__, result); - pl2303_close(tty, port, NULL); + pl2303_close(port); return -EPROTO; } @@ -758,9 +730,10 @@ static int pl2303_open(struct tty_struct *tty, if (result) { dev_err(&port->dev, "%s - failed submitting interrupt urb," " error %d\n", __func__, result); - pl2303_close(tty, port, NULL); + pl2303_close(port); return -EPROTO; } + port->port.drain_delay = 256; return 0; } @@ -821,6 +794,14 @@ static int pl2303_tiocmget(struct tty_struct *tty, struct file *file) return result; } +static int pl2303_carrier_raised(struct usb_serial_port *port) +{ + struct pl2303_private *priv = usb_get_serial_port_data(port); + if (priv->line_status & UART_DCD) + return 1; + return 0; +} + static int wait_modem_info(struct usb_serial_port *port, unsigned int arg) { struct pl2303_private *priv = usb_get_serial_port_data(port); @@ -1125,6 +1106,8 @@ static struct usb_serial_driver pl2303_device = { .num_ports = 1, .open = pl2303_open, .close = pl2303_close, + .dtr_rts = pl2303_dtr_rts, + .carrier_raised = pl2303_carrier_raised, .write = pl2303_write, .ioctl = pl2303_ioctl, .break_ctl = pl2303_break_ctl, diff --git a/drivers/usb/serial/sierra.c b/drivers/usb/serial/sierra.c index 913225c61610..17ac34f4d668 100644 --- a/drivers/usb/serial/sierra.c +++ b/drivers/usb/serial/sierra.c @@ -26,12 +26,10 @@ #include <linux/module.h> #include <linux/usb.h> #include <linux/usb/serial.h> -#include <linux/usb/ch9.h> #define SWIMS_USB_REQUEST_SetPower 0x00 #define SWIMS_USB_REQUEST_SetNmea 0x07 -/* per port private data */ #define N_IN_URB 4 #define N_OUT_URB 4 #define IN_BUFLEN 4096 @@ -39,6 +37,12 @@ static int debug; static int nmea; +/* Used in interface blacklisting */ +struct sierra_iface_info { + const u32 infolen; /* number of interface numbers on blacklist */ + const u8 *ifaceinfo; /* pointer to the array holding the numbers */ +}; + static int sierra_set_power_state(struct usb_device *udev, __u16 swiState) { int result; @@ -85,6 +89,23 @@ static int sierra_calc_num_ports(struct usb_serial *serial) return result; } +static int is_blacklisted(const u8 ifnum, + const struct sierra_iface_info *blacklist) +{ + const u8 *info; + int i; + + if (blacklist) { + info = blacklist->ifaceinfo; + + for (i = 0; i < blacklist->infolen; i++) { + if (info[i] == ifnum) + return 1; + } + } + return 0; +} + static int sierra_calc_interface(struct usb_serial *serial) { int interface; @@ -153,9 +174,25 @@ static int sierra_probe(struct usb_serial *serial, */ usb_set_serial_data(serial, (void *)num_ports); + /* ifnum could have changed - by calling usb_set_interface */ + ifnum = sierra_calc_interface(serial); + + if (is_blacklisted(ifnum, + (struct sierra_iface_info *)id->driver_info)) { + dev_dbg(&serial->dev->dev, + "Ignoring blacklisted interface #%d\n", ifnum); + return -ENODEV; + } + return result; } +static const u8 direct_ip_non_serial_ifaces[] = { 7, 8, 9, 10, 11 }; +static const struct sierra_iface_info direct_ip_interface_blacklist = { + .infolen = ARRAY_SIZE(direct_ip_non_serial_ifaces), + .ifaceinfo = direct_ip_non_serial_ifaces, +}; + static struct usb_device_id id_table [] = { { USB_DEVICE(0x1199, 0x0017) }, /* Sierra Wireless EM5625 */ { USB_DEVICE(0x1199, 0x0018) }, /* Sierra Wireless MC5720 */ @@ -188,9 +225,11 @@ static struct usb_device_id id_table [] = { { USB_DEVICE(0x1199, 0x6833) }, /* Sierra Wireless MC8781 */ { USB_DEVICE(0x1199, 0x683A) }, /* Sierra Wireless MC8785 */ { USB_DEVICE(0x1199, 0x683B) }, /* Sierra Wireless MC8785 Composite */ - { USB_DEVICE(0x1199, 0x683C) }, /* Sierra Wireless MC8790 */ - { USB_DEVICE(0x1199, 0x683D) }, /* Sierra Wireless MC8790 */ - { USB_DEVICE(0x1199, 0x683E) }, /* Sierra Wireless MC8790 */ + /* Sierra Wireless MC8790, MC8791, MC8792 Composite */ + { USB_DEVICE(0x1199, 0x683C) }, + { USB_DEVICE(0x1199, 0x683D) }, /* Sierra Wireless MC8791 Composite */ + /* Sierra Wireless MC8790, MC8791, MC8792 */ + { USB_DEVICE(0x1199, 0x683E) }, { USB_DEVICE(0x1199, 0x6850) }, /* Sierra Wireless AirCard 880 */ { USB_DEVICE(0x1199, 0x6851) }, /* Sierra Wireless AirCard 881 */ { USB_DEVICE(0x1199, 0x6852) }, /* Sierra Wireless AirCard 880 E */ @@ -211,6 +250,10 @@ static struct usb_device_id id_table [] = { { USB_DEVICE(0x1199, 0x0112) }, /* Sierra Wireless AirCard 580 */ { USB_DEVICE(0x0F3D, 0x0112) }, /* Airprime/Sierra PC 5220 */ + { USB_DEVICE(0x1199, 0x68A3), /* Sierra Wireless Direct IP modems */ + .driver_info = (kernel_ulong_t)&direct_ip_interface_blacklist + }, + { } }; MODULE_DEVICE_TABLE(usb, id_table); @@ -229,7 +272,6 @@ struct sierra_port_private { /* Input endpoints and buffers for this port */ struct urb *in_urbs[N_IN_URB]; - char *in_buffer[N_IN_URB]; /* Settings for the port */ int rts_state; /* Handshaking pins (outputs) */ @@ -240,57 +282,50 @@ struct sierra_port_private { int ri_state; }; -static int sierra_send_setup(struct tty_struct *tty, - struct usb_serial_port *port) +static int sierra_send_setup(struct usb_serial_port *port) { struct usb_serial *serial = port->serial; struct sierra_port_private *portdata; __u16 interface = 0; + int val = 0; dev_dbg(&port->dev, "%s", __func__); portdata = usb_get_serial_port_data(port); - if (tty) { - int val = 0; - if (portdata->dtr_state) - val |= 0x01; - if (portdata->rts_state) - val |= 0x02; - - /* If composite device then properly report interface */ - if (serial->num_ports == 1) { - interface = sierra_calc_interface(serial); - - /* Control message is sent only to interfaces with - * interrupt_in endpoints - */ - if (port->interrupt_in_urb) { - /* send control message */ - return usb_control_msg(serial->dev, - usb_rcvctrlpipe(serial->dev, 0), - 0x22, 0x21, val, interface, - NULL, 0, USB_CTRL_SET_TIMEOUT); - } - } - - /* Otherwise the need to do non-composite mapping */ - else { - if (port->bulk_out_endpointAddress == 2) - interface = 0; - else if (port->bulk_out_endpointAddress == 4) - interface = 1; - else if (port->bulk_out_endpointAddress == 5) - interface = 2; + if (portdata->dtr_state) + val |= 0x01; + if (portdata->rts_state) + val |= 0x02; + /* If composite device then properly report interface */ + if (serial->num_ports == 1) { + interface = sierra_calc_interface(serial); + /* Control message is sent only to interfaces with + * interrupt_in endpoints + */ + if (port->interrupt_in_urb) { + /* send control message */ return usb_control_msg(serial->dev, usb_rcvctrlpipe(serial->dev, 0), 0x22, 0x21, val, interface, NULL, 0, USB_CTRL_SET_TIMEOUT); - } } + /* Otherwise the need to do non-composite mapping */ + else { + if (port->bulk_out_endpointAddress == 2) + interface = 0; + else if (port->bulk_out_endpointAddress == 4) + interface = 1; + else if (port->bulk_out_endpointAddress == 5) + interface = 2; + return usb_control_msg(serial->dev, + usb_rcvctrlpipe(serial->dev, 0), + 0x22, 0x21, val, interface, + NULL, 0, USB_CTRL_SET_TIMEOUT); + } return 0; } @@ -299,7 +334,7 @@ static void sierra_set_termios(struct tty_struct *tty, { dev_dbg(&port->dev, "%s", __func__); tty_termios_copy_hw(tty->termios, old_termios); - sierra_send_setup(tty, port); + sierra_send_setup(port); } static int sierra_tiocmget(struct tty_struct *tty, struct file *file) @@ -338,7 +373,18 @@ static int sierra_tiocmset(struct tty_struct *tty, struct file *file, portdata->rts_state = 0; if (clear & TIOCM_DTR) portdata->dtr_state = 0; - return sierra_send_setup(tty, port); + return sierra_send_setup(port); +} + +static void sierra_release_urb(struct urb *urb) +{ + struct usb_serial_port *port; + if (urb) { + port = urb->context; + dev_dbg(&port->dev, "%s: %p\n", __func__, urb); + kfree(urb->transfer_buffer); + usb_free_urb(urb); + } } static void sierra_outdat_callback(struct urb *urb) @@ -465,7 +511,7 @@ static void sierra_indat_callback(struct urb *urb) " received", __func__); /* Resubmit urb so we continue receiving */ - if (port->port.count && status != -ESHUTDOWN) { + if (port->port.count && status != -ESHUTDOWN && status != -EPERM) { err = usb_submit_urb(urb, GFP_ATOMIC); if (err) dev_err(&port->dev, "resubmit read urb failed." @@ -557,67 +603,99 @@ static int sierra_write_room(struct tty_struct *tty) return 2048; } -static int sierra_open(struct tty_struct *tty, - struct usb_serial_port *port, struct file *filp) +static void sierra_stop_rx_urbs(struct usb_serial_port *port) { - struct sierra_port_private *portdata; - struct usb_serial *serial = port->serial; int i; - struct urb *urb; - int result; + struct sierra_port_private *portdata = usb_get_serial_port_data(port); - portdata = usb_get_serial_port_data(port); + for (i = 0; i < ARRAY_SIZE(portdata->in_urbs); i++) + usb_kill_urb(portdata->in_urbs[i]); - dev_dbg(&port->dev, "%s", __func__); + usb_kill_urb(port->interrupt_in_urb); +} - /* Set some sane defaults */ - portdata->rts_state = 1; - portdata->dtr_state = 1; +static int sierra_submit_rx_urbs(struct usb_serial_port *port, gfp_t mem_flags) +{ + int ok_cnt; + int err = -EINVAL; + int i; + struct urb *urb; + struct sierra_port_private *portdata = usb_get_serial_port_data(port); - /* Reset low level data toggle and start reading from endpoints */ - for (i = 0; i < N_IN_URB; i++) { + ok_cnt = 0; + for (i = 0; i < ARRAY_SIZE(portdata->in_urbs); i++) { urb = portdata->in_urbs[i]; if (!urb) continue; - if (urb->dev != serial->dev) { - dev_dbg(&port->dev, "%s: dev %p != %p", - __func__, urb->dev, serial->dev); - continue; + err = usb_submit_urb(urb, mem_flags); + if (err) { + dev_err(&port->dev, "%s: submit urb failed: %d\n", + __func__, err); + } else { + ok_cnt++; } + } - /* - * make sure endpoint data toggle is synchronized with the - * device - */ - usb_clear_halt(urb->dev, urb->pipe); - - result = usb_submit_urb(urb, GFP_KERNEL); - if (result) { - dev_err(&port->dev, "submit urb %d failed (%d) %d\n", - i, result, urb->transfer_buffer_length); + if (ok_cnt && port->interrupt_in_urb) { + err = usb_submit_urb(port->interrupt_in_urb, mem_flags); + if (err) { + dev_err(&port->dev, "%s: submit intr urb failed: %d\n", + __func__, err); } } - sierra_send_setup(tty, port); + if (ok_cnt > 0) /* at least one rx urb submitted */ + return 0; + else + return err; +} + +static struct urb *sierra_setup_urb(struct usb_serial *serial, int endpoint, + int dir, void *ctx, int len, + gfp_t mem_flags, + usb_complete_t callback) +{ + struct urb *urb; + u8 *buf; + + if (endpoint == -1) + return NULL; - /* start up the interrupt endpoint if we have one */ - if (port->interrupt_in_urb) { - result = usb_submit_urb(port->interrupt_in_urb, GFP_KERNEL); - if (result) - dev_err(&port->dev, "submit irq_in urb failed %d\n", - result); + urb = usb_alloc_urb(0, mem_flags); + if (urb == NULL) { + dev_dbg(&serial->dev->dev, "%s: alloc for endpoint %d failed\n", + __func__, endpoint); + return NULL; } - return 0; + + buf = kmalloc(len, mem_flags); + if (buf) { + /* Fill URB using supplied data */ + usb_fill_bulk_urb(urb, serial->dev, + usb_sndbulkpipe(serial->dev, endpoint) | dir, + buf, len, callback, ctx); + + /* debug */ + dev_dbg(&serial->dev->dev, "%s %c u : %p d:%p\n", __func__, + dir == USB_DIR_IN ? 'i' : 'o', urb, buf); + } else { + dev_dbg(&serial->dev->dev, "%s %c u:%p d:%p\n", __func__, + dir == USB_DIR_IN ? 'i' : 'o', urb, buf); + + sierra_release_urb(urb); + urb = NULL; + } + + return urb; } -static void sierra_close(struct tty_struct *tty, - struct usb_serial_port *port, struct file *filp) +static void sierra_close(struct usb_serial_port *port) { int i; struct usb_serial *serial = port->serial; struct sierra_port_private *portdata; - dev_dbg(&port->dev, "%s", __func__); + dev_dbg(&port->dev, "%s\n", __func__); portdata = usb_get_serial_port_data(port); portdata->rts_state = 0; @@ -626,25 +704,83 @@ static void sierra_close(struct tty_struct *tty, if (serial->dev) { mutex_lock(&serial->disc_mutex); if (!serial->disconnected) - sierra_send_setup(tty, port); + sierra_send_setup(port); mutex_unlock(&serial->disc_mutex); - /* Stop reading/writing urbs */ - for (i = 0; i < N_IN_URB; i++) - usb_kill_urb(portdata->in_urbs[i]); + /* Stop reading urbs */ + sierra_stop_rx_urbs(port); + /* .. and release them */ + for (i = 0; i < N_IN_URB; i++) { + sierra_release_urb(portdata->in_urbs[i]); + portdata->in_urbs[i] = NULL; + } } +} - usb_kill_urb(port->interrupt_in_urb); - tty_port_tty_set(&port->port, NULL); +static int sierra_open(struct tty_struct *tty, + struct usb_serial_port *port, struct file *filp) +{ + struct sierra_port_private *portdata; + struct usb_serial *serial = port->serial; + int i; + int err; + int endpoint; + struct urb *urb; + + portdata = usb_get_serial_port_data(port); + + dev_dbg(&port->dev, "%s", __func__); + + /* Set some sane defaults */ + portdata->rts_state = 1; + portdata->dtr_state = 1; + + + endpoint = port->bulk_in_endpointAddress; + for (i = 0; i < ARRAY_SIZE(portdata->in_urbs); i++) { + urb = sierra_setup_urb(serial, endpoint, USB_DIR_IN, port, + IN_BUFLEN, GFP_KERNEL, + sierra_indat_callback); + portdata->in_urbs[i] = urb; + } + /* clear halt condition */ + usb_clear_halt(serial->dev, + usb_sndbulkpipe(serial->dev, endpoint) | USB_DIR_IN); + + err = sierra_submit_rx_urbs(port, GFP_KERNEL); + if (err) { + /* get rid of everything as in close */ + sierra_close(port); + return err; + } + sierra_send_setup(port); + + return 0; +} + + +static void sierra_dtr_rts(struct usb_serial_port *port, int on) +{ + struct usb_serial *serial = port->serial; + struct sierra_port_private *portdata; + + portdata = usb_get_serial_port_data(port); + portdata->rts_state = on; + portdata->dtr_state = on; + + if (serial->dev) { + mutex_lock(&serial->disc_mutex); + if (!serial->disconnected) + sierra_send_setup(port); + mutex_unlock(&serial->disc_mutex); + } } static int sierra_startup(struct usb_serial *serial) { struct usb_serial_port *port; struct sierra_port_private *portdata; - struct urb *urb; int i; - int j; dev_dbg(&serial->dev->dev, "%s", __func__); @@ -666,34 +802,8 @@ static int sierra_startup(struct usb_serial *serial) return -ENOMEM; } spin_lock_init(&portdata->lock); - for (j = 0; j < N_IN_URB; j++) { - portdata->in_buffer[j] = kmalloc(IN_BUFLEN, GFP_KERNEL); - if (!portdata->in_buffer[j]) { - for (--j; j >= 0; j--) - kfree(portdata->in_buffer[j]); - kfree(portdata); - return -ENOMEM; - } - } - + /* Set the port private data pointer */ usb_set_serial_port_data(port, portdata); - - /* initialize the in urbs */ - for (j = 0; j < N_IN_URB; ++j) { - urb = usb_alloc_urb(0, GFP_KERNEL); - if (urb == NULL) { - dev_dbg(&port->dev, "%s: alloc for in " - "port failed.", __func__); - continue; - } - /* Fill URB using supplied data. */ - usb_fill_bulk_urb(urb, serial->dev, - usb_rcvbulkpipe(serial->dev, - port->bulk_in_endpointAddress), - portdata->in_buffer[j], IN_BUFLEN, - sierra_indat_callback, port); - portdata->in_urbs[j] = urb; - } } return 0; @@ -701,7 +811,7 @@ static int sierra_startup(struct usb_serial *serial) static void sierra_shutdown(struct usb_serial *serial) { - int i, j; + int i; struct usb_serial_port *port; struct sierra_port_private *portdata; @@ -714,12 +824,6 @@ static void sierra_shutdown(struct usb_serial *serial) portdata = usb_get_serial_port_data(port); if (!portdata) continue; - - for (j = 0; j < N_IN_URB; j++) { - usb_kill_urb(portdata->in_urbs[j]); - usb_free_urb(portdata->in_urbs[j]); - kfree(portdata->in_buffer[j]); - } kfree(portdata); usb_set_serial_port_data(port, NULL); } @@ -737,6 +841,7 @@ static struct usb_serial_driver sierra_device = { .probe = sierra_probe, .open = sierra_open, .close = sierra_close, + .dtr_rts = sierra_dtr_rts, .write = sierra_write, .write_room = sierra_write_room, .set_termios = sierra_set_termios, diff --git a/drivers/usb/serial/spcp8x5.c b/drivers/usb/serial/spcp8x5.c index 5e7528cc81a8..8f7ed8f13996 100644 --- a/drivers/usb/serial/spcp8x5.c +++ b/drivers/usb/serial/spcp8x5.c @@ -446,66 +446,47 @@ static void spcp8x5_set_workMode(struct usb_device *dev, u16 value, "RTSCTS usb_control_msg(enable flowctrl) = %d\n", ret); } +static int spcp8x5_carrier_raised(struct usb_serial_port *port) +{ + struct spcp8x5_private *priv = usb_get_serial_port_data(port); + if (priv->line_status & MSR_STATUS_LINE_DCD) + return 1; + return 0; +} + +static void spcp8x5_dtr_rts(struct usb_serial_port *port, int on) +{ + struct spcp8x5_private *priv = usb_get_serial_port_data(port); + unsigned long flags; + u8 control; + + spin_lock_irqsave(&priv->lock, flags); + if (on) + priv->line_control = MCR_CONTROL_LINE_DTR + | MCR_CONTROL_LINE_RTS; + else + priv->line_control &= ~ (MCR_CONTROL_LINE_DTR + | MCR_CONTROL_LINE_RTS); + control = priv->line_control; + spin_unlock_irqrestore(&priv->lock, flags); + spcp8x5_set_ctrlLine(port->serial->dev, control , priv->type); +} + /* close the serial port. We should wait for data sending to device 1st and * then kill all urb. */ -static void spcp8x5_close(struct tty_struct *tty, - struct usb_serial_port *port, struct file *filp) +static void spcp8x5_close(struct usb_serial_port *port) { struct spcp8x5_private *priv = usb_get_serial_port_data(port); unsigned long flags; - unsigned int c_cflag; - int bps; - long timeout; - wait_queue_t wait; int result; dbg("%s - port %d", __func__, port->number); - /* wait for data to drain from the buffer */ spin_lock_irqsave(&priv->lock, flags); - timeout = SPCP8x5_CLOSING_WAIT; - init_waitqueue_entry(&wait, current); - add_wait_queue(&tty->write_wait, &wait); - for (;;) { - set_current_state(TASK_INTERRUPTIBLE); - if (ringbuf_avail_data(priv->buf) == 0 || - timeout == 0 || signal_pending(current)) - break; - spin_unlock_irqrestore(&priv->lock, flags); - timeout = schedule_timeout(timeout); - spin_lock_irqsave(&priv->lock, flags); - } - set_current_state(TASK_RUNNING); - remove_wait_queue(&tty->write_wait, &wait); - /* clear out any remaining data in the buffer */ clear_ringbuf(priv->buf); spin_unlock_irqrestore(&priv->lock, flags); - /* wait for characters to drain from the device (this is long enough - * for the entire all byte spcp8x5 hardware buffer to drain with no - * flow control for data rates of 1200 bps or more, for lower rates we - * should really know how much data is in the buffer to compute a delay - * that is not unnecessarily long) */ - bps = tty_get_baud_rate(tty); - if (bps > 1200) - timeout = max((HZ*2560) / bps, HZ/10); - else - timeout = 2*HZ; - set_current_state(TASK_INTERRUPTIBLE); - schedule_timeout(timeout); - - /* clear control lines */ - if (tty) { - c_cflag = tty->termios->c_cflag; - if (c_cflag & HUPCL) { - spin_lock_irqsave(&priv->lock, flags); - priv->line_control = 0; - spin_unlock_irqrestore(&priv->lock, flags); - spcp8x5_set_ctrlLine(port->serial->dev, 0 , priv->type); - } - } - /* kill urb */ if (port->write_urb != NULL) { result = usb_unlink_urb(port->write_urb); @@ -665,13 +646,6 @@ static int spcp8x5_open(struct tty_struct *tty, if (ret) return ret; - spin_lock_irqsave(&priv->lock, flags); - if (tty && (tty->termios->c_cflag & CBAUD)) - priv->line_control = MCR_DTR | MCR_RTS; - else - priv->line_control = 0; - spin_unlock_irqrestore(&priv->lock, flags); - spcp8x5_set_ctrlLine(serial->dev, priv->line_control , priv->type); /* Setup termios */ @@ -691,9 +665,10 @@ static int spcp8x5_open(struct tty_struct *tty, port->read_urb->dev = serial->dev; ret = usb_submit_urb(port->read_urb, GFP_KERNEL); if (ret) { - spcp8x5_close(tty, port, NULL); + spcp8x5_close(port); return -EPROTO; } + port->port.drain_delay = 256; return 0; } @@ -1033,6 +1008,8 @@ static struct usb_serial_driver spcp8x5_device = { .num_ports = 1, .open = spcp8x5_open, .close = spcp8x5_close, + .dtr_rts = spcp8x5_dtr_rts, + .carrier_raised = spcp8x5_carrier_raised, .write = spcp8x5_write, .set_termios = spcp8x5_set_termios, .ioctl = spcp8x5_ioctl, diff --git a/drivers/usb/serial/symbolserial.c b/drivers/usb/serial/symbolserial.c index 69879e437940..8b07ebc6baeb 100644 --- a/drivers/usb/serial/symbolserial.c +++ b/drivers/usb/serial/symbolserial.c @@ -152,8 +152,7 @@ static int symbol_open(struct tty_struct *tty, struct usb_serial_port *port, return result; } -static void symbol_close(struct tty_struct *tty, struct usb_serial_port *port, - struct file *filp) +static void symbol_close(struct usb_serial_port *port) { struct symbol_private *priv = usb_get_serial_data(port->serial); diff --git a/drivers/usb/serial/ti_usb_3410_5052.c b/drivers/usb/serial/ti_usb_3410_5052.c index 0a64bac306ee..42cb04c403be 100644 --- a/drivers/usb/serial/ti_usb_3410_5052.c +++ b/drivers/usb/serial/ti_usb_3410_5052.c @@ -100,8 +100,7 @@ static int ti_startup(struct usb_serial *serial); static void ti_shutdown(struct usb_serial *serial); static int ti_open(struct tty_struct *tty, struct usb_serial_port *port, struct file *file); -static void ti_close(struct tty_struct *tty, struct usb_serial_port *port, - struct file *file); +static void ti_close(struct usb_serial_port *port); static int ti_write(struct tty_struct *tty, struct usb_serial_port *port, const unsigned char *data, int count); static int ti_write_room(struct tty_struct *tty); @@ -647,8 +646,7 @@ release_lock: } -static void ti_close(struct tty_struct *tty, struct usb_serial_port *port, - struct file *file) +static void ti_close(struct usb_serial_port *port) { struct ti_device *tdev; struct ti_port *tport; diff --git a/drivers/usb/serial/usb-serial.c b/drivers/usb/serial/usb-serial.c index f331e2bde88a..1967a7edc10c 100644 --- a/drivers/usb/serial/usb-serial.c +++ b/drivers/usb/serial/usb-serial.c @@ -238,9 +238,11 @@ static int serial_open (struct tty_struct *tty, struct file *filp) goto bailout_interface_put; mutex_unlock(&serial->disc_mutex); } - mutex_unlock(&port->mutex); - return 0; + /* Now do the correct tty layer semantics */ + retval = tty_port_block_til_ready(&port->port, tty, filp); + if (retval == 0) + return 0; bailout_interface_put: usb_autopm_put_interface(serial->interface); @@ -259,64 +261,89 @@ bailout_serial_put: return retval; } -static void serial_close(struct tty_struct *tty, struct file *filp) +/** + * serial_do_down - shut down hardware + * @port: port to shut down + * + * Shut down a USB port unless it is the console. We never shut down the + * console hardware as it will always be in use. + * + * Don't free any resources at this point + */ +static void serial_do_down(struct usb_serial_port *port) { - struct usb_serial_port *port = tty->driver_data; + struct usb_serial_driver *drv = port->serial->type; struct usb_serial *serial; struct module *owner; - int count; - if (!port) + /* The console is magical, do not hang up the console hardware + or there will be tears */ + if (port->console) return; - dbg("%s - port %d", __func__, port->number); - mutex_lock(&port->mutex); serial = port->serial; owner = serial->type->driver.owner; - if (port->port.count == 0) { - mutex_unlock(&port->mutex); - return; - } - - if (port->port.count == 1) - /* only call the device specific close if this - * port is being closed by the last owner. Ensure we do - * this before we drop the port count. The call is protected - * by the port mutex - */ - serial->type->close(tty, port, filp); - - if (port->port.count == (port->console ? 2 : 1)) { - struct tty_struct *tty = tty_port_tty_get(&port->port); - if (tty) { - /* We must do this before we drop the port count to - zero. */ - if (tty->driver_data) - tty->driver_data = NULL; - tty_port_tty_set(&port->port, NULL); - tty_kref_put(tty); - } - } + if (drv->close) + drv->close(port); - --port->port.count; - count = port->port.count; mutex_unlock(&port->mutex); - put_device(&port->dev); +} + +/** + * serial_do_free - free resources post close/hangup + * @port: port to free up + * + * Do the resource freeing and refcount dropping for the port. We must + * be careful about ordering and we must avoid freeing up the console. + */ +static void serial_do_free(struct usb_serial_port *port) +{ + struct usb_serial *serial; + struct module *owner; + + /* The console is magical, do not hang up the console hardware + or there will be tears */ + if (port->console) + return; + + serial = port->serial; + owner = serial->type->driver.owner; + put_device(&port->dev); /* Mustn't dereference port any more */ - if (count == 0) { - mutex_lock(&serial->disc_mutex); - if (!serial->disconnected) - usb_autopm_put_interface(serial->interface); - mutex_unlock(&serial->disc_mutex); - } + mutex_lock(&serial->disc_mutex); + if (!serial->disconnected) + usb_autopm_put_interface(serial->interface); + mutex_unlock(&serial->disc_mutex); usb_serial_put(serial); - /* Mustn't dereference serial any more */ - if (count == 0) - module_put(owner); + module_put(owner); +} + +static void serial_close(struct tty_struct *tty, struct file *filp) +{ + struct usb_serial_port *port = tty->driver_data; + + dbg("%s - port %d", __func__, port->number); + + + if (tty_port_close_start(&port->port, tty, filp) == 0) + return; + + serial_do_down(port); + tty_port_close_end(&port->port, tty); + tty_port_tty_set(&port->port, NULL); + serial_do_free(port); +} + +static void serial_hangup(struct tty_struct *tty) +{ + struct usb_serial_port *port = tty->driver_data; + serial_do_down(port); + tty_port_hangup(&port->port); + serial_do_free(port); } static int serial_write(struct tty_struct *tty, const unsigned char *buf, @@ -648,6 +675,29 @@ static struct usb_serial_driver *search_serial_device( return NULL; } +static int serial_carrier_raised(struct tty_port *port) +{ + struct usb_serial_port *p = container_of(port, struct usb_serial_port, port); + struct usb_serial_driver *drv = p->serial->type; + if (drv->carrier_raised) + return drv->carrier_raised(p); + /* No carrier control - don't block */ + return 1; +} + +static void serial_dtr_rts(struct tty_port *port, int on) +{ + struct usb_serial_port *p = container_of(port, struct usb_serial_port, port); + struct usb_serial_driver *drv = p->serial->type; + if (drv->dtr_rts) + drv->dtr_rts(p, on); +} + +static const struct tty_port_operations serial_port_ops = { + .carrier_raised = serial_carrier_raised, + .dtr_rts = serial_dtr_rts, +}; + int usb_serial_probe(struct usb_interface *interface, const struct usb_device_id *id) { @@ -841,6 +891,7 @@ int usb_serial_probe(struct usb_interface *interface, if (!port) goto probe_error; tty_port_init(&port->port); + port->port.ops = &serial_port_ops; port->serial = serial; spin_lock_init(&port->lock); mutex_init(&port->mutex); @@ -1071,6 +1122,9 @@ void usb_serial_disconnect(struct usb_interface *interface) if (port) { struct tty_struct *tty = tty_port_tty_get(&port->port); if (tty) { + /* The hangup will occur asynchronously but + the object refcounts will sort out all the + cleanup */ tty_hangup(tty); tty_kref_put(tty); } @@ -1135,6 +1189,7 @@ static const struct tty_operations serial_ops = { .open = serial_open, .close = serial_close, .write = serial_write, + .hangup = serial_hangup, .write_room = serial_write_room, .ioctl = serial_ioctl, .set_termios = serial_set_termios, @@ -1147,6 +1202,7 @@ static const struct tty_operations serial_ops = { .proc_fops = &serial_proc_fops, }; + struct tty_driver *usb_serial_tty_driver; static int __init usb_serial_init(void) diff --git a/drivers/usb/serial/visor.c b/drivers/usb/serial/visor.c index 5ac414bda718..b15f1c0e1d4a 100644 --- a/drivers/usb/serial/visor.c +++ b/drivers/usb/serial/visor.c @@ -38,8 +38,7 @@ /* function prototypes for a handspring visor */ static int visor_open(struct tty_struct *tty, struct usb_serial_port *port, struct file *filp); -static void visor_close(struct tty_struct *tty, struct usb_serial_port *port, - struct file *filp); +static void visor_close(struct usb_serial_port *port); static int visor_write(struct tty_struct *tty, struct usb_serial_port *port, const unsigned char *buf, int count); static int visor_write_room(struct tty_struct *tty); @@ -324,8 +323,7 @@ exit: } -static void visor_close(struct tty_struct *tty, - struct usb_serial_port *port, struct file *filp) +static void visor_close(struct usb_serial_port *port) { struct visor_private *priv = usb_get_serial_port_data(port); unsigned char *transfer_buffer; diff --git a/drivers/usb/serial/whiteheat.c b/drivers/usb/serial/whiteheat.c index 5335d3211c07..7c7295d09f34 100644 --- a/drivers/usb/serial/whiteheat.c +++ b/drivers/usb/serial/whiteheat.c @@ -147,8 +147,7 @@ static int whiteheat_attach(struct usb_serial *serial); static void whiteheat_shutdown(struct usb_serial *serial); static int whiteheat_open(struct tty_struct *tty, struct usb_serial_port *port, struct file *filp); -static void whiteheat_close(struct tty_struct *tty, - struct usb_serial_port *port, struct file *filp); +static void whiteheat_close(struct usb_serial_port *port); static int whiteheat_write(struct tty_struct *tty, struct usb_serial_port *port, const unsigned char *buf, int count); @@ -712,8 +711,7 @@ exit: } -static void whiteheat_close(struct tty_struct *tty, - struct usb_serial_port *port, struct file *filp) +static void whiteheat_close(struct usb_serial_port *port) { struct whiteheat_private *info = usb_get_serial_port_data(port); struct whiteheat_urb_wrap *wrap; @@ -723,31 +721,7 @@ static void whiteheat_close(struct tty_struct *tty, dbg("%s - port %d", __func__, port->number); - mutex_lock(&port->serial->disc_mutex); - /* filp is NULL when called from usb_serial_disconnect */ - if ((filp && (tty_hung_up_p(filp))) || port->serial->disconnected) { - mutex_unlock(&port->serial->disc_mutex); - return; - } - mutex_unlock(&port->serial->disc_mutex); - - tty->closing = 1; - -/* - * Not currently in use; tty_wait_until_sent() calls - * serial_chars_in_buffer() which deadlocks on the second semaphore - * acquisition. This should be fixed at some point. Greg's been - * notified. - if ((filp->f_flags & (O_NDELAY | O_NONBLOCK)) == 0) { - tty_wait_until_sent(tty, CLOSING_DELAY); - } -*/ - - tty_driver_flush_buffer(tty); - tty_ldisc_flush(tty); - firm_report_tx_done(port); - firm_close(port); /* shutdown our bulk reads and writes */ @@ -775,10 +749,7 @@ static void whiteheat_close(struct tty_struct *tty, } spin_unlock_irq(&info->lock); mutex_unlock(&info->deathwarrant); - stop_command_port(port->serial); - - tty->closing = 0; } @@ -26,10 +26,9 @@ #include <linux/mempool.h> #include <linux/workqueue.h> #include <linux/blktrace_api.h> -#include <trace/block.h> #include <scsi/sg.h> /* for struct sg_iovec */ -DEFINE_TRACE(block_split); +#include <trace/events/block.h> /* * Test patch to inline a certain number of bi_io_vec's inside the bio diff --git a/fs/buffer.c b/fs/buffer.c index 49106127a4aa..1864d0b63088 100644 --- a/fs/buffer.c +++ b/fs/buffer.c @@ -2935,6 +2935,8 @@ int submit_bh(int rw, struct buffer_head * bh) BUG_ON(!buffer_locked(bh)); BUG_ON(!buffer_mapped(bh)); BUG_ON(!bh->b_end_io); + BUG_ON(buffer_delay(bh)); + BUG_ON(buffer_unwritten(bh)); /* * Mask in barrier bit for a write (could be either a WRITE or a diff --git a/fs/devpts/inode.c b/fs/devpts/inode.c index c68edb969441..9b1d285f9fe6 100644 --- a/fs/devpts/inode.c +++ b/fs/devpts/inode.c @@ -557,8 +557,10 @@ static int __init init_devpts_fs(void) int err = register_filesystem(&devpts_fs_type); if (!err) { devpts_mnt = kern_mount(&devpts_fs_type); - if (IS_ERR(devpts_mnt)) + if (IS_ERR(devpts_mnt)) { err = PTR_ERR(devpts_mnt); + unregister_filesystem(&devpts_fs_type); + } } return err; } diff --git a/fs/ext2/super.c b/fs/ext2/super.c index 5c4afe652245..e3c748faf2db 100644 --- a/fs/ext2/super.c +++ b/fs/ext2/super.c @@ -1093,6 +1093,7 @@ failed_mount: brelse(bh); failed_sbi: sb->s_fs_info = NULL; + kfree(sbi->s_blockgroup_lock); kfree(sbi); return ret; } diff --git a/fs/ext3/super.c b/fs/ext3/super.c index 599dbfe504c3..d8b73d4abe3e 100644 --- a/fs/ext3/super.c +++ b/fs/ext3/super.c @@ -2021,6 +2021,7 @@ failed_mount: brelse(bh); out_fail: sb->s_fs_info = NULL; + kfree(sbi->s_blockgroup_lock); kfree(sbi); lock_kernel(); return ret; diff --git a/fs/ext4/Makefile b/fs/ext4/Makefile index a8ff003a00f7..8a34710ecf40 100644 --- a/fs/ext4/Makefile +++ b/fs/ext4/Makefile @@ -5,8 +5,8 @@ obj-$(CONFIG_EXT4_FS) += ext4.o ext4-y := balloc.o bitmap.o dir.o file.o fsync.o ialloc.o inode.o \ - ioctl.o namei.o super.o symlink.o hash.o resize.o extents.o \ - ext4_jbd2.o migrate.o mballoc.o + ioctl.o namei.o super.o symlink.o hash.o resize.o extents.o \ + ext4_jbd2.o migrate.o mballoc.o block_validity.o ext4-$(CONFIG_EXT4_FS_XATTR) += xattr.o xattr_user.o xattr_trusted.o ext4-$(CONFIG_EXT4_FS_POSIX_ACL) += acl.o diff --git a/fs/ext4/balloc.c b/fs/ext4/balloc.c index 53c72ad85877..e2126d70dff5 100644 --- a/fs/ext4/balloc.c +++ b/fs/ext4/balloc.c @@ -19,7 +19,6 @@ #include <linux/buffer_head.h> #include "ext4.h" #include "ext4_jbd2.h" -#include "group.h" #include "mballoc.h" /* @@ -88,6 +87,7 @@ unsigned ext4_init_block_bitmap(struct super_block *sb, struct buffer_head *bh, ext4_group_t block_group, struct ext4_group_desc *gdp) { int bit, bit_max; + ext4_group_t ngroups = ext4_get_groups_count(sb); unsigned free_blocks, group_blocks; struct ext4_sb_info *sbi = EXT4_SB(sb); @@ -123,7 +123,7 @@ unsigned ext4_init_block_bitmap(struct super_block *sb, struct buffer_head *bh, bit_max += ext4_bg_num_gdb(sb, block_group); } - if (block_group == sbi->s_groups_count - 1) { + if (block_group == ngroups - 1) { /* * Even though mke2fs always initialize first and last group * if some other tool enabled the EXT4_BG_BLOCK_UNINIT we need @@ -131,7 +131,7 @@ unsigned ext4_init_block_bitmap(struct super_block *sb, struct buffer_head *bh, */ group_blocks = ext4_blocks_count(sbi->s_es) - le32_to_cpu(sbi->s_es->s_first_data_block) - - (EXT4_BLOCKS_PER_GROUP(sb) * (sbi->s_groups_count - 1)); + (EXT4_BLOCKS_PER_GROUP(sb) * (ngroups - 1)); } else { group_blocks = EXT4_BLOCKS_PER_GROUP(sb); } @@ -205,18 +205,18 @@ struct ext4_group_desc * ext4_get_group_desc(struct super_block *sb, { unsigned int group_desc; unsigned int offset; + ext4_group_t ngroups = ext4_get_groups_count(sb); struct ext4_group_desc *desc; struct ext4_sb_info *sbi = EXT4_SB(sb); - if (block_group >= sbi->s_groups_count) { + if (block_group >= ngroups) { ext4_error(sb, "ext4_get_group_desc", "block_group >= groups_count - " "block_group = %u, groups_count = %u", - block_group, sbi->s_groups_count); + block_group, ngroups); return NULL; } - smp_rmb(); group_desc = block_group >> EXT4_DESC_PER_BLOCK_BITS(sb); offset = block_group & (EXT4_DESC_PER_BLOCK(sb) - 1); @@ -326,16 +326,16 @@ ext4_read_block_bitmap(struct super_block *sb, ext4_group_t block_group) unlock_buffer(bh); return bh; } - spin_lock(sb_bgl_lock(EXT4_SB(sb), block_group)); + ext4_lock_group(sb, block_group); if (desc->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)) { ext4_init_block_bitmap(sb, bh, block_group, desc); set_bitmap_uptodate(bh); set_buffer_uptodate(bh); - spin_unlock(sb_bgl_lock(EXT4_SB(sb), block_group)); + ext4_unlock_group(sb, block_group); unlock_buffer(bh); return bh; } - spin_unlock(sb_bgl_lock(EXT4_SB(sb), block_group)); + ext4_unlock_group(sb, block_group); if (buffer_uptodate(bh)) { /* * if not uninit if bh is uptodate, @@ -451,7 +451,7 @@ void ext4_add_groupblocks(handle_t *handle, struct super_block *sb, down_write(&grp->alloc_sem); for (i = 0, blocks_freed = 0; i < count; i++) { BUFFER_TRACE(bitmap_bh, "clear bit"); - if (!ext4_clear_bit_atomic(sb_bgl_lock(sbi, block_group), + if (!ext4_clear_bit_atomic(ext4_group_lock_ptr(sb, block_group), bit + i, bitmap_bh->b_data)) { ext4_error(sb, __func__, "bit already cleared for block %llu", @@ -461,11 +461,11 @@ void ext4_add_groupblocks(handle_t *handle, struct super_block *sb, blocks_freed++; } } - spin_lock(sb_bgl_lock(sbi, block_group)); + ext4_lock_group(sb, block_group); blk_free_count = blocks_freed + ext4_free_blks_count(sb, desc); ext4_free_blks_set(sb, desc, blk_free_count); desc->bg_checksum = ext4_group_desc_csum(sbi, block_group, desc); - spin_unlock(sb_bgl_lock(sbi, block_group)); + ext4_unlock_group(sb, block_group); percpu_counter_add(&sbi->s_freeblocks_counter, blocks_freed); if (sbi->s_log_groups_per_flex) { @@ -665,7 +665,7 @@ ext4_fsblk_t ext4_count_free_blocks(struct super_block *sb) ext4_fsblk_t desc_count; struct ext4_group_desc *gdp; ext4_group_t i; - ext4_group_t ngroups = EXT4_SB(sb)->s_groups_count; + ext4_group_t ngroups = ext4_get_groups_count(sb); #ifdef EXT4FS_DEBUG struct ext4_super_block *es; ext4_fsblk_t bitmap_count; @@ -677,7 +677,6 @@ ext4_fsblk_t ext4_count_free_blocks(struct super_block *sb) bitmap_count = 0; gdp = NULL; - smp_rmb(); for (i = 0; i < ngroups; i++) { gdp = ext4_get_group_desc(sb, i, NULL); if (!gdp) @@ -700,7 +699,6 @@ ext4_fsblk_t ext4_count_free_blocks(struct super_block *sb) return bitmap_count; #else desc_count = 0; - smp_rmb(); for (i = 0; i < ngroups; i++) { gdp = ext4_get_group_desc(sb, i, NULL); if (!gdp) diff --git a/fs/ext4/block_validity.c b/fs/ext4/block_validity.c new file mode 100644 index 000000000000..50784ef07563 --- /dev/null +++ b/fs/ext4/block_validity.c @@ -0,0 +1,244 @@ +/* + * linux/fs/ext4/block_validity.c + * + * Copyright (C) 2009 + * Theodore Ts'o (tytso@mit.edu) + * + * Track which blocks in the filesystem are metadata blocks that + * should never be used as data blocks by files or directories. + */ + +#include <linux/time.h> +#include <linux/fs.h> +#include <linux/namei.h> +#include <linux/quotaops.h> +#include <linux/buffer_head.h> +#include <linux/module.h> +#include <linux/swap.h> +#include <linux/pagemap.h> +#include <linux/version.h> +#include <linux/blkdev.h> +#include <linux/mutex.h> +#include "ext4.h" + +struct ext4_system_zone { + struct rb_node node; + ext4_fsblk_t start_blk; + unsigned int count; +}; + +static struct kmem_cache *ext4_system_zone_cachep; + +int __init init_ext4_system_zone(void) +{ + ext4_system_zone_cachep = KMEM_CACHE(ext4_system_zone, + SLAB_RECLAIM_ACCOUNT); + if (ext4_system_zone_cachep == NULL) + return -ENOMEM; + return 0; +} + +void exit_ext4_system_zone(void) +{ + kmem_cache_destroy(ext4_system_zone_cachep); +} + +static inline int can_merge(struct ext4_system_zone *entry1, + struct ext4_system_zone *entry2) +{ + if ((entry1->start_blk + entry1->count) == entry2->start_blk) + return 1; + return 0; +} + +/* + * Mark a range of blocks as belonging to the "system zone" --- that + * is, filesystem metadata blocks which should never be used by + * inodes. + */ +static int add_system_zone(struct ext4_sb_info *sbi, + ext4_fsblk_t start_blk, + unsigned int count) +{ + struct ext4_system_zone *new_entry = NULL, *entry; + struct rb_node **n = &sbi->system_blks.rb_node, *node; + struct rb_node *parent = NULL, *new_node = NULL; + + while (*n) { + parent = *n; + entry = rb_entry(parent, struct ext4_system_zone, node); + if (start_blk < entry->start_blk) + n = &(*n)->rb_left; + else if (start_blk >= (entry->start_blk + entry->count)) + n = &(*n)->rb_right; + else { + if (start_blk + count > (entry->start_blk + + entry->count)) + entry->count = (start_blk + count - + entry->start_blk); + new_node = *n; + new_entry = rb_entry(new_node, struct ext4_system_zone, + node); + break; + } + } + + if (!new_entry) { + new_entry = kmem_cache_alloc(ext4_system_zone_cachep, + GFP_KERNEL); + if (!new_entry) + return -ENOMEM; + new_entry->start_blk = start_blk; + new_entry->count = count; + new_node = &new_entry->node; + + rb_link_node(new_node, parent, n); + rb_insert_color(new_node, &sbi->system_blks); + } + + /* Can we merge to the left? */ + node = rb_prev(new_node); + if (node) { + entry = rb_entry(node, struct ext4_system_zone, node); + if (can_merge(entry, new_entry)) { + new_entry->start_blk = entry->start_blk; + new_entry->count += entry->count; + rb_erase(node, &sbi->system_blks); + kmem_cache_free(ext4_system_zone_cachep, entry); + } + } + + /* Can we merge to the right? */ + node = rb_next(new_node); + if (node) { + entry = rb_entry(node, struct ext4_system_zone, node); + if (can_merge(new_entry, entry)) { + new_entry->count += entry->count; + rb_erase(node, &sbi->system_blks); + kmem_cache_free(ext4_system_zone_cachep, entry); + } + } + return 0; +} + +static void debug_print_tree(struct ext4_sb_info *sbi) +{ + struct rb_node *node; + struct ext4_system_zone *entry; + int first = 1; + + printk(KERN_INFO "System zones: "); + node = rb_first(&sbi->system_blks); + while (node) { + entry = rb_entry(node, struct ext4_system_zone, node); + printk("%s%llu-%llu", first ? "" : ", ", + entry->start_blk, entry->start_blk + entry->count - 1); + first = 0; + node = rb_next(node); + } + printk("\n"); +} + +int ext4_setup_system_zone(struct super_block *sb) +{ + ext4_group_t ngroups = ext4_get_groups_count(sb); + struct ext4_sb_info *sbi = EXT4_SB(sb); + struct ext4_group_desc *gdp; + ext4_group_t i; + int flex_size = ext4_flex_bg_size(sbi); + int ret; + + if (!test_opt(sb, BLOCK_VALIDITY)) { + if (EXT4_SB(sb)->system_blks.rb_node) + ext4_release_system_zone(sb); + return 0; + } + if (EXT4_SB(sb)->system_blks.rb_node) + return 0; + + for (i=0; i < ngroups; i++) { + if (ext4_bg_has_super(sb, i) && + ((i < 5) || ((i % flex_size) == 0))) + add_system_zone(sbi, ext4_group_first_block_no(sb, i), + sbi->s_gdb_count + 1); + gdp = ext4_get_group_desc(sb, i, NULL); + ret = add_system_zone(sbi, ext4_block_bitmap(sb, gdp), 1); + if (ret) + return ret; + ret = add_system_zone(sbi, ext4_inode_bitmap(sb, gdp), 1); + if (ret) + return ret; + ret = add_system_zone(sbi, ext4_inode_table(sb, gdp), + sbi->s_itb_per_group); + if (ret) + return ret; + } + + if (test_opt(sb, DEBUG)) + debug_print_tree(EXT4_SB(sb)); + return 0; +} + +/* Called when the filesystem is unmounted */ +void ext4_release_system_zone(struct super_block *sb) +{ + struct rb_node *n = EXT4_SB(sb)->system_blks.rb_node; + struct rb_node *parent; + struct ext4_system_zone *entry; + + while (n) { + /* Do the node's children first */ + if (n->rb_left) { + n = n->rb_left; + continue; + } + if (n->rb_right) { + n = n->rb_right; + continue; + } + /* + * The node has no children; free it, and then zero + * out parent's link to it. Finally go to the + * beginning of the loop and try to free the parent + * node. + */ + parent = rb_parent(n); + entry = rb_entry(n, struct ext4_system_zone, node); + kmem_cache_free(ext4_system_zone_cachep, entry); + if (!parent) + EXT4_SB(sb)->system_blks.rb_node = NULL; + else if (parent->rb_left == n) + parent->rb_left = NULL; + else if (parent->rb_right == n) + parent->rb_right = NULL; + n = parent; + } + EXT4_SB(sb)->system_blks.rb_node = NULL; +} + +/* + * Returns 1 if the passed-in block region (start_blk, + * start_blk+count) is valid; 0 if some part of the block region + * overlaps with filesystem metadata blocks. + */ +int ext4_data_block_valid(struct ext4_sb_info *sbi, ext4_fsblk_t start_blk, + unsigned int count) +{ + struct ext4_system_zone *entry; + struct rb_node *n = sbi->system_blks.rb_node; + + if ((start_blk <= le32_to_cpu(sbi->s_es->s_first_data_block)) || + (start_blk + count > ext4_blocks_count(sbi->s_es))) + return 0; + while (n) { + entry = rb_entry(n, struct ext4_system_zone, node); + if (start_blk + count - 1 < entry->start_blk) + n = n->rb_left; + else if (start_blk >= (entry->start_blk + entry->count)) + n = n->rb_right; + else + return 0; + } + return 1; +} + diff --git a/fs/ext4/dir.c b/fs/ext4/dir.c index b64789929a65..9dc93168e262 100644 --- a/fs/ext4/dir.c +++ b/fs/ext4/dir.c @@ -131,8 +131,7 @@ static int ext4_readdir(struct file *filp, struct buffer_head *bh = NULL; map_bh.b_state = 0; - err = ext4_get_blocks_wrap(NULL, inode, blk, 1, &map_bh, - 0, 0, 0); + err = ext4_get_blocks(NULL, inode, blk, 1, &map_bh, 0); if (err > 0) { pgoff_t index = map_bh.b_blocknr >> (PAGE_CACHE_SHIFT - inode->i_blkbits); diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index d0f15ef56de1..cc7d5edc38c9 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h @@ -21,7 +21,14 @@ #include <linux/magic.h> #include <linux/jbd2.h> #include <linux/quota.h> -#include "ext4_i.h" +#include <linux/rwsem.h> +#include <linux/rbtree.h> +#include <linux/seqlock.h> +#include <linux/mutex.h> +#include <linux/timer.h> +#include <linux/wait.h> +#include <linux/blockgroup_lock.h> +#include <linux/percpu_counter.h> /* * The fourth extended filesystem constants/structures @@ -46,6 +53,19 @@ #define ext4_debug(f, a...) do {} while (0) #endif +/* data type for block offset of block group */ +typedef int ext4_grpblk_t; + +/* data type for filesystem-wide blocks number */ +typedef unsigned long long ext4_fsblk_t; + +/* data type for file logical block number */ +typedef __u32 ext4_lblk_t; + +/* data type for block group number */ +typedef unsigned int ext4_group_t; + + /* prefer goal again. length */ #define EXT4_MB_HINT_MERGE 1 /* blocks already reserved */ @@ -179,9 +199,6 @@ struct flex_groups { #define EXT4_BG_BLOCK_UNINIT 0x0002 /* Block bitmap not in use */ #define EXT4_BG_INODE_ZEROED 0x0004 /* On-disk itable initialized to zero */ -#ifdef __KERNEL__ -#include "ext4_sb.h" -#endif /* * Macro-instructions used to manage group descriptors */ @@ -297,10 +314,23 @@ struct ext4_new_group_data { }; /* - * Following is used by preallocation code to tell get_blocks() that we - * want uninitialzed extents. + * Flags used by ext4_get_blocks() */ -#define EXT4_CREATE_UNINITIALIZED_EXT 2 + /* Allocate any needed blocks and/or convert an unitialized + extent to be an initialized ext4 */ +#define EXT4_GET_BLOCKS_CREATE 0x0001 + /* Request the creation of an unitialized extent */ +#define EXT4_GET_BLOCKS_UNINIT_EXT 0x0002 +#define EXT4_GET_BLOCKS_CREATE_UNINIT_EXT (EXT4_GET_BLOCKS_UNINIT_EXT|\ + EXT4_GET_BLOCKS_CREATE) + /* Caller is from the delayed allocation writeout path, + so set the magic i_delalloc_reserve_flag after taking the + inode allocation semaphore for */ +#define EXT4_GET_BLOCKS_DELALLOC_RESERVE 0x0004 + /* Call ext4_da_update_reserve_space() after successfully + allocating the blocks */ +#define EXT4_GET_BLOCKS_UPDATE_RESERVE_SPACE 0x0008 + /* * ioctl commands @@ -516,6 +546,110 @@ do { \ #endif /* defined(__KERNEL__) || defined(__linux__) */ /* + * storage for cached extent + */ +struct ext4_ext_cache { + ext4_fsblk_t ec_start; + ext4_lblk_t ec_block; + __u32 ec_len; /* must be 32bit to return holes */ + __u32 ec_type; +}; + +/* + * fourth extended file system inode data in memory + */ +struct ext4_inode_info { + __le32 i_data[15]; /* unconverted */ + __u32 i_flags; + ext4_fsblk_t i_file_acl; + __u32 i_dtime; + + /* + * i_block_group is the number of the block group which contains + * this file's inode. Constant across the lifetime of the inode, + * it is ued for making block allocation decisions - we try to + * place a file's data blocks near its inode block, and new inodes + * near to their parent directory's inode. + */ + ext4_group_t i_block_group; + __u32 i_state; /* Dynamic state flags for ext4 */ + + ext4_lblk_t i_dir_start_lookup; +#ifdef CONFIG_EXT4_FS_XATTR + /* + * Extended attributes can be read independently of the main file + * data. Taking i_mutex even when reading would cause contention + * between readers of EAs and writers of regular file data, so + * instead we synchronize on xattr_sem when reading or changing + * EAs. + */ + struct rw_semaphore xattr_sem; +#endif +#ifdef CONFIG_EXT4_FS_POSIX_ACL + struct posix_acl *i_acl; + struct posix_acl *i_default_acl; +#endif + + struct list_head i_orphan; /* unlinked but open inodes */ + + /* + * i_disksize keeps track of what the inode size is ON DISK, not + * in memory. During truncate, i_size is set to the new size by + * the VFS prior to calling ext4_truncate(), but the filesystem won't + * set i_disksize to 0 until the truncate is actually under way. + * + * The intent is that i_disksize always represents the blocks which + * are used by this file. This allows recovery to restart truncate + * on orphans if we crash during truncate. We actually write i_disksize + * into the on-disk inode when writing inodes out, instead of i_size. + * + * The only time when i_disksize and i_size may be different is when + * a truncate is in progress. The only things which change i_disksize + * are ext4_get_block (growth) and ext4_truncate (shrinkth). + */ + loff_t i_disksize; + + /* + * i_data_sem is for serialising ext4_truncate() against + * ext4_getblock(). In the 2.4 ext2 design, great chunks of inode's + * data tree are chopped off during truncate. We can't do that in + * ext4 because whenever we perform intermediate commits during + * truncate, the inode and all the metadata blocks *must* be in a + * consistent state which allows truncation of the orphans to restart + * during recovery. Hence we must fix the get_block-vs-truncate race + * by other means, so we have i_data_sem. + */ + struct rw_semaphore i_data_sem; + struct inode vfs_inode; + struct jbd2_inode jinode; + + struct ext4_ext_cache i_cached_extent; + /* + * File creation time. Its function is same as that of + * struct timespec i_{a,c,m}time in the generic inode. + */ + struct timespec i_crtime; + + /* mballoc */ + struct list_head i_prealloc_list; + spinlock_t i_prealloc_lock; + + /* ialloc */ + ext4_group_t i_last_alloc_group; + + /* allocation reservation info for delalloc */ + unsigned int i_reserved_data_blocks; + unsigned int i_reserved_meta_blocks; + unsigned int i_allocated_meta_blocks; + unsigned short i_delalloc_reserved_flag; + + /* on-disk additional length */ + __u16 i_extra_isize; + + spinlock_t i_block_reservation_lock; +}; + +/* * File system states */ #define EXT4_VALID_FS 0x0001 /* Unmounted cleanly */ @@ -560,6 +694,7 @@ do { \ #define EXT4_MOUNT_I_VERSION 0x2000000 /* i_version support */ #define EXT4_MOUNT_DELALLOC 0x8000000 /* Delalloc support */ #define EXT4_MOUNT_DATA_ERR_ABORT 0x10000000 /* Abort on file data write */ +#define EXT4_MOUNT_BLOCK_VALIDITY 0x20000000 /* Block validity checking */ /* Compatibility, for having both ext2_fs.h and ext4_fs.h included at once */ #ifndef _LINUX_EXT2_FS_H @@ -689,6 +824,137 @@ struct ext4_super_block { }; #ifdef __KERNEL__ +/* + * fourth extended-fs super-block data in memory + */ +struct ext4_sb_info { + unsigned long s_desc_size; /* Size of a group descriptor in bytes */ + unsigned long s_inodes_per_block;/* Number of inodes per block */ + unsigned long s_blocks_per_group;/* Number of blocks in a group */ + unsigned long s_inodes_per_group;/* Number of inodes in a group */ + unsigned long s_itb_per_group; /* Number of inode table blocks per group */ + unsigned long s_gdb_count; /* Number of group descriptor blocks */ + unsigned long s_desc_per_block; /* Number of group descriptors per block */ + ext4_group_t s_groups_count; /* Number of groups in the fs */ + unsigned long s_overhead_last; /* Last calculated overhead */ + unsigned long s_blocks_last; /* Last seen block count */ + loff_t s_bitmap_maxbytes; /* max bytes for bitmap files */ + struct buffer_head * s_sbh; /* Buffer containing the super block */ + struct ext4_super_block *s_es; /* Pointer to the super block in the buffer */ + struct buffer_head **s_group_desc; + unsigned long s_mount_opt; + ext4_fsblk_t s_sb_block; + uid_t s_resuid; + gid_t s_resgid; + unsigned short s_mount_state; + unsigned short s_pad; + int s_addr_per_block_bits; + int s_desc_per_block_bits; + int s_inode_size; + int s_first_ino; + unsigned int s_inode_readahead_blks; + spinlock_t s_next_gen_lock; + u32 s_next_generation; + u32 s_hash_seed[4]; + int s_def_hash_version; + int s_hash_unsigned; /* 3 if hash should be signed, 0 if not */ + struct percpu_counter s_freeblocks_counter; + struct percpu_counter s_freeinodes_counter; + struct percpu_counter s_dirs_counter; + struct percpu_counter s_dirtyblocks_counter; + struct blockgroup_lock *s_blockgroup_lock; + struct proc_dir_entry *s_proc; + struct kobject s_kobj; + struct completion s_kobj_unregister; + + /* Journaling */ + struct inode *s_journal_inode; + struct journal_s *s_journal; + struct list_head s_orphan; + struct mutex s_orphan_lock; + struct mutex s_resize_lock; + unsigned long s_commit_interval; + u32 s_max_batch_time; + u32 s_min_batch_time; + struct block_device *journal_bdev; +#ifdef CONFIG_JBD2_DEBUG + struct timer_list turn_ro_timer; /* For turning read-only (crash simulation) */ + wait_queue_head_t ro_wait_queue; /* For people waiting for the fs to go read-only */ +#endif +#ifdef CONFIG_QUOTA + char *s_qf_names[MAXQUOTAS]; /* Names of quota files with journalled quota */ + int s_jquota_fmt; /* Format of quota to use */ +#endif + unsigned int s_want_extra_isize; /* New inodes should reserve # bytes */ + struct rb_root system_blks; + +#ifdef EXTENTS_STATS + /* ext4 extents stats */ + unsigned long s_ext_min; + unsigned long s_ext_max; + unsigned long s_depth_max; + spinlock_t s_ext_stats_lock; + unsigned long s_ext_blocks; + unsigned long s_ext_extents; +#endif + + /* for buddy allocator */ + struct ext4_group_info ***s_group_info; + struct inode *s_buddy_cache; + long s_blocks_reserved; + spinlock_t s_reserve_lock; + spinlock_t s_md_lock; + tid_t s_last_transaction; + unsigned short *s_mb_offsets; + unsigned int *s_mb_maxs; + + /* tunables */ + unsigned long s_stripe; + unsigned int s_mb_stream_request; + unsigned int s_mb_max_to_scan; + unsigned int s_mb_min_to_scan; + unsigned int s_mb_stats; + unsigned int s_mb_order2_reqs; + unsigned int s_mb_group_prealloc; + /* where last allocation was done - for stream allocation */ + unsigned long s_mb_last_group; + unsigned long s_mb_last_start; + + /* history to debug policy */ + struct ext4_mb_history *s_mb_history; + int s_mb_history_cur; + int s_mb_history_max; + int s_mb_history_num; + spinlock_t s_mb_history_lock; + int s_mb_history_filter; + + /* stats for buddy allocator */ + spinlock_t s_mb_pa_lock; + atomic_t s_bal_reqs; /* number of reqs with len > 1 */ + atomic_t s_bal_success; /* we found long enough chunks */ + atomic_t s_bal_allocated; /* in blocks */ + atomic_t s_bal_ex_scanned; /* total extents scanned */ + atomic_t s_bal_goals; /* goal hits */ + atomic_t s_bal_breaks; /* too long searches */ + atomic_t s_bal_2orders; /* 2^order hits */ + spinlock_t s_bal_lock; + unsigned long s_mb_buddies_generated; + unsigned long long s_mb_generation_time; + atomic_t s_mb_lost_chunks; + atomic_t s_mb_preallocated; + atomic_t s_mb_discarded; + + /* locality groups */ + struct ext4_locality_group *s_locality_groups; + + /* for write statistics */ + unsigned long s_sectors_written_start; + u64 s_kbytes_written; + + unsigned int s_log_groups_per_flex; + struct flex_groups *s_flex_groups; +}; + static inline struct ext4_sb_info *EXT4_SB(struct super_block *sb) { return sb->s_fs_info; @@ -704,7 +970,6 @@ static inline struct timespec ext4_current_time(struct inode *inode) current_fs_time(inode->i_sb) : CURRENT_TIME_SEC; } - static inline int ext4_valid_inum(struct super_block *sb, unsigned long ino) { return ino == EXT4_ROOT_INO || @@ -1014,6 +1279,14 @@ extern struct ext4_group_desc * ext4_get_group_desc(struct super_block * sb, ext4_group_t block_group, struct buffer_head ** bh); extern int ext4_should_retry_alloc(struct super_block *sb, int *retries); +struct buffer_head *ext4_read_block_bitmap(struct super_block *sb, + ext4_group_t block_group); +extern unsigned ext4_init_block_bitmap(struct super_block *sb, + struct buffer_head *bh, + ext4_group_t group, + struct ext4_group_desc *desc); +#define ext4_free_blocks_after_init(sb, group, desc) \ + ext4_init_block_bitmap(sb, NULL, group, desc) /* dir.c */ extern int ext4_check_dir_entry(const char *, struct inode *, @@ -1038,6 +1311,11 @@ extern struct inode * ext4_orphan_get(struct super_block *, unsigned long); extern unsigned long ext4_count_free_inodes(struct super_block *); extern unsigned long ext4_count_dirs(struct super_block *); extern void ext4_check_inodes_bitmap(struct super_block *); +extern unsigned ext4_init_inode_bitmap(struct super_block *sb, + struct buffer_head *bh, + ext4_group_t group, + struct ext4_group_desc *desc); +extern void mark_bitmap_end(int start_bit, int end_bit, char *bitmap); /* mballoc.c */ extern long ext4_mb_stats; @@ -1123,6 +1401,8 @@ extern void ext4_abort(struct super_block *, const char *, const char *, ...) __attribute__ ((format (printf, 3, 4))); extern void ext4_warning(struct super_block *, const char *, const char *, ...) __attribute__ ((format (printf, 3, 4))); +extern void ext4_msg(struct super_block *, const char *, const char *, ...) + __attribute__ ((format (printf, 3, 4))); extern void ext4_grp_locked_error(struct super_block *, ext4_group_t, const char *, const char *, ...) __attribute__ ((format (printf, 4, 5))); @@ -1161,6 +1441,10 @@ extern void ext4_used_dirs_set(struct super_block *sb, struct ext4_group_desc *bg, __u32 count); extern void ext4_itable_unused_set(struct super_block *sb, struct ext4_group_desc *bg, __u32 count); +extern __le16 ext4_group_desc_csum(struct ext4_sb_info *sbi, __u32 group, + struct ext4_group_desc *gdp); +extern int ext4_group_desc_csum_verify(struct ext4_sb_info *sbi, __u32 group, + struct ext4_group_desc *gdp); static inline ext4_fsblk_t ext4_blocks_count(struct ext4_super_block *es) { @@ -1228,6 +1512,18 @@ struct ext4_group_info *ext4_get_group_info(struct super_block *sb, return grp_info[indexv][indexh]; } +/* + * Reading s_groups_count requires using smp_rmb() afterwards. See + * the locking protocol documented in the comments of ext4_group_add() + * in resize.c + */ +static inline ext4_group_t ext4_get_groups_count(struct super_block *sb) +{ + ext4_group_t ngroups = EXT4_SB(sb)->s_groups_count; + + smp_rmb(); + return ngroups; +} static inline ext4_group_t ext4_flex_group(struct ext4_sb_info *sbi, ext4_group_t block_group) @@ -1283,33 +1579,25 @@ struct ext4_group_info { }; #define EXT4_GROUP_INFO_NEED_INIT_BIT 0 -#define EXT4_GROUP_INFO_LOCKED_BIT 1 #define EXT4_MB_GRP_NEED_INIT(grp) \ (test_bit(EXT4_GROUP_INFO_NEED_INIT_BIT, &((grp)->bb_state))) -static inline void ext4_lock_group(struct super_block *sb, ext4_group_t group) +static inline spinlock_t *ext4_group_lock_ptr(struct super_block *sb, + ext4_group_t group) { - struct ext4_group_info *grinfo = ext4_get_group_info(sb, group); - - bit_spin_lock(EXT4_GROUP_INFO_LOCKED_BIT, &(grinfo->bb_state)); + return bgl_lock_ptr(EXT4_SB(sb)->s_blockgroup_lock, group); } -static inline void ext4_unlock_group(struct super_block *sb, - ext4_group_t group) +static inline void ext4_lock_group(struct super_block *sb, ext4_group_t group) { - struct ext4_group_info *grinfo = ext4_get_group_info(sb, group); - - bit_spin_unlock(EXT4_GROUP_INFO_LOCKED_BIT, &(grinfo->bb_state)); + spin_lock(ext4_group_lock_ptr(sb, group)); } -static inline int ext4_is_group_locked(struct super_block *sb, +static inline void ext4_unlock_group(struct super_block *sb, ext4_group_t group) { - struct ext4_group_info *grinfo = ext4_get_group_info(sb, group); - - return bit_spin_is_locked(EXT4_GROUP_INFO_LOCKED_BIT, - &(grinfo->bb_state)); + spin_unlock(ext4_group_lock_ptr(sb, group)); } /* @@ -1326,11 +1614,21 @@ extern const struct file_operations ext4_file_operations; /* namei.c */ extern const struct inode_operations ext4_dir_inode_operations; extern const struct inode_operations ext4_special_inode_operations; +extern struct dentry *ext4_get_parent(struct dentry *child); /* symlink.c */ extern const struct inode_operations ext4_symlink_inode_operations; extern const struct inode_operations ext4_fast_symlink_inode_operations; +/* block_validity */ +extern void ext4_release_system_zone(struct super_block *sb); +extern int ext4_setup_system_zone(struct super_block *sb); +extern int __init init_ext4_system_zone(void); +extern void exit_ext4_system_zone(void); +extern int ext4_data_block_valid(struct ext4_sb_info *sbi, + ext4_fsblk_t start_blk, + unsigned int count); + /* extents.c */ extern int ext4_ext_tree_init(handle_t *handle, struct inode *); extern int ext4_ext_writepage_trans_blocks(struct inode *, int); @@ -1338,17 +1636,15 @@ extern int ext4_ext_index_trans_blocks(struct inode *inode, int nrblocks, int chunk); extern int ext4_ext_get_blocks(handle_t *handle, struct inode *inode, ext4_lblk_t iblock, unsigned int max_blocks, - struct buffer_head *bh_result, - int create, int extend_disksize); + struct buffer_head *bh_result, int flags); extern void ext4_ext_truncate(struct inode *); extern void ext4_ext_init(struct super_block *); extern void ext4_ext_release(struct super_block *); extern long ext4_fallocate(struct inode *inode, int mode, loff_t offset, loff_t len); -extern int ext4_get_blocks_wrap(handle_t *handle, struct inode *inode, - sector_t block, unsigned int max_blocks, - struct buffer_head *bh, int create, - int extend_disksize, int flag); +extern int ext4_get_blocks(handle_t *handle, struct inode *inode, + sector_t block, unsigned int max_blocks, + struct buffer_head *bh, int flags); extern int ext4_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, __u64 start, __u64 len); diff --git a/fs/ext4/ext4_i.h b/fs/ext4/ext4_i.h deleted file mode 100644 index 4ce2187123aa..000000000000 --- a/fs/ext4/ext4_i.h +++ /dev/null @@ -1,140 +0,0 @@ -/* - * ext4_i.h - * - * Copyright (C) 1992, 1993, 1994, 1995 - * Remy Card (card@masi.ibp.fr) - * Laboratoire MASI - Institut Blaise Pascal - * Universite Pierre et Marie Curie (Paris VI) - * - * from - * - * linux/include/linux/minix_fs_i.h - * - * Copyright (C) 1991, 1992 Linus Torvalds - */ - -#ifndef _EXT4_I -#define _EXT4_I - -#include <linux/rwsem.h> -#include <linux/rbtree.h> -#include <linux/seqlock.h> -#include <linux/mutex.h> - -/* data type for block offset of block group */ -typedef int ext4_grpblk_t; - -/* data type for filesystem-wide blocks number */ -typedef unsigned long long ext4_fsblk_t; - -/* data type for file logical block number */ -typedef __u32 ext4_lblk_t; - -/* data type for block group number */ -typedef unsigned int ext4_group_t; - -/* - * storage for cached extent - */ -struct ext4_ext_cache { - ext4_fsblk_t ec_start; - ext4_lblk_t ec_block; - __u32 ec_len; /* must be 32bit to return holes */ - __u32 ec_type; -}; - -/* - * fourth extended file system inode data in memory - */ -struct ext4_inode_info { - __le32 i_data[15]; /* unconverted */ - __u32 i_flags; - ext4_fsblk_t i_file_acl; - __u32 i_dtime; - - /* - * i_block_group is the number of the block group which contains - * this file's inode. Constant across the lifetime of the inode, - * it is ued for making block allocation decisions - we try to - * place a file's data blocks near its inode block, and new inodes - * near to their parent directory's inode. - */ - ext4_group_t i_block_group; - __u32 i_state; /* Dynamic state flags for ext4 */ - - ext4_lblk_t i_dir_start_lookup; -#ifdef CONFIG_EXT4_FS_XATTR - /* - * Extended attributes can be read independently of the main file - * data. Taking i_mutex even when reading would cause contention - * between readers of EAs and writers of regular file data, so - * instead we synchronize on xattr_sem when reading or changing - * EAs. - */ - struct rw_semaphore xattr_sem; -#endif -#ifdef CONFIG_EXT4_FS_POSIX_ACL - struct posix_acl *i_acl; - struct posix_acl *i_default_acl; -#endif - - struct list_head i_orphan; /* unlinked but open inodes */ - - /* - * i_disksize keeps track of what the inode size is ON DISK, not - * in memory. During truncate, i_size is set to the new size by - * the VFS prior to calling ext4_truncate(), but the filesystem won't - * set i_disksize to 0 until the truncate is actually under way. - * - * The intent is that i_disksize always represents the blocks which - * are used by this file. This allows recovery to restart truncate - * on orphans if we crash during truncate. We actually write i_disksize - * into the on-disk inode when writing inodes out, instead of i_size. - * - * The only time when i_disksize and i_size may be different is when - * a truncate is in progress. The only things which change i_disksize - * are ext4_get_block (growth) and ext4_truncate (shrinkth). - */ - loff_t i_disksize; - - /* - * i_data_sem is for serialising ext4_truncate() against - * ext4_getblock(). In the 2.4 ext2 design, great chunks of inode's - * data tree are chopped off during truncate. We can't do that in - * ext4 because whenever we perform intermediate commits during - * truncate, the inode and all the metadata blocks *must* be in a - * consistent state which allows truncation of the orphans to restart - * during recovery. Hence we must fix the get_block-vs-truncate race - * by other means, so we have i_data_sem. - */ - struct rw_semaphore i_data_sem; - struct inode vfs_inode; - struct jbd2_inode jinode; - - struct ext4_ext_cache i_cached_extent; - /* - * File creation time. Its function is same as that of - * struct timespec i_{a,c,m}time in the generic inode. - */ - struct timespec i_crtime; - - /* mballoc */ - struct list_head i_prealloc_list; - spinlock_t i_prealloc_lock; - - /* ialloc */ - ext4_group_t i_last_alloc_group; - - /* allocation reservation info for delalloc */ - unsigned int i_reserved_data_blocks; - unsigned int i_reserved_meta_blocks; - unsigned int i_allocated_meta_blocks; - unsigned short i_delalloc_reserved_flag; - - /* on-disk additional length */ - __u16 i_extra_isize; - - spinlock_t i_block_reservation_lock; -}; - -#endif /* _EXT4_I */ diff --git a/fs/ext4/ext4_sb.h b/fs/ext4/ext4_sb.h deleted file mode 100644 index 57b71fefbccf..000000000000 --- a/fs/ext4/ext4_sb.h +++ /dev/null @@ -1,161 +0,0 @@ -/* - * ext4_sb.h - * - * Copyright (C) 1992, 1993, 1994, 1995 - * Remy Card (card@masi.ibp.fr) - * Laboratoire MASI - Institut Blaise Pascal - * Universite Pierre et Marie Curie (Paris VI) - * - * from - * - * linux/include/linux/minix_fs_sb.h - * - * Copyright (C) 1991, 1992 Linus Torvalds - */ - -#ifndef _EXT4_SB -#define _EXT4_SB - -#ifdef __KERNEL__ -#include <linux/timer.h> -#include <linux/wait.h> -#include <linux/blockgroup_lock.h> -#include <linux/percpu_counter.h> -#endif -#include <linux/rbtree.h> - -/* - * fourth extended-fs super-block data in memory - */ -struct ext4_sb_info { - unsigned long s_desc_size; /* Size of a group descriptor in bytes */ - unsigned long s_inodes_per_block;/* Number of inodes per block */ - unsigned long s_blocks_per_group;/* Number of blocks in a group */ - unsigned long s_inodes_per_group;/* Number of inodes in a group */ - unsigned long s_itb_per_group; /* Number of inode table blocks per group */ - unsigned long s_gdb_count; /* Number of group descriptor blocks */ - unsigned long s_desc_per_block; /* Number of group descriptors per block */ - ext4_group_t s_groups_count; /* Number of groups in the fs */ - unsigned long s_overhead_last; /* Last calculated overhead */ - unsigned long s_blocks_last; /* Last seen block count */ - loff_t s_bitmap_maxbytes; /* max bytes for bitmap files */ - struct buffer_head * s_sbh; /* Buffer containing the super block */ - struct ext4_super_block *s_es; /* Pointer to the super block in the buffer */ - struct buffer_head **s_group_desc; - unsigned long s_mount_opt; - ext4_fsblk_t s_sb_block; - uid_t s_resuid; - gid_t s_resgid; - unsigned short s_mount_state; - unsigned short s_pad; - int s_addr_per_block_bits; - int s_desc_per_block_bits; - int s_inode_size; - int s_first_ino; - unsigned int s_inode_readahead_blks; - spinlock_t s_next_gen_lock; - u32 s_next_generation; - u32 s_hash_seed[4]; - int s_def_hash_version; - int s_hash_unsigned; /* 3 if hash should be signed, 0 if not */ - struct percpu_counter s_freeblocks_counter; - struct percpu_counter s_freeinodes_counter; - struct percpu_counter s_dirs_counter; - struct percpu_counter s_dirtyblocks_counter; - struct blockgroup_lock *s_blockgroup_lock; - struct proc_dir_entry *s_proc; - struct kobject s_kobj; - struct completion s_kobj_unregister; - - /* Journaling */ - struct inode *s_journal_inode; - struct journal_s *s_journal; - struct list_head s_orphan; - unsigned long s_commit_interval; - u32 s_max_batch_time; - u32 s_min_batch_time; - struct block_device *journal_bdev; -#ifdef CONFIG_JBD2_DEBUG - struct timer_list turn_ro_timer; /* For turning read-only (crash simulation) */ - wait_queue_head_t ro_wait_queue; /* For people waiting for the fs to go read-only */ -#endif -#ifdef CONFIG_QUOTA - char *s_qf_names[MAXQUOTAS]; /* Names of quota files with journalled quota */ - int s_jquota_fmt; /* Format of quota to use */ -#endif - unsigned int s_want_extra_isize; /* New inodes should reserve # bytes */ - -#ifdef EXTENTS_STATS - /* ext4 extents stats */ - unsigned long s_ext_min; - unsigned long s_ext_max; - unsigned long s_depth_max; - spinlock_t s_ext_stats_lock; - unsigned long s_ext_blocks; - unsigned long s_ext_extents; -#endif - - /* for buddy allocator */ - struct ext4_group_info ***s_group_info; - struct inode *s_buddy_cache; - long s_blocks_reserved; - spinlock_t s_reserve_lock; - spinlock_t s_md_lock; - tid_t s_last_transaction; - unsigned short *s_mb_offsets; - unsigned int *s_mb_maxs; - - /* tunables */ - unsigned long s_stripe; - unsigned int s_mb_stream_request; - unsigned int s_mb_max_to_scan; - unsigned int s_mb_min_to_scan; - unsigned int s_mb_stats; - unsigned int s_mb_order2_reqs; - unsigned int s_mb_group_prealloc; - /* where last allocation was done - for stream allocation */ - unsigned long s_mb_last_group; - unsigned long s_mb_last_start; - - /* history to debug policy */ - struct ext4_mb_history *s_mb_history; - int s_mb_history_cur; - int s_mb_history_max; - int s_mb_history_num; - spinlock_t s_mb_history_lock; - int s_mb_history_filter; - - /* stats for buddy allocator */ - spinlock_t s_mb_pa_lock; - atomic_t s_bal_reqs; /* number of reqs with len > 1 */ - atomic_t s_bal_success; /* we found long enough chunks */ - atomic_t s_bal_allocated; /* in blocks */ - atomic_t s_bal_ex_scanned; /* total extents scanned */ - atomic_t s_bal_goals; /* goal hits */ - atomic_t s_bal_breaks; /* too long searches */ - atomic_t s_bal_2orders; /* 2^order hits */ - spinlock_t s_bal_lock; - unsigned long s_mb_buddies_generated; - unsigned long long s_mb_generation_time; - atomic_t s_mb_lost_chunks; - atomic_t s_mb_preallocated; - atomic_t s_mb_discarded; - - /* locality groups */ - struct ext4_locality_group *s_locality_groups; - - /* for write statistics */ - unsigned long s_sectors_written_start; - u64 s_kbytes_written; - - unsigned int s_log_groups_per_flex; - struct flex_groups *s_flex_groups; -}; - -static inline spinlock_t * -sb_bgl_lock(struct ext4_sb_info *sbi, unsigned int block_group) -{ - return bgl_lock_ptr(sbi->s_blockgroup_lock, block_group); -} - -#endif /* _EXT4_SB */ diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index e3a55eb8b26a..2593f748c3a4 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c @@ -326,32 +326,18 @@ ext4_ext_max_entries(struct inode *inode, int depth) static int ext4_valid_extent(struct inode *inode, struct ext4_extent *ext) { - ext4_fsblk_t block = ext_pblock(ext), valid_block; + ext4_fsblk_t block = ext_pblock(ext); int len = ext4_ext_get_actual_len(ext); - struct ext4_super_block *es = EXT4_SB(inode->i_sb)->s_es; - valid_block = le32_to_cpu(es->s_first_data_block) + - EXT4_SB(inode->i_sb)->s_gdb_count; - if (unlikely(block <= valid_block || - ((block + len) > ext4_blocks_count(es)))) - return 0; - else - return 1; + return ext4_data_block_valid(EXT4_SB(inode->i_sb), block, len); } static int ext4_valid_extent_idx(struct inode *inode, struct ext4_extent_idx *ext_idx) { - ext4_fsblk_t block = idx_pblock(ext_idx), valid_block; - struct ext4_super_block *es = EXT4_SB(inode->i_sb)->s_es; + ext4_fsblk_t block = idx_pblock(ext_idx); - valid_block = le32_to_cpu(es->s_first_data_block) + - EXT4_SB(inode->i_sb)->s_gdb_count; - if (unlikely(block <= valid_block || - (block >= ext4_blocks_count(es)))) - return 0; - else - return 1; + return ext4_data_block_valid(EXT4_SB(inode->i_sb), block, 1); } static int ext4_valid_extent_entries(struct inode *inode, @@ -2097,12 +2083,16 @@ ext4_ext_rm_leaf(handle_t *handle, struct inode *inode, ex = EXT_LAST_EXTENT(eh); ex_ee_block = le32_to_cpu(ex->ee_block); - if (ext4_ext_is_uninitialized(ex)) - uninitialized = 1; ex_ee_len = ext4_ext_get_actual_len(ex); while (ex >= EXT_FIRST_EXTENT(eh) && ex_ee_block + ex_ee_len > start) { + + if (ext4_ext_is_uninitialized(ex)) + uninitialized = 1; + else + uninitialized = 0; + ext_debug("remove ext %lu:%u\n", ex_ee_block, ex_ee_len); path[depth].p_ext = ex; @@ -2784,7 +2774,7 @@ fix_extent_len: int ext4_ext_get_blocks(handle_t *handle, struct inode *inode, ext4_lblk_t iblock, unsigned int max_blocks, struct buffer_head *bh_result, - int create, int extend_disksize) + int flags) { struct ext4_ext_path *path = NULL; struct ext4_extent_header *eh; @@ -2793,7 +2783,6 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode, int err = 0, depth, ret, cache_type; unsigned int allocated = 0; struct ext4_allocation_request ar; - loff_t disksize; __clear_bit(BH_New, &bh_result->b_state); ext_debug("blocks %u/%u requested for inode %u\n", @@ -2803,7 +2792,7 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode, cache_type = ext4_ext_in_cache(inode, iblock, &newex); if (cache_type) { if (cache_type == EXT4_EXT_CACHE_GAP) { - if (!create) { + if ((flags & EXT4_GET_BLOCKS_CREATE) == 0) { /* * block isn't allocated yet and * user doesn't want to allocate it @@ -2869,9 +2858,11 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode, EXT4_EXT_CACHE_EXTENT); goto out; } - if (create == EXT4_CREATE_UNINITIALIZED_EXT) + if (flags & EXT4_GET_BLOCKS_UNINIT_EXT) goto out; - if (!create) { + if ((flags & EXT4_GET_BLOCKS_CREATE) == 0) { + if (allocated > max_blocks) + allocated = max_blocks; /* * We have blocks reserved already. We * return allocated blocks so that delalloc @@ -2879,8 +2870,6 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode, * the buffer head will be unmapped so that * a read from the block returns 0s. */ - if (allocated > max_blocks) - allocated = max_blocks; set_buffer_unwritten(bh_result); bh_result->b_bdev = inode->i_sb->s_bdev; bh_result->b_blocknr = newblock; @@ -2903,7 +2892,7 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode, * requested block isn't allocated yet; * we couldn't try to create block if create flag is zero */ - if (!create) { + if ((flags & EXT4_GET_BLOCKS_CREATE) == 0) { /* * put just found gap into cache to speed up * subsequent requests @@ -2932,10 +2921,10 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode, * EXT_UNINIT_MAX_LEN. */ if (max_blocks > EXT_INIT_MAX_LEN && - create != EXT4_CREATE_UNINITIALIZED_EXT) + !(flags & EXT4_GET_BLOCKS_UNINIT_EXT)) max_blocks = EXT_INIT_MAX_LEN; else if (max_blocks > EXT_UNINIT_MAX_LEN && - create == EXT4_CREATE_UNINITIALIZED_EXT) + (flags & EXT4_GET_BLOCKS_UNINIT_EXT)) max_blocks = EXT_UNINIT_MAX_LEN; /* Check if we can really insert (iblock)::(iblock+max_blocks) extent */ @@ -2966,7 +2955,7 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode, /* try to insert new extent into found leaf and return */ ext4_ext_store_pblock(&newex, newblock); newex.ee_len = cpu_to_le16(ar.len); - if (create == EXT4_CREATE_UNINITIALIZED_EXT) /* Mark uninitialized */ + if (flags & EXT4_GET_BLOCKS_UNINIT_EXT) /* Mark uninitialized */ ext4_ext_mark_uninitialized(&newex); err = ext4_ext_insert_extent(handle, inode, path, &newex); if (err) { @@ -2983,18 +2972,10 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode, newblock = ext_pblock(&newex); allocated = ext4_ext_get_actual_len(&newex); outnew: - if (extend_disksize) { - disksize = ((loff_t) iblock + ar.len) << inode->i_blkbits; - if (disksize > i_size_read(inode)) - disksize = i_size_read(inode); - if (disksize > EXT4_I(inode)->i_disksize) - EXT4_I(inode)->i_disksize = disksize; - } - set_buffer_new(bh_result); /* Cache only when it is _not_ an uninitialized extent */ - if (create != EXT4_CREATE_UNINITIALIZED_EXT) + if ((flags & EXT4_GET_BLOCKS_UNINIT_EXT) == 0) ext4_ext_put_in_cache(inode, iblock, allocated, newblock, EXT4_EXT_CACHE_EXTENT); out: @@ -3150,9 +3131,10 @@ retry: ret = PTR_ERR(handle); break; } - ret = ext4_get_blocks_wrap(handle, inode, block, - max_blocks, &map_bh, - EXT4_CREATE_UNINITIALIZED_EXT, 0, 0); + map_bh.b_state = 0; + ret = ext4_get_blocks(handle, inode, block, + max_blocks, &map_bh, + EXT4_GET_BLOCKS_CREATE_UNINIT_EXT); if (ret <= 0) { #ifdef EXT4FS_DEBUG WARN_ON(ret <= 0); @@ -3195,7 +3177,7 @@ static int ext4_ext_fiemap_cb(struct inode *inode, struct ext4_ext_path *path, void *data) { struct fiemap_extent_info *fieinfo = data; - unsigned long blksize_bits = inode->i_sb->s_blocksize_bits; + unsigned char blksize_bits = inode->i_sb->s_blocksize_bits; __u64 logical; __u64 physical; __u64 length; @@ -3242,9 +3224,16 @@ static int ext4_ext_fiemap_cb(struct inode *inode, struct ext4_ext_path *path, * * XXX this might miss a single-block extent at EXT_MAX_BLOCK */ - if (logical + length - 1 == EXT_MAX_BLOCK || - ext4_ext_next_allocated_block(path) == EXT_MAX_BLOCK) + if (ext4_ext_next_allocated_block(path) == EXT_MAX_BLOCK || + newex->ec_block + newex->ec_len - 1 == EXT_MAX_BLOCK) { + loff_t size = i_size_read(inode); + loff_t bs = EXT4_BLOCK_SIZE(inode->i_sb); + flags |= FIEMAP_EXTENT_LAST; + if ((flags & FIEMAP_EXTENT_DELALLOC) && + logical+length > size) + length = (size - logical + bs - 1) & ~(bs-1); + } error = fiemap_fill_next_extent(fieinfo, logical, physical, length, flags); @@ -3318,10 +3307,10 @@ int ext4_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, * Walk the extent tree gathering extent information. * ext4_ext_fiemap_cb will push extents back to user. */ - down_write(&EXT4_I(inode)->i_data_sem); + down_read(&EXT4_I(inode)->i_data_sem); error = ext4_ext_walk_space(inode, start_blk, len_blks, ext4_ext_fiemap_cb, fieinfo); - up_write(&EXT4_I(inode)->i_data_sem); + up_read(&EXT4_I(inode)->i_data_sem); } return error; diff --git a/fs/ext4/group.h b/fs/ext4/group.h deleted file mode 100644 index c2c0a8d06d0e..000000000000 --- a/fs/ext4/group.h +++ /dev/null @@ -1,29 +0,0 @@ -/* - * linux/fs/ext4/group.h - * - * Copyright (C) 2007 Cluster File Systems, Inc - * - * Author: Andreas Dilger <adilger@clusterfs.com> - */ - -#ifndef _LINUX_EXT4_GROUP_H -#define _LINUX_EXT4_GROUP_H - -extern __le16 ext4_group_desc_csum(struct ext4_sb_info *sbi, __u32 group, - struct ext4_group_desc *gdp); -extern int ext4_group_desc_csum_verify(struct ext4_sb_info *sbi, __u32 group, - struct ext4_group_desc *gdp); -struct buffer_head *ext4_read_block_bitmap(struct super_block *sb, - ext4_group_t block_group); -extern unsigned ext4_init_block_bitmap(struct super_block *sb, - struct buffer_head *bh, - ext4_group_t group, - struct ext4_group_desc *desc); -#define ext4_free_blocks_after_init(sb, group, desc) \ - ext4_init_block_bitmap(sb, NULL, group, desc) -extern unsigned ext4_init_inode_bitmap(struct super_block *sb, - struct buffer_head *bh, - ext4_group_t group, - struct ext4_group_desc *desc); -extern void mark_bitmap_end(int start_bit, int end_bit, char *bitmap); -#endif /* _LINUX_EXT4_GROUP_H */ diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c index f18e0a08a6b5..3743bd849bce 100644 --- a/fs/ext4/ialloc.c +++ b/fs/ext4/ialloc.c @@ -27,7 +27,6 @@ #include "ext4_jbd2.h" #include "xattr.h" #include "acl.h" -#include "group.h" /* * ialloc.c contains the inodes allocation and deallocation routines @@ -123,16 +122,16 @@ ext4_read_inode_bitmap(struct super_block *sb, ext4_group_t block_group) unlock_buffer(bh); return bh; } - spin_lock(sb_bgl_lock(EXT4_SB(sb), block_group)); + ext4_lock_group(sb, block_group); if (desc->bg_flags & cpu_to_le16(EXT4_BG_INODE_UNINIT)) { ext4_init_inode_bitmap(sb, bh, block_group, desc); set_bitmap_uptodate(bh); set_buffer_uptodate(bh); - spin_unlock(sb_bgl_lock(EXT4_SB(sb), block_group)); + ext4_unlock_group(sb, block_group); unlock_buffer(bh); return bh; } - spin_unlock(sb_bgl_lock(EXT4_SB(sb), block_group)); + ext4_unlock_group(sb, block_group); if (buffer_uptodate(bh)) { /* * if not uninit if bh is uptodate, @@ -247,9 +246,8 @@ void ext4_free_inode(handle_t *handle, struct inode *inode) goto error_return; /* Ok, now we can actually update the inode bitmaps.. */ - spin_lock(sb_bgl_lock(sbi, block_group)); - cleared = ext4_clear_bit(bit, bitmap_bh->b_data); - spin_unlock(sb_bgl_lock(sbi, block_group)); + cleared = ext4_clear_bit_atomic(ext4_group_lock_ptr(sb, block_group), + bit, bitmap_bh->b_data); if (!cleared) ext4_error(sb, "ext4_free_inode", "bit already cleared for inode %lu", ino); @@ -261,7 +259,7 @@ void ext4_free_inode(handle_t *handle, struct inode *inode) if (fatal) goto error_return; if (gdp) { - spin_lock(sb_bgl_lock(sbi, block_group)); + ext4_lock_group(sb, block_group); count = ext4_free_inodes_count(sb, gdp) + 1; ext4_free_inodes_set(sb, gdp, count); if (is_directory) { @@ -277,7 +275,7 @@ void ext4_free_inode(handle_t *handle, struct inode *inode) } gdp->bg_checksum = ext4_group_desc_csum(sbi, block_group, gdp); - spin_unlock(sb_bgl_lock(sbi, block_group)); + ext4_unlock_group(sb, block_group); percpu_counter_inc(&sbi->s_freeinodes_counter); if (is_directory) percpu_counter_dec(&sbi->s_dirs_counter); @@ -316,7 +314,7 @@ error_return: static int find_group_dir(struct super_block *sb, struct inode *parent, ext4_group_t *best_group) { - ext4_group_t ngroups = EXT4_SB(sb)->s_groups_count; + ext4_group_t ngroups = ext4_get_groups_count(sb); unsigned int freei, avefreei; struct ext4_group_desc *desc, *best_desc = NULL; ext4_group_t group; @@ -349,11 +347,10 @@ static int find_group_flex(struct super_block *sb, struct inode *parent, { struct ext4_sb_info *sbi = EXT4_SB(sb); struct ext4_group_desc *desc; - struct buffer_head *bh; struct flex_groups *flex_group = sbi->s_flex_groups; ext4_group_t parent_group = EXT4_I(parent)->i_block_group; ext4_group_t parent_fbg_group = ext4_flex_group(sbi, parent_group); - ext4_group_t ngroups = sbi->s_groups_count; + ext4_group_t ngroups = ext4_get_groups_count(sb); int flex_size = ext4_flex_bg_size(sbi); ext4_group_t best_flex = parent_fbg_group; int blocks_per_flex = sbi->s_blocks_per_group * flex_size; @@ -362,7 +359,7 @@ static int find_group_flex(struct super_block *sb, struct inode *parent, ext4_group_t n_fbg_groups; ext4_group_t i; - n_fbg_groups = (sbi->s_groups_count + flex_size - 1) >> + n_fbg_groups = (ngroups + flex_size - 1) >> sbi->s_log_groups_per_flex; find_close_to_parent: @@ -404,7 +401,7 @@ find_close_to_parent: found_flexbg: for (i = best_flex * flex_size; i < ngroups && i < (best_flex + 1) * flex_size; i++) { - desc = ext4_get_group_desc(sb, i, &bh); + desc = ext4_get_group_desc(sb, i, NULL); if (ext4_free_inodes_count(sb, desc)) { *best_group = i; goto out; @@ -478,20 +475,21 @@ static int find_group_orlov(struct super_block *sb, struct inode *parent, { ext4_group_t parent_group = EXT4_I(parent)->i_block_group; struct ext4_sb_info *sbi = EXT4_SB(sb); - ext4_group_t ngroups = sbi->s_groups_count; + ext4_group_t real_ngroups = ext4_get_groups_count(sb); int inodes_per_group = EXT4_INODES_PER_GROUP(sb); unsigned int freei, avefreei; ext4_fsblk_t freeb, avefreeb; unsigned int ndirs; int max_dirs, min_inodes; ext4_grpblk_t min_blocks; - ext4_group_t i, grp, g; + ext4_group_t i, grp, g, ngroups; struct ext4_group_desc *desc; struct orlov_stats stats; int flex_size = ext4_flex_bg_size(sbi); + ngroups = real_ngroups; if (flex_size > 1) { - ngroups = (ngroups + flex_size - 1) >> + ngroups = (real_ngroups + flex_size - 1) >> sbi->s_log_groups_per_flex; parent_group >>= sbi->s_log_groups_per_flex; } @@ -543,7 +541,7 @@ static int find_group_orlov(struct super_block *sb, struct inode *parent, */ grp *= flex_size; for (i = 0; i < flex_size; i++) { - if (grp+i >= sbi->s_groups_count) + if (grp+i >= real_ngroups) break; desc = ext4_get_group_desc(sb, grp+i, NULL); if (desc && ext4_free_inodes_count(sb, desc)) { @@ -583,7 +581,7 @@ static int find_group_orlov(struct super_block *sb, struct inode *parent, } fallback: - ngroups = sbi->s_groups_count; + ngroups = real_ngroups; avefreei = freei / ngroups; fallback_retry: parent_group = EXT4_I(parent)->i_block_group; @@ -613,9 +611,8 @@ static int find_group_other(struct super_block *sb, struct inode *parent, ext4_group_t *group, int mode) { ext4_group_t parent_group = EXT4_I(parent)->i_block_group; - ext4_group_t ngroups = EXT4_SB(sb)->s_groups_count; + ext4_group_t i, last, ngroups = ext4_get_groups_count(sb); struct ext4_group_desc *desc; - ext4_group_t i, last; int flex_size = ext4_flex_bg_size(EXT4_SB(sb)); /* @@ -708,10 +705,10 @@ static int find_group_other(struct super_block *sb, struct inode *parent, /* * claim the inode from the inode bitmap. If the group - * is uninit we need to take the groups's sb_bgl_lock + * is uninit we need to take the groups's ext4_group_lock * and clear the uninit flag. The inode bitmap update * and group desc uninit flag clear should be done - * after holding sb_bgl_lock so that ext4_read_inode_bitmap + * after holding ext4_group_lock so that ext4_read_inode_bitmap * doesn't race with the ext4_claim_inode */ static int ext4_claim_inode(struct super_block *sb, @@ -722,7 +719,7 @@ static int ext4_claim_inode(struct super_block *sb, struct ext4_sb_info *sbi = EXT4_SB(sb); struct ext4_group_desc *gdp = ext4_get_group_desc(sb, group, NULL); - spin_lock(sb_bgl_lock(sbi, group)); + ext4_lock_group(sb, group); if (ext4_set_bit(ino, inode_bitmap_bh->b_data)) { /* not a free inode */ retval = 1; @@ -731,7 +728,7 @@ static int ext4_claim_inode(struct super_block *sb, ino++; if ((group == 0 && ino < EXT4_FIRST_INO(sb)) || ino > EXT4_INODES_PER_GROUP(sb)) { - spin_unlock(sb_bgl_lock(sbi, group)); + ext4_unlock_group(sb, group); ext4_error(sb, __func__, "reserved inode or inode > inodes count - " "block_group = %u, inode=%lu", group, @@ -780,7 +777,7 @@ static int ext4_claim_inode(struct super_block *sb, } gdp->bg_checksum = ext4_group_desc_csum(sbi, group, gdp); err_ret: - spin_unlock(sb_bgl_lock(sbi, group)); + ext4_unlock_group(sb, group); return retval; } @@ -799,11 +796,10 @@ struct inode *ext4_new_inode(handle_t *handle, struct inode *dir, int mode) struct super_block *sb; struct buffer_head *inode_bitmap_bh = NULL; struct buffer_head *group_desc_bh; - ext4_group_t group = 0; + ext4_group_t ngroups, group = 0; unsigned long ino = 0; struct inode *inode; struct ext4_group_desc *gdp = NULL; - struct ext4_super_block *es; struct ext4_inode_info *ei; struct ext4_sb_info *sbi; int ret2, err = 0; @@ -818,15 +814,14 @@ struct inode *ext4_new_inode(handle_t *handle, struct inode *dir, int mode) return ERR_PTR(-EPERM); sb = dir->i_sb; + ngroups = ext4_get_groups_count(sb); trace_mark(ext4_request_inode, "dev %s dir %lu mode %d", sb->s_id, dir->i_ino, mode); inode = new_inode(sb); if (!inode) return ERR_PTR(-ENOMEM); ei = EXT4_I(inode); - sbi = EXT4_SB(sb); - es = sbi->s_es; if (sbi->s_log_groups_per_flex && test_opt(sb, OLDALLOC)) { ret2 = find_group_flex(sb, dir, &group); @@ -856,7 +851,7 @@ got_group: if (ret2 == -1) goto out; - for (i = 0; i < sbi->s_groups_count; i++) { + for (i = 0; i < ngroups; i++) { err = -EIO; gdp = ext4_get_group_desc(sb, group, &group_desc_bh); @@ -917,7 +912,7 @@ repeat_in_this_group: * group descriptor metadata has not yet been updated. * So we just go onto the next blockgroup. */ - if (++group == sbi->s_groups_count) + if (++group == ngroups) group = 0; } err = -ENOSPC; @@ -938,7 +933,7 @@ got: } free = 0; - spin_lock(sb_bgl_lock(sbi, group)); + ext4_lock_group(sb, group); /* recheck and clear flag under lock if we still need to */ if (gdp->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)) { free = ext4_free_blocks_after_init(sb, group, gdp); @@ -947,7 +942,7 @@ got: gdp->bg_checksum = ext4_group_desc_csum(sbi, group, gdp); } - spin_unlock(sb_bgl_lock(sbi, group)); + ext4_unlock_group(sb, group); /* Don't need to dirty bitmap block if we didn't change it */ if (free) { @@ -1158,7 +1153,7 @@ unsigned long ext4_count_free_inodes(struct super_block *sb) { unsigned long desc_count; struct ext4_group_desc *gdp; - ext4_group_t i; + ext4_group_t i, ngroups = ext4_get_groups_count(sb); #ifdef EXT4FS_DEBUG struct ext4_super_block *es; unsigned long bitmap_count, x; @@ -1168,7 +1163,7 @@ unsigned long ext4_count_free_inodes(struct super_block *sb) desc_count = 0; bitmap_count = 0; gdp = NULL; - for (i = 0; i < EXT4_SB(sb)->s_groups_count; i++) { + for (i = 0; i < ngroups; i++) { gdp = ext4_get_group_desc(sb, i, NULL); if (!gdp) continue; @@ -1190,7 +1185,7 @@ unsigned long ext4_count_free_inodes(struct super_block *sb) return desc_count; #else desc_count = 0; - for (i = 0; i < EXT4_SB(sb)->s_groups_count; i++) { + for (i = 0; i < ngroups; i++) { gdp = ext4_get_group_desc(sb, i, NULL); if (!gdp) continue; @@ -1205,9 +1200,9 @@ unsigned long ext4_count_free_inodes(struct super_block *sb) unsigned long ext4_count_dirs(struct super_block * sb) { unsigned long count = 0; - ext4_group_t i; + ext4_group_t i, ngroups = ext4_get_groups_count(sb); - for (i = 0; i < EXT4_SB(sb)->s_groups_count; i++) { + for (i = 0; i < ngroups; i++) { struct ext4_group_desc *gdp = ext4_get_group_desc(sb, i, NULL); if (!gdp) continue; diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 2a9ffd528dd1..875db944b22f 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -372,20 +372,21 @@ static int ext4_block_to_path(struct inode *inode, } static int __ext4_check_blockref(const char *function, struct inode *inode, - __le32 *p, unsigned int max) { - - unsigned int maxblocks = ext4_blocks_count(EXT4_SB(inode->i_sb)->s_es); + __le32 *p, unsigned int max) +{ __le32 *bref = p; + unsigned int blk; + while (bref < p+max) { - if (unlikely(le32_to_cpu(*bref) >= maxblocks)) { + blk = le32_to_cpu(*bref++); + if (blk && + unlikely(!ext4_data_block_valid(EXT4_SB(inode->i_sb), + blk, 1))) { ext4_error(inode->i_sb, function, - "block reference %u >= max (%u) " - "in inode #%lu, offset=%d", - le32_to_cpu(*bref), maxblocks, - inode->i_ino, (int)(bref-p)); + "invalid block reference %u " + "in inode #%lu", blk, inode->i_ino); return -EIO; } - bref++; } return 0; } @@ -892,6 +893,10 @@ err_out: } /* + * The ext4_ind_get_blocks() function handles non-extents inodes + * (i.e., using the traditional indirect/double-indirect i_blocks + * scheme) for ext4_get_blocks(). + * * Allocation strategy is simple: if we have to allocate something, we will * have to go the whole way to leaf. So let's do it before attaching anything * to tree, set linkage between the newborn blocks, write them if sync is @@ -909,15 +914,16 @@ err_out: * return = 0, if plain lookup failed. * return < 0, error case. * - * - * Need to be called with - * down_read(&EXT4_I(inode)->i_data_sem) if not allocating file system block - * (ie, create is zero). Otherwise down_write(&EXT4_I(inode)->i_data_sem) + * The ext4_ind_get_blocks() function should be called with + * down_write(&EXT4_I(inode)->i_data_sem) if allocating filesystem + * blocks (i.e., flags has EXT4_GET_BLOCKS_CREATE set) or + * down_read(&EXT4_I(inode)->i_data_sem) if not allocating file system + * blocks. */ -static int ext4_get_blocks_handle(handle_t *handle, struct inode *inode, +static int ext4_ind_get_blocks(handle_t *handle, struct inode *inode, ext4_lblk_t iblock, unsigned int maxblocks, struct buffer_head *bh_result, - int create, int extend_disksize) + int flags) { int err = -EIO; ext4_lblk_t offsets[4]; @@ -927,14 +933,11 @@ static int ext4_get_blocks_handle(handle_t *handle, struct inode *inode, int indirect_blks; int blocks_to_boundary = 0; int depth; - struct ext4_inode_info *ei = EXT4_I(inode); int count = 0; ext4_fsblk_t first_block = 0; - loff_t disksize; - J_ASSERT(!(EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL)); - J_ASSERT(handle != NULL || create == 0); + J_ASSERT(handle != NULL || (flags & EXT4_GET_BLOCKS_CREATE) == 0); depth = ext4_block_to_path(inode, iblock, offsets, &blocks_to_boundary); @@ -963,7 +966,7 @@ static int ext4_get_blocks_handle(handle_t *handle, struct inode *inode, } /* Next simple case - plain lookup or failed read of indirect block */ - if (!create || err == -EIO) + if ((flags & EXT4_GET_BLOCKS_CREATE) == 0 || err == -EIO) goto cleanup; /* @@ -997,19 +1000,7 @@ static int ext4_get_blocks_handle(handle_t *handle, struct inode *inode, if (!err) err = ext4_splice_branch(handle, inode, iblock, partial, indirect_blks, count); - /* - * i_disksize growing is protected by i_data_sem. Don't forget to - * protect it if you're about to implement concurrent - * ext4_get_block() -bzzz - */ - if (!err && extend_disksize) { - disksize = ((loff_t) iblock + count) << inode->i_blkbits; - if (disksize > i_size_read(inode)) - disksize = i_size_read(inode); - if (disksize > ei->i_disksize) - ei->i_disksize = disksize; - } - if (err) + else goto cleanup; set_buffer_new(bh_result); @@ -1120,8 +1111,23 @@ static void ext4_da_update_reserve_space(struct inode *inode, int used) ext4_discard_preallocations(inode); } +static int check_block_validity(struct inode *inode, sector_t logical, + sector_t phys, int len) +{ + if (!ext4_data_block_valid(EXT4_SB(inode->i_sb), phys, len)) { + ext4_error(inode->i_sb, "check_block_validity", + "inode #%lu logical block %llu mapped to %llu " + "(size %d)", inode->i_ino, + (unsigned long long) logical, + (unsigned long long) phys, len); + WARN_ON(1); + return -EIO; + } + return 0; +} + /* - * The ext4_get_blocks_wrap() function try to look up the requested blocks, + * The ext4_get_blocks() function tries to look up the requested blocks, * and returns if the blocks are already mapped. * * Otherwise it takes the write lock of the i_data_sem and allocate blocks @@ -1129,7 +1135,7 @@ static void ext4_da_update_reserve_space(struct inode *inode, int used) * mapped. * * If file type is extents based, it will call ext4_ext_get_blocks(), - * Otherwise, call with ext4_get_blocks_handle() to handle indirect mapping + * Otherwise, call with ext4_ind_get_blocks() to handle indirect mapping * based files * * On success, it returns the number of blocks being mapped or allocate. @@ -1142,9 +1148,9 @@ static void ext4_da_update_reserve_space(struct inode *inode, int used) * * It returns the error in case of allocation failure. */ -int ext4_get_blocks_wrap(handle_t *handle, struct inode *inode, sector_t block, - unsigned int max_blocks, struct buffer_head *bh, - int create, int extend_disksize, int flag) +int ext4_get_blocks(handle_t *handle, struct inode *inode, sector_t block, + unsigned int max_blocks, struct buffer_head *bh, + int flags) { int retval; @@ -1152,21 +1158,28 @@ int ext4_get_blocks_wrap(handle_t *handle, struct inode *inode, sector_t block, clear_buffer_unwritten(bh); /* - * Try to see if we can get the block without requesting - * for new file system block. + * Try to see if we can get the block without requesting a new + * file system block. */ down_read((&EXT4_I(inode)->i_data_sem)); if (EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL) { retval = ext4_ext_get_blocks(handle, inode, block, max_blocks, - bh, 0, 0); + bh, 0); } else { - retval = ext4_get_blocks_handle(handle, - inode, block, max_blocks, bh, 0, 0); + retval = ext4_ind_get_blocks(handle, inode, block, max_blocks, + bh, 0); } up_read((&EXT4_I(inode)->i_data_sem)); + if (retval > 0 && buffer_mapped(bh)) { + int ret = check_block_validity(inode, block, + bh->b_blocknr, retval); + if (ret != 0) + return ret; + } + /* If it is only a block(s) look up */ - if (!create) + if ((flags & EXT4_GET_BLOCKS_CREATE) == 0) return retval; /* @@ -1205,7 +1218,7 @@ int ext4_get_blocks_wrap(handle_t *handle, struct inode *inode, sector_t block, * let the underlying get_block() function know to * avoid double accounting */ - if (flag) + if (flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE) EXT4_I(inode)->i_delalloc_reserved_flag = 1; /* * We need to check for EXT4 here because migrate @@ -1213,10 +1226,10 @@ int ext4_get_blocks_wrap(handle_t *handle, struct inode *inode, sector_t block, */ if (EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL) { retval = ext4_ext_get_blocks(handle, inode, block, max_blocks, - bh, create, extend_disksize); + bh, flags); } else { - retval = ext4_get_blocks_handle(handle, inode, block, - max_blocks, bh, create, extend_disksize); + retval = ext4_ind_get_blocks(handle, inode, block, + max_blocks, bh, flags); if (retval > 0 && buffer_new(bh)) { /* @@ -1229,18 +1242,23 @@ int ext4_get_blocks_wrap(handle_t *handle, struct inode *inode, sector_t block, } } - if (flag) { + if (flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE) EXT4_I(inode)->i_delalloc_reserved_flag = 0; - /* - * Update reserved blocks/metadata blocks - * after successful block allocation - * which were deferred till now - */ - if ((retval > 0) && buffer_delay(bh)) - ext4_da_update_reserve_space(inode, retval); - } + + /* + * Update reserved blocks/metadata blocks after successful + * block allocation which had been deferred till now. + */ + if ((retval > 0) && (flags & EXT4_GET_BLOCKS_UPDATE_RESERVE_SPACE)) + ext4_da_update_reserve_space(inode, retval); up_write((&EXT4_I(inode)->i_data_sem)); + if (retval > 0 && buffer_mapped(bh)) { + int ret = check_block_validity(inode, block, + bh->b_blocknr, retval); + if (ret != 0) + return ret; + } return retval; } @@ -1268,8 +1286,8 @@ int ext4_get_block(struct inode *inode, sector_t iblock, started = 1; } - ret = ext4_get_blocks_wrap(handle, inode, iblock, - max_blocks, bh_result, create, 0, 0); + ret = ext4_get_blocks(handle, inode, iblock, max_blocks, bh_result, + create ? EXT4_GET_BLOCKS_CREATE : 0); if (ret > 0) { bh_result->b_size = (ret << inode->i_blkbits); ret = 0; @@ -1288,17 +1306,19 @@ struct buffer_head *ext4_getblk(handle_t *handle, struct inode *inode, { struct buffer_head dummy; int fatal = 0, err; + int flags = 0; J_ASSERT(handle != NULL || create == 0); dummy.b_state = 0; dummy.b_blocknr = -1000; buffer_trace_init(&dummy.b_history); - err = ext4_get_blocks_wrap(handle, inode, block, 1, - &dummy, create, 1, 0); + if (create) + flags |= EXT4_GET_BLOCKS_CREATE; + err = ext4_get_blocks(handle, inode, block, 1, &dummy, flags); /* - * ext4_get_blocks_handle() returns number of blocks - * mapped. 0 in case of a HOLE. + * ext4_get_blocks() returns number of blocks mapped. 0 in + * case of a HOLE. */ if (err > 0) { if (err > 1) @@ -1439,7 +1459,7 @@ static int ext4_write_begin(struct file *file, struct address_space *mapping, struct page **pagep, void **fsdata) { struct inode *inode = mapping->host; - int ret, needed_blocks = ext4_writepage_trans_blocks(inode); + int ret, needed_blocks; handle_t *handle; int retries = 0; struct page *page; @@ -1450,6 +1470,11 @@ static int ext4_write_begin(struct file *file, struct address_space *mapping, "dev %s ino %lu pos %llu len %u flags %u", inode->i_sb->s_id, inode->i_ino, (unsigned long long) pos, len, flags); + /* + * Reserve one block more for addition to orphan list in case + * we allocate blocks but write fails for some reason + */ + needed_blocks = ext4_writepage_trans_blocks(inode) + 1; index = pos >> PAGE_CACHE_SHIFT; from = pos & (PAGE_CACHE_SIZE - 1); to = from + len; @@ -1483,15 +1508,30 @@ retry: if (ret) { unlock_page(page); - ext4_journal_stop(handle); page_cache_release(page); /* * block_write_begin may have instantiated a few blocks * outside i_size. Trim these off again. Don't need * i_size_read because we hold i_mutex. + * + * Add inode to orphan list in case we crash before + * truncate finishes */ if (pos + len > inode->i_size) + ext4_orphan_add(handle, inode); + + ext4_journal_stop(handle); + if (pos + len > inode->i_size) { vmtruncate(inode, inode->i_size); + /* + * If vmtruncate failed early the inode might + * still be on the orphan list; we need to + * make sure the inode is removed from the + * orphan list in that case. + */ + if (inode->i_nlink) + ext4_orphan_del(NULL, inode); + } } if (ret == -ENOSPC && ext4_should_retry_alloc(inode->i_sb, &retries)) @@ -1509,6 +1549,52 @@ static int write_end_fn(handle_t *handle, struct buffer_head *bh) return ext4_handle_dirty_metadata(handle, NULL, bh); } +static int ext4_generic_write_end(struct file *file, + struct address_space *mapping, + loff_t pos, unsigned len, unsigned copied, + struct page *page, void *fsdata) +{ + int i_size_changed = 0; + struct inode *inode = mapping->host; + handle_t *handle = ext4_journal_current_handle(); + + copied = block_write_end(file, mapping, pos, len, copied, page, fsdata); + + /* + * No need to use i_size_read() here, the i_size + * cannot change under us because we hold i_mutex. + * + * But it's important to update i_size while still holding page lock: + * page writeout could otherwise come in and zero beyond i_size. + */ + if (pos + copied > inode->i_size) { + i_size_write(inode, pos + copied); + i_size_changed = 1; + } + + if (pos + copied > EXT4_I(inode)->i_disksize) { + /* We need to mark inode dirty even if + * new_i_size is less that inode->i_size + * bu greater than i_disksize.(hint delalloc) + */ + ext4_update_i_disksize(inode, (pos + copied)); + i_size_changed = 1; + } + unlock_page(page); + page_cache_release(page); + + /* + * Don't mark the inode dirty under page lock. First, it unnecessarily + * makes the holding time of page lock longer. Second, it forces lock + * ordering of page lock and transaction start for journaling + * filesystems. + */ + if (i_size_changed) + ext4_mark_inode_dirty(handle, inode); + + return copied; +} + /* * We need to pick up the new inode size which generic_commit_write gave us * `file' can be NULL - eg, when called from page_symlink(). @@ -1532,21 +1618,15 @@ static int ext4_ordered_write_end(struct file *file, ret = ext4_jbd2_file_inode(handle, inode); if (ret == 0) { - loff_t new_i_size; - - new_i_size = pos + copied; - if (new_i_size > EXT4_I(inode)->i_disksize) { - ext4_update_i_disksize(inode, new_i_size); - /* We need to mark inode dirty even if - * new_i_size is less that inode->i_size - * bu greater than i_disksize.(hint delalloc) - */ - ext4_mark_inode_dirty(handle, inode); - } - - ret2 = generic_write_end(file, mapping, pos, len, copied, + ret2 = ext4_generic_write_end(file, mapping, pos, len, copied, page, fsdata); copied = ret2; + if (pos + len > inode->i_size) + /* if we have allocated more blocks and copied + * less. We will have blocks allocated outside + * inode->i_size. So truncate them + */ + ext4_orphan_add(handle, inode); if (ret2 < 0) ret = ret2; } @@ -1554,6 +1634,18 @@ static int ext4_ordered_write_end(struct file *file, if (!ret) ret = ret2; + if (pos + len > inode->i_size) { + vmtruncate(inode, inode->i_size); + /* + * If vmtruncate failed early the inode might still be + * on the orphan list; we need to make sure the inode + * is removed from the orphan list in that case. + */ + if (inode->i_nlink) + ext4_orphan_del(NULL, inode); + } + + return ret ? ret : copied; } @@ -1565,25 +1657,21 @@ static int ext4_writeback_write_end(struct file *file, handle_t *handle = ext4_journal_current_handle(); struct inode *inode = mapping->host; int ret = 0, ret2; - loff_t new_i_size; trace_mark(ext4_writeback_write_end, "dev %s ino %lu pos %llu len %u copied %u", inode->i_sb->s_id, inode->i_ino, (unsigned long long) pos, len, copied); - new_i_size = pos + copied; - if (new_i_size > EXT4_I(inode)->i_disksize) { - ext4_update_i_disksize(inode, new_i_size); - /* We need to mark inode dirty even if - * new_i_size is less that inode->i_size - * bu greater than i_disksize.(hint delalloc) - */ - ext4_mark_inode_dirty(handle, inode); - } - - ret2 = generic_write_end(file, mapping, pos, len, copied, + ret2 = ext4_generic_write_end(file, mapping, pos, len, copied, page, fsdata); copied = ret2; + if (pos + len > inode->i_size) + /* if we have allocated more blocks and copied + * less. We will have blocks allocated outside + * inode->i_size. So truncate them + */ + ext4_orphan_add(handle, inode); + if (ret2 < 0) ret = ret2; @@ -1591,6 +1679,17 @@ static int ext4_writeback_write_end(struct file *file, if (!ret) ret = ret2; + if (pos + len > inode->i_size) { + vmtruncate(inode, inode->i_size); + /* + * If vmtruncate failed early the inode might still be + * on the orphan list; we need to make sure the inode + * is removed from the orphan list in that case. + */ + if (inode->i_nlink) + ext4_orphan_del(NULL, inode); + } + return ret ? ret : copied; } @@ -1635,10 +1734,27 @@ static int ext4_journalled_write_end(struct file *file, } unlock_page(page); + page_cache_release(page); + if (pos + len > inode->i_size) + /* if we have allocated more blocks and copied + * less. We will have blocks allocated outside + * inode->i_size. So truncate them + */ + ext4_orphan_add(handle, inode); + ret2 = ext4_journal_stop(handle); if (!ret) ret = ret2; - page_cache_release(page); + if (pos + len > inode->i_size) { + vmtruncate(inode, inode->i_size); + /* + * If vmtruncate failed early the inode might still be + * on the orphan list; we need to make sure the inode + * is removed from the orphan list in that case. + */ + if (inode->i_nlink) + ext4_orphan_del(NULL, inode); + } return ret ? ret : copied; } @@ -1852,7 +1968,7 @@ static int mpage_da_submit_io(struct mpage_da_data *mpd) * @logical - first logical block to start assignment with * * the function goes through all passed space and put actual disk - * block numbers into buffer heads, dropping BH_Delay + * block numbers into buffer heads, dropping BH_Delay and BH_Unwritten */ static void mpage_put_bnr_to_bhs(struct mpage_da_data *mpd, sector_t logical, struct buffer_head *exbh) @@ -1902,16 +2018,24 @@ static void mpage_put_bnr_to_bhs(struct mpage_da_data *mpd, sector_t logical, do { if (cur_logical >= logical + blocks) break; - if (buffer_delay(bh)) { - bh->b_blocknr = pblock; - clear_buffer_delay(bh); - bh->b_bdev = inode->i_sb->s_bdev; - } else if (buffer_unwritten(bh)) { - bh->b_blocknr = pblock; - clear_buffer_unwritten(bh); - set_buffer_mapped(bh); - set_buffer_new(bh); - bh->b_bdev = inode->i_sb->s_bdev; + + if (buffer_delay(bh) || + buffer_unwritten(bh)) { + + BUG_ON(bh->b_bdev != inode->i_sb->s_bdev); + + if (buffer_delay(bh)) { + clear_buffer_delay(bh); + bh->b_blocknr = pblock; + } else { + /* + * unwritten already should have + * blocknr assigned. Verify that + */ + clear_buffer_unwritten(bh); + BUG_ON(bh->b_blocknr != pblock); + } + } else if (buffer_mapped(bh)) BUG_ON(bh->b_blocknr != pblock); @@ -1990,51 +2114,6 @@ static void ext4_print_free_blocks(struct inode *inode) return; } -#define EXT4_DELALLOC_RSVED 1 -static int ext4_da_get_block_write(struct inode *inode, sector_t iblock, - struct buffer_head *bh_result, int create) -{ - int ret; - unsigned max_blocks = bh_result->b_size >> inode->i_blkbits; - loff_t disksize = EXT4_I(inode)->i_disksize; - handle_t *handle = NULL; - - handle = ext4_journal_current_handle(); - BUG_ON(!handle); - ret = ext4_get_blocks_wrap(handle, inode, iblock, max_blocks, - bh_result, create, 0, EXT4_DELALLOC_RSVED); - if (ret <= 0) - return ret; - - bh_result->b_size = (ret << inode->i_blkbits); - - if (ext4_should_order_data(inode)) { - int retval; - retval = ext4_jbd2_file_inode(handle, inode); - if (retval) - /* - * Failed to add inode for ordered mode. Don't - * update file size - */ - return retval; - } - - /* - * Update on-disk size along with block allocation we don't - * use 'extend_disksize' as size may change within already - * allocated block -bzzz - */ - disksize = ((loff_t) iblock + ret) << inode->i_blkbits; - if (disksize > i_size_read(inode)) - disksize = i_size_read(inode); - if (disksize > EXT4_I(inode)->i_disksize) { - ext4_update_i_disksize(inode, disksize); - ret = ext4_mark_inode_dirty(handle, inode); - return ret; - } - return 0; -} - /* * mpage_da_map_blocks - go through given space * @@ -2045,29 +2124,57 @@ static int ext4_da_get_block_write(struct inode *inode, sector_t iblock, */ static int mpage_da_map_blocks(struct mpage_da_data *mpd) { - int err = 0; + int err, blks, get_blocks_flags; struct buffer_head new; - sector_t next; + sector_t next = mpd->b_blocknr; + unsigned max_blocks = mpd->b_size >> mpd->inode->i_blkbits; + loff_t disksize = EXT4_I(mpd->inode)->i_disksize; + handle_t *handle = NULL; /* * We consider only non-mapped and non-allocated blocks */ if ((mpd->b_state & (1 << BH_Mapped)) && - !(mpd->b_state & (1 << BH_Delay))) + !(mpd->b_state & (1 << BH_Delay)) && + !(mpd->b_state & (1 << BH_Unwritten))) return 0; - new.b_state = mpd->b_state; - new.b_blocknr = 0; - new.b_size = mpd->b_size; - next = mpd->b_blocknr; + /* - * If we didn't accumulate anything - * to write simply return + * If we didn't accumulate anything to write simply return */ - if (!new.b_size) + if (!mpd->b_size) return 0; - err = ext4_da_get_block_write(mpd->inode, next, &new, 1); - if (err) { + handle = ext4_journal_current_handle(); + BUG_ON(!handle); + + /* + * Call ext4_get_blocks() to allocate any delayed allocation + * blocks, or to convert an uninitialized extent to be + * initialized (in the case where we have written into + * one or more preallocated blocks). + * + * We pass in the magic EXT4_GET_BLOCKS_DELALLOC_RESERVE to + * indicate that we are on the delayed allocation path. This + * affects functions in many different parts of the allocation + * call path. This flag exists primarily because we don't + * want to change *many* call functions, so ext4_get_blocks() + * will set the magic i_delalloc_reserved_flag once the + * inode's allocation semaphore is taken. + * + * If the blocks in questions were delalloc blocks, set + * EXT4_GET_BLOCKS_DELALLOC_RESERVE so the delalloc accounting + * variables are updated after the blocks have been allocated. + */ + new.b_state = 0; + get_blocks_flags = (EXT4_GET_BLOCKS_CREATE | + EXT4_GET_BLOCKS_DELALLOC_RESERVE); + if (mpd->b_state & (1 << BH_Delay)) + get_blocks_flags |= EXT4_GET_BLOCKS_UPDATE_RESERVE_SPACE; + blks = ext4_get_blocks(handle, mpd->inode, next, max_blocks, + &new, get_blocks_flags); + if (blks < 0) { + err = blks; /* * If get block returns with error we simply * return. Later writepage will redirty the page and @@ -2100,12 +2207,14 @@ static int mpage_da_map_blocks(struct mpage_da_data *mpd) if (err == -ENOSPC) { ext4_print_free_blocks(mpd->inode); } - /* invlaidate all the pages */ + /* invalidate all the pages */ ext4_da_block_invalidatepages(mpd, next, mpd->b_size >> mpd->inode->i_blkbits); return err; } - BUG_ON(new.b_size == 0); + BUG_ON(blks == 0); + + new.b_size = (blks << mpd->inode->i_blkbits); if (buffer_new(&new)) __unmap_underlying_blocks(mpd->inode, &new); @@ -2118,6 +2227,23 @@ static int mpage_da_map_blocks(struct mpage_da_data *mpd) (mpd->b_state & (1 << BH_Unwritten))) mpage_put_bnr_to_bhs(mpd, next, &new); + if (ext4_should_order_data(mpd->inode)) { + err = ext4_jbd2_file_inode(handle, mpd->inode); + if (err) + return err; + } + + /* + * Update on-disk size along with block allocation. + */ + disksize = ((loff_t) next + blks) << mpd->inode->i_blkbits; + if (disksize > i_size_read(mpd->inode)) + disksize = i_size_read(mpd->inode); + if (disksize > EXT4_I(mpd->inode)->i_disksize) { + ext4_update_i_disksize(mpd->inode, disksize); + return ext4_mark_inode_dirty(handle, mpd->inode); + } + return 0; } @@ -2192,6 +2318,17 @@ flush_it: return; } +static int ext4_bh_unmapped_or_delay(handle_t *handle, struct buffer_head *bh) +{ + /* + * unmapped buffer is possible for holes. + * delay buffer is possible with delayed allocation. + * We also need to consider unwritten buffer as unmapped. + */ + return (!buffer_mapped(bh) || buffer_delay(bh) || + buffer_unwritten(bh)) && buffer_dirty(bh); +} + /* * __mpage_da_writepage - finds extent of pages and blocks * @@ -2276,8 +2413,7 @@ static int __mpage_da_writepage(struct page *page, * Otherwise we won't make progress * with the page in ext4_da_writepage */ - if (buffer_dirty(bh) && - (!buffer_mapped(bh) || buffer_delay(bh))) { + if (ext4_bh_unmapped_or_delay(NULL, bh)) { mpage_add_bh_to_extent(mpd, logical, bh->b_size, bh->b_state); @@ -2303,8 +2439,16 @@ static int __mpage_da_writepage(struct page *page, } /* - * this is a special callback for ->write_begin() only - * it's intention is to return mapped block or reserve space + * This is a special get_blocks_t callback which is used by + * ext4_da_write_begin(). It will either return mapped block or + * reserve space for a single block. + * + * For delayed buffer_head we have BH_Mapped, BH_New, BH_Delay set. + * We also have b_blocknr = -1 and b_bdev initialized properly + * + * For unwritten buffer_head we have BH_Mapped, BH_New, BH_Unwritten set. + * We also have b_blocknr = physicalblock mapping unwritten extent and b_bdev + * initialized properly. */ static int ext4_da_get_block_prep(struct inode *inode, sector_t iblock, struct buffer_head *bh_result, int create) @@ -2323,7 +2467,7 @@ static int ext4_da_get_block_prep(struct inode *inode, sector_t iblock, * preallocated blocks are unmapped but should treated * the same as allocated blocks. */ - ret = ext4_get_blocks_wrap(NULL, inode, iblock, 1, bh_result, 0, 0, 0); + ret = ext4_get_blocks(NULL, inode, iblock, 1, bh_result, 0); if ((ret == 0) && !buffer_delay(bh_result)) { /* the block isn't (pre)allocated yet, let's reserve space */ /* @@ -2340,40 +2484,53 @@ static int ext4_da_get_block_prep(struct inode *inode, sector_t iblock, set_buffer_delay(bh_result); } else if (ret > 0) { bh_result->b_size = (ret << inode->i_blkbits); - /* - * With sub-block writes into unwritten extents - * we also need to mark the buffer as new so that - * the unwritten parts of the buffer gets correctly zeroed. - */ - if (buffer_unwritten(bh_result)) + if (buffer_unwritten(bh_result)) { + /* A delayed write to unwritten bh should + * be marked new and mapped. Mapped ensures + * that we don't do get_block multiple times + * when we write to the same offset and new + * ensures that we do proper zero out for + * partial write. + */ set_buffer_new(bh_result); + set_buffer_mapped(bh_result); + } ret = 0; } return ret; } -static int ext4_bh_unmapped_or_delay(handle_t *handle, struct buffer_head *bh) -{ - /* - * unmapped buffer is possible for holes. - * delay buffer is possible with delayed allocation - */ - return ((!buffer_mapped(bh) || buffer_delay(bh)) && buffer_dirty(bh)); -} - -static int ext4_normal_get_block_write(struct inode *inode, sector_t iblock, +/* + * This function is used as a standard get_block_t calback function + * when there is no desire to allocate any blocks. It is used as a + * callback function for block_prepare_write(), nobh_writepage(), and + * block_write_full_page(). These functions should only try to map a + * single block at a time. + * + * Since this function doesn't do block allocations even if the caller + * requests it by passing in create=1, it is critically important that + * any caller checks to make sure that any buffer heads are returned + * by this function are either all already mapped or marked for + * delayed allocation before calling nobh_writepage() or + * block_write_full_page(). Otherwise, b_blocknr could be left + * unitialized, and the page write functions will be taken by + * surprise. + */ +static int noalloc_get_block_write(struct inode *inode, sector_t iblock, struct buffer_head *bh_result, int create) { int ret = 0; unsigned max_blocks = bh_result->b_size >> inode->i_blkbits; + BUG_ON(bh_result->b_size != inode->i_sb->s_blocksize); + /* * we don't want to do block allocation in writepage * so call get_block_wrap with create = 0 */ - ret = ext4_get_blocks_wrap(NULL, inode, iblock, max_blocks, - bh_result, 0, 0, 0); + ret = ext4_get_blocks(NULL, inode, iblock, max_blocks, bh_result, 0); + BUG_ON(create && ret == 0); if (ret > 0) { bh_result->b_size = (ret << inode->i_blkbits); ret = 0; @@ -2382,10 +2539,11 @@ static int ext4_normal_get_block_write(struct inode *inode, sector_t iblock, } /* - * get called vi ext4_da_writepages after taking page lock (have journal handle) - * get called via journal_submit_inode_data_buffers (no journal handle) - * get called via shrink_page_list via pdflush (no journal handle) - * or grab_page_cache when doing write_begin (have journal handle) + * This function can get called via... + * - ext4_da_writepages after taking page lock (have journal handle) + * - journal_submit_inode_data_buffers (no journal handle) + * - shrink_page_list via pdflush (no journal handle) + * - grab_page_cache when doing write_begin (have journal handle) */ static int ext4_da_writepage(struct page *page, struct writeback_control *wbc) @@ -2436,7 +2594,7 @@ static int ext4_da_writepage(struct page *page, * do block allocation here. */ ret = block_prepare_write(page, 0, PAGE_CACHE_SIZE, - ext4_normal_get_block_write); + noalloc_get_block_write); if (!ret) { page_bufs = page_buffers(page); /* check whether all are mapped and non delay */ @@ -2461,11 +2619,10 @@ static int ext4_da_writepage(struct page *page, } if (test_opt(inode->i_sb, NOBH) && ext4_should_writeback_data(inode)) - ret = nobh_writepage(page, ext4_normal_get_block_write, wbc); + ret = nobh_writepage(page, noalloc_get_block_write, wbc); else - ret = block_write_full_page(page, - ext4_normal_get_block_write, - wbc); + ret = block_write_full_page(page, noalloc_get_block_write, + wbc); return ret; } @@ -2777,7 +2934,7 @@ retry: *pagep = page; ret = block_write_begin(file, mapping, pos, len, flags, pagep, fsdata, - ext4_da_get_block_prep); + ext4_da_get_block_prep); if (ret < 0) { unlock_page(page); ext4_journal_stop(handle); @@ -2815,7 +2972,7 @@ static int ext4_da_should_update_i_disksize(struct page *page, for (i = 0; i < idx; i++) bh = bh->b_this_page; - if (!buffer_mapped(bh) || (buffer_delay(bh))) + if (!buffer_mapped(bh) || (buffer_delay(bh)) || buffer_unwritten(bh)) return 0; return 1; } @@ -3085,12 +3242,10 @@ static int __ext4_normal_writepage(struct page *page, struct inode *inode = page->mapping->host; if (test_opt(inode->i_sb, NOBH)) - return nobh_writepage(page, - ext4_normal_get_block_write, wbc); + return nobh_writepage(page, noalloc_get_block_write, wbc); else - return block_write_full_page(page, - ext4_normal_get_block_write, - wbc); + return block_write_full_page(page, noalloc_get_block_write, + wbc); } static int ext4_normal_writepage(struct page *page, @@ -3142,7 +3297,7 @@ static int __ext4_journalled_writepage(struct page *page, int err; ret = block_prepare_write(page, 0, PAGE_CACHE_SIZE, - ext4_normal_get_block_write); + noalloc_get_block_write); if (ret != 0) goto out_unlock; @@ -3227,9 +3382,8 @@ static int ext4_journalled_writepage(struct page *page, * really know unless we go poke around in the buffer_heads. * But block_write_full_page will do the right thing. */ - return block_write_full_page(page, - ext4_normal_get_block_write, - wbc); + return block_write_full_page(page, noalloc_get_block_write, + wbc); } no_write: redirty_page_for_writepage(wbc, page); @@ -3973,7 +4127,8 @@ void ext4_truncate(struct inode *inode) if (!ext4_can_truncate(inode)) return; - if (inode->i_size == 0 && !test_opt(inode->i_sb, NO_AUTO_DA_ALLOC)) + if (ei->i_disksize && inode->i_size == 0 && + !test_opt(inode->i_sb, NO_AUTO_DA_ALLOC)) ei->i_state |= EXT4_STATE_DA_ALLOC_CLOSE; if (EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL) { @@ -4715,25 +4870,6 @@ int ext4_write_inode(struct inode *inode, int wait) return ext4_force_commit(inode->i_sb); } -int __ext4_write_dirty_metadata(struct inode *inode, struct buffer_head *bh) -{ - int err = 0; - - mark_buffer_dirty(bh); - if (inode && inode_needs_sync(inode)) { - sync_dirty_buffer(bh); - if (buffer_req(bh) && !buffer_uptodate(bh)) { - ext4_error(inode->i_sb, __func__, - "IO error syncing inode, " - "inode=%lu, block=%llu", - inode->i_ino, - (unsigned long long)bh->b_blocknr); - err = -EIO; - } - } - return err; -} - /* * ext4_setattr() * @@ -4930,7 +5066,8 @@ static int ext4_index_trans_blocks(struct inode *inode, int nrblocks, int chunk) */ int ext4_meta_trans_blocks(struct inode *inode, int nrblocks, int chunk) { - int groups, gdpblocks; + ext4_group_t groups, ngroups = ext4_get_groups_count(inode->i_sb); + int gdpblocks; int idxblocks; int ret = 0; @@ -4957,8 +5094,8 @@ int ext4_meta_trans_blocks(struct inode *inode, int nrblocks, int chunk) groups += nrblocks; gdpblocks = groups; - if (groups > EXT4_SB(inode->i_sb)->s_groups_count) - groups = EXT4_SB(inode->i_sb)->s_groups_count; + if (groups > ngroups) + groups = ngroups; if (groups > EXT4_SB(inode->i_sb)->s_gdb_count) gdpblocks = EXT4_SB(inode->i_sb)->s_gdb_count; @@ -4998,7 +5135,7 @@ int ext4_writepage_trans_blocks(struct inode *inode) * Calculate the journal credits for a chunk of data modification. * * This is called from DIO, fallocate or whoever calling - * ext4_get_blocks_wrap() to map/allocate a chunk of contigous disk blocks. + * ext4_get_blocks() to map/allocate a chunk of contigous disk blocks. * * journal buffers for data blocks are not included here, as DIO * and fallocate do no need to journal data buffers. diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c index f871677a7984..ed8482e22c0e 100644 --- a/fs/ext4/mballoc.c +++ b/fs/ext4/mballoc.c @@ -372,24 +372,12 @@ static inline void mb_set_bit(int bit, void *addr) ext4_set_bit(bit, addr); } -static inline void mb_set_bit_atomic(spinlock_t *lock, int bit, void *addr) -{ - addr = mb_correct_addr_and_bit(&bit, addr); - ext4_set_bit_atomic(lock, bit, addr); -} - static inline void mb_clear_bit(int bit, void *addr) { addr = mb_correct_addr_and_bit(&bit, addr); ext4_clear_bit(bit, addr); } -static inline void mb_clear_bit_atomic(spinlock_t *lock, int bit, void *addr) -{ - addr = mb_correct_addr_and_bit(&bit, addr); - ext4_clear_bit_atomic(lock, bit, addr); -} - static inline int mb_find_next_zero_bit(void *addr, int max, int start) { int fix = 0, ret, tmpmax; @@ -448,7 +436,7 @@ static void mb_free_blocks_double(struct inode *inode, struct ext4_buddy *e4b, if (unlikely(e4b->bd_info->bb_bitmap == NULL)) return; - BUG_ON(!ext4_is_group_locked(sb, e4b->bd_group)); + assert_spin_locked(ext4_group_lock_ptr(sb, e4b->bd_group)); for (i = 0; i < count; i++) { if (!mb_test_bit(first + i, e4b->bd_info->bb_bitmap)) { ext4_fsblk_t blocknr; @@ -472,7 +460,7 @@ static void mb_mark_used_double(struct ext4_buddy *e4b, int first, int count) if (unlikely(e4b->bd_info->bb_bitmap == NULL)) return; - BUG_ON(!ext4_is_group_locked(e4b->bd_sb, e4b->bd_group)); + assert_spin_locked(ext4_group_lock_ptr(e4b->bd_sb, e4b->bd_group)); for (i = 0; i < count; i++) { BUG_ON(mb_test_bit(first + i, e4b->bd_info->bb_bitmap)); mb_set_bit(first + i, e4b->bd_info->bb_bitmap); @@ -739,6 +727,7 @@ static void ext4_mb_generate_buddy(struct super_block *sb, static int ext4_mb_init_cache(struct page *page, char *incore) { + ext4_group_t ngroups; int blocksize; int blocks_per_page; int groups_per_page; @@ -757,6 +746,7 @@ static int ext4_mb_init_cache(struct page *page, char *incore) inode = page->mapping->host; sb = inode->i_sb; + ngroups = ext4_get_groups_count(sb); blocksize = 1 << inode->i_blkbits; blocks_per_page = PAGE_CACHE_SIZE / blocksize; @@ -780,7 +770,7 @@ static int ext4_mb_init_cache(struct page *page, char *incore) for (i = 0; i < groups_per_page; i++) { struct ext4_group_desc *desc; - if (first_group + i >= EXT4_SB(sb)->s_groups_count) + if (first_group + i >= ngroups) break; err = -EIO; @@ -801,17 +791,17 @@ static int ext4_mb_init_cache(struct page *page, char *incore) unlock_buffer(bh[i]); continue; } - spin_lock(sb_bgl_lock(EXT4_SB(sb), first_group + i)); + ext4_lock_group(sb, first_group + i); if (desc->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)) { ext4_init_block_bitmap(sb, bh[i], first_group + i, desc); set_bitmap_uptodate(bh[i]); set_buffer_uptodate(bh[i]); - spin_unlock(sb_bgl_lock(EXT4_SB(sb), first_group + i)); + ext4_unlock_group(sb, first_group + i); unlock_buffer(bh[i]); continue; } - spin_unlock(sb_bgl_lock(EXT4_SB(sb), first_group + i)); + ext4_unlock_group(sb, first_group + i); if (buffer_uptodate(bh[i])) { /* * if not uninit if bh is uptodate, @@ -852,7 +842,7 @@ static int ext4_mb_init_cache(struct page *page, char *incore) struct ext4_group_info *grinfo; group = (first_block + i) >> 1; - if (group >= EXT4_SB(sb)->s_groups_count) + if (group >= ngroups) break; /* @@ -1078,7 +1068,7 @@ static int mb_find_order_for_block(struct ext4_buddy *e4b, int block) return 0; } -static void mb_clear_bits(spinlock_t *lock, void *bm, int cur, int len) +static void mb_clear_bits(void *bm, int cur, int len) { __u32 *addr; @@ -1091,15 +1081,12 @@ static void mb_clear_bits(spinlock_t *lock, void *bm, int cur, int len) cur += 32; continue; } - if (lock) - mb_clear_bit_atomic(lock, cur, bm); - else - mb_clear_bit(cur, bm); + mb_clear_bit(cur, bm); cur++; } } -static void mb_set_bits(spinlock_t *lock, void *bm, int cur, int len) +static void mb_set_bits(void *bm, int cur, int len) { __u32 *addr; @@ -1112,10 +1099,7 @@ static void mb_set_bits(spinlock_t *lock, void *bm, int cur, int len) cur += 32; continue; } - if (lock) - mb_set_bit_atomic(lock, cur, bm); - else - mb_set_bit(cur, bm); + mb_set_bit(cur, bm); cur++; } } @@ -1131,7 +1115,7 @@ static void mb_free_blocks(struct inode *inode, struct ext4_buddy *e4b, struct super_block *sb = e4b->bd_sb; BUG_ON(first + count > (sb->s_blocksize << 3)); - BUG_ON(!ext4_is_group_locked(sb, e4b->bd_group)); + assert_spin_locked(ext4_group_lock_ptr(sb, e4b->bd_group)); mb_check_buddy(e4b); mb_free_blocks_double(inode, e4b, first, count); @@ -1212,7 +1196,7 @@ static int mb_find_extent(struct ext4_buddy *e4b, int order, int block, int ord; void *buddy; - BUG_ON(!ext4_is_group_locked(e4b->bd_sb, e4b->bd_group)); + assert_spin_locked(ext4_group_lock_ptr(e4b->bd_sb, e4b->bd_group)); BUG_ON(ex == NULL); buddy = mb_find_buddy(e4b, order, &max); @@ -1276,7 +1260,7 @@ static int mb_mark_used(struct ext4_buddy *e4b, struct ext4_free_extent *ex) BUG_ON(start + len > (e4b->bd_sb->s_blocksize << 3)); BUG_ON(e4b->bd_group != ex->fe_group); - BUG_ON(!ext4_is_group_locked(e4b->bd_sb, e4b->bd_group)); + assert_spin_locked(ext4_group_lock_ptr(e4b->bd_sb, e4b->bd_group)); mb_check_buddy(e4b); mb_mark_used_double(e4b, start, len); @@ -1330,8 +1314,7 @@ static int mb_mark_used(struct ext4_buddy *e4b, struct ext4_free_extent *ex) e4b->bd_info->bb_counters[ord]++; } - mb_set_bits(sb_bgl_lock(EXT4_SB(e4b->bd_sb), ex->fe_group), - EXT4_MB_BITMAP(e4b), ex->fe_start, len0); + mb_set_bits(EXT4_MB_BITMAP(e4b), ex->fe_start, len0); mb_check_buddy(e4b); return ret; @@ -1726,7 +1709,6 @@ static int ext4_mb_good_group(struct ext4_allocation_context *ac, unsigned free, fragments; unsigned i, bits; int flex_size = ext4_flex_bg_size(EXT4_SB(ac->ac_sb)); - struct ext4_group_desc *desc; struct ext4_group_info *grp = ext4_get_group_info(ac->ac_sb, group); BUG_ON(cr < 0 || cr >= 4); @@ -1742,10 +1724,6 @@ static int ext4_mb_good_group(struct ext4_allocation_context *ac, switch (cr) { case 0: BUG_ON(ac->ac_2order == 0); - /* If this group is uninitialized, skip it initially */ - desc = ext4_get_group_desc(ac->ac_sb, group, NULL); - if (desc->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)) - return 0; /* Avoid using the first bg of a flexgroup for data files */ if ((ac->ac_flags & EXT4_MB_HINT_DATA) && @@ -1788,6 +1766,7 @@ int ext4_mb_get_buddy_cache_lock(struct super_block *sb, ext4_group_t group) int block, pnum; int blocks_per_page; int groups_per_page; + ext4_group_t ngroups = ext4_get_groups_count(sb); ext4_group_t first_group; struct ext4_group_info *grp; @@ -1807,7 +1786,7 @@ int ext4_mb_get_buddy_cache_lock(struct super_block *sb, ext4_group_t group) /* read all groups the page covers into the cache */ for (i = 0; i < groups_per_page; i++) { - if ((first_group + i) >= EXT4_SB(sb)->s_groups_count) + if ((first_group + i) >= ngroups) break; grp = ext4_get_group_info(sb, first_group + i); /* take all groups write allocation @@ -1945,8 +1924,7 @@ err: static noinline_for_stack int ext4_mb_regular_allocator(struct ext4_allocation_context *ac) { - ext4_group_t group; - ext4_group_t i; + ext4_group_t ngroups, group, i; int cr; int err = 0; int bsbits; @@ -1957,6 +1935,7 @@ ext4_mb_regular_allocator(struct ext4_allocation_context *ac) sb = ac->ac_sb; sbi = EXT4_SB(sb); + ngroups = ext4_get_groups_count(sb); BUG_ON(ac->ac_status == AC_STATUS_FOUND); /* first, try the goal */ @@ -2017,11 +1996,11 @@ repeat: */ group = ac->ac_g_ex.fe_group; - for (i = 0; i < EXT4_SB(sb)->s_groups_count; group++, i++) { + for (i = 0; i < ngroups; group++, i++) { struct ext4_group_info *grp; struct ext4_group_desc *desc; - if (group == EXT4_SB(sb)->s_groups_count) + if (group == ngroups) group = 0; /* quick check to skip empty groups */ @@ -2064,9 +2043,7 @@ repeat: ac->ac_groups_scanned++; desc = ext4_get_group_desc(sb, group, NULL); - if (cr == 0 || (desc->bg_flags & - cpu_to_le16(EXT4_BG_BLOCK_UNINIT) && - ac->ac_2order != 0)) + if (cr == 0) ext4_mb_simple_scan_group(ac, &e4b); else if (cr == 1 && ac->ac_g_ex.fe_len == sbi->s_stripe) @@ -2315,12 +2292,10 @@ static struct file_operations ext4_mb_seq_history_fops = { static void *ext4_mb_seq_groups_start(struct seq_file *seq, loff_t *pos) { struct super_block *sb = seq->private; - struct ext4_sb_info *sbi = EXT4_SB(sb); ext4_group_t group; - if (*pos < 0 || *pos >= sbi->s_groups_count) + if (*pos < 0 || *pos >= ext4_get_groups_count(sb)) return NULL; - group = *pos + 1; return (void *) ((unsigned long) group); } @@ -2328,11 +2303,10 @@ static void *ext4_mb_seq_groups_start(struct seq_file *seq, loff_t *pos) static void *ext4_mb_seq_groups_next(struct seq_file *seq, void *v, loff_t *pos) { struct super_block *sb = seq->private; - struct ext4_sb_info *sbi = EXT4_SB(sb); ext4_group_t group; ++*pos; - if (*pos < 0 || *pos >= sbi->s_groups_count) + if (*pos < 0 || *pos >= ext4_get_groups_count(sb)) return NULL; group = *pos + 1; return (void *) ((unsigned long) group); @@ -2420,7 +2394,8 @@ static void ext4_mb_history_release(struct super_block *sb) if (sbi->s_proc != NULL) { remove_proc_entry("mb_groups", sbi->s_proc); - remove_proc_entry("mb_history", sbi->s_proc); + if (sbi->s_mb_history_max) + remove_proc_entry("mb_history", sbi->s_proc); } kfree(sbi->s_mb_history); } @@ -2431,17 +2406,17 @@ static void ext4_mb_history_init(struct super_block *sb) int i; if (sbi->s_proc != NULL) { - proc_create_data("mb_history", S_IRUGO, sbi->s_proc, - &ext4_mb_seq_history_fops, sb); + if (sbi->s_mb_history_max) + proc_create_data("mb_history", S_IRUGO, sbi->s_proc, + &ext4_mb_seq_history_fops, sb); proc_create_data("mb_groups", S_IRUGO, sbi->s_proc, &ext4_mb_seq_groups_fops, sb); } - sbi->s_mb_history_max = 1000; sbi->s_mb_history_cur = 0; spin_lock_init(&sbi->s_mb_history_lock); i = sbi->s_mb_history_max * sizeof(struct ext4_mb_history); - sbi->s_mb_history = kzalloc(i, GFP_KERNEL); + sbi->s_mb_history = i ? kzalloc(i, GFP_KERNEL) : NULL; /* if we can't allocate history, then we simple won't use it */ } @@ -2451,7 +2426,7 @@ ext4_mb_store_history(struct ext4_allocation_context *ac) struct ext4_sb_info *sbi = EXT4_SB(ac->ac_sb); struct ext4_mb_history h; - if (unlikely(sbi->s_mb_history == NULL)) + if (sbi->s_mb_history == NULL) return; if (!(ac->ac_op & sbi->s_mb_history_filter)) @@ -2587,6 +2562,7 @@ void ext4_mb_update_group_info(struct ext4_group_info *grp, ext4_grpblk_t add) static int ext4_mb_init_backend(struct super_block *sb) { + ext4_group_t ngroups = ext4_get_groups_count(sb); ext4_group_t i; int metalen; struct ext4_sb_info *sbi = EXT4_SB(sb); @@ -2598,7 +2574,7 @@ static int ext4_mb_init_backend(struct super_block *sb) struct ext4_group_desc *desc; /* This is the number of blocks used by GDT */ - num_meta_group_infos = (sbi->s_groups_count + EXT4_DESC_PER_BLOCK(sb) - + num_meta_group_infos = (ngroups + EXT4_DESC_PER_BLOCK(sb) - 1) >> EXT4_DESC_PER_BLOCK_BITS(sb); /* @@ -2644,7 +2620,7 @@ static int ext4_mb_init_backend(struct super_block *sb) for (i = 0; i < num_meta_group_infos; i++) { if ((i + 1) == num_meta_group_infos) metalen = sizeof(*meta_group_info) * - (sbi->s_groups_count - + (ngroups - (i << EXT4_DESC_PER_BLOCK_BITS(sb))); meta_group_info = kmalloc(metalen, GFP_KERNEL); if (meta_group_info == NULL) { @@ -2655,7 +2631,7 @@ static int ext4_mb_init_backend(struct super_block *sb) sbi->s_group_info[i] = meta_group_info; } - for (i = 0; i < sbi->s_groups_count; i++) { + for (i = 0; i < ngroups; i++) { desc = ext4_get_group_desc(sb, i, NULL); if (desc == NULL) { printk(KERN_ERR @@ -2761,7 +2737,7 @@ int ext4_mb_init(struct super_block *sb, int needs_recovery) return 0; } -/* need to called with ext4 group lock (ext4_lock_group) */ +/* need to called with the ext4 group lock held */ static void ext4_mb_cleanup_pa(struct ext4_group_info *grp) { struct ext4_prealloc_space *pa; @@ -2781,13 +2757,14 @@ static void ext4_mb_cleanup_pa(struct ext4_group_info *grp) int ext4_mb_release(struct super_block *sb) { + ext4_group_t ngroups = ext4_get_groups_count(sb); ext4_group_t i; int num_meta_group_infos; struct ext4_group_info *grinfo; struct ext4_sb_info *sbi = EXT4_SB(sb); if (sbi->s_group_info) { - for (i = 0; i < sbi->s_groups_count; i++) { + for (i = 0; i < ngroups; i++) { grinfo = ext4_get_group_info(sb, i); #ifdef DOUBLE_CHECK kfree(grinfo->bb_bitmap); @@ -2797,7 +2774,7 @@ int ext4_mb_release(struct super_block *sb) ext4_unlock_group(sb, i); kfree(grinfo); } - num_meta_group_infos = (sbi->s_groups_count + + num_meta_group_infos = (ngroups + EXT4_DESC_PER_BLOCK(sb) - 1) >> EXT4_DESC_PER_BLOCK_BITS(sb); for (i = 0; i < num_meta_group_infos; i++) @@ -2984,27 +2961,25 @@ ext4_mb_mark_diskspace_used(struct ext4_allocation_context *ac, + le32_to_cpu(es->s_first_data_block); len = ac->ac_b_ex.fe_len; - if (in_range(ext4_block_bitmap(sb, gdp), block, len) || - in_range(ext4_inode_bitmap(sb, gdp), block, len) || - in_range(block, ext4_inode_table(sb, gdp), - EXT4_SB(sb)->s_itb_per_group) || - in_range(block + len - 1, ext4_inode_table(sb, gdp), - EXT4_SB(sb)->s_itb_per_group)) { + if (!ext4_data_block_valid(sbi, block, len)) { ext4_error(sb, __func__, - "Allocating block %llu in system zone of %d group\n", - block, ac->ac_b_ex.fe_group); + "Allocating blocks %llu-%llu which overlap " + "fs metadata\n", block, block+len); /* File system mounted not to panic on error * Fix the bitmap and repeat the block allocation * We leak some of the blocks here. */ - mb_set_bits(sb_bgl_lock(sbi, ac->ac_b_ex.fe_group), - bitmap_bh->b_data, ac->ac_b_ex.fe_start, - ac->ac_b_ex.fe_len); + ext4_lock_group(sb, ac->ac_b_ex.fe_group); + mb_set_bits(bitmap_bh->b_data, ac->ac_b_ex.fe_start, + ac->ac_b_ex.fe_len); + ext4_unlock_group(sb, ac->ac_b_ex.fe_group); err = ext4_handle_dirty_metadata(handle, NULL, bitmap_bh); if (!err) err = -EAGAIN; goto out_err; } + + ext4_lock_group(sb, ac->ac_b_ex.fe_group); #ifdef AGGRESSIVE_CHECK { int i; @@ -3014,9 +2989,7 @@ ext4_mb_mark_diskspace_used(struct ext4_allocation_context *ac, } } #endif - spin_lock(sb_bgl_lock(sbi, ac->ac_b_ex.fe_group)); - mb_set_bits(NULL, bitmap_bh->b_data, - ac->ac_b_ex.fe_start, ac->ac_b_ex.fe_len); + mb_set_bits(bitmap_bh->b_data, ac->ac_b_ex.fe_start,ac->ac_b_ex.fe_len); if (gdp->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)) { gdp->bg_flags &= cpu_to_le16(~EXT4_BG_BLOCK_UNINIT); ext4_free_blks_set(sb, gdp, @@ -3026,7 +2999,8 @@ ext4_mb_mark_diskspace_used(struct ext4_allocation_context *ac, len = ext4_free_blks_count(sb, gdp) - ac->ac_b_ex.fe_len; ext4_free_blks_set(sb, gdp, len); gdp->bg_checksum = ext4_group_desc_csum(sbi, ac->ac_b_ex.fe_group, gdp); - spin_unlock(sb_bgl_lock(sbi, ac->ac_b_ex.fe_group)); + + ext4_unlock_group(sb, ac->ac_b_ex.fe_group); percpu_counter_sub(&sbi->s_freeblocks_counter, ac->ac_b_ex.fe_len); /* * Now reduce the dirty block count also. Should not go negative @@ -3459,7 +3433,7 @@ ext4_mb_use_preallocated(struct ext4_allocation_context *ac) * the function goes through all block freed in the group * but not yet committed and marks them used in in-core bitmap. * buddy must be generated from this bitmap - * Need to be called with ext4 group lock (ext4_lock_group) + * Need to be called with the ext4 group lock held */ static void ext4_mb_generate_from_freelist(struct super_block *sb, void *bitmap, ext4_group_t group) @@ -3473,9 +3447,7 @@ static void ext4_mb_generate_from_freelist(struct super_block *sb, void *bitmap, while (n) { entry = rb_entry(n, struct ext4_free_data, node); - mb_set_bits(sb_bgl_lock(EXT4_SB(sb), group), - bitmap, entry->start_blk, - entry->count); + mb_set_bits(bitmap, entry->start_blk, entry->count); n = rb_next(n); } return; @@ -3484,7 +3456,7 @@ static void ext4_mb_generate_from_freelist(struct super_block *sb, void *bitmap, /* * the function goes through all preallocation in this group and marks them * used in in-core bitmap. buddy must be generated from this bitmap - * Need to be called with ext4 group lock (ext4_lock_group) + * Need to be called with ext4 group lock held */ static void ext4_mb_generate_from_pa(struct super_block *sb, void *bitmap, ext4_group_t group) @@ -3516,8 +3488,7 @@ static void ext4_mb_generate_from_pa(struct super_block *sb, void *bitmap, if (unlikely(len == 0)) continue; BUG_ON(groupnr != group); - mb_set_bits(sb_bgl_lock(EXT4_SB(sb), group), - bitmap, start, len); + mb_set_bits(bitmap, start, len); preallocated += len; count++; } @@ -4121,7 +4092,7 @@ static void ext4_mb_return_to_preallocation(struct inode *inode, static void ext4_mb_show_ac(struct ext4_allocation_context *ac) { struct super_block *sb = ac->ac_sb; - ext4_group_t i; + ext4_group_t ngroups, i; printk(KERN_ERR "EXT4-fs: Can't allocate:" " Allocation context details:\n"); @@ -4145,7 +4116,8 @@ static void ext4_mb_show_ac(struct ext4_allocation_context *ac) printk(KERN_ERR "EXT4-fs: %lu scanned, %d found\n", ac->ac_ex_scanned, ac->ac_found); printk(KERN_ERR "EXT4-fs: groups: \n"); - for (i = 0; i < EXT4_SB(sb)->s_groups_count; i++) { + ngroups = ext4_get_groups_count(sb); + for (i = 0; i < ngroups; i++) { struct ext4_group_info *grp = ext4_get_group_info(sb, i); struct ext4_prealloc_space *pa; ext4_grpblk_t start; @@ -4469,13 +4441,13 @@ static int ext4_mb_release_context(struct ext4_allocation_context *ac) static int ext4_mb_discard_preallocations(struct super_block *sb, int needed) { - ext4_group_t i; + ext4_group_t i, ngroups = ext4_get_groups_count(sb); int ret; int freed = 0; trace_mark(ext4_mb_discard_preallocations, "dev %s needed %d", sb->s_id, needed); - for (i = 0; i < EXT4_SB(sb)->s_groups_count && needed > 0; i++) { + for (i = 0; i < ngroups && needed > 0; i++) { ret = ext4_mb_discard_group_preallocations(sb, i, needed); freed += ret; needed -= ret; @@ -4859,29 +4831,25 @@ do_more: new_entry->group = block_group; new_entry->count = count; new_entry->t_tid = handle->h_transaction->t_tid; + ext4_lock_group(sb, block_group); - mb_clear_bits(sb_bgl_lock(sbi, block_group), bitmap_bh->b_data, - bit, count); + mb_clear_bits(bitmap_bh->b_data, bit, count); ext4_mb_free_metadata(handle, &e4b, new_entry); - ext4_unlock_group(sb, block_group); } else { - ext4_lock_group(sb, block_group); /* need to update group_info->bb_free and bitmap * with group lock held. generate_buddy look at * them with group lock_held */ - mb_clear_bits(sb_bgl_lock(sbi, block_group), bitmap_bh->b_data, - bit, count); + ext4_lock_group(sb, block_group); + mb_clear_bits(bitmap_bh->b_data, bit, count); mb_free_blocks(inode, &e4b, bit, count); ext4_mb_return_to_preallocation(inode, &e4b, block, count); - ext4_unlock_group(sb, block_group); } - spin_lock(sb_bgl_lock(sbi, block_group)); ret = ext4_free_blks_count(sb, gdp) + count; ext4_free_blks_set(sb, gdp, ret); gdp->bg_checksum = ext4_group_desc_csum(sbi, block_group, gdp); - spin_unlock(sb_bgl_lock(sbi, block_group)); + ext4_unlock_group(sb, block_group); percpu_counter_add(&sbi->s_freeblocks_counter, count); if (sbi->s_log_groups_per_flex) { diff --git a/fs/ext4/mballoc.h b/fs/ext4/mballoc.h index dd9e6cd5f6cf..75e34f69215b 100644 --- a/fs/ext4/mballoc.h +++ b/fs/ext4/mballoc.h @@ -23,7 +23,6 @@ #include <linux/mutex.h> #include "ext4_jbd2.h" #include "ext4.h" -#include "group.h" /* * with AGGRESSIVE_CHECK allocator runs consistency checks over diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c index 22098e1cd085..07eb6649e4fa 100644 --- a/fs/ext4/namei.c +++ b/fs/ext4/namei.c @@ -37,7 +37,6 @@ #include "ext4.h" #include "ext4_jbd2.h" -#include "namei.h" #include "xattr.h" #include "acl.h" @@ -750,7 +749,7 @@ static int dx_make_map(struct ext4_dir_entry_2 *de, unsigned blocksize, ext4fs_dirhash(de->name, de->name_len, &h); map_tail--; map_tail->hash = h.hash; - map_tail->offs = (u16) ((char *) de - base); + map_tail->offs = ((char *) de - base)>>2; map_tail->size = le16_to_cpu(de->rec_len); count++; cond_resched(); @@ -1148,7 +1147,8 @@ dx_move_dirents(char *from, char *to, struct dx_map_entry *map, int count, unsigned rec_len = 0; while (count--) { - struct ext4_dir_entry_2 *de = (struct ext4_dir_entry_2 *) (from + map->offs); + struct ext4_dir_entry_2 *de = (struct ext4_dir_entry_2 *) + (from + (map->offs<<2)); rec_len = EXT4_DIR_REC_LEN(de->name_len); memcpy (to, de, rec_len); ((struct ext4_dir_entry_2 *) to)->rec_len = @@ -1997,7 +1997,7 @@ int ext4_orphan_add(handle_t *handle, struct inode *inode) if (!ext4_handle_valid(handle)) return 0; - lock_super(sb); + mutex_lock(&EXT4_SB(sb)->s_orphan_lock); if (!list_empty(&EXT4_I(inode)->i_orphan)) goto out_unlock; @@ -2006,9 +2006,13 @@ int ext4_orphan_add(handle_t *handle, struct inode *inode) /* @@@ FIXME: Observation from aviro: * I think I can trigger J_ASSERT in ext4_orphan_add(). We block - * here (on lock_super()), so race with ext4_link() which might bump + * here (on s_orphan_lock), so race with ext4_link() which might bump * ->i_nlink. For, say it, character device. Not a regular file, * not a directory, not a symlink and ->i_nlink > 0. + * + * tytso, 4/25/2009: I'm not sure how that could happen; + * shouldn't the fs core protect us from these sort of + * unlink()/link() races? */ J_ASSERT((S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode)) || inode->i_nlink == 0); @@ -2045,7 +2049,7 @@ int ext4_orphan_add(handle_t *handle, struct inode *inode) jbd_debug(4, "orphan inode %lu will point to %d\n", inode->i_ino, NEXT_ORPHAN(inode)); out_unlock: - unlock_super(sb); + mutex_unlock(&EXT4_SB(sb)->s_orphan_lock); ext4_std_error(inode->i_sb, err); return err; } @@ -2066,11 +2070,9 @@ int ext4_orphan_del(handle_t *handle, struct inode *inode) if (!ext4_handle_valid(handle)) return 0; - lock_super(inode->i_sb); - if (list_empty(&ei->i_orphan)) { - unlock_super(inode->i_sb); - return 0; - } + mutex_lock(&EXT4_SB(inode->i_sb)->s_orphan_lock); + if (list_empty(&ei->i_orphan)) + goto out; ino_next = NEXT_ORPHAN(inode); prev = ei->i_orphan.prev; @@ -2120,7 +2122,7 @@ int ext4_orphan_del(handle_t *handle, struct inode *inode) out_err: ext4_std_error(inode->i_sb, err); out: - unlock_super(inode->i_sb); + mutex_unlock(&EXT4_SB(inode->i_sb)->s_orphan_lock); return err; out_brelse: @@ -2533,6 +2535,7 @@ const struct inode_operations ext4_dir_inode_operations = { .removexattr = generic_removexattr, #endif .permission = ext4_permission, + .fiemap = ext4_fiemap, }; const struct inode_operations ext4_special_inode_operations = { diff --git a/fs/ext4/namei.h b/fs/ext4/namei.h deleted file mode 100644 index 5e4dfff36a00..000000000000 --- a/fs/ext4/namei.h +++ /dev/null @@ -1,8 +0,0 @@ -/* linux/fs/ext4/namei.h - * - * Copyright (C) 2005 Simtec Electronics - * Ben Dooks <ben@simtec.co.uk> - * -*/ - -extern struct dentry *ext4_get_parent(struct dentry *child); diff --git a/fs/ext4/resize.c b/fs/ext4/resize.c index 546c7dd869e1..27eb289eea37 100644 --- a/fs/ext4/resize.c +++ b/fs/ext4/resize.c @@ -15,7 +15,6 @@ #include <linux/slab.h> #include "ext4_jbd2.h" -#include "group.h" #define outside(b, first, last) ((b) < (first) || (b) >= (last)) #define inside(b, first, last) ((b) >= (first) && (b) < (last)) @@ -193,7 +192,7 @@ static int setup_new_group_blocks(struct super_block *sb, if (IS_ERR(handle)) return PTR_ERR(handle); - lock_super(sb); + mutex_lock(&sbi->s_resize_lock); if (input->group != sbi->s_groups_count) { err = -EBUSY; goto exit_journal; @@ -302,7 +301,7 @@ exit_bh: brelse(bh); exit_journal: - unlock_super(sb); + mutex_unlock(&sbi->s_resize_lock); if ((err2 = ext4_journal_stop(handle)) && !err) err = err2; @@ -643,11 +642,12 @@ exit_free: * important part is that the new block and inode counts are in the backup * superblocks, and the location of the new group metadata in the GDT backups. * - * We do not need lock_super() for this, because these blocks are not - * otherwise touched by the filesystem code when it is mounted. We don't - * need to worry about last changing from sbi->s_groups_count, because the - * worst that can happen is that we do not copy the full number of backups - * at this time. The resize which changed s_groups_count will backup again. + * We do not need take the s_resize_lock for this, because these + * blocks are not otherwise touched by the filesystem code when it is + * mounted. We don't need to worry about last changing from + * sbi->s_groups_count, because the worst that can happen is that we + * do not copy the full number of backups at this time. The resize + * which changed s_groups_count will backup again. */ static void update_backups(struct super_block *sb, int blk_off, char *data, int size) @@ -809,7 +809,7 @@ int ext4_group_add(struct super_block *sb, struct ext4_new_group_data *input) goto exit_put; } - lock_super(sb); + mutex_lock(&sbi->s_resize_lock); if (input->group != sbi->s_groups_count) { ext4_warning(sb, __func__, "multiple resizers run on filesystem!"); @@ -840,7 +840,7 @@ int ext4_group_add(struct super_block *sb, struct ext4_new_group_data *input) /* * OK, now we've set up the new group. Time to make it active. * - * Current kernels don't lock all allocations via lock_super(), + * We do not lock all allocations via s_resize_lock * so we have to be safe wrt. concurrent accesses the group * data. So we need to be careful to set all of the relevant * group descriptor data etc. *before* we enable the group. @@ -900,12 +900,12 @@ int ext4_group_add(struct super_block *sb, struct ext4_new_group_data *input) * * The precise rules we use are: * - * * Writers of s_groups_count *must* hold lock_super + * * Writers of s_groups_count *must* hold s_resize_lock * AND * * Writers must perform a smp_wmb() after updating all dependent * data and before modifying the groups count * - * * Readers must hold lock_super() over the access + * * Readers must hold s_resize_lock over the access * OR * * Readers must perform an smp_rmb() after reading the groups count * and before reading any dependent data. @@ -948,7 +948,7 @@ int ext4_group_add(struct super_block *sb, struct ext4_new_group_data *input) sb->s_dirt = 1; exit_journal: - unlock_super(sb); + mutex_unlock(&sbi->s_resize_lock); if ((err2 = ext4_journal_stop(handle)) && !err) err = err2; if (!err) { @@ -986,7 +986,7 @@ int ext4_group_extend(struct super_block *sb, struct ext4_super_block *es, /* We don't need to worry about locking wrt other resizers just * yet: we're going to revalidate es->s_blocks_count after - * taking lock_super() below. */ + * taking the s_resize_lock below. */ o_blocks_count = ext4_blocks_count(es); o_groups_count = EXT4_SB(sb)->s_groups_count; @@ -1056,11 +1056,11 @@ int ext4_group_extend(struct super_block *sb, struct ext4_super_block *es, goto exit_put; } - lock_super(sb); + mutex_lock(&EXT4_SB(sb)->s_resize_lock); if (o_blocks_count != ext4_blocks_count(es)) { ext4_warning(sb, __func__, "multiple resizers run on filesystem!"); - unlock_super(sb); + mutex_unlock(&EXT4_SB(sb)->s_resize_lock); ext4_journal_stop(handle); err = -EBUSY; goto exit_put; @@ -1070,14 +1070,14 @@ int ext4_group_extend(struct super_block *sb, struct ext4_super_block *es, EXT4_SB(sb)->s_sbh))) { ext4_warning(sb, __func__, "error %d on journal write access", err); - unlock_super(sb); + mutex_unlock(&EXT4_SB(sb)->s_resize_lock); ext4_journal_stop(handle); goto exit_put; } ext4_blocks_count_set(es, o_blocks_count + add); ext4_handle_dirty_metadata(handle, NULL, EXT4_SB(sb)->s_sbh); sb->s_dirt = 1; - unlock_super(sb); + mutex_unlock(&EXT4_SB(sb)->s_resize_lock); ext4_debug("freeing blocks %llu through %llu\n", o_blocks_count, o_blocks_count + add); /* We add the blocks to the bitmap and set the group need init bit */ diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 2958f4e6f222..c191d0f65fed 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -20,6 +20,7 @@ #include <linux/string.h> #include <linux/fs.h> #include <linux/time.h> +#include <linux/vmalloc.h> #include <linux/jbd2.h> #include <linux/slab.h> #include <linux/init.h> @@ -45,16 +46,20 @@ #include "ext4_jbd2.h" #include "xattr.h" #include "acl.h" -#include "namei.h" -#include "group.h" + +static int default_mb_history_length = 1000; + +module_param_named(default_mb_history_length, default_mb_history_length, + int, 0644); +MODULE_PARM_DESC(default_mb_history_length, + "Default number of entries saved for mb_history"); struct proc_dir_entry *ext4_proc_root; static struct kset *ext4_kset; static int ext4_load_journal(struct super_block *, struct ext4_super_block *, unsigned long journal_devnum); -static int ext4_commit_super(struct super_block *sb, - struct ext4_super_block *es, int sync); +static int ext4_commit_super(struct super_block *sb, int sync); static void ext4_mark_recovery_complete(struct super_block *sb, struct ext4_super_block *es); static void ext4_clear_journal_err(struct super_block *sb, @@ -74,7 +79,7 @@ ext4_fsblk_t ext4_block_bitmap(struct super_block *sb, { return le32_to_cpu(bg->bg_block_bitmap_lo) | (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT ? - (ext4_fsblk_t)le32_to_cpu(bg->bg_block_bitmap_hi) << 32 : 0); + (ext4_fsblk_t)le32_to_cpu(bg->bg_block_bitmap_hi) << 32 : 0); } ext4_fsblk_t ext4_inode_bitmap(struct super_block *sb, @@ -82,7 +87,7 @@ ext4_fsblk_t ext4_inode_bitmap(struct super_block *sb, { return le32_to_cpu(bg->bg_inode_bitmap_lo) | (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT ? - (ext4_fsblk_t)le32_to_cpu(bg->bg_inode_bitmap_hi) << 32 : 0); + (ext4_fsblk_t)le32_to_cpu(bg->bg_inode_bitmap_hi) << 32 : 0); } ext4_fsblk_t ext4_inode_table(struct super_block *sb, @@ -90,7 +95,7 @@ ext4_fsblk_t ext4_inode_table(struct super_block *sb, { return le32_to_cpu(bg->bg_inode_table_lo) | (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT ? - (ext4_fsblk_t)le32_to_cpu(bg->bg_inode_table_hi) << 32 : 0); + (ext4_fsblk_t)le32_to_cpu(bg->bg_inode_table_hi) << 32 : 0); } __u32 ext4_free_blks_count(struct super_block *sb, @@ -98,7 +103,7 @@ __u32 ext4_free_blks_count(struct super_block *sb, { return le16_to_cpu(bg->bg_free_blocks_count_lo) | (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT ? - (__u32)le16_to_cpu(bg->bg_free_blocks_count_hi) << 16 : 0); + (__u32)le16_to_cpu(bg->bg_free_blocks_count_hi) << 16 : 0); } __u32 ext4_free_inodes_count(struct super_block *sb, @@ -106,7 +111,7 @@ __u32 ext4_free_inodes_count(struct super_block *sb, { return le16_to_cpu(bg->bg_free_inodes_count_lo) | (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT ? - (__u32)le16_to_cpu(bg->bg_free_inodes_count_hi) << 16 : 0); + (__u32)le16_to_cpu(bg->bg_free_inodes_count_hi) << 16 : 0); } __u32 ext4_used_dirs_count(struct super_block *sb, @@ -114,7 +119,7 @@ __u32 ext4_used_dirs_count(struct super_block *sb, { return le16_to_cpu(bg->bg_used_dirs_count_lo) | (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT ? - (__u32)le16_to_cpu(bg->bg_used_dirs_count_hi) << 16 : 0); + (__u32)le16_to_cpu(bg->bg_used_dirs_count_hi) << 16 : 0); } __u32 ext4_itable_unused_count(struct super_block *sb, @@ -122,7 +127,7 @@ __u32 ext4_itable_unused_count(struct super_block *sb, { return le16_to_cpu(bg->bg_itable_unused_lo) | (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT ? - (__u32)le16_to_cpu(bg->bg_itable_unused_hi) << 16 : 0); + (__u32)le16_to_cpu(bg->bg_itable_unused_hi) << 16 : 0); } void ext4_block_bitmap_set(struct super_block *sb, @@ -202,8 +207,7 @@ handle_t *ext4_journal_start_sb(struct super_block *sb, int nblocks) journal = EXT4_SB(sb)->s_journal; if (journal) { if (is_journal_aborted(journal)) { - ext4_abort(sb, __func__, - "Detected aborted journal"); + ext4_abort(sb, __func__, "Detected aborted journal"); return ERR_PTR(-EROFS); } return jbd2_journal_start(journal, nblocks); @@ -302,10 +306,10 @@ static void ext4_handle_error(struct super_block *sb) jbd2_journal_abort(journal, -EIO); } if (test_opt(sb, ERRORS_RO)) { - printk(KERN_CRIT "Remounting filesystem read-only\n"); + ext4_msg(sb, KERN_CRIT, "Remounting filesystem read-only"); sb->s_flags |= MS_RDONLY; } - ext4_commit_super(sb, es, 1); + ext4_commit_super(sb, 1); if (test_opt(sb, ERRORS_PANIC)) panic("EXT4-fs (device %s): panic forced after error\n", sb->s_id); @@ -395,8 +399,6 @@ void ext4_abort(struct super_block *sb, const char *function, { va_list args; - printk(KERN_CRIT "ext4_abort called.\n"); - va_start(args, fmt); printk(KERN_CRIT "EXT4-fs error (device %s): %s: ", sb->s_id, function); vprintk(fmt, args); @@ -409,7 +411,7 @@ void ext4_abort(struct super_block *sb, const char *function, if (sb->s_flags & MS_RDONLY) return; - printk(KERN_CRIT "Remounting filesystem read-only\n"); + ext4_msg(sb, KERN_CRIT, "Remounting filesystem read-only"); EXT4_SB(sb)->s_mount_state |= EXT4_ERROR_FS; sb->s_flags |= MS_RDONLY; EXT4_SB(sb)->s_mount_opt |= EXT4_MOUNT_ABORT; @@ -417,6 +419,18 @@ void ext4_abort(struct super_block *sb, const char *function, jbd2_journal_abort(EXT4_SB(sb)->s_journal, -EIO); } +void ext4_msg (struct super_block * sb, const char *prefix, + const char *fmt, ...) +{ + va_list args; + + va_start(args, fmt); + printk("%sEXT4-fs (%s): ", prefix, sb->s_id); + vprintk(fmt, args); + printk("\n"); + va_end(args); +} + void ext4_warning(struct super_block *sb, const char *function, const char *fmt, ...) { @@ -431,7 +445,7 @@ void ext4_warning(struct super_block *sb, const char *function, } void ext4_grp_locked_error(struct super_block *sb, ext4_group_t grp, - const char *function, const char *fmt, ...) + const char *function, const char *fmt, ...) __releases(bitlock) __acquires(bitlock) { @@ -447,7 +461,7 @@ __acquires(bitlock) if (test_opt(sb, ERRORS_CONT)) { EXT4_SB(sb)->s_mount_state |= EXT4_ERROR_FS; es->s_state |= cpu_to_le16(EXT4_ERROR_FS); - ext4_commit_super(sb, es, 0); + ext4_commit_super(sb, 0); return; } ext4_unlock_group(sb, grp); @@ -467,7 +481,6 @@ __acquires(bitlock) return; } - void ext4_update_dynamic_rev(struct super_block *sb) { struct ext4_super_block *es = EXT4_SB(sb)->s_es; @@ -496,7 +509,7 @@ void ext4_update_dynamic_rev(struct super_block *sb) /* * Open the external journal device */ -static struct block_device *ext4_blkdev_get(dev_t dev) +static struct block_device *ext4_blkdev_get(dev_t dev, struct super_block *sb) { struct block_device *bdev; char b[BDEVNAME_SIZE]; @@ -507,7 +520,7 @@ static struct block_device *ext4_blkdev_get(dev_t dev) return bdev; fail: - printk(KERN_ERR "EXT4-fs: failed to open journal device %s: %ld\n", + ext4_msg(sb, KERN_ERR, "failed to open journal device %s: %ld", __bdevname(dev, b), PTR_ERR(bdev)); return NULL; } @@ -543,8 +556,8 @@ static void dump_orphan_list(struct super_block *sb, struct ext4_sb_info *sbi) { struct list_head *l; - printk(KERN_ERR "sb orphan head is %d\n", - le32_to_cpu(sbi->s_es->s_last_orphan)); + ext4_msg(sb, KERN_ERR, "sb orphan head is %d", + le32_to_cpu(sbi->s_es->s_last_orphan)); printk(KERN_ERR "sb_info orphan list:\n"); list_for_each(l, &sbi->s_orphan) { @@ -563,6 +576,7 @@ static void ext4_put_super(struct super_block *sb) struct ext4_super_block *es = sbi->s_es; int i, err; + ext4_release_system_zone(sb); ext4_mb_release(sb); ext4_ext_release(sb); ext4_xattr_put_super(sb); @@ -576,7 +590,7 @@ static void ext4_put_super(struct super_block *sb) if (!(sb->s_flags & MS_RDONLY)) { EXT4_CLEAR_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER); es->s_state = cpu_to_le16(sbi->s_mount_state); - ext4_commit_super(sb, es, 1); + ext4_commit_super(sb, 1); } if (sbi->s_proc) { remove_proc_entry(sb->s_id, ext4_proc_root); @@ -586,7 +600,10 @@ static void ext4_put_super(struct super_block *sb) for (i = 0; i < sbi->s_gdb_count; i++) brelse(sbi->s_group_desc[i]); kfree(sbi->s_group_desc); - kfree(sbi->s_flex_groups); + if (is_vmalloc_addr(sbi->s_flex_groups)) + vfree(sbi->s_flex_groups); + else + kfree(sbi->s_flex_groups); percpu_counter_destroy(&sbi->s_freeblocks_counter); percpu_counter_destroy(&sbi->s_freeinodes_counter); percpu_counter_destroy(&sbi->s_dirs_counter); @@ -629,7 +646,6 @@ static void ext4_put_super(struct super_block *sb) lock_kernel(); kfree(sbi->s_blockgroup_lock); kfree(sbi); - return; } static struct kmem_cache *ext4_inode_cachep; @@ -644,6 +660,7 @@ static struct inode *ext4_alloc_inode(struct super_block *sb) ei = kmem_cache_alloc(ext4_inode_cachep, GFP_NOFS); if (!ei) return NULL; + #ifdef CONFIG_EXT4_FS_POSIX_ACL ei->i_acl = EXT4_ACL_NOT_CACHED; ei->i_default_acl = EXT4_ACL_NOT_CACHED; @@ -664,14 +681,16 @@ static struct inode *ext4_alloc_inode(struct super_block *sb) ei->i_allocated_meta_blocks = 0; ei->i_delalloc_reserved_flag = 0; spin_lock_init(&(ei->i_block_reservation_lock)); + return &ei->vfs_inode; } static void ext4_destroy_inode(struct inode *inode) { if (!list_empty(&(EXT4_I(inode)->i_orphan))) { - printk("EXT4 Inode %p: orphan list check failed!\n", - EXT4_I(inode)); + ext4_msg(inode->i_sb, KERN_ERR, + "Inode %lu (%p): orphan list check failed!", + inode->i_ino, EXT4_I(inode)); print_hex_dump(KERN_INFO, "", DUMP_PREFIX_ADDRESS, 16, 4, EXT4_I(inode), sizeof(struct ext4_inode_info), true); @@ -870,12 +889,12 @@ static int ext4_show_options(struct seq_file *seq, struct vfsmount *vfs) seq_puts(seq, ",noauto_da_alloc"); ext4_show_quota_options(seq, sb); + return 0; } - static struct inode *ext4_nfs_get_inode(struct super_block *sb, - u64 ino, u32 generation) + u64 ino, u32 generation) { struct inode *inode; @@ -904,14 +923,14 @@ static struct inode *ext4_nfs_get_inode(struct super_block *sb, } static struct dentry *ext4_fh_to_dentry(struct super_block *sb, struct fid *fid, - int fh_len, int fh_type) + int fh_len, int fh_type) { return generic_fh_to_dentry(sb, fid, fh_len, fh_type, ext4_nfs_get_inode); } static struct dentry *ext4_fh_to_parent(struct super_block *sb, struct fid *fid, - int fh_len, int fh_type) + int fh_len, int fh_type) { return generic_fh_to_parent(sb, fid, fh_len, fh_type, ext4_nfs_get_inode); @@ -923,7 +942,8 @@ static struct dentry *ext4_fh_to_parent(struct super_block *sb, struct fid *fid, * which would prevent try_to_free_buffers() from freeing them, we must use * jbd2 layer's try_to_free_buffers() function to release them. */ -static int bdev_try_to_free_page(struct super_block *sb, struct page *page, gfp_t wait) +static int bdev_try_to_free_page(struct super_block *sb, struct page *page, + gfp_t wait) { journal_t *journal = EXT4_SB(sb)->s_journal; @@ -992,7 +1012,6 @@ static const struct super_operations ext4_sops = { .dirty_inode = ext4_dirty_inode, .delete_inode = ext4_delete_inode, .put_super = ext4_put_super, - .write_super = ext4_write_super, .sync_fs = ext4_sync_fs, .freeze_fs = ext4_freeze, .unfreeze_fs = ext4_unfreeze, @@ -1007,6 +1026,25 @@ static const struct super_operations ext4_sops = { .bdev_try_to_free_page = bdev_try_to_free_page, }; +static const struct super_operations ext4_nojournal_sops = { + .alloc_inode = ext4_alloc_inode, + .destroy_inode = ext4_destroy_inode, + .write_inode = ext4_write_inode, + .dirty_inode = ext4_dirty_inode, + .delete_inode = ext4_delete_inode, + .write_super = ext4_write_super, + .put_super = ext4_put_super, + .statfs = ext4_statfs, + .remount_fs = ext4_remount, + .clear_inode = ext4_clear_inode, + .show_options = ext4_show_options, +#ifdef CONFIG_QUOTA + .quota_read = ext4_quota_read, + .quota_write = ext4_quota_write, +#endif + .bdev_try_to_free_page = bdev_try_to_free_page, +}; + static const struct export_operations ext4_export_ops = { .fh_to_dentry = ext4_fh_to_dentry, .fh_to_parent = ext4_fh_to_parent, @@ -1023,12 +1061,13 @@ enum { Opt_journal_update, Opt_journal_dev, Opt_journal_checksum, Opt_journal_async_commit, Opt_abort, Opt_data_journal, Opt_data_ordered, Opt_data_writeback, - Opt_data_err_abort, Opt_data_err_ignore, + Opt_data_err_abort, Opt_data_err_ignore, Opt_mb_history_length, Opt_usrjquota, Opt_grpjquota, Opt_offusrjquota, Opt_offgrpjquota, Opt_jqfmt_vfsold, Opt_jqfmt_vfsv0, Opt_quota, Opt_noquota, Opt_ignore, Opt_barrier, Opt_nobarrier, Opt_err, Opt_resize, Opt_usrquota, Opt_grpquota, Opt_i_version, Opt_stripe, Opt_delalloc, Opt_nodelalloc, + Opt_block_validity, Opt_noblock_validity, Opt_inode_readahead_blks, Opt_journal_ioprio }; @@ -1069,6 +1108,7 @@ static const match_table_t tokens = { {Opt_data_writeback, "data=writeback"}, {Opt_data_err_abort, "data_err=abort"}, {Opt_data_err_ignore, "data_err=ignore"}, + {Opt_mb_history_length, "mb_history_length=%u"}, {Opt_offusrjquota, "usrjquota="}, {Opt_usrjquota, "usrjquota=%s"}, {Opt_offgrpjquota, "grpjquota="}, @@ -1087,6 +1127,8 @@ static const match_table_t tokens = { {Opt_resize, "resize"}, {Opt_delalloc, "delalloc"}, {Opt_nodelalloc, "nodelalloc"}, + {Opt_block_validity, "block_validity"}, + {Opt_noblock_validity, "noblock_validity"}, {Opt_inode_readahead_blks, "inode_readahead_blks=%u"}, {Opt_journal_ioprio, "journal_ioprio=%u"}, {Opt_auto_da_alloc, "auto_da_alloc=%u"}, @@ -1102,8 +1144,9 @@ static ext4_fsblk_t get_sb_block(void **data) if (!options || strncmp(options, "sb=", 3) != 0) return 1; /* Default location */ + options += 3; - /*todo: use simple_strtoll with >32bit ext4 */ + /* TODO: use simple_strtoll with >32bit ext4 */ sb_block = simple_strtoul(options, &options, 0); if (*options && *options != ',') { printk(KERN_ERR "EXT4-fs: Invalid sb specification: %s\n", @@ -1113,6 +1156,7 @@ static ext4_fsblk_t get_sb_block(void **data) if (*options == ',') options++; *data = (void *) options; + return sb_block; } @@ -1206,8 +1250,7 @@ static int parse_options(char *options, struct super_block *sb, #else case Opt_user_xattr: case Opt_nouser_xattr: - printk(KERN_ERR "EXT4 (no)user_xattr options " - "not supported\n"); + ext4_msg(sb, KERN_ERR, "(no)user_xattr options not supported"); break; #endif #ifdef CONFIG_EXT4_FS_POSIX_ACL @@ -1220,8 +1263,7 @@ static int parse_options(char *options, struct super_block *sb, #else case Opt_acl: case Opt_noacl: - printk(KERN_ERR "EXT4 (no)acl options " - "not supported\n"); + ext4_msg(sb, KERN_ERR, "(no)acl options not supported"); break; #endif case Opt_journal_update: @@ -1231,16 +1273,16 @@ static int parse_options(char *options, struct super_block *sb, user to specify an existing inode to be the journal file. */ if (is_remount) { - printk(KERN_ERR "EXT4-fs: cannot specify " - "journal on remount\n"); + ext4_msg(sb, KERN_ERR, + "Cannot specify journal on remount"); return 0; } set_opt(sbi->s_mount_opt, UPDATE_JOURNAL); break; case Opt_journal_dev: if (is_remount) { - printk(KERN_ERR "EXT4-fs: cannot specify " - "journal on remount\n"); + ext4_msg(sb, KERN_ERR, + "Cannot specify journal on remount"); return 0; } if (match_int(&args[0], &option)) @@ -1294,9 +1336,8 @@ static int parse_options(char *options, struct super_block *sb, if (is_remount) { if ((sbi->s_mount_opt & EXT4_MOUNT_DATA_FLAGS) != data_opt) { - printk(KERN_ERR - "EXT4-fs: cannot change data " - "mode on remount\n"); + ext4_msg(sb, KERN_ERR, + "Cannot change data mode on remount"); return 0; } } else { @@ -1310,6 +1351,13 @@ static int parse_options(char *options, struct super_block *sb, case Opt_data_err_ignore: clear_opt(sbi->s_mount_opt, DATA_ERR_ABORT); break; + case Opt_mb_history_length: + if (match_int(&args[0], &option)) + return 0; + if (option < 0) + return 0; + sbi->s_mb_history_max = option; + break; #ifdef CONFIG_QUOTA case Opt_usrjquota: qtype = USRQUOTA; @@ -1319,31 +1367,31 @@ static int parse_options(char *options, struct super_block *sb, set_qf_name: if (sb_any_quota_loaded(sb) && !sbi->s_qf_names[qtype]) { - printk(KERN_ERR - "EXT4-fs: Cannot change journaled " - "quota options when quota turned on.\n"); + ext4_msg(sb, KERN_ERR, + "Cannot change journaled " + "quota options when quota turned on"); return 0; } qname = match_strdup(&args[0]); if (!qname) { - printk(KERN_ERR - "EXT4-fs: not enough memory for " - "storing quotafile name.\n"); + ext4_msg(sb, KERN_ERR, + "Not enough memory for " + "storing quotafile name"); return 0; } if (sbi->s_qf_names[qtype] && strcmp(sbi->s_qf_names[qtype], qname)) { - printk(KERN_ERR - "EXT4-fs: %s quota file already " - "specified.\n", QTYPE2NAME(qtype)); + ext4_msg(sb, KERN_ERR, + "%s quota file already " + "specified", QTYPE2NAME(qtype)); kfree(qname); return 0; } sbi->s_qf_names[qtype] = qname; if (strchr(sbi->s_qf_names[qtype], '/')) { - printk(KERN_ERR - "EXT4-fs: quotafile must be on " - "filesystem root.\n"); + ext4_msg(sb, KERN_ERR, + "quotafile must be on " + "filesystem root"); kfree(sbi->s_qf_names[qtype]); sbi->s_qf_names[qtype] = NULL; return 0; @@ -1358,9 +1406,9 @@ set_qf_name: clear_qf_name: if (sb_any_quota_loaded(sb) && sbi->s_qf_names[qtype]) { - printk(KERN_ERR "EXT4-fs: Cannot change " + ext4_msg(sb, KERN_ERR, "Cannot change " "journaled quota options when " - "quota turned on.\n"); + "quota turned on"); return 0; } /* @@ -1377,9 +1425,9 @@ clear_qf_name: set_qf_format: if (sb_any_quota_loaded(sb) && sbi->s_jquota_fmt != qfmt) { - printk(KERN_ERR "EXT4-fs: Cannot change " + ext4_msg(sb, KERN_ERR, "Cannot change " "journaled quota options when " - "quota turned on.\n"); + "quota turned on"); return 0; } sbi->s_jquota_fmt = qfmt; @@ -1395,8 +1443,8 @@ set_qf_format: break; case Opt_noquota: if (sb_any_quota_loaded(sb)) { - printk(KERN_ERR "EXT4-fs: Cannot change quota " - "options when quota turned on.\n"); + ext4_msg(sb, KERN_ERR, "Cannot change quota " + "options when quota turned on"); return 0; } clear_opt(sbi->s_mount_opt, QUOTA); @@ -1407,8 +1455,8 @@ set_qf_format: case Opt_quota: case Opt_usrquota: case Opt_grpquota: - printk(KERN_ERR - "EXT4-fs: quota options not supported.\n"); + ext4_msg(sb, KERN_ERR, + "quota options not supported"); break; case Opt_usrjquota: case Opt_grpjquota: @@ -1416,9 +1464,8 @@ set_qf_format: case Opt_offgrpjquota: case Opt_jqfmt_vfsold: case Opt_jqfmt_vfsv0: - printk(KERN_ERR - "EXT4-fs: journaled quota options not " - "supported.\n"); + ext4_msg(sb, KERN_ERR, + "journaled quota options not supported"); break; case Opt_noquota: break; @@ -1443,8 +1490,9 @@ set_qf_format: break; case Opt_resize: if (!is_remount) { - printk("EXT4-fs: resize option only available " - "for remount\n"); + ext4_msg(sb, KERN_ERR, + "resize option only available " + "for remount"); return 0; } if (match_int(&args[0], &option) != 0) @@ -1474,14 +1522,21 @@ set_qf_format: case Opt_delalloc: set_opt(sbi->s_mount_opt, DELALLOC); break; + case Opt_block_validity: + set_opt(sbi->s_mount_opt, BLOCK_VALIDITY); + break; + case Opt_noblock_validity: + clear_opt(sbi->s_mount_opt, BLOCK_VALIDITY); + break; case Opt_inode_readahead_blks: if (match_int(&args[0], &option)) return 0; if (option < 0 || option > (1 << 30)) return 0; - if (option & (option - 1)) { - printk(KERN_ERR "EXT4-fs: inode_readahead_blks" - " must be a power of 2\n"); + if (!is_power_of_2(option)) { + ext4_msg(sb, KERN_ERR, + "EXT4-fs: inode_readahead_blks" + " must be a power of 2"); return 0; } sbi->s_inode_readahead_blks = option; @@ -1508,9 +1563,9 @@ set_qf_format: set_opt(sbi->s_mount_opt,NO_AUTO_DA_ALLOC); break; default: - printk(KERN_ERR - "EXT4-fs: Unrecognized mount option \"%s\" " - "or missing value\n", p); + ext4_msg(sb, KERN_ERR, + "Unrecognized mount option \"%s\" " + "or missing value", p); return 0; } } @@ -1528,21 +1583,21 @@ set_qf_format: (sbi->s_mount_opt & EXT4_MOUNT_GRPQUOTA)) || (sbi->s_qf_names[GRPQUOTA] && (sbi->s_mount_opt & EXT4_MOUNT_USRQUOTA))) { - printk(KERN_ERR "EXT4-fs: old and new quota " - "format mixing.\n"); + ext4_msg(sb, KERN_ERR, "old and new quota " + "format mixing"); return 0; } if (!sbi->s_jquota_fmt) { - printk(KERN_ERR "EXT4-fs: journaled quota format " - "not specified.\n"); + ext4_msg(sb, KERN_ERR, "journaled quota format " + "not specified"); return 0; } } else { if (sbi->s_jquota_fmt) { - printk(KERN_ERR "EXT4-fs: journaled quota format " + ext4_msg(sb, KERN_ERR, "journaled quota format " "specified with no journaling " - "enabled.\n"); + "enabled"); return 0; } } @@ -1557,32 +1612,32 @@ static int ext4_setup_super(struct super_block *sb, struct ext4_super_block *es, int res = 0; if (le32_to_cpu(es->s_rev_level) > EXT4_MAX_SUPP_REV) { - printk(KERN_ERR "EXT4-fs warning: revision level too high, " - "forcing read-only mode\n"); + ext4_msg(sb, KERN_ERR, "revision level too high, " + "forcing read-only mode"); res = MS_RDONLY; } if (read_only) return res; if (!(sbi->s_mount_state & EXT4_VALID_FS)) - printk(KERN_WARNING "EXT4-fs warning: mounting unchecked fs, " - "running e2fsck is recommended\n"); + ext4_msg(sb, KERN_WARNING, "warning: mounting unchecked fs, " + "running e2fsck is recommended"); else if ((sbi->s_mount_state & EXT4_ERROR_FS)) - printk(KERN_WARNING - "EXT4-fs warning: mounting fs with errors, " - "running e2fsck is recommended\n"); + ext4_msg(sb, KERN_WARNING, + "warning: mounting fs with errors, " + "running e2fsck is recommended"); else if ((__s16) le16_to_cpu(es->s_max_mnt_count) >= 0 && le16_to_cpu(es->s_mnt_count) >= (unsigned short) (__s16) le16_to_cpu(es->s_max_mnt_count)) - printk(KERN_WARNING - "EXT4-fs warning: maximal mount count reached, " - "running e2fsck is recommended\n"); + ext4_msg(sb, KERN_WARNING, + "warning: maximal mount count reached, " + "running e2fsck is recommended"); else if (le32_to_cpu(es->s_checkinterval) && (le32_to_cpu(es->s_lastcheck) + le32_to_cpu(es->s_checkinterval) <= get_seconds())) - printk(KERN_WARNING - "EXT4-fs warning: checktime reached, " - "running e2fsck is recommended\n"); - if (!sbi->s_journal) + ext4_msg(sb, KERN_WARNING, + "warning: checktime reached, " + "running e2fsck is recommended"); + if (!sbi->s_journal) es->s_state &= cpu_to_le16(~EXT4_VALID_FS); if (!(__s16) le16_to_cpu(es->s_max_mnt_count)) es->s_max_mnt_count = cpu_to_le16(EXT4_DFL_MAX_MNT_COUNT); @@ -1592,7 +1647,7 @@ static int ext4_setup_super(struct super_block *sb, struct ext4_super_block *es, if (sbi->s_journal) EXT4_SET_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER); - ext4_commit_super(sb, es, 1); + ext4_commit_super(sb, 1); if (test_opt(sb, DEBUG)) printk(KERN_INFO "[EXT4 FS bs=%lu, gc=%u, " "bpg=%lu, ipg=%lu, mo=%04lx]\n", @@ -1603,11 +1658,11 @@ static int ext4_setup_super(struct super_block *sb, struct ext4_super_block *es, sbi->s_mount_opt); if (EXT4_SB(sb)->s_journal) { - printk(KERN_INFO "EXT4 FS on %s, %s journal on %s\n", - sb->s_id, EXT4_SB(sb)->s_journal->j_inode ? "internal" : + ext4_msg(sb, KERN_INFO, "%s journal on %s", + EXT4_SB(sb)->s_journal->j_inode ? "internal" : "external", EXT4_SB(sb)->s_journal->j_devname); } else { - printk(KERN_INFO "EXT4 FS on %s, no journal\n", sb->s_id); + ext4_msg(sb, KERN_INFO, "no journal"); } return res; } @@ -1616,10 +1671,10 @@ static int ext4_fill_flex_info(struct super_block *sb) { struct ext4_sb_info *sbi = EXT4_SB(sb); struct ext4_group_desc *gdp = NULL; - struct buffer_head *bh; ext4_group_t flex_group_count; ext4_group_t flex_group; int groups_per_flex = 0; + size_t size; int i; if (!sbi->s_es->s_log_groups_per_flex) { @@ -1634,16 +1689,21 @@ static int ext4_fill_flex_info(struct super_block *sb) flex_group_count = ((sbi->s_groups_count + groups_per_flex - 1) + ((le16_to_cpu(sbi->s_es->s_reserved_gdt_blocks) + 1) << EXT4_DESC_PER_BLOCK_BITS(sb))) / groups_per_flex; - sbi->s_flex_groups = kzalloc(flex_group_count * - sizeof(struct flex_groups), GFP_KERNEL); + size = flex_group_count * sizeof(struct flex_groups); + sbi->s_flex_groups = kzalloc(size, GFP_KERNEL); + if (sbi->s_flex_groups == NULL) { + sbi->s_flex_groups = vmalloc(size); + if (sbi->s_flex_groups) + memset(sbi->s_flex_groups, 0, size); + } if (sbi->s_flex_groups == NULL) { - printk(KERN_ERR "EXT4-fs: not enough memory for " - "%u flex groups\n", flex_group_count); + ext4_msg(sb, KERN_ERR, "not enough memory for " + "%u flex groups", flex_group_count); goto failed; } for (i = 0; i < sbi->s_groups_count; i++) { - gdp = ext4_get_group_desc(sb, i, &bh); + gdp = ext4_get_group_desc(sb, i, NULL); flex_group = ext4_flex_group(sbi, i); atomic_set(&sbi->s_flex_groups[flex_group].free_inodes, @@ -1724,44 +1784,44 @@ static int ext4_check_descriptors(struct super_block *sb) block_bitmap = ext4_block_bitmap(sb, gdp); if (block_bitmap < first_block || block_bitmap > last_block) { - printk(KERN_ERR "EXT4-fs: ext4_check_descriptors: " + ext4_msg(sb, KERN_ERR, "ext4_check_descriptors: " "Block bitmap for group %u not in group " - "(block %llu)!\n", i, block_bitmap); + "(block %llu)!", i, block_bitmap); return 0; } inode_bitmap = ext4_inode_bitmap(sb, gdp); if (inode_bitmap < first_block || inode_bitmap > last_block) { - printk(KERN_ERR "EXT4-fs: ext4_check_descriptors: " + ext4_msg(sb, KERN_ERR, "ext4_check_descriptors: " "Inode bitmap for group %u not in group " - "(block %llu)!\n", i, inode_bitmap); + "(block %llu)!", i, inode_bitmap); return 0; } inode_table = ext4_inode_table(sb, gdp); if (inode_table < first_block || inode_table + sbi->s_itb_per_group - 1 > last_block) { - printk(KERN_ERR "EXT4-fs: ext4_check_descriptors: " + ext4_msg(sb, KERN_ERR, "ext4_check_descriptors: " "Inode table for group %u not in group " - "(block %llu)!\n", i, inode_table); + "(block %llu)!", i, inode_table); return 0; } - spin_lock(sb_bgl_lock(sbi, i)); + ext4_lock_group(sb, i); if (!ext4_group_desc_csum_verify(sbi, i, gdp)) { - printk(KERN_ERR "EXT4-fs: ext4_check_descriptors: " - "Checksum for group %u failed (%u!=%u)\n", - i, le16_to_cpu(ext4_group_desc_csum(sbi, i, - gdp)), le16_to_cpu(gdp->bg_checksum)); + ext4_msg(sb, KERN_ERR, "ext4_check_descriptors: " + "Checksum for group %u failed (%u!=%u)", + i, le16_to_cpu(ext4_group_desc_csum(sbi, i, + gdp)), le16_to_cpu(gdp->bg_checksum)); if (!(sb->s_flags & MS_RDONLY)) { - spin_unlock(sb_bgl_lock(sbi, i)); + ext4_unlock_group(sb, i); return 0; } } - spin_unlock(sb_bgl_lock(sbi, i)); + ext4_unlock_group(sb, i); if (!flexbg_flag) first_block += EXT4_BLOCKS_PER_GROUP(sb); } ext4_free_blocks_count_set(sbi->s_es, ext4_count_free_blocks(sb)); - sbi->s_es->s_free_inodes_count = cpu_to_le32(ext4_count_free_inodes(sb)); + sbi->s_es->s_free_inodes_count =cpu_to_le32(ext4_count_free_inodes(sb)); return 1; } @@ -1796,8 +1856,8 @@ static void ext4_orphan_cleanup(struct super_block *sb, } if (bdev_read_only(sb->s_bdev)) { - printk(KERN_ERR "EXT4-fs: write access " - "unavailable, skipping orphan cleanup.\n"); + ext4_msg(sb, KERN_ERR, "write access " + "unavailable, skipping orphan cleanup"); return; } @@ -1811,8 +1871,7 @@ static void ext4_orphan_cleanup(struct super_block *sb, } if (s_flags & MS_RDONLY) { - printk(KERN_INFO "EXT4-fs: %s: orphan cleanup on readonly fs\n", - sb->s_id); + ext4_msg(sb, KERN_INFO, "orphan cleanup on readonly fs"); sb->s_flags &= ~MS_RDONLY; } #ifdef CONFIG_QUOTA @@ -1823,9 +1882,9 @@ static void ext4_orphan_cleanup(struct super_block *sb, if (EXT4_SB(sb)->s_qf_names[i]) { int ret = ext4_quota_on_mount(sb, i); if (ret < 0) - printk(KERN_ERR - "EXT4-fs: Cannot turn on journaled " - "quota: error %d\n", ret); + ext4_msg(sb, KERN_ERR, + "Cannot turn on journaled " + "quota: error %d", ret); } } #endif @@ -1842,16 +1901,16 @@ static void ext4_orphan_cleanup(struct super_block *sb, list_add(&EXT4_I(inode)->i_orphan, &EXT4_SB(sb)->s_orphan); vfs_dq_init(inode); if (inode->i_nlink) { - printk(KERN_DEBUG - "%s: truncating inode %lu to %lld bytes\n", + ext4_msg(sb, KERN_DEBUG, + "%s: truncating inode %lu to %lld bytes", __func__, inode->i_ino, inode->i_size); jbd_debug(2, "truncating inode %lu to %lld bytes\n", inode->i_ino, inode->i_size); ext4_truncate(inode); nr_truncates++; } else { - printk(KERN_DEBUG - "%s: deleting unreferenced inode %lu\n", + ext4_msg(sb, KERN_DEBUG, + "%s: deleting unreferenced inode %lu", __func__, inode->i_ino); jbd_debug(2, "deleting unreferenced inode %lu\n", inode->i_ino); @@ -1863,11 +1922,11 @@ static void ext4_orphan_cleanup(struct super_block *sb, #define PLURAL(x) (x), ((x) == 1) ? "" : "s" if (nr_orphans) - printk(KERN_INFO "EXT4-fs: %s: %d orphan inode%s deleted\n", - sb->s_id, PLURAL(nr_orphans)); + ext4_msg(sb, KERN_INFO, "%d orphan inode%s deleted", + PLURAL(nr_orphans)); if (nr_truncates) - printk(KERN_INFO "EXT4-fs: %s: %d truncate%s cleaned up\n", - sb->s_id, PLURAL(nr_truncates)); + ext4_msg(sb, KERN_INFO, "%d truncate%s cleaned up", + PLURAL(nr_truncates)); #ifdef CONFIG_QUOTA /* Turn quotas off */ for (i = 0; i < MAXQUOTAS; i++) { @@ -1877,6 +1936,7 @@ static void ext4_orphan_cleanup(struct super_block *sb, #endif sb->s_flags = s_flags; /* Restore MS_RDONLY status */ } + /* * Maximal extent format file size. * Resulting logical blkno at s_maxbytes must fit in our on-disk @@ -1927,19 +1987,19 @@ static loff_t ext4_max_bitmap_size(int bits, int has_huge_files) loff_t res = EXT4_NDIR_BLOCKS; int meta_blocks; loff_t upper_limit; - /* This is calculated to be the largest file size for a - * dense, bitmapped file such that the total number of - * sectors in the file, including data and all indirect blocks, - * does not exceed 2^48 -1 - * __u32 i_blocks_lo and _u16 i_blocks_high representing the - * total number of 512 bytes blocks of the file + /* This is calculated to be the largest file size for a dense, block + * mapped file such that the file's total number of 512-byte sectors, + * including data and all indirect blocks, does not exceed (2^48 - 1). + * + * __u32 i_blocks_lo and _u16 i_blocks_high represent the total + * number of 512-byte sectors of the file. */ if (!has_huge_files || sizeof(blkcnt_t) < sizeof(u64)) { /* - * !has_huge_files or CONFIG_LBD is not enabled - * implies the inode i_block represent total blocks in - * 512 bytes 32 == size of vfs inode i_blocks * 8 + * !has_huge_files or CONFIG_LBD not enabled implies that + * the inode i_block field represents total file blocks in + * 2^32 512-byte sectors == size of vfs inode i_blocks * 8 */ upper_limit = (1LL << 32) - 1; @@ -1981,7 +2041,7 @@ static loff_t ext4_max_bitmap_size(int bits, int has_huge_files) } static ext4_fsblk_t descriptor_loc(struct super_block *sb, - ext4_fsblk_t logical_sb_block, int nr) + ext4_fsblk_t logical_sb_block, int nr) { struct ext4_sb_info *sbi = EXT4_SB(sb); ext4_group_t bg, first_meta_bg; @@ -1995,6 +2055,7 @@ static ext4_fsblk_t descriptor_loc(struct super_block *sb, bg = sbi->s_desc_per_block * nr; if (ext4_bg_has_super(sb, bg)) has_super = 1; + return (has_super + ext4_group_first_block_no(sb, bg)); } @@ -2091,8 +2152,7 @@ static ssize_t inode_readahead_blks_store(struct ext4_attr *a, if (parse_strtoul(buf, 0x40000000, &t)) return -EINVAL; - /* inode_readahead_blks must be a power of 2 */ - if (t & (t-1)) + if (!is_power_of_2(t)) return -EINVAL; sbi->s_inode_readahead_blks = t; @@ -2100,7 +2160,7 @@ static ssize_t inode_readahead_blks_store(struct ext4_attr *a, } static ssize_t sbi_ui_show(struct ext4_attr *a, - struct ext4_sb_info *sbi, char *buf) + struct ext4_sb_info *sbi, char *buf) { unsigned int *ui = (unsigned int *) (((char *) sbi) + a->offset); @@ -2205,7 +2265,6 @@ static struct kobj_type ext4_ktype = { static int ext4_fill_super(struct super_block *sb, void *data, int silent) __releases(kernel_lock) __acquires(kernel_lock) - { struct buffer_head *bh; struct ext4_super_block *es = NULL; @@ -2256,7 +2315,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) blocksize = sb_min_blocksize(sb, EXT4_MIN_BLOCK_SIZE); if (!blocksize) { - printk(KERN_ERR "EXT4-fs: unable to set blocksize\n"); + ext4_msg(sb, KERN_ERR, "unable to set blocksize"); goto out_fail; } @@ -2272,7 +2331,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) } if (!(bh = sb_bread(sb, logical_sb_block))) { - printk(KERN_ERR "EXT4-fs: unable to read superblock\n"); + ext4_msg(sb, KERN_ERR, "unable to read superblock"); goto out_fail; } /* @@ -2321,6 +2380,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) sbi->s_commit_interval = JBD2_DEFAULT_MAX_COMMIT_AGE * HZ; sbi->s_min_batch_time = EXT4_DEF_MIN_BATCH_TIME; sbi->s_max_batch_time = EXT4_DEF_MAX_BATCH_TIME; + sbi->s_mb_history_max = default_mb_history_length; set_opt(sbi->s_mount_opt, BARRIER); @@ -2330,7 +2390,6 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) */ set_opt(sbi->s_mount_opt, DELALLOC); - if (!parse_options((char *) data, sb, &journal_devnum, &journal_ioprio, NULL, 0)) goto failed_mount; @@ -2342,9 +2401,9 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) (EXT4_HAS_COMPAT_FEATURE(sb, ~0U) || EXT4_HAS_RO_COMPAT_FEATURE(sb, ~0U) || EXT4_HAS_INCOMPAT_FEATURE(sb, ~0U))) - printk(KERN_WARNING - "EXT4-fs warning: feature flags set on rev 0 fs, " - "running e2fsck is recommended\n"); + ext4_msg(sb, KERN_WARNING, + "feature flags set on rev 0 fs, " + "running e2fsck is recommended"); /* * Check feature flags regardless of the revision level, since we @@ -2353,16 +2412,18 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) */ features = EXT4_HAS_INCOMPAT_FEATURE(sb, ~EXT4_FEATURE_INCOMPAT_SUPP); if (features) { - printk(KERN_ERR "EXT4-fs: %s: couldn't mount because of " - "unsupported optional features (%x).\n", sb->s_id, + ext4_msg(sb, KERN_ERR, + "Couldn't mount because of " + "unsupported optional features (%x)", (le32_to_cpu(EXT4_SB(sb)->s_es->s_feature_incompat) & ~EXT4_FEATURE_INCOMPAT_SUPP)); goto failed_mount; } features = EXT4_HAS_RO_COMPAT_FEATURE(sb, ~EXT4_FEATURE_RO_COMPAT_SUPP); if (!(sb->s_flags & MS_RDONLY) && features) { - printk(KERN_ERR "EXT4-fs: %s: couldn't mount RDWR because of " - "unsupported optional features (%x).\n", sb->s_id, + ext4_msg(sb, KERN_ERR, + "Couldn't mount RDWR because of " + "unsupported optional features (%x)", (le32_to_cpu(EXT4_SB(sb)->s_es->s_feature_ro_compat) & ~EXT4_FEATURE_RO_COMPAT_SUPP)); goto failed_mount; @@ -2376,9 +2437,9 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) */ if (sizeof(root->i_blocks) < sizeof(u64) && !(sb->s_flags & MS_RDONLY)) { - printk(KERN_ERR "EXT4-fs: %s: Filesystem with huge " + ext4_msg(sb, KERN_ERR, "Filesystem with huge " "files cannot be mounted read-write " - "without CONFIG_LBD.\n", sb->s_id); + "without CONFIG_LBD"); goto failed_mount; } } @@ -2386,17 +2447,15 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) if (blocksize < EXT4_MIN_BLOCK_SIZE || blocksize > EXT4_MAX_BLOCK_SIZE) { - printk(KERN_ERR - "EXT4-fs: Unsupported filesystem blocksize %d on %s.\n", - blocksize, sb->s_id); + ext4_msg(sb, KERN_ERR, + "Unsupported filesystem blocksize %d", blocksize); goto failed_mount; } if (sb->s_blocksize != blocksize) { - /* Validate the filesystem blocksize */ if (!sb_set_blocksize(sb, blocksize)) { - printk(KERN_ERR "EXT4-fs: bad block size %d.\n", + ext4_msg(sb, KERN_ERR, "bad block size %d", blocksize); goto failed_mount; } @@ -2406,15 +2465,15 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) offset = do_div(logical_sb_block, blocksize); bh = sb_bread(sb, logical_sb_block); if (!bh) { - printk(KERN_ERR - "EXT4-fs: Can't read superblock on 2nd try.\n"); + ext4_msg(sb, KERN_ERR, + "Can't read superblock on 2nd try"); goto failed_mount; } es = (struct ext4_super_block *)(((char *)bh->b_data) + offset); sbi->s_es = es; if (es->s_magic != cpu_to_le16(EXT4_SUPER_MAGIC)) { - printk(KERN_ERR - "EXT4-fs: Magic mismatch, very weird !\n"); + ext4_msg(sb, KERN_ERR, + "Magic mismatch, very weird!"); goto failed_mount; } } @@ -2432,30 +2491,33 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) if ((sbi->s_inode_size < EXT4_GOOD_OLD_INODE_SIZE) || (!is_power_of_2(sbi->s_inode_size)) || (sbi->s_inode_size > blocksize)) { - printk(KERN_ERR - "EXT4-fs: unsupported inode size: %d\n", + ext4_msg(sb, KERN_ERR, + "unsupported inode size: %d", sbi->s_inode_size); goto failed_mount; } if (sbi->s_inode_size > EXT4_GOOD_OLD_INODE_SIZE) sb->s_time_gran = 1 << (EXT4_EPOCH_BITS - 2); } + sbi->s_desc_size = le16_to_cpu(es->s_desc_size); if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_64BIT)) { if (sbi->s_desc_size < EXT4_MIN_DESC_SIZE_64BIT || sbi->s_desc_size > EXT4_MAX_DESC_SIZE || !is_power_of_2(sbi->s_desc_size)) { - printk(KERN_ERR - "EXT4-fs: unsupported descriptor size %lu\n", + ext4_msg(sb, KERN_ERR, + "unsupported descriptor size %lu", sbi->s_desc_size); goto failed_mount; } } else sbi->s_desc_size = EXT4_MIN_DESC_SIZE; + sbi->s_blocks_per_group = le32_to_cpu(es->s_blocks_per_group); sbi->s_inodes_per_group = le32_to_cpu(es->s_inodes_per_group); if (EXT4_INODE_SIZE(sb) == 0 || EXT4_INODES_PER_GROUP(sb) == 0) goto cantfind_ext4; + sbi->s_inodes_per_block = blocksize / EXT4_INODE_SIZE(sb); if (sbi->s_inodes_per_block == 0) goto cantfind_ext4; @@ -2466,6 +2528,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) sbi->s_mount_state = le16_to_cpu(es->s_state); sbi->s_addr_per_block_bits = ilog2(EXT4_ADDR_PER_BLOCK(sb)); sbi->s_desc_per_block_bits = ilog2(EXT4_DESC_PER_BLOCK(sb)); + for (i = 0; i < 4; i++) sbi->s_hash_seed[i] = le32_to_cpu(es->s_hash_seed[i]); sbi->s_def_hash_version = es->s_def_hash_version; @@ -2483,25 +2546,24 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) } if (sbi->s_blocks_per_group > blocksize * 8) { - printk(KERN_ERR - "EXT4-fs: #blocks per group too big: %lu\n", + ext4_msg(sb, KERN_ERR, + "#blocks per group too big: %lu", sbi->s_blocks_per_group); goto failed_mount; } if (sbi->s_inodes_per_group > blocksize * 8) { - printk(KERN_ERR - "EXT4-fs: #inodes per group too big: %lu\n", + ext4_msg(sb, KERN_ERR, + "#inodes per group too big: %lu", sbi->s_inodes_per_group); goto failed_mount; } if (ext4_blocks_count(es) > (sector_t)(~0ULL) >> (sb->s_blocksize_bits - 9)) { - printk(KERN_ERR "EXT4-fs: filesystem on %s:" - " too large to mount safely\n", sb->s_id); + ext4_msg(sb, KERN_ERR, "filesystem" + " too large to mount safely"); if (sizeof(sector_t) < 8) - printk(KERN_WARNING "EXT4-fs: CONFIG_LBD not " - "enabled\n"); + ext4_msg(sb, KERN_WARNING, "CONFIG_LBD not enabled"); goto failed_mount; } @@ -2511,21 +2573,21 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) /* check blocks count against device size */ blocks_count = sb->s_bdev->bd_inode->i_size >> sb->s_blocksize_bits; if (blocks_count && ext4_blocks_count(es) > blocks_count) { - printk(KERN_WARNING "EXT4-fs: bad geometry: block count %llu " - "exceeds size of device (%llu blocks)\n", + ext4_msg(sb, KERN_WARNING, "bad geometry: block count %llu " + "exceeds size of device (%llu blocks)", ext4_blocks_count(es), blocks_count); goto failed_mount; } - /* - * It makes no sense for the first data block to be beyond the end - * of the filesystem. - */ - if (le32_to_cpu(es->s_first_data_block) >= ext4_blocks_count(es)) { - printk(KERN_WARNING "EXT4-fs: bad geometry: first data" - "block %u is beyond end of filesystem (%llu)\n", - le32_to_cpu(es->s_first_data_block), - ext4_blocks_count(es)); + /* + * It makes no sense for the first data block to be beyond the end + * of the filesystem. + */ + if (le32_to_cpu(es->s_first_data_block) >= ext4_blocks_count(es)) { + ext4_msg(sb, KERN_WARNING, "bad geometry: first data" + "block %u is beyond end of filesystem (%llu)", + le32_to_cpu(es->s_first_data_block), + ext4_blocks_count(es)); goto failed_mount; } blocks_count = (ext4_blocks_count(es) - @@ -2533,9 +2595,9 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) EXT4_BLOCKS_PER_GROUP(sb) - 1); do_div(blocks_count, EXT4_BLOCKS_PER_GROUP(sb)); if (blocks_count > ((uint64_t)1<<32) - EXT4_DESC_PER_BLOCK(sb)) { - printk(KERN_WARNING "EXT4-fs: groups count too large: %u " + ext4_msg(sb, KERN_WARNING, "groups count too large: %u " "(block count %llu, first data block %u, " - "blocks per group %lu)\n", sbi->s_groups_count, + "blocks per group %lu)", sbi->s_groups_count, ext4_blocks_count(es), le32_to_cpu(es->s_first_data_block), EXT4_BLOCKS_PER_GROUP(sb)); @@ -2547,7 +2609,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) sbi->s_group_desc = kmalloc(db_count * sizeof(struct buffer_head *), GFP_KERNEL); if (sbi->s_group_desc == NULL) { - printk(KERN_ERR "EXT4-fs: not enough memory\n"); + ext4_msg(sb, KERN_ERR, "not enough memory"); goto failed_mount; } @@ -2562,21 +2624,21 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) block = descriptor_loc(sb, logical_sb_block, i); sbi->s_group_desc[i] = sb_bread(sb, block); if (!sbi->s_group_desc[i]) { - printk(KERN_ERR "EXT4-fs: " - "can't read group descriptor %d\n", i); + ext4_msg(sb, KERN_ERR, + "can't read group descriptor %d", i); db_count = i; goto failed_mount2; } } if (!ext4_check_descriptors(sb)) { - printk(KERN_ERR "EXT4-fs: group descriptors corrupted!\n"); + ext4_msg(sb, KERN_ERR, "group descriptors corrupted!"); goto failed_mount2; } if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_FLEX_BG)) if (!ext4_fill_flex_info(sb)) { - printk(KERN_ERR - "EXT4-fs: unable to initialize " - "flex_bg meta info!\n"); + ext4_msg(sb, KERN_ERR, + "unable to initialize " + "flex_bg meta info!"); goto failed_mount2; } @@ -2598,7 +2660,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) err = percpu_counter_init(&sbi->s_dirtyblocks_counter, 0); } if (err) { - printk(KERN_ERR "EXT4-fs: insufficient memory\n"); + ext4_msg(sb, KERN_ERR, "insufficient memory"); goto failed_mount3; } @@ -2607,7 +2669,11 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) /* * set up enough so that it can read an inode */ - sb->s_op = &ext4_sops; + if (!test_opt(sb, NOLOAD) && + EXT4_HAS_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_HAS_JOURNAL)) + sb->s_op = &ext4_sops; + else + sb->s_op = &ext4_nojournal_sops; sb->s_export_op = &ext4_export_ops; sb->s_xattr = ext4_xattr_handlers; #ifdef CONFIG_QUOTA @@ -2615,6 +2681,8 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) sb->dq_op = &ext4_quota_operations; #endif INIT_LIST_HEAD(&sbi->s_orphan); /* unlinked but open files */ + mutex_init(&sbi->s_orphan_lock); + mutex_init(&sbi->s_resize_lock); sb->s_root = NULL; @@ -2632,13 +2700,13 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) goto failed_mount3; if (!(sb->s_flags & MS_RDONLY) && EXT4_SB(sb)->s_journal->j_failed_commit) { - printk(KERN_CRIT "EXT4-fs error (device %s): " + ext4_msg(sb, KERN_CRIT, "error: " "ext4_fill_super: Journal transaction " - "%u is corrupt\n", sb->s_id, + "%u is corrupt", EXT4_SB(sb)->s_journal->j_failed_commit); if (test_opt(sb, ERRORS_RO)) { - printk(KERN_CRIT - "Mounting filesystem read-only\n"); + ext4_msg(sb, KERN_CRIT, + "Mounting filesystem read-only"); sb->s_flags |= MS_RDONLY; EXT4_SB(sb)->s_mount_state |= EXT4_ERROR_FS; es->s_state |= cpu_to_le16(EXT4_ERROR_FS); @@ -2646,14 +2714,14 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) if (test_opt(sb, ERRORS_PANIC)) { EXT4_SB(sb)->s_mount_state |= EXT4_ERROR_FS; es->s_state |= cpu_to_le16(EXT4_ERROR_FS); - ext4_commit_super(sb, es, 1); + ext4_commit_super(sb, 1); goto failed_mount4; } } } else if (test_opt(sb, NOLOAD) && !(sb->s_flags & MS_RDONLY) && EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER)) { - printk(KERN_ERR "EXT4-fs: required journal recovery " - "suppressed and not mounted read-only\n"); + ext4_msg(sb, KERN_ERR, "required journal recovery " + "suppressed and not mounted read-only"); goto failed_mount4; } else { clear_opt(sbi->s_mount_opt, DATA_FLAGS); @@ -2666,7 +2734,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) if (ext4_blocks_count(es) > 0xffffffffULL && !jbd2_journal_set_features(EXT4_SB(sb)->s_journal, 0, 0, JBD2_FEATURE_INCOMPAT_64BIT)) { - printk(KERN_ERR "EXT4-fs: Failed to set 64-bit journal feature\n"); + ext4_msg(sb, KERN_ERR, "Failed to set 64-bit journal feature"); goto failed_mount4; } @@ -2704,8 +2772,8 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) case EXT4_MOUNT_WRITEBACK_DATA: if (!jbd2_journal_check_available_features (sbi->s_journal, 0, 0, JBD2_FEATURE_INCOMPAT_REVOKE)) { - printk(KERN_ERR "EXT4-fs: Journal does not support " - "requested data journaling mode\n"); + ext4_msg(sb, KERN_ERR, "Journal does not support " + "requested data journaling mode"); goto failed_mount4; } default: @@ -2717,8 +2785,8 @@ no_journal: if (test_opt(sb, NOBH)) { if (!(test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_WRITEBACK_DATA)) { - printk(KERN_WARNING "EXT4-fs: Ignoring nobh option - " - "its supported only with writeback mode\n"); + ext4_msg(sb, KERN_WARNING, "Ignoring nobh option - " + "its supported only with writeback mode"); clear_opt(sbi->s_mount_opt, NOBH); } } @@ -2729,18 +2797,18 @@ no_journal: root = ext4_iget(sb, EXT4_ROOT_INO); if (IS_ERR(root)) { - printk(KERN_ERR "EXT4-fs: get root inode failed\n"); + ext4_msg(sb, KERN_ERR, "get root inode failed"); ret = PTR_ERR(root); goto failed_mount4; } if (!S_ISDIR(root->i_mode) || !root->i_blocks || !root->i_size) { iput(root); - printk(KERN_ERR "EXT4-fs: corrupt root inode, run e2fsck\n"); + ext4_msg(sb, KERN_ERR, "corrupt root inode, run e2fsck"); goto failed_mount4; } sb->s_root = d_alloc_root(root); if (!sb->s_root) { - printk(KERN_ERR "EXT4-fs: get root dentry failed\n"); + ext4_msg(sb, KERN_ERR, "get root dentry failed"); iput(root); ret = -ENOMEM; goto failed_mount4; @@ -2769,22 +2837,29 @@ no_journal: sbi->s_inode_size) { sbi->s_want_extra_isize = sizeof(struct ext4_inode) - EXT4_GOOD_OLD_INODE_SIZE; - printk(KERN_INFO "EXT4-fs: required extra inode space not" - "available.\n"); + ext4_msg(sb, KERN_INFO, "required extra inode space not" + "available"); } if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA) { - printk(KERN_WARNING "EXT4-fs: Ignoring delalloc option - " - "requested data journaling mode\n"); + ext4_msg(sb, KERN_WARNING, "Ignoring delalloc option - " + "requested data journaling mode"); clear_opt(sbi->s_mount_opt, DELALLOC); } else if (test_opt(sb, DELALLOC)) - printk(KERN_INFO "EXT4-fs: delayed allocation enabled\n"); + ext4_msg(sb, KERN_INFO, "delayed allocation enabled"); + + err = ext4_setup_system_zone(sb); + if (err) { + ext4_msg(sb, KERN_ERR, "failed to initialize system " + "zone (%d)\n", err); + goto failed_mount4; + } ext4_ext_init(sb); err = ext4_mb_init(sb, needs_recovery); if (err) { - printk(KERN_ERR "EXT4-fs: failed to initalize mballoc (%d)\n", - err); + ext4_msg(sb, KERN_ERR, "failed to initalize mballoc (%d)", + err); goto failed_mount4; } @@ -2798,19 +2873,11 @@ no_journal: goto failed_mount4; }; - /* - * akpm: core read_super() calls in here with the superblock locked. - * That deadlocks, because orphan cleanup needs to lock the superblock - * in numerous places. Here we just pop the lock - it's relatively - * harmless, because we are now ready to accept write_super() requests, - * and aviro says that's the only reason for hanging onto the - * superblock lock. - */ EXT4_SB(sb)->s_mount_state |= EXT4_ORPHAN_FS; ext4_orphan_cleanup(sb, es); EXT4_SB(sb)->s_mount_state &= ~EXT4_ORPHAN_FS; if (needs_recovery) { - printk(KERN_INFO "EXT4-fs: recovery complete.\n"); + ext4_msg(sb, KERN_INFO, "recovery complete"); ext4_mark_recovery_complete(sb, es); } if (EXT4_SB(sb)->s_journal) { @@ -2823,25 +2890,30 @@ no_journal: } else descr = "out journal"; - printk(KERN_INFO "EXT4-fs: mounted filesystem %s with%s\n", - sb->s_id, descr); + ext4_msg(sb, KERN_INFO, "mounted filesystem with%s", descr); lock_kernel(); return 0; cantfind_ext4: if (!silent) - printk(KERN_ERR "VFS: Can't find ext4 filesystem on dev %s.\n", - sb->s_id); + ext4_msg(sb, KERN_ERR, "VFS: Can't find ext4 filesystem"); goto failed_mount; failed_mount4: - printk(KERN_ERR "EXT4-fs (device %s): mount failed\n", sb->s_id); + ext4_msg(sb, KERN_ERR, "mount failed"); + ext4_release_system_zone(sb); if (sbi->s_journal) { jbd2_journal_destroy(sbi->s_journal); sbi->s_journal = NULL; } failed_mount3: + if (sbi->s_flex_groups) { + if (is_vmalloc_addr(sbi->s_flex_groups)) + vfree(sbi->s_flex_groups); + else + kfree(sbi->s_flex_groups); + } percpu_counter_destroy(&sbi->s_freeblocks_counter); percpu_counter_destroy(&sbi->s_freeinodes_counter); percpu_counter_destroy(&sbi->s_dirs_counter); @@ -2862,6 +2934,7 @@ failed_mount: brelse(bh); out_fail: sb->s_fs_info = NULL; + kfree(sbi->s_blockgroup_lock); kfree(sbi); lock_kernel(); return ret; @@ -2906,27 +2979,27 @@ static journal_t *ext4_get_journal(struct super_block *sb, journal_inode = ext4_iget(sb, journal_inum); if (IS_ERR(journal_inode)) { - printk(KERN_ERR "EXT4-fs: no journal found.\n"); + ext4_msg(sb, KERN_ERR, "no journal found"); return NULL; } if (!journal_inode->i_nlink) { make_bad_inode(journal_inode); iput(journal_inode); - printk(KERN_ERR "EXT4-fs: journal inode is deleted.\n"); + ext4_msg(sb, KERN_ERR, "journal inode is deleted"); return NULL; } jbd_debug(2, "Journal inode found at %p: %lld bytes\n", journal_inode, journal_inode->i_size); if (!S_ISREG(journal_inode->i_mode)) { - printk(KERN_ERR "EXT4-fs: invalid journal inode.\n"); + ext4_msg(sb, KERN_ERR, "invalid journal inode"); iput(journal_inode); return NULL; } journal = jbd2_journal_init_inode(journal_inode); if (!journal) { - printk(KERN_ERR "EXT4-fs: Could not load journal inode\n"); + ext4_msg(sb, KERN_ERR, "Could not load journal inode"); iput(journal_inode); return NULL; } @@ -2950,13 +3023,13 @@ static journal_t *ext4_get_dev_journal(struct super_block *sb, BUG_ON(!EXT4_HAS_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_HAS_JOURNAL)); - bdev = ext4_blkdev_get(j_dev); + bdev = ext4_blkdev_get(j_dev, sb); if (bdev == NULL) return NULL; if (bd_claim(bdev, sb)) { - printk(KERN_ERR - "EXT4-fs: failed to claim external journal device.\n"); + ext4_msg(sb, KERN_ERR, + "failed to claim external journal device"); blkdev_put(bdev, FMODE_READ|FMODE_WRITE); return NULL; } @@ -2964,8 +3037,8 @@ static journal_t *ext4_get_dev_journal(struct super_block *sb, blocksize = sb->s_blocksize; hblock = bdev_hardsect_size(bdev); if (blocksize < hblock) { - printk(KERN_ERR - "EXT4-fs: blocksize too small for journal device.\n"); + ext4_msg(sb, KERN_ERR, + "blocksize too small for journal device"); goto out_bdev; } @@ -2973,8 +3046,8 @@ static journal_t *ext4_get_dev_journal(struct super_block *sb, offset = EXT4_MIN_BLOCK_SIZE % blocksize; set_blocksize(bdev, blocksize); if (!(bh = __bread(bdev, sb_block, blocksize))) { - printk(KERN_ERR "EXT4-fs: couldn't read superblock of " - "external journal\n"); + ext4_msg(sb, KERN_ERR, "couldn't read superblock of " + "external journal"); goto out_bdev; } @@ -2982,14 +3055,14 @@ static journal_t *ext4_get_dev_journal(struct super_block *sb, if ((le16_to_cpu(es->s_magic) != EXT4_SUPER_MAGIC) || !(le32_to_cpu(es->s_feature_incompat) & EXT4_FEATURE_INCOMPAT_JOURNAL_DEV)) { - printk(KERN_ERR "EXT4-fs: external journal has " - "bad superblock\n"); + ext4_msg(sb, KERN_ERR, "external journal has " + "bad superblock"); brelse(bh); goto out_bdev; } if (memcmp(EXT4_SB(sb)->s_es->s_journal_uuid, es->s_uuid, 16)) { - printk(KERN_ERR "EXT4-fs: journal UUID does not match\n"); + ext4_msg(sb, KERN_ERR, "journal UUID does not match"); brelse(bh); goto out_bdev; } @@ -3001,25 +3074,26 @@ static journal_t *ext4_get_dev_journal(struct super_block *sb, journal = jbd2_journal_init_dev(bdev, sb->s_bdev, start, len, blocksize); if (!journal) { - printk(KERN_ERR "EXT4-fs: failed to create device journal\n"); + ext4_msg(sb, KERN_ERR, "failed to create device journal"); goto out_bdev; } journal->j_private = sb; ll_rw_block(READ, 1, &journal->j_sb_buffer); wait_on_buffer(journal->j_sb_buffer); if (!buffer_uptodate(journal->j_sb_buffer)) { - printk(KERN_ERR "EXT4-fs: I/O error on journal device\n"); + ext4_msg(sb, KERN_ERR, "I/O error on journal device"); goto out_journal; } if (be32_to_cpu(journal->j_superblock->s_nr_users) != 1) { - printk(KERN_ERR "EXT4-fs: External journal has more than one " - "user (unsupported) - %d\n", + ext4_msg(sb, KERN_ERR, "External journal has more than one " + "user (unsupported) - %d", be32_to_cpu(journal->j_superblock->s_nr_users)); goto out_journal; } EXT4_SB(sb)->journal_bdev = bdev; ext4_init_journal_params(sb, journal); return journal; + out_journal: jbd2_journal_destroy(journal); out_bdev: @@ -3041,8 +3115,8 @@ static int ext4_load_journal(struct super_block *sb, if (journal_devnum && journal_devnum != le32_to_cpu(es->s_journal_dev)) { - printk(KERN_INFO "EXT4-fs: external journal device major/minor " - "numbers have changed\n"); + ext4_msg(sb, KERN_INFO, "external journal device major/minor " + "numbers have changed"); journal_dev = new_decode_dev(journal_devnum); } else journal_dev = new_decode_dev(le32_to_cpu(es->s_journal_dev)); @@ -3054,24 +3128,23 @@ static int ext4_load_journal(struct super_block *sb, * crash? For recovery, we need to check in advance whether we * can get read-write access to the device. */ - if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER)) { if (sb->s_flags & MS_RDONLY) { - printk(KERN_INFO "EXT4-fs: INFO: recovery " - "required on readonly filesystem.\n"); + ext4_msg(sb, KERN_INFO, "INFO: recovery " + "required on readonly filesystem"); if (really_read_only) { - printk(KERN_ERR "EXT4-fs: write access " - "unavailable, cannot proceed.\n"); + ext4_msg(sb, KERN_ERR, "write access " + "unavailable, cannot proceed"); return -EROFS; } - printk(KERN_INFO "EXT4-fs: write access will " - "be enabled during recovery.\n"); + ext4_msg(sb, KERN_INFO, "write access will " + "be enabled during recovery"); } } if (journal_inum && journal_dev) { - printk(KERN_ERR "EXT4-fs: filesystem has both journal " - "and inode journals!\n"); + ext4_msg(sb, KERN_ERR, "filesystem has both journal " + "and inode journals!"); return -EINVAL; } @@ -3084,14 +3157,14 @@ static int ext4_load_journal(struct super_block *sb, } if (journal->j_flags & JBD2_BARRIER) - printk(KERN_INFO "EXT4-fs: barriers enabled\n"); + ext4_msg(sb, KERN_INFO, "barriers enabled"); else - printk(KERN_INFO "EXT4-fs: barriers disabled\n"); + ext4_msg(sb, KERN_INFO, "barriers disabled"); if (!really_read_only && test_opt(sb, UPDATE_JOURNAL)) { err = jbd2_journal_update_format(journal); if (err) { - printk(KERN_ERR "EXT4-fs: error updating journal.\n"); + ext4_msg(sb, KERN_ERR, "error updating journal"); jbd2_journal_destroy(journal); return err; } @@ -3103,7 +3176,7 @@ static int ext4_load_journal(struct super_block *sb, err = jbd2_journal_load(journal); if (err) { - printk(KERN_ERR "EXT4-fs: error loading journal.\n"); + ext4_msg(sb, KERN_ERR, "error loading journal"); jbd2_journal_destroy(journal); return err; } @@ -3114,18 +3187,17 @@ static int ext4_load_journal(struct super_block *sb, if (journal_devnum && journal_devnum != le32_to_cpu(es->s_journal_dev)) { es->s_journal_dev = cpu_to_le32(journal_devnum); - sb->s_dirt = 1; /* Make sure we flush the recovery flag to disk. */ - ext4_commit_super(sb, es, 1); + ext4_commit_super(sb, 1); } return 0; } -static int ext4_commit_super(struct super_block *sb, - struct ext4_super_block *es, int sync) +static int ext4_commit_super(struct super_block *sb, int sync) { + struct ext4_super_block *es = EXT4_SB(sb)->s_es; struct buffer_head *sbh = EXT4_SB(sb)->s_sbh; int error = 0; @@ -3140,8 +3212,8 @@ static int ext4_commit_super(struct super_block *sb, * be remapped. Nothing we can do but to retry the * write and hope for the best. */ - printk(KERN_ERR "EXT4-fs: previous I/O error to " - "superblock detected for %s.\n", sb->s_id); + ext4_msg(sb, KERN_ERR, "previous I/O error to " + "superblock detected"); clear_buffer_write_io_error(sbh); set_buffer_uptodate(sbh); } @@ -3154,7 +3226,7 @@ static int ext4_commit_super(struct super_block *sb, &EXT4_SB(sb)->s_freeblocks_counter)); es->s_free_inodes_count = cpu_to_le32(percpu_counter_sum_positive( &EXT4_SB(sb)->s_freeinodes_counter)); - + sb->s_dirt = 0; BUFFER_TRACE(sbh, "marking dirty"); mark_buffer_dirty(sbh); if (sync) { @@ -3164,8 +3236,8 @@ static int ext4_commit_super(struct super_block *sb, error = buffer_write_io_error(sbh); if (error) { - printk(KERN_ERR "EXT4-fs: I/O error while writing " - "superblock for %s.\n", sb->s_id); + ext4_msg(sb, KERN_ERR, "I/O error while writing " + "superblock"); clear_buffer_write_io_error(sbh); set_buffer_uptodate(sbh); } @@ -3173,7 +3245,6 @@ static int ext4_commit_super(struct super_block *sb, return error; } - /* * Have we just finished recovery? If so, and if we are mounting (or * remounting) the filesystem readonly, then we will end up with a @@ -3192,14 +3263,11 @@ static void ext4_mark_recovery_complete(struct super_block *sb, if (jbd2_journal_flush(journal) < 0) goto out; - lock_super(sb); if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER) && sb->s_flags & MS_RDONLY) { EXT4_CLEAR_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER); - sb->s_dirt = 0; - ext4_commit_super(sb, es, 1); + ext4_commit_super(sb, 1); } - unlock_super(sb); out: jbd2_journal_unlock_updates(journal); @@ -3238,7 +3306,7 @@ static void ext4_clear_journal_err(struct super_block *sb, EXT4_SB(sb)->s_mount_state |= EXT4_ERROR_FS; es->s_state |= cpu_to_le16(EXT4_ERROR_FS); - ext4_commit_super(sb, es, 1); + ext4_commit_super(sb, 1); jbd2_journal_clear_err(journal); } @@ -3257,29 +3325,15 @@ int ext4_force_commit(struct super_block *sb) return 0; journal = EXT4_SB(sb)->s_journal; - if (journal) { - sb->s_dirt = 0; + if (journal) ret = ext4_journal_force_commit(journal); - } return ret; } -/* - * Ext4 always journals updates to the superblock itself, so we don't - * have to propagate any other updates to the superblock on disk at this - * point. (We can probably nuke this function altogether, and remove - * any mention to sb->s_dirt in all of fs/ext4; eventual cleanup...) - */ static void ext4_write_super(struct super_block *sb) { - if (EXT4_SB(sb)->s_journal) { - if (mutex_trylock(&sb->s_lock) != 0) - BUG(); - sb->s_dirt = 0; - } else { - ext4_commit_super(sb, EXT4_SB(sb)->s_es, 1); - } + ext4_commit_super(sb, 1); } static int ext4_sync_fs(struct super_block *sb, int wait) @@ -3288,16 +3342,9 @@ static int ext4_sync_fs(struct super_block *sb, int wait) tid_t target; trace_mark(ext4_sync_fs, "dev %s wait %d", sb->s_id, wait); - sb->s_dirt = 0; - if (EXT4_SB(sb)->s_journal) { - if (jbd2_journal_start_commit(EXT4_SB(sb)->s_journal, - &target)) { - if (wait) - jbd2_log_wait_commit(EXT4_SB(sb)->s_journal, - target); - } - } else { - ext4_commit_super(sb, EXT4_SB(sb)->s_es, wait); + if (jbd2_journal_start_commit(EXT4_SB(sb)->s_journal, &target)) { + if (wait) + jbd2_log_wait_commit(EXT4_SB(sb)->s_journal, target); } return ret; } @@ -3310,34 +3357,32 @@ static int ext4_freeze(struct super_block *sb) { int error = 0; journal_t *journal; - sb->s_dirt = 0; - if (!(sb->s_flags & MS_RDONLY)) { - journal = EXT4_SB(sb)->s_journal; + if (sb->s_flags & MS_RDONLY) + return 0; - if (journal) { - /* Now we set up the journal barrier. */ - jbd2_journal_lock_updates(journal); + journal = EXT4_SB(sb)->s_journal; - /* - * We don't want to clear needs_recovery flag when we - * failed to flush the journal. - */ - error = jbd2_journal_flush(journal); - if (error < 0) - goto out; - } + /* Now we set up the journal barrier. */ + jbd2_journal_lock_updates(journal); - /* Journal blocked and flushed, clear needs_recovery flag. */ - EXT4_CLEAR_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER); - error = ext4_commit_super(sb, EXT4_SB(sb)->s_es, 1); - if (error) - goto out; + /* + * Don't clear the needs_recovery flag if we failed to flush + * the journal. + */ + error = jbd2_journal_flush(journal); + if (error < 0) { + out: + jbd2_journal_unlock_updates(journal); + return error; } + + /* Journal blocked and flushed, clear needs_recovery flag. */ + EXT4_CLEAR_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER); + error = ext4_commit_super(sb, 1); + if (error) + goto out; return 0; -out: - jbd2_journal_unlock_updates(journal); - return error; } /* @@ -3346,14 +3391,15 @@ out: */ static int ext4_unfreeze(struct super_block *sb) { - if (EXT4_SB(sb)->s_journal && !(sb->s_flags & MS_RDONLY)) { - lock_super(sb); - /* Reser the needs_recovery flag before the fs is unlocked. */ - EXT4_SET_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER); - ext4_commit_super(sb, EXT4_SB(sb)->s_es, 1); - unlock_super(sb); - jbd2_journal_unlock_updates(EXT4_SB(sb)->s_journal); - } + if (sb->s_flags & MS_RDONLY) + return 0; + + lock_super(sb); + /* Reset the needs_recovery flag before the fs is unlocked. */ + EXT4_SET_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER); + ext4_commit_super(sb, 1); + unlock_super(sb); + jbd2_journal_unlock_updates(EXT4_SB(sb)->s_journal); return 0; } @@ -3432,22 +3478,15 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data) (sbi->s_mount_state & EXT4_VALID_FS)) es->s_state = cpu_to_le16(sbi->s_mount_state); - /* - * We have to unlock super so that we can wait for - * transactions. - */ - if (sbi->s_journal) { - unlock_super(sb); + if (sbi->s_journal) ext4_mark_recovery_complete(sb, es); - lock_super(sb); - } } else { int ret; if ((ret = EXT4_HAS_RO_COMPAT_FEATURE(sb, ~EXT4_FEATURE_RO_COMPAT_SUPP))) { - printk(KERN_WARNING "EXT4-fs: %s: couldn't " + ext4_msg(sb, KERN_WARNING, "couldn't " "remount RDWR because of unsupported " - "optional features (%x).\n", sb->s_id, + "optional features (%x)", (le32_to_cpu(sbi->s_es->s_feature_ro_compat) & ~EXT4_FEATURE_RO_COMPAT_SUPP)); err = -EROFS; @@ -3456,17 +3495,15 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data) /* * Make sure the group descriptor checksums - * are sane. If they aren't, refuse to - * remount r/w. + * are sane. If they aren't, refuse to remount r/w. */ for (g = 0; g < sbi->s_groups_count; g++) { struct ext4_group_desc *gdp = ext4_get_group_desc(sb, g, NULL); if (!ext4_group_desc_csum_verify(sbi, g, gdp)) { - printk(KERN_ERR - "EXT4-fs: ext4_remount: " - "Checksum for group %u failed (%u!=%u)\n", + ext4_msg(sb, KERN_ERR, + "ext4_remount: Checksum for group %u failed (%u!=%u)", g, le16_to_cpu(ext4_group_desc_csum(sbi, g, gdp)), le16_to_cpu(gdp->bg_checksum)); err = -EINVAL; @@ -3480,11 +3517,10 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data) * require a full umount/remount for now. */ if (es->s_last_orphan) { - printk(KERN_WARNING "EXT4-fs: %s: couldn't " + ext4_msg(sb, KERN_WARNING, "Couldn't " "remount RDWR because of unprocessed " "orphan inode list. Please " - "umount/remount instead.\n", - sb->s_id); + "umount/remount instead"); err = -EINVAL; goto restore_opts; } @@ -3504,8 +3540,9 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data) sb->s_flags &= ~MS_RDONLY; } } + ext4_setup_system_zone(sb); if (sbi->s_journal == NULL) - ext4_commit_super(sb, es, 1); + ext4_commit_super(sb, 1); #ifdef CONFIG_QUOTA /* Release old quota file names */ @@ -3515,6 +3552,7 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data) kfree(old_opts.s_qf_names[i]); #endif return 0; + restore_opts: sb->s_flags = old_sb_flags; sbi->s_mount_opt = old_opts.s_mount_opt; @@ -3545,9 +3583,8 @@ static int ext4_statfs(struct dentry *dentry, struct kstatfs *buf) if (test_opt(sb, MINIX_DF)) { sbi->s_overhead_last = 0; } else if (sbi->s_blocks_last != ext4_blocks_count(es)) { - ext4_group_t ngroups = sbi->s_groups_count, i; + ext4_group_t i, ngroups = ext4_get_groups_count(sb); ext4_fsblk_t overhead = 0; - smp_rmb(); /* * Compute the overhead (FS structures). This is constant @@ -3599,11 +3636,12 @@ static int ext4_statfs(struct dentry *dentry, struct kstatfs *buf) le64_to_cpup((void *)es->s_uuid + sizeof(u64)); buf->f_fsid.val[0] = fsid & 0xFFFFFFFFUL; buf->f_fsid.val[1] = (fsid >> 32) & 0xFFFFFFFFUL; + return 0; } -/* Helper function for writing quotas on sync - we need to start transaction before quota file - * is locked for write. Otherwise the are possible deadlocks: +/* Helper function for writing quotas on sync - we need to start transaction + * before quota file is locked for write. Otherwise the are possible deadlocks: * Process 1 Process 2 * ext4_create() quota_sync() * jbd2_journal_start() write_dquot() @@ -3627,7 +3665,7 @@ static int ext4_write_dquot(struct dquot *dquot) inode = dquot_to_inode(dquot); handle = ext4_journal_start(inode, - EXT4_QUOTA_TRANS_BLOCKS(dquot->dq_sb)); + EXT4_QUOTA_TRANS_BLOCKS(dquot->dq_sb)); if (IS_ERR(handle)) return PTR_ERR(handle); ret = dquot_commit(dquot); @@ -3643,7 +3681,7 @@ static int ext4_acquire_dquot(struct dquot *dquot) handle_t *handle; handle = ext4_journal_start(dquot_to_inode(dquot), - EXT4_QUOTA_INIT_BLOCKS(dquot->dq_sb)); + EXT4_QUOTA_INIT_BLOCKS(dquot->dq_sb)); if (IS_ERR(handle)) return PTR_ERR(handle); ret = dquot_acquire(dquot); @@ -3659,7 +3697,7 @@ static int ext4_release_dquot(struct dquot *dquot) handle_t *handle; handle = ext4_journal_start(dquot_to_inode(dquot), - EXT4_QUOTA_DEL_BLOCKS(dquot->dq_sb)); + EXT4_QUOTA_DEL_BLOCKS(dquot->dq_sb)); if (IS_ERR(handle)) { /* Release dquot anyway to avoid endless cycle in dqput() */ dquot_release(dquot); @@ -3707,7 +3745,7 @@ static int ext4_write_info(struct super_block *sb, int type) static int ext4_quota_on_mount(struct super_block *sb, int type) { return vfs_quota_on_mount(sb, EXT4_SB(sb)->s_qf_names[type], - EXT4_SB(sb)->s_jquota_fmt, type); + EXT4_SB(sb)->s_jquota_fmt, type); } /* @@ -3738,9 +3776,9 @@ static int ext4_quota_on(struct super_block *sb, int type, int format_id, if (EXT4_SB(sb)->s_qf_names[type]) { /* Quotafile not in fs root? */ if (path.dentry->d_parent != sb->s_root) - printk(KERN_WARNING - "EXT4-fs: Quota file not on filesystem root. " - "Journaled quota will not work.\n"); + ext4_msg(sb, KERN_WARNING, + "Quota file not on filesystem root. " + "Journaled quota will not work"); } /* @@ -3823,8 +3861,8 @@ static ssize_t ext4_quota_write(struct super_block *sb, int type, handle_t *handle = journal_current_handle(); if (EXT4_SB(sb)->s_journal && !handle) { - printk(KERN_WARNING "EXT4-fs: Quota write (off=%llu, len=%llu)" - " cancelled because transaction is not started.\n", + ext4_msg(sb, KERN_WARNING, "Quota write (off=%llu, len=%llu)" + " cancelled because transaction is not started", (unsigned long long)off, (unsigned long long)len); return -EIO; } @@ -3878,10 +3916,10 @@ out: #endif -static int ext4_get_sb(struct file_system_type *fs_type, - int flags, const char *dev_name, void *data, struct vfsmount *mnt) +static int ext4_get_sb(struct file_system_type *fs_type, int flags, + const char *dev_name, void *data, struct vfsmount *mnt) { - return get_sb_bdev(fs_type, flags, dev_name, data, ext4_fill_super, mnt); + return get_sb_bdev(fs_type, flags, dev_name, data, ext4_fill_super,mnt); } static struct file_system_type ext4_fs_type = { @@ -3893,14 +3931,14 @@ static struct file_system_type ext4_fs_type = { }; #ifdef CONFIG_EXT4DEV_COMPAT -static int ext4dev_get_sb(struct file_system_type *fs_type, - int flags, const char *dev_name, void *data, struct vfsmount *mnt) +static int ext4dev_get_sb(struct file_system_type *fs_type, int flags, + const char *dev_name, void *data,struct vfsmount *mnt) { - printk(KERN_WARNING "EXT4-fs: Update your userspace programs " - "to mount using ext4\n"); - printk(KERN_WARNING "EXT4-fs: ext4dev backwards compatibility " - "will go away by 2.6.31\n"); - return get_sb_bdev(fs_type, flags, dev_name, data, ext4_fill_super, mnt); + printk(KERN_WARNING "EXT4-fs (%s): Update your userspace programs " + "to mount using ext4\n", dev_name); + printk(KERN_WARNING "EXT4-fs (%s): ext4dev backwards compatibility " + "will go away by 2.6.31\n", dev_name); + return get_sb_bdev(fs_type, flags, dev_name, data, ext4_fill_super,mnt); } static struct file_system_type ext4dev_fs_type = { @@ -3917,13 +3955,16 @@ static int __init init_ext4_fs(void) { int err; + err = init_ext4_system_zone(); + if (err) + return err; ext4_kset = kset_create_and_add("ext4", NULL, fs_kobj); if (!ext4_kset) - return -ENOMEM; + goto out4; ext4_proc_root = proc_mkdir("fs/ext4", NULL); err = init_ext4_mballoc(); if (err) - return err; + goto out3; err = init_ext4_xattr(); if (err) @@ -3948,6 +3989,11 @@ out1: exit_ext4_xattr(); out2: exit_ext4_mballoc(); +out3: + remove_proc_entry("fs/ext4", NULL); + kset_unregister(ext4_kset); +out4: + exit_ext4_system_zone(); return err; } @@ -3962,6 +4008,7 @@ static void __exit exit_ext4_fs(void) exit_ext4_mballoc(); remove_proc_entry("fs/ext4", NULL); kset_unregister(ext4_kset); + exit_ext4_system_zone(); } MODULE_AUTHOR("Remy Card, Stephen Tweedie, Andrew Morton, Andreas Dilger, Theodore Ts'o and others"); diff --git a/fs/ioctl.c b/fs/ioctl.c index 82d9c42b8bac..286f38dfc6c0 100644 --- a/fs/ioctl.c +++ b/fs/ioctl.c @@ -414,10 +414,6 @@ static int file_ioctl(struct file *filp, unsigned int cmd, switch (cmd) { case FIBMAP: return ioctl_fibmap(filp, p); - case FS_IOC_FIEMAP: - return ioctl_fiemap(filp, arg); - case FIGETBSZ: - return put_user(inode->i_sb->s_blocksize, p); case FIONREAD: return put_user(i_size_read(inode) - filp->f_pos, p); } @@ -557,6 +553,16 @@ int do_vfs_ioctl(struct file *filp, unsigned int fd, unsigned int cmd, error = ioctl_fsthaw(filp); break; + case FS_IOC_FIEMAP: + return ioctl_fiemap(filp, arg); + + case FIGETBSZ: + { + struct inode *inode = filp->f_path.dentry->d_inode; + int __user *p = (int __user *)arg; + return put_user(inode->i_sb->s_blocksize, p); + } + default: if (S_ISREG(filp->f_path.dentry->d_inode->i_mode)) error = file_ioctl(filp, cmd, arg); diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c index 58144102bf25..62be7d294ec2 100644 --- a/fs/jbd2/journal.c +++ b/fs/jbd2/journal.c @@ -1781,7 +1781,7 @@ int jbd2_journal_wipe(journal_t *journal, int write) * Journal abort has very specific semantics, which we describe * for journal abort. * - * Two internal function, which provide abort to te jbd layer + * Two internal functions, which provide abort to the jbd layer * itself are here. */ @@ -1879,7 +1879,7 @@ void jbd2_journal_abort(journal_t *journal, int errno) * int jbd2_journal_errno () - returns the journal's error state. * @journal: journal to examine. * - * This is the errno numbet set with jbd2_journal_abort(), the last + * This is the errno number set with jbd2_journal_abort(), the last * time the journal was mounted - if the journal was stopped * without calling abort this will be 0. * @@ -1903,7 +1903,7 @@ int jbd2_journal_errno(journal_t *journal) * int jbd2_journal_clear_err () - clears the journal's error state * @journal: journal to act on. * - * An error must be cleared or Acked to take a FS out of readonly + * An error must be cleared or acked to take a FS out of readonly * mode. */ int jbd2_journal_clear_err(journal_t *journal) @@ -1923,7 +1923,7 @@ int jbd2_journal_clear_err(journal_t *journal) * void jbd2_journal_ack_err() - Ack journal err. * @journal: journal to act on. * - * An error must be cleared or Acked to take a FS out of readonly + * An error must be cleared or acked to take a FS out of readonly * mode. */ void jbd2_journal_ack_err(journal_t *journal) diff --git a/fs/mpage.c b/fs/mpage.c index 680ba60863ff..42381bd6543b 100644 --- a/fs/mpage.c +++ b/fs/mpage.c @@ -379,7 +379,8 @@ mpage_readpages(struct address_space *mapping, struct list_head *pages, struct buffer_head map_bh; unsigned long first_logical_block = 0; - clear_buffer_mapped(&map_bh); + map_bh.b_state = 0; + map_bh.b_size = 0; for (page_idx = 0; page_idx < nr_pages; page_idx++) { struct page *page = list_entry(pages->prev, struct page, lru); @@ -412,7 +413,8 @@ int mpage_readpage(struct page *page, get_block_t get_block) struct buffer_head map_bh; unsigned long first_logical_block = 0; - clear_buffer_mapped(&map_bh); + map_bh.b_state = 0; + map_bh.b_size = 0; bio = do_mpage_readpage(bio, page, 1, &last_block_in_bio, &map_bh, &first_logical_block, get_block); if (bio) diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h index 89853bcd27a6..f1736ca7922c 100644 --- a/include/asm-generic/vmlinux.lds.h +++ b/include/asm-generic/vmlinux.lds.h @@ -63,7 +63,7 @@ #define BRANCH_PROFILE() #endif -#ifdef CONFIG_EVENT_TRACER +#ifdef CONFIG_EVENT_TRACING #define FTRACE_EVENTS() VMLINUX_SYMBOL(__start_ftrace_events) = .; \ *(_ftrace_events) \ VMLINUX_SYMBOL(__stop_ftrace_events) = .; diff --git a/include/linux/blktrace_api.h b/include/linux/blktrace_api.h index d960889e92ef..7e4350ece0f8 100644 --- a/include/linux/blktrace_api.h +++ b/include/linux/blktrace_api.h @@ -116,9 +116,9 @@ struct blk_io_trace { * The remap event */ struct blk_io_trace_remap { - __be32 device; __be32 device_from; - __be64 sector; + __be32 device_to; + __be64 sector_from; }; enum { @@ -165,8 +165,9 @@ struct blk_trace { extern int blk_trace_ioctl(struct block_device *, unsigned, char __user *); extern void blk_trace_shutdown(struct request_queue *); -extern int do_blk_trace_setup(struct request_queue *q, - char *name, dev_t dev, struct blk_user_trace_setup *buts); +extern int do_blk_trace_setup(struct request_queue *q, char *name, + dev_t dev, struct block_device *bdev, + struct blk_user_trace_setup *buts); extern void __trace_note_message(struct blk_trace *, const char *fmt, ...); /** @@ -193,22 +194,42 @@ extern void __trace_note_message(struct blk_trace *, const char *fmt, ...); extern void blk_add_driver_data(struct request_queue *q, struct request *rq, void *data, size_t len); extern int blk_trace_setup(struct request_queue *q, char *name, dev_t dev, + struct block_device *bdev, char __user *arg); extern int blk_trace_startstop(struct request_queue *q, int start); extern int blk_trace_remove(struct request_queue *q); +extern int blk_trace_init_sysfs(struct device *dev); extern struct attribute_group blk_trace_attr_group; #else /* !CONFIG_BLK_DEV_IO_TRACE */ -#define blk_trace_ioctl(bdev, cmd, arg) (-ENOTTY) -#define blk_trace_shutdown(q) do { } while (0) -#define do_blk_trace_setup(q, name, dev, buts) (-ENOTTY) -#define blk_add_driver_data(q, rq, data, len) do {} while (0) -#define blk_trace_setup(q, name, dev, arg) (-ENOTTY) -#define blk_trace_startstop(q, start) (-ENOTTY) -#define blk_trace_remove(q) (-ENOTTY) -#define blk_add_trace_msg(q, fmt, ...) do { } while (0) +# define blk_trace_ioctl(bdev, cmd, arg) (-ENOTTY) +# define blk_trace_shutdown(q) do { } while (0) +# define do_blk_trace_setup(q, name, dev, bdev, buts) (-ENOTTY) +# define blk_add_driver_data(q, rq, data, len) do {} while (0) +# define blk_trace_setup(q, name, dev, bdev, arg) (-ENOTTY) +# define blk_trace_startstop(q, start) (-ENOTTY) +# define blk_trace_remove(q) (-ENOTTY) +# define blk_add_trace_msg(q, fmt, ...) do { } while (0) +static inline int blk_trace_init_sysfs(struct device *dev) +{ + return 0; +} #endif /* CONFIG_BLK_DEV_IO_TRACE */ + +#if defined(CONFIG_EVENT_TRACING) && defined(CONFIG_BLOCK) + +static inline int blk_cmd_buf_len(struct request *rq) +{ + return blk_pc_request(rq) ? rq->cmd_len * 3 : 1; +} + +extern void blk_dump_cmd(char *buf, struct request *rq); +extern void blk_fill_rwbs(char *rwbs, u32 rw, int bytes); +extern void blk_fill_rwbs_rq(char *rwbs, struct request *rq); + +#endif /* CONFIG_EVENT_TRACING && CONFIG_BLOCK */ + #endif /* __KERNEL__ */ #endif diff --git a/include/linux/compat.h b/include/linux/compat.h index f2ded21f9a3c..af931ee43dd8 100644 --- a/include/linux/compat.h +++ b/include/linux/compat.h @@ -222,6 +222,8 @@ int copy_siginfo_from_user32(siginfo_t *to, struct compat_siginfo __user *from); int copy_siginfo_to_user32(struct compat_siginfo __user *to, siginfo_t *from); int get_compat_sigevent(struct sigevent *event, const struct compat_sigevent __user *u_event); +long compat_sys_rt_tgsigqueueinfo(compat_pid_t tgid, compat_pid_t pid, int sig, + struct compat_siginfo __user *uinfo); static inline int compat_timeval_compare(struct compat_timeval *lhs, struct compat_timeval *rhs) diff --git a/include/linux/cyclades.h b/include/linux/cyclades.h index 788850ba4e75..1fbdea4f08eb 100644 --- a/include/linux/cyclades.h +++ b/include/linux/cyclades.h @@ -142,19 +142,6 @@ struct CYZ_BOOT_CTRL { #ifndef DP_WINDOW_SIZE -/* #include "cyclomz.h" */ -/****************** ****************** *******************/ -/* - * The data types defined below are used in all ZFIRM interface - * data structures. They accomodate differences between HW - * architectures and compilers. - */ - -typedef __u64 ucdouble; /* 64 bits, unsigned */ -typedef __u32 uclong; /* 32 bits, unsigned */ -typedef __u16 ucshort; /* 16 bits, unsigned */ -typedef __u8 ucchar; /* 8 bits, unsigned */ - /* * Memory Window Sizes */ @@ -507,16 +494,20 @@ struct ZFW_CTRL { /* Per card data structure */ struct cyclades_card { - void __iomem *base_addr; - void __iomem *ctl_addr; - int irq; - unsigned int num_chips; /* 0 if card absent, -1 if Z/PCI, else Y */ - unsigned int first_line; /* minor number of first channel on card */ - unsigned int nports; /* Number of ports in the card */ - int bus_index; /* address shift - 0 for ISA, 1 for PCI */ - int intr_enabled; /* FW Interrupt flag - 0 disabled, 1 enabled */ - spinlock_t card_lock; - struct cyclades_port *ports; + void __iomem *base_addr; + union { + void __iomem *p9050; + struct RUNTIME_9060 __iomem *p9060; + } ctl_addr; + int irq; + unsigned int num_chips; /* 0 if card absent, -1 if Z/PCI, else Y */ + unsigned int first_line; /* minor number of first channel on card */ + unsigned int nports; /* Number of ports in the card */ + int bus_index; /* address shift - 0 for ISA, 1 for PCI */ + int intr_enabled; /* FW Interrupt flag - 0 disabled, 1 enabled */ + u32 hw_ver; + spinlock_t card_lock; + struct cyclades_port *ports; }; /*************************************** diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index 8a0c2f221e6b..39b95c56587e 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -233,8 +233,6 @@ extern int ftrace_arch_read_dyn_info(char *buf, int size); extern int skip_trace(unsigned long ip); -extern void ftrace_release(void *start, unsigned long size); - extern void ftrace_disable_daemon(void); extern void ftrace_enable_daemon(void); #else @@ -325,13 +323,8 @@ static inline void __ftrace_enabled_restore(int enabled) #ifdef CONFIG_FTRACE_MCOUNT_RECORD extern void ftrace_init(void); -extern void ftrace_init_module(struct module *mod, - unsigned long *start, unsigned long *end); #else static inline void ftrace_init(void) { } -static inline void -ftrace_init_module(struct module *mod, - unsigned long *start, unsigned long *end) { } #endif /* @@ -368,6 +361,7 @@ struct ftrace_ret_stack { unsigned long ret; unsigned long func; unsigned long long calltime; + unsigned long long subtime; }; /* @@ -379,8 +373,6 @@ extern void return_to_handler(void); extern int ftrace_push_return_trace(unsigned long ret, unsigned long func, int *depth); -extern void -ftrace_pop_return_trace(struct ftrace_graph_ret *trace, unsigned long *ret); /* * Sometimes we don't want to trace a function with the function @@ -496,8 +488,15 @@ static inline int test_tsk_trace_graph(struct task_struct *tsk) extern int ftrace_dump_on_oops; +#ifdef CONFIG_PREEMPT +#define INIT_TRACE_RECURSION .trace_recursion = 0, +#endif + #endif /* CONFIG_TRACING */ +#ifndef INIT_TRACE_RECURSION +#define INIT_TRACE_RECURSION +#endif #ifdef CONFIG_HW_BRANCH_TRACER diff --git a/include/linux/ftrace_event.h b/include/linux/ftrace_event.h new file mode 100644 index 000000000000..5c093ffc655b --- /dev/null +++ b/include/linux/ftrace_event.h @@ -0,0 +1,172 @@ +#ifndef _LINUX_FTRACE_EVENT_H +#define _LINUX_FTRACE_EVENT_H + +#include <linux/trace_seq.h> +#include <linux/ring_buffer.h> +#include <linux/percpu.h> + +struct trace_array; +struct tracer; +struct dentry; + +DECLARE_PER_CPU(struct trace_seq, ftrace_event_seq); + +struct trace_print_flags { + unsigned long mask; + const char *name; +}; + +const char *ftrace_print_flags_seq(struct trace_seq *p, const char *delim, + unsigned long flags, + const struct trace_print_flags *flag_array); + +const char *ftrace_print_symbols_seq(struct trace_seq *p, unsigned long val, + const struct trace_print_flags *symbol_array); + +/* + * The trace entry - the most basic unit of tracing. This is what + * is printed in the end as a single line in the trace output, such as: + * + * bash-15816 [01] 235.197585: idle_cpu <- irq_enter + */ +struct trace_entry { + unsigned short type; + unsigned char flags; + unsigned char preempt_count; + int pid; + int tgid; +}; + +#define FTRACE_MAX_EVENT \ + ((1 << (sizeof(((struct trace_entry *)0)->type) * 8)) - 1) + +/* + * Trace iterator - used by printout routines who present trace + * results to users and which routines might sleep, etc: + */ +struct trace_iterator { + struct trace_array *tr; + struct tracer *trace; + void *private; + int cpu_file; + struct mutex mutex; + struct ring_buffer_iter *buffer_iter[NR_CPUS]; + unsigned long iter_flags; + + /* The below is zeroed out in pipe_read */ + struct trace_seq seq; + struct trace_entry *ent; + int cpu; + u64 ts; + + loff_t pos; + long idx; + + cpumask_var_t started; +}; + + +typedef enum print_line_t (*trace_print_func)(struct trace_iterator *iter, + int flags); +struct trace_event { + struct hlist_node node; + struct list_head list; + int type; + trace_print_func trace; + trace_print_func raw; + trace_print_func hex; + trace_print_func binary; +}; + +extern int register_ftrace_event(struct trace_event *event); +extern int unregister_ftrace_event(struct trace_event *event); + +/* Return values for print_line callback */ +enum print_line_t { + TRACE_TYPE_PARTIAL_LINE = 0, /* Retry after flushing the seq */ + TRACE_TYPE_HANDLED = 1, + TRACE_TYPE_UNHANDLED = 2, /* Relay to other output functions */ + TRACE_TYPE_NO_CONSUME = 3 /* Handled but ask to not consume */ +}; + + +struct ring_buffer_event * +trace_current_buffer_lock_reserve(int type, unsigned long len, + unsigned long flags, int pc); +void trace_current_buffer_unlock_commit(struct ring_buffer_event *event, + unsigned long flags, int pc); +void trace_nowake_buffer_unlock_commit(struct ring_buffer_event *event, + unsigned long flags, int pc); +void trace_current_buffer_discard_commit(struct ring_buffer_event *event); + +void tracing_record_cmdline(struct task_struct *tsk); + +struct ftrace_event_call { + struct list_head list; + char *name; + char *system; + struct dentry *dir; + struct trace_event *event; + int enabled; + int (*regfunc)(void); + void (*unregfunc)(void); + int id; + int (*raw_init)(void); + int (*show_format)(struct trace_seq *s); + int (*define_fields)(void); + struct list_head fields; + int filter_active; + void *filter; + void *mod; + +#ifdef CONFIG_EVENT_PROFILE + atomic_t profile_count; + int (*profile_enable)(struct ftrace_event_call *); + void (*profile_disable)(struct ftrace_event_call *); +#endif +}; + +#define MAX_FILTER_PRED 32 +#define MAX_FILTER_STR_VAL 128 + +extern int init_preds(struct ftrace_event_call *call); +extern void destroy_preds(struct ftrace_event_call *call); +extern int filter_match_preds(struct ftrace_event_call *call, void *rec); +extern int filter_current_check_discard(struct ftrace_event_call *call, + void *rec, + struct ring_buffer_event *event); + +extern int trace_define_field(struct ftrace_event_call *call, char *type, + char *name, int offset, int size, int is_signed); + +#define is_signed_type(type) (((type)(-1)) < 0) + +int trace_set_clr_event(const char *system, const char *event, int set); + +/* + * The double __builtin_constant_p is because gcc will give us an error + * if we try to allocate the static variable to fmt if it is not a + * constant. Even with the outer if statement optimizing out. + */ +#define event_trace_printk(ip, fmt, args...) \ +do { \ + __trace_printk_check_format(fmt, ##args); \ + tracing_record_cmdline(current); \ + if (__builtin_constant_p(fmt)) { \ + static const char *trace_printk_fmt \ + __attribute__((section("__trace_printk_fmt"))) = \ + __builtin_constant_p(fmt) ? fmt : NULL; \ + \ + __trace_bprintk(ip, trace_printk_fmt, ##args); \ + } else \ + __trace_printk(ip, fmt, ##args); \ +} while (0) + +#define __common_field(type, item, is_signed) \ + ret = trace_define_field(event_call, #type, "common_" #item, \ + offsetof(typeof(field.ent), item), \ + sizeof(field.ent.item), is_signed); \ + if (ret) \ + return ret; + +#endif /* _LINUX_FTRACE_EVENT_H */ diff --git a/include/linux/ide.h b/include/linux/ide.h index 9fed365a598b..867cb68d8461 100644 --- a/include/linux/ide.h +++ b/include/linux/ide.h @@ -26,6 +26,9 @@ #include <asm/io.h> #include <asm/mutex.h> +/* for request_sense */ +#include <linux/cdrom.h> + #if defined(CONFIG_CRIS) || defined(CONFIG_FRV) || defined(CONFIG_MN10300) # define SUPPORT_VLB_SYNC 0 #else @@ -324,7 +327,6 @@ struct ide_cmd { unsigned int cursg_ofs; struct request *rq; /* copy of request */ - void *special; /* valid_t generally */ }; /* ATAPI packet command flags */ @@ -360,11 +362,7 @@ struct ide_atapi_pc { /* data buffer */ u8 *buf; - /* current buffer position */ - u8 *cur_pos; int buf_size; - /* missing/available data on the current buffer */ - int b_count; /* the corresponding request */ struct request *rq; @@ -377,10 +375,6 @@ struct ide_atapi_pc { */ u8 pc_buf[IDE_PC_BUFFER_SIZE]; - /* idetape only */ - struct idetape_bh *bh; - char *b_data; - unsigned long timeout; }; @@ -593,16 +587,16 @@ struct ide_drive_s { /* callback for packet commands */ int (*pc_callback)(struct ide_drive_s *, int); - void (*pc_update_buffers)(struct ide_drive_s *, struct ide_atapi_pc *); - int (*pc_io_buffers)(struct ide_drive_s *, struct ide_atapi_pc *, - unsigned int, int); - ide_startstop_t (*irq_handler)(struct ide_drive_s *); unsigned long atapi_flags; struct ide_atapi_pc request_sense_pc; - struct request request_sense_rq; + + /* current sense rq and buffer */ + bool sense_rq_armed; + struct request sense_rq; + struct request_sense sense_data; }; typedef struct ide_drive_s ide_drive_t; @@ -1174,7 +1168,10 @@ int ide_do_test_unit_ready(ide_drive_t *, struct gendisk *); int ide_do_start_stop(ide_drive_t *, struct gendisk *, int); int ide_set_media_lock(ide_drive_t *, struct gendisk *, int); void ide_create_request_sense_cmd(ide_drive_t *, struct ide_atapi_pc *); -void ide_retry_pc(ide_drive_t *, struct gendisk *); +void ide_retry_pc(ide_drive_t *drive); + +void ide_prep_sense(ide_drive_t *drive, struct request *rq); +int ide_queue_sense_rq(ide_drive_t *drive, void *special); int ide_cd_expiry(ide_drive_t *); diff --git a/include/linux/init_task.h b/include/linux/init_task.h index 7f54ba942429..6646bfc7b892 100644 --- a/include/linux/init_task.h +++ b/include/linux/init_task.h @@ -174,6 +174,7 @@ extern struct cred init_cred; INIT_TRACE_IRQFLAGS \ INIT_LOCKDEP \ INIT_FTRACE_GRAPH \ + INIT_TRACE_RECURSION \ } diff --git a/include/linux/kmemtrace.h b/include/linux/kmemtrace.h new file mode 100644 index 000000000000..b616d3930c3b --- /dev/null +++ b/include/linux/kmemtrace.h @@ -0,0 +1,25 @@ +/* + * Copyright (C) 2008 Eduard - Gabriel Munteanu + * + * This file is released under GPL version 2. + */ + +#ifndef _LINUX_KMEMTRACE_H +#define _LINUX_KMEMTRACE_H + +#ifdef __KERNEL__ + +#include <trace/events/kmem.h> + +#ifdef CONFIG_KMEMTRACE +extern void kmemtrace_init(void); +#else +static inline void kmemtrace_init(void) +{ +} +#endif + +#endif /* __KERNEL__ */ + +#endif /* _LINUX_KMEMTRACE_H */ + diff --git a/include/linux/mm.h b/include/linux/mm.h index 5528ff32512e..ad613ed66ab0 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -19,6 +19,7 @@ struct anon_vma; struct file_ra_state; struct user_struct; struct writeback_control; +struct rlimit; #ifndef CONFIG_DISCONTIGMEM /* Don't use mapnrs, do it properly */ extern unsigned long max_mapnr; @@ -1315,8 +1316,8 @@ int vmemmap_populate_basepages(struct page *start_page, int vmemmap_populate(struct page *start_page, unsigned long pages, int node); void vmemmap_populate_print_last(void); -extern void *alloc_locked_buffer(size_t size); -extern void free_locked_buffer(void *buffer, size_t size); -extern void release_locked_buffer(void *buffer, size_t size); +extern int account_locked_memory(struct mm_struct *mm, struct rlimit *rlim, + size_t size); +extern void refund_locked_memory(struct mm_struct *mm, size_t size); #endif /* __KERNEL__ */ #endif /* _LINUX_MM_H */ diff --git a/include/linux/mmiotrace.h b/include/linux/mmiotrace.h index 3d1b7bde1283..97491f78b08c 100644 --- a/include/linux/mmiotrace.h +++ b/include/linux/mmiotrace.h @@ -30,6 +30,8 @@ extern unsigned int kmmio_count; extern int register_kmmio_probe(struct kmmio_probe *p); extern void unregister_kmmio_probe(struct kmmio_probe *p); +extern int kmmio_init(void); +extern void kmmio_cleanup(void); #ifdef CONFIG_MMIOTRACE /* kmmio is active by some kmmio_probes? */ diff --git a/include/linux/module.h b/include/linux/module.h index 627ac082e2a6..a8f2c0aa4c32 100644 --- a/include/linux/module.h +++ b/include/linux/module.h @@ -337,6 +337,14 @@ struct module const char **trace_bprintk_fmt_start; unsigned int num_trace_bprintk_fmt; #endif +#ifdef CONFIG_EVENT_TRACING + struct ftrace_event_call *trace_events; + unsigned int num_trace_events; +#endif +#ifdef CONFIG_FTRACE_MCOUNT_RECORD + unsigned long *ftrace_callsites; + unsigned int num_ftrace_callsites; +#endif #ifdef CONFIG_MODULE_UNLOAD /* What modules depend on me? */ diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h index 0f71812d67d3..d7d1c41a0b17 100644 --- a/include/linux/pci_ids.h +++ b/include/linux/pci_ids.h @@ -1996,10 +1996,12 @@ #define PCI_DEVICE_ID_OXSEMI_PCIe952_1_U 0xC118 #define PCI_DEVICE_ID_OXSEMI_PCIe952_1_GU 0xC11C #define PCI_DEVICE_ID_OXSEMI_16PCI954 0x9501 +#define PCI_DEVICE_ID_OXSEMI_C950 0x950B #define PCI_DEVICE_ID_OXSEMI_16PCI95N 0x9511 #define PCI_DEVICE_ID_OXSEMI_16PCI954PP 0x9513 #define PCI_DEVICE_ID_OXSEMI_16PCI952 0x9521 #define PCI_DEVICE_ID_OXSEMI_16PCI952PP 0x9523 +#define PCI_SUBDEVICE_ID_OXSEMI_C950 0x0001 #define PCI_VENDOR_ID_CHELSIO 0x1425 diff --git a/include/linux/ptrace.h b/include/linux/ptrace.h index 67c15653fc23..59e133d39d50 100644 --- a/include/linux/ptrace.h +++ b/include/linux/ptrace.h @@ -95,7 +95,6 @@ extern void __ptrace_link(struct task_struct *child, struct task_struct *new_parent); extern void __ptrace_unlink(struct task_struct *child); extern void exit_ptrace(struct task_struct *tracer); -extern void ptrace_fork(struct task_struct *task, unsigned long clone_flags); #define PTRACE_MODE_READ 1 #define PTRACE_MODE_ATTACH 2 /* Returns 0 on success, -errno on denial. */ @@ -327,15 +326,6 @@ static inline void user_enable_block_step(struct task_struct *task) #define arch_ptrace_untrace(task) do { } while (0) #endif -#ifndef arch_ptrace_fork -/* - * Do machine-specific work to initialize a new task. - * - * This is called from copy_process(). - */ -#define arch_ptrace_fork(child, clone_flags) do { } while (0) -#endif - extern int task_current_syscall(struct task_struct *target, long *callno, unsigned long args[6], unsigned int maxargs, unsigned long *sp, unsigned long *pc); diff --git a/include/linux/rational.h b/include/linux/rational.h new file mode 100644 index 000000000000..4f532fcd9eea --- /dev/null +++ b/include/linux/rational.h @@ -0,0 +1,19 @@ +/* + * rational fractions + * + * Copyright (C) 2009 emlix GmbH, Oskar Schirmer <os@emlix.com> + * + * helper functions when coping with rational numbers, + * e.g. when calculating optimum numerator/denominator pairs for + * pll configuration taking into account restricted register size + */ + +#ifndef _LINUX_RATIONAL_H +#define _LINUX_RATIONAL_H + +void rational_best_approximation( + unsigned long given_numerator, unsigned long given_denominator, + unsigned long max_numerator, unsigned long max_denominator, + unsigned long *best_numerator, unsigned long *best_denominator); + +#endif /* _LINUX_RATIONAL_H */ diff --git a/include/linux/rculist.h b/include/linux/rculist.h index e649bd3f2c97..5710f43bbc9e 100644 --- a/include/linux/rculist.h +++ b/include/linux/rculist.h @@ -198,6 +198,32 @@ static inline void list_splice_init_rcu(struct list_head *list, at->prev = last; } +/** + * list_entry_rcu - get the struct for this entry + * @ptr: the &struct list_head pointer. + * @type: the type of the struct this is embedded in. + * @member: the name of the list_struct within the struct. + * + * This primitive may safely run concurrently with the _rcu list-mutation + * primitives such as list_add_rcu() as long as it's guarded by rcu_read_lock(). + */ +#define list_entry_rcu(ptr, type, member) \ + container_of(rcu_dereference(ptr), type, member) + +/** + * list_first_entry_rcu - get the first element from a list + * @ptr: the list head to take the element from. + * @type: the type of the struct this is embedded in. + * @member: the name of the list_struct within the struct. + * + * Note, that list is expected to be not empty. + * + * This primitive may safely run concurrently with the _rcu list-mutation + * primitives such as list_add_rcu() as long as it's guarded by rcu_read_lock(). + */ +#define list_first_entry_rcu(ptr, type, member) \ + list_entry_rcu((ptr)->next, type, member) + #define __list_for_each_rcu(pos, head) \ for (pos = rcu_dereference((head)->next); \ pos != (head); \ @@ -214,9 +240,9 @@ static inline void list_splice_init_rcu(struct list_head *list, * as long as the traversal is guarded by rcu_read_lock(). */ #define list_for_each_entry_rcu(pos, head, member) \ - for (pos = list_entry(rcu_dereference((head)->next), typeof(*pos), member); \ + for (pos = list_entry_rcu((head)->next, typeof(*pos), member); \ prefetch(pos->member.next), &pos->member != (head); \ - pos = list_entry(rcu_dereference(pos->member.next), typeof(*pos), member)) + pos = list_entry_rcu(pos->member.next, typeof(*pos), member)) /** diff --git a/include/linux/rcutree.h b/include/linux/rcutree.h index 58b2aa5312b9..5a5153806c42 100644 --- a/include/linux/rcutree.h +++ b/include/linux/rcutree.h @@ -161,8 +161,15 @@ struct rcu_data { unsigned long offline_fqs; /* Kicked due to being offline. */ unsigned long resched_ipi; /* Sent a resched IPI. */ - /* 5) For future __rcu_pending statistics. */ + /* 5) __rcu_pending() statistics. */ long n_rcu_pending; /* rcu_pending() calls since boot. */ + long n_rp_qs_pending; + long n_rp_cb_ready; + long n_rp_cpu_needs_gp; + long n_rp_gp_completed; + long n_rp_gp_started; + long n_rp_need_fqs; + long n_rp_need_nothing; int cpu; }; diff --git a/include/linux/ring_buffer.h b/include/linux/ring_buffer.h index e1b7b2173885..8670f1575fe1 100644 --- a/include/linux/ring_buffer.h +++ b/include/linux/ring_buffer.h @@ -11,7 +11,7 @@ struct ring_buffer_iter; * Don't refer to this struct directly, use functions below. */ struct ring_buffer_event { - u32 type:2, len:3, time_delta:27; + u32 type_len:5, time_delta:27; u32 array[]; }; @@ -24,7 +24,8 @@ struct ring_buffer_event { * size is variable depending on how much * padding is needed * If time_delta is non zero: - * everything else same as RINGBUF_TYPE_DATA + * array[0] holds the actual length + * size = 4 + length (bytes) * * @RINGBUF_TYPE_TIME_EXTEND: Extend the time delta * array[0] = time delta (28 .. 59) @@ -35,22 +36,23 @@ struct ring_buffer_event { * array[1..2] = tv_sec * size = 16 bytes * - * @RINGBUF_TYPE_DATA: Data record - * If len is zero: + * <= @RINGBUF_TYPE_DATA_TYPE_LEN_MAX: + * Data record + * If type_len is zero: * array[0] holds the actual length * array[1..(length+3)/4] holds data - * size = 4 + 4 + length (bytes) + * size = 4 + length (bytes) * else - * length = len << 2 + * length = type_len << 2 * array[0..(length+3)/4-1] holds data * size = 4 + length (bytes) */ enum ring_buffer_type { + RINGBUF_TYPE_DATA_TYPE_LEN_MAX = 28, RINGBUF_TYPE_PADDING, RINGBUF_TYPE_TIME_EXTEND, /* FIXME: RINGBUF_TYPE_TIME_STAMP not implemented */ RINGBUF_TYPE_TIME_STAMP, - RINGBUF_TYPE_DATA, }; unsigned ring_buffer_event_length(struct ring_buffer_event *event); @@ -68,13 +70,54 @@ ring_buffer_event_time_delta(struct ring_buffer_event *event) return event->time_delta; } +/* + * ring_buffer_event_discard can discard any event in the ring buffer. + * it is up to the caller to protect against a reader from + * consuming it or a writer from wrapping and replacing it. + * + * No external protection is needed if this is called before + * the event is commited. But in that case it would be better to + * use ring_buffer_discard_commit. + * + * Note, if an event that has not been committed is discarded + * with ring_buffer_event_discard, it must still be committed. + */ void ring_buffer_event_discard(struct ring_buffer_event *event); /* + * ring_buffer_discard_commit will remove an event that has not + * ben committed yet. If this is used, then ring_buffer_unlock_commit + * must not be called on the discarded event. This function + * will try to remove the event from the ring buffer completely + * if another event has not been written after it. + * + * Example use: + * + * if (some_condition) + * ring_buffer_discard_commit(buffer, event); + * else + * ring_buffer_unlock_commit(buffer, event); + */ +void ring_buffer_discard_commit(struct ring_buffer *buffer, + struct ring_buffer_event *event); + +/* * size is in bytes for each per CPU buffer. */ struct ring_buffer * -ring_buffer_alloc(unsigned long size, unsigned flags); +__ring_buffer_alloc(unsigned long size, unsigned flags, struct lock_class_key *key); + +/* + * Because the ring buffer is generic, if other users of the ring buffer get + * traced by ftrace, it can produce lockdep warnings. We need to keep each + * ring buffer's lock class separate. + */ +#define ring_buffer_alloc(size, flags) \ +({ \ + static struct lock_class_key __key; \ + __ring_buffer_alloc((size), (flags), &__key); \ +}) + void ring_buffer_free(struct ring_buffer *buffer); int ring_buffer_resize(struct ring_buffer *buffer, unsigned long size); @@ -122,6 +165,8 @@ unsigned long ring_buffer_entries(struct ring_buffer *buffer); unsigned long ring_buffer_overruns(struct ring_buffer *buffer); unsigned long ring_buffer_entries_cpu(struct ring_buffer *buffer, int cpu); unsigned long ring_buffer_overrun_cpu(struct ring_buffer *buffer, int cpu); +unsigned long ring_buffer_commit_overrun_cpu(struct ring_buffer *buffer, int cpu); +unsigned long ring_buffer_nmi_dropped_cpu(struct ring_buffer *buffer, int cpu); u64 ring_buffer_time_stamp(struct ring_buffer *buffer, int cpu); void ring_buffer_normalize_time_stamp(struct ring_buffer *buffer, @@ -137,6 +182,11 @@ void ring_buffer_free_read_page(struct ring_buffer *buffer, void *data); int ring_buffer_read_page(struct ring_buffer *buffer, void **data_page, size_t len, int cpu, int full); +struct trace_seq; + +int ring_buffer_print_entry_header(struct trace_seq *s); +int ring_buffer_print_page_header(struct trace_seq *s); + enum ring_buffer_flags { RB_FL_OVERWRITE = 1 << 0, }; diff --git a/include/linux/sched.h b/include/linux/sched.h index ff687281f233..42bf2766111e 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -77,6 +77,7 @@ struct sched_param { #include <linux/proportions.h> #include <linux/seccomp.h> #include <linux/rcupdate.h> +#include <linux/rculist.h> #include <linux/rtmutex.h> #include <linux/time.h> @@ -96,8 +97,8 @@ struct exec_domain; struct futex_pi_state; struct robust_list_head; struct bio; -struct bts_tracer; struct fs_struct; +struct bts_context; /* * List of flags we want to share for kernel threads, @@ -1229,18 +1230,11 @@ struct task_struct { struct list_head ptraced; struct list_head ptrace_entry; -#ifdef CONFIG_X86_PTRACE_BTS /* * This is the tracer handle for the ptrace BTS extension. * This field actually belongs to the ptracer task. */ - struct bts_tracer *bts; - /* - * The buffer to hold the BTS data. - */ - void *bts_buffer; - size_t bts_size; -#endif /* CONFIG_X86_PTRACE_BTS */ + struct bts_context *bts; /* PID/PID hash table linkage. */ struct pid_link pids[PIDTYPE_MAX]; @@ -1450,7 +1444,9 @@ struct task_struct { #ifdef CONFIG_TRACING /* state flags for use by tracers */ unsigned long trace; -#endif + /* bitmask of trace recursion */ + unsigned long trace_recursion; +#endif /* CONFIG_TRACING */ }; /* Future-safe accessor for struct task_struct's cpus_allowed. */ @@ -2024,8 +2020,10 @@ extern void set_task_comm(struct task_struct *tsk, char *from); extern char *get_task_comm(char *to, struct task_struct *tsk); #ifdef CONFIG_SMP +extern void wait_task_context_switch(struct task_struct *p); extern unsigned long wait_task_inactive(struct task_struct *, long match_state); #else +static inline void wait_task_context_switch(struct task_struct *p) {} static inline unsigned long wait_task_inactive(struct task_struct *p, long match_state) { @@ -2033,7 +2031,8 @@ static inline unsigned long wait_task_inactive(struct task_struct *p, } #endif -#define next_task(p) list_entry(rcu_dereference((p)->tasks.next), struct task_struct, tasks) +#define next_task(p) \ + list_entry_rcu((p)->tasks.next, struct task_struct, tasks) #define for_each_process(p) \ for (p = &init_task ; (p = next_task(p)) != &init_task ; ) @@ -2072,8 +2071,8 @@ int same_thread_group(struct task_struct *p1, struct task_struct *p2) static inline struct task_struct *next_thread(const struct task_struct *p) { - return list_entry(rcu_dereference(p->thread_group.next), - struct task_struct, thread_group); + return list_entry_rcu(p->thread_group.next, + struct task_struct, thread_group); } static inline int thread_group_empty(struct task_struct *p) diff --git a/include/linux/serial.h b/include/linux/serial.h index 9136cc5608c3..e5bb75a63802 100644 --- a/include/linux/serial.h +++ b/include/linux/serial.h @@ -96,54 +96,76 @@ struct serial_uart_config { /* * Definitions for async_struct (and serial_struct) flags field + * + * Define ASYNCB_* for convenient use with {test,set,clear}_bit. */ -#define ASYNC_HUP_NOTIFY 0x0001 /* Notify getty on hangups and closes - on the callout port */ -#define ASYNC_FOURPORT 0x0002 /* Set OU1, OUT2 per AST Fourport settings */ -#define ASYNC_SAK 0x0004 /* Secure Attention Key (Orange book) */ -#define ASYNC_SPLIT_TERMIOS 0x0008 /* Separate termios for dialin/callout */ - -#define ASYNC_SPD_MASK 0x1030 -#define ASYNC_SPD_HI 0x0010 /* Use 56000 instead of 38400 bps */ - -#define ASYNC_SPD_VHI 0x0020 /* Use 115200 instead of 38400 bps */ -#define ASYNC_SPD_CUST 0x0030 /* Use user-specified divisor */ - -#define ASYNC_SKIP_TEST 0x0040 /* Skip UART test during autoconfiguration */ -#define ASYNC_AUTO_IRQ 0x0080 /* Do automatic IRQ during autoconfiguration */ -#define ASYNC_SESSION_LOCKOUT 0x0100 /* Lock out cua opens based on session */ -#define ASYNC_PGRP_LOCKOUT 0x0200 /* Lock out cua opens based on pgrp */ -#define ASYNC_CALLOUT_NOHUP 0x0400 /* Don't do hangups for cua device */ - -#define ASYNC_HARDPPS_CD 0x0800 /* Call hardpps when CD goes high */ - -#define ASYNC_SPD_SHI 0x1000 /* Use 230400 instead of 38400 bps */ -#define ASYNC_SPD_WARP 0x1010 /* Use 460800 instead of 38400 bps */ - -#define ASYNC_LOW_LATENCY 0x2000 /* Request low latency behaviour */ - -#define ASYNC_BUGGY_UART 0x4000 /* This is a buggy UART, skip some safety - * checks. Note: can be dangerous! */ - -#define ASYNC_AUTOPROBE 0x8000 /* Port was autoprobed by PCI or PNP code */ - -#define ASYNC_FLAGS 0x7FFF /* Possible legal async flags */ -#define ASYNC_USR_MASK 0x3430 /* Legal flags that non-privileged - * users can set or reset */ - -/* Internal flags used only by kernel/chr_drv/serial.c */ -#define ASYNC_INITIALIZED 0x80000000 /* Serial port was initialized */ -#define ASYNC_NORMAL_ACTIVE 0x20000000 /* Normal device is active */ -#define ASYNC_BOOT_AUTOCONF 0x10000000 /* Autoconfigure port on bootup */ -#define ASYNC_CLOSING 0x08000000 /* Serial port is closing */ -#define ASYNC_CTS_FLOW 0x04000000 /* Do CTS flow control */ -#define ASYNC_CHECK_CD 0x02000000 /* i.e., CLOCAL */ -#define ASYNC_SHARE_IRQ 0x01000000 /* for multifunction cards - --- no longer used */ -#define ASYNC_CONS_FLOW 0x00800000 /* flow control for console */ - -#define ASYNC_BOOT_ONLYMCA 0x00400000 /* Probe only if MCA bus */ -#define ASYNC_INTERNAL_FLAGS 0xFFC00000 /* Internal flags */ +#define ASYNCB_HUP_NOTIFY 0 /* Notify getty on hangups and closes + * on the callout port */ +#define ASYNCB_FOURPORT 1 /* Set OU1, OUT2 per AST Fourport settings */ +#define ASYNCB_SAK 2 /* Secure Attention Key (Orange book) */ +#define ASYNCB_SPLIT_TERMIOS 3 /* Separate termios for dialin/callout */ +#define ASYNCB_SPD_HI 4 /* Use 56000 instead of 38400 bps */ +#define ASYNCB_SPD_VHI 5 /* Use 115200 instead of 38400 bps */ +#define ASYNCB_SKIP_TEST 6 /* Skip UART test during autoconfiguration */ +#define ASYNCB_AUTO_IRQ 7 /* Do automatic IRQ during + * autoconfiguration */ +#define ASYNCB_SESSION_LOCKOUT 8 /* Lock out cua opens based on session */ +#define ASYNCB_PGRP_LOCKOUT 9 /* Lock out cua opens based on pgrp */ +#define ASYNCB_CALLOUT_NOHUP 10 /* Don't do hangups for cua device */ +#define ASYNCB_HARDPPS_CD 11 /* Call hardpps when CD goes high */ +#define ASYNCB_SPD_SHI 12 /* Use 230400 instead of 38400 bps */ +#define ASYNCB_LOW_LATENCY 13 /* Request low latency behaviour */ +#define ASYNCB_BUGGY_UART 14 /* This is a buggy UART, skip some safety + * checks. Note: can be dangerous! */ +#define ASYNCB_AUTOPROBE 15 /* Port was autoprobed by PCI or PNP code */ +#define ASYNCB_LAST_USER 15 + +/* Internal flags used only by kernel */ +#define ASYNCB_INITIALIZED 31 /* Serial port was initialized */ +#define ASYNCB_NORMAL_ACTIVE 29 /* Normal device is active */ +#define ASYNCB_BOOT_AUTOCONF 28 /* Autoconfigure port on bootup */ +#define ASYNCB_CLOSING 27 /* Serial port is closing */ +#define ASYNCB_CTS_FLOW 26 /* Do CTS flow control */ +#define ASYNCB_CHECK_CD 25 /* i.e., CLOCAL */ +#define ASYNCB_SHARE_IRQ 24 /* for multifunction cards, no longer used */ +#define ASYNCB_CONS_FLOW 23 /* flow control for console */ +#define ASYNCB_BOOT_ONLYMCA 22 /* Probe only if MCA bus */ +#define ASYNCB_FIRST_KERNEL 22 + +#define ASYNC_HUP_NOTIFY (1U << ASYNCB_HUP_NOTIFY) +#define ASYNC_FOURPORT (1U << ASYNCB_FOURPORT) +#define ASYNC_SAK (1U << ASYNCB_SAK) +#define ASYNC_SPLIT_TERMIOS (1U << ASYNCB_SPLIT_TERMIOS) +#define ASYNC_SPD_HI (1U << ASYNCB_SPD_HI) +#define ASYNC_SPD_VHI (1U << ASYNCB_SPD_VHI) +#define ASYNC_SKIP_TEST (1U << ASYNCB_SKIP_TEST) +#define ASYNC_AUTO_IRQ (1U << ASYNCB_AUTO_IRQ) +#define ASYNC_SESSION_LOCKOUT (1U << ASYNCB_SESSION_LOCKOUT) +#define ASYNC_PGRP_LOCKOUT (1U << ASYNCB_PGRP_LOCKOUT) +#define ASYNC_CALLOUT_NOHUP (1U << ASYNCB_CALLOUT_NOHUP) +#define ASYNC_HARDPPS_CD (1U << ASYNCB_HARDPPS_CD) +#define ASYNC_SPD_SHI (1U << ASYNCB_SPD_SHI) +#define ASYNC_LOW_LATENCY (1U << ASYNCB_LOW_LATENCY) +#define ASYNC_BUGGY_UART (1U << ASYNCB_BUGGY_UART) +#define ASYNC_AUTOPROBE (1U << ASYNCB_AUTOPROBE) + +#define ASYNC_FLAGS ((1U << ASYNCB_LAST_USER) - 1) +#define ASYNC_USR_MASK (ASYNC_SPD_HI|ASYNC_SPD_VHI| \ + ASYNC_CALLOUT_NOHUP|ASYNC_SPD_SHI|ASYNC_LOW_LATENCY) +#define ASYNC_SPD_CUST (ASYNC_SPD_HI|ASYNC_SPD_VHI) +#define ASYNC_SPD_WARP (ASYNC_SPD_HI|ASYNC_SPD_SHI) +#define ASYNC_SPD_MASK (ASYNC_SPD_HI|ASYNC_SPD_VHI|ASYNC_SPD_SHI) + +#define ASYNC_INITIALIZED (1U << ASYNCB_INITIALIZED) +#define ASYNC_NORMAL_ACTIVE (1U << ASYNCB_NORMAL_ACTIVE) +#define ASYNC_BOOT_AUTOCONF (1U << ASYNCB_BOOT_AUTOCONF) +#define ASYNC_CLOSING (1U << ASYNCB_CLOSING) +#define ASYNC_CTS_FLOW (1U << ASYNCB_CTS_FLOW) +#define ASYNC_CHECK_CD (1U << ASYNCB_CHECK_CD) +#define ASYNC_SHARE_IRQ (1U << ASYNCB_SHARE_IRQ) +#define ASYNC_CONS_FLOW (1U << ASYNCB_CONS_FLOW) +#define ASYNC_BOOT_ONLYMCA (1U << ASYNCB_BOOT_ONLYMCA) +#define ASYNC_INTERNAL_FLAGS (~((1U << ASYNCB_FIRST_KERNEL) - 1)) /* * Multiport serial configuration structure --- external structure diff --git a/include/linux/serial_core.h b/include/linux/serial_core.h index 57a97e52e58d..6fd80c4243f1 100644 --- a/include/linux/serial_core.h +++ b/include/linux/serial_core.h @@ -41,7 +41,8 @@ #define PORT_XSCALE 15 #define PORT_RM9000 16 /* PMC-Sierra RM9xxx internal UART */ #define PORT_OCTEON 17 /* Cavium OCTEON internal UART */ -#define PORT_MAX_8250 17 /* max port ID */ +#define PORT_AR7 18 /* Texas Instruments AR7 internal UART */ +#define PORT_MAX_8250 18 /* max port ID */ /* * ARM specific type numbers. These are not currently guaranteed @@ -167,6 +168,9 @@ /* MAX3100 */ #define PORT_MAX3100 86 +/* Timberdale UART */ +#define PORT_TIMBUART 87 + #ifdef __KERNEL__ #include <linux/compiler.h> diff --git a/include/linux/signal.h b/include/linux/signal.h index 84f997f8aa53..c7552836bd95 100644 --- a/include/linux/signal.h +++ b/include/linux/signal.h @@ -235,6 +235,8 @@ static inline int valid_signal(unsigned long sig) extern int next_signal(struct sigpending *pending, sigset_t *mask); extern int group_send_sig_info(int sig, struct siginfo *info, struct task_struct *p); extern int __group_send_sig_info(int, struct siginfo *, struct task_struct *); +extern long do_rt_tgsigqueueinfo(pid_t tgid, pid_t pid, int sig, + siginfo_t *info); extern long do_sigpending(void __user *, unsigned long); extern int sigprocmask(int, sigset_t *, sigset_t *); extern int show_unhandled_signals; diff --git a/include/linux/slab_def.h b/include/linux/slab_def.h index 5ac9b0bcaf9a..713f841ecaa9 100644 --- a/include/linux/slab_def.h +++ b/include/linux/slab_def.h @@ -14,7 +14,7 @@ #include <asm/page.h> /* kmalloc_sizes.h needs PAGE_SIZE */ #include <asm/cache.h> /* kmalloc_sizes.h needs L1_CACHE_BYTES */ #include <linux/compiler.h> -#include <trace/kmemtrace.h> +#include <linux/kmemtrace.h> /* Size description struct for general caches. */ struct cache_sizes { diff --git a/include/linux/slub_def.h b/include/linux/slub_def.h index 5046f90c1171..be5d40c43bd2 100644 --- a/include/linux/slub_def.h +++ b/include/linux/slub_def.h @@ -10,7 +10,7 @@ #include <linux/gfp.h> #include <linux/workqueue.h> #include <linux/kobject.h> -#include <trace/kmemtrace.h> +#include <linux/kmemtrace.h> enum stat_item { ALLOC_FASTPATH, /* Allocation from cpu slab */ diff --git a/include/linux/trace_seq.h b/include/linux/trace_seq.h new file mode 100644 index 000000000000..c68bccba2074 --- /dev/null +++ b/include/linux/trace_seq.h @@ -0,0 +1,92 @@ +#ifndef _LINUX_TRACE_SEQ_H +#define _LINUX_TRACE_SEQ_H + +#include <linux/fs.h> + +/* + * Trace sequences are used to allow a function to call several other functions + * to create a string of data to use (up to a max of PAGE_SIZE. + */ + +struct trace_seq { + unsigned char buffer[PAGE_SIZE]; + unsigned int len; + unsigned int readpos; +}; + +static inline void +trace_seq_init(struct trace_seq *s) +{ + s->len = 0; + s->readpos = 0; +} + +/* + * Currently only defined when tracing is enabled. + */ +#ifdef CONFIG_TRACING +extern int trace_seq_printf(struct trace_seq *s, const char *fmt, ...) + __attribute__ ((format (printf, 2, 3))); +extern int trace_seq_vprintf(struct trace_seq *s, const char *fmt, va_list args) + __attribute__ ((format (printf, 2, 0))); +extern int +trace_seq_bprintf(struct trace_seq *s, const char *fmt, const u32 *binary); +extern void trace_print_seq(struct seq_file *m, struct trace_seq *s); +extern ssize_t trace_seq_to_user(struct trace_seq *s, char __user *ubuf, + size_t cnt); +extern int trace_seq_puts(struct trace_seq *s, const char *str); +extern int trace_seq_putc(struct trace_seq *s, unsigned char c); +extern int trace_seq_putmem(struct trace_seq *s, const void *mem, size_t len); +extern int trace_seq_putmem_hex(struct trace_seq *s, const void *mem, + size_t len); +extern void *trace_seq_reserve(struct trace_seq *s, size_t len); +extern int trace_seq_path(struct trace_seq *s, struct path *path); + +#else /* CONFIG_TRACING */ +static inline int trace_seq_printf(struct trace_seq *s, const char *fmt, ...) +{ + return 0; +} +static inline int +trace_seq_bprintf(struct trace_seq *s, const char *fmt, const u32 *binary) +{ + return 0; +} + +static inline void trace_print_seq(struct seq_file *m, struct trace_seq *s) +{ +} +static inline ssize_t trace_seq_to_user(struct trace_seq *s, char __user *ubuf, + size_t cnt) +{ + return 0; +} +static inline int trace_seq_puts(struct trace_seq *s, const char *str) +{ + return 0; +} +static inline int trace_seq_putc(struct trace_seq *s, unsigned char c) +{ + return 0; +} +static inline int +trace_seq_putmem(struct trace_seq *s, const void *mem, size_t len) +{ + return 0; +} +static inline int trace_seq_putmem_hex(struct trace_seq *s, const void *mem, + size_t len) +{ + return 0; +} +static inline void *trace_seq_reserve(struct trace_seq *s, size_t len) +{ + return NULL; +} +static inline int trace_seq_path(struct trace_seq *s, struct path *path) +{ + return 0; +} +#endif /* CONFIG_TRACING */ + +#endif /* _LINUX_TRACE_SEQ_H */ diff --git a/include/linux/tracepoint.h b/include/linux/tracepoint.h index d35a7ee7611f..14df7e635d43 100644 --- a/include/linux/tracepoint.h +++ b/include/linux/tracepoint.h @@ -31,6 +31,8 @@ struct tracepoint { * Keep in sync with vmlinux.lds.h. */ +#ifndef DECLARE_TRACE + #define TP_PROTO(args...) args #define TP_ARGS(args...) args @@ -114,6 +116,7 @@ static inline void tracepoint_update_probe_range(struct tracepoint *begin, struct tracepoint *end) { } #endif /* CONFIG_TRACEPOINTS */ +#endif /* DECLARE_TRACE */ /* * Connect a probe to a tracepoint. @@ -154,10 +157,8 @@ static inline void tracepoint_synchronize_unregister(void) } #define PARAMS(args...) args -#define TRACE_FORMAT(name, proto, args, fmt) \ - DECLARE_TRACE(name, PARAMS(proto), PARAMS(args)) - +#ifndef TRACE_EVENT /* * For use with the TRACE_EVENT macro: * @@ -262,5 +263,6 @@ static inline void tracepoint_synchronize_unregister(void) #define TRACE_EVENT(name, proto, args, struct, assign, print) \ DECLARE_TRACE(name, PARAMS(proto), PARAMS(args)) +#endif #endif diff --git a/include/linux/tty.h b/include/linux/tty.h index fc39db95499f..1488d8c81aac 100644 --- a/include/linux/tty.h +++ b/include/linux/tty.h @@ -185,7 +185,7 @@ struct tty_port; struct tty_port_operations { /* Return 1 if the carrier is raised */ int (*carrier_raised)(struct tty_port *port); - void (*raise_dtr_rts)(struct tty_port *port); + void (*dtr_rts)(struct tty_port *port, int raise); }; struct tty_port { @@ -201,6 +201,9 @@ struct tty_port { unsigned char *xmit_buf; /* Optional buffer */ int close_delay; /* Close port delay */ int closing_wait; /* Delay for output */ + int drain_delay; /* Set to zero if no pure time + based drain is needed else + set to size of fifo */ }; /* @@ -223,8 +226,11 @@ struct tty_struct { struct tty_driver *driver; const struct tty_operations *ops; int index; - /* The ldisc objects are protected by tty_ldisc_lock at the moment */ - struct tty_ldisc ldisc; + + /* Protects ldisc changes: Lock tty not pty */ + struct mutex ldisc_mutex; + struct tty_ldisc *ldisc; + struct mutex termios_mutex; spinlock_t ctrl_lock; /* Termios values are protected by the termios mutex */ @@ -311,6 +317,7 @@ struct tty_struct { #define TTY_CLOSING 7 /* ->close() in progress */ #define TTY_LDISC 9 /* Line discipline attached */ #define TTY_LDISC_CHANGING 10 /* Line discipline changing */ +#define TTY_LDISC_OPEN 11 /* Line discipline is open */ #define TTY_HW_COOK_OUT 14 /* Hardware can do output cooking */ #define TTY_HW_COOK_IN 15 /* Hardware can do input cooking */ #define TTY_PTY_LOCK 16 /* pty private */ @@ -403,6 +410,7 @@ extern int tty_termios_hw_change(struct ktermios *a, struct ktermios *b); extern struct tty_ldisc *tty_ldisc_ref(struct tty_struct *); extern void tty_ldisc_deref(struct tty_ldisc *); extern struct tty_ldisc *tty_ldisc_ref_wait(struct tty_struct *); +extern void tty_ldisc_hangup(struct tty_struct *tty); extern const struct file_operations tty_ldiscs_proc_fops; extern void tty_wakeup(struct tty_struct *tty); @@ -425,6 +433,9 @@ extern struct tty_struct *tty_init_dev(struct tty_driver *driver, int idx, extern void tty_release_dev(struct file *filp); extern int tty_init_termios(struct tty_struct *tty); +extern struct tty_struct *tty_pair_get_tty(struct tty_struct *tty); +extern struct tty_struct *tty_pair_get_pty(struct tty_struct *tty); + extern struct mutex tty_mutex; extern void tty_write_unlock(struct tty_struct *tty); @@ -438,6 +449,7 @@ extern struct tty_struct *tty_port_tty_get(struct tty_port *port); extern void tty_port_tty_set(struct tty_port *port, struct tty_struct *tty); extern int tty_port_carrier_raised(struct tty_port *port); extern void tty_port_raise_dtr_rts(struct tty_port *port); +extern void tty_port_lower_dtr_rts(struct tty_port *port); extern void tty_port_hangup(struct tty_port *port); extern int tty_port_block_til_ready(struct tty_port *port, struct tty_struct *tty, struct file *filp); diff --git a/include/linux/tty_driver.h b/include/linux/tty_driver.h index bcba84ea2d86..3566129384a4 100644 --- a/include/linux/tty_driver.h +++ b/include/linux/tty_driver.h @@ -127,7 +127,8 @@ * the line discipline are close to full, and it should somehow * signal that no more characters should be sent to the tty. * - * Optional: Always invoke via tty_throttle(); + * Optional: Always invoke via tty_throttle(), called under the + * termios lock. * * void (*unthrottle)(struct tty_struct * tty); * @@ -135,7 +136,8 @@ * that characters can now be sent to the tty without fear of * overrunning the input buffers of the line disciplines. * - * Optional: Always invoke via tty_unthrottle(); + * Optional: Always invoke via tty_unthrottle(), called under the + * termios lock. * * void (*stop)(struct tty_struct *tty); * diff --git a/include/linux/usb/serial.h b/include/linux/usb/serial.h index 625e9e4639c6..8cdfed738fe4 100644 --- a/include/linux/usb/serial.h +++ b/include/linux/usb/serial.h @@ -224,8 +224,7 @@ struct usb_serial_driver { /* Called by console with tty = NULL and by tty */ int (*open)(struct tty_struct *tty, struct usb_serial_port *port, struct file *filp); - void (*close)(struct tty_struct *tty, - struct usb_serial_port *port, struct file *filp); + void (*close)(struct usb_serial_port *port); int (*write)(struct tty_struct *tty, struct usb_serial_port *port, const unsigned char *buf, int count); /* Called only by the tty layer */ @@ -241,6 +240,10 @@ struct usb_serial_driver { int (*tiocmget)(struct tty_struct *tty, struct file *file); int (*tiocmset)(struct tty_struct *tty, struct file *file, unsigned int set, unsigned int clear); + /* Called by the tty layer for port level work. There may or may not + be an attached tty at this point */ + void (*dtr_rts)(struct usb_serial_port *port, int on); + int (*carrier_raised)(struct usb_serial_port *port); /* USB events */ void (*read_int_callback)(struct urb *urb); void (*write_int_callback)(struct urb *urb); @@ -283,8 +286,7 @@ extern int usb_serial_generic_open(struct tty_struct *tty, struct usb_serial_port *port, struct file *filp); extern int usb_serial_generic_write(struct tty_struct *tty, struct usb_serial_port *port, const unsigned char *buf, int count); -extern void usb_serial_generic_close(struct tty_struct *tty, - struct usb_serial_port *port, struct file *filp); +extern void usb_serial_generic_close(struct usb_serial_port *port); extern int usb_serial_generic_resume(struct usb_serial *serial); extern int usb_serial_generic_write_room(struct tty_struct *tty); extern int usb_serial_generic_chars_in_buffer(struct tty_struct *tty); diff --git a/include/trace/block.h b/include/trace/block.h deleted file mode 100644 index 25b7068b819e..000000000000 --- a/include/trace/block.h +++ /dev/null @@ -1,76 +0,0 @@ -#ifndef _TRACE_BLOCK_H -#define _TRACE_BLOCK_H - -#include <linux/blkdev.h> -#include <linux/tracepoint.h> - -DECLARE_TRACE(block_rq_abort, - TP_PROTO(struct request_queue *q, struct request *rq), - TP_ARGS(q, rq)); - -DECLARE_TRACE(block_rq_insert, - TP_PROTO(struct request_queue *q, struct request *rq), - TP_ARGS(q, rq)); - -DECLARE_TRACE(block_rq_issue, - TP_PROTO(struct request_queue *q, struct request *rq), - TP_ARGS(q, rq)); - -DECLARE_TRACE(block_rq_requeue, - TP_PROTO(struct request_queue *q, struct request *rq), - TP_ARGS(q, rq)); - -DECLARE_TRACE(block_rq_complete, - TP_PROTO(struct request_queue *q, struct request *rq), - TP_ARGS(q, rq)); - -DECLARE_TRACE(block_bio_bounce, - TP_PROTO(struct request_queue *q, struct bio *bio), - TP_ARGS(q, bio)); - -DECLARE_TRACE(block_bio_complete, - TP_PROTO(struct request_queue *q, struct bio *bio), - TP_ARGS(q, bio)); - -DECLARE_TRACE(block_bio_backmerge, - TP_PROTO(struct request_queue *q, struct bio *bio), - TP_ARGS(q, bio)); - -DECLARE_TRACE(block_bio_frontmerge, - TP_PROTO(struct request_queue *q, struct bio *bio), - TP_ARGS(q, bio)); - -DECLARE_TRACE(block_bio_queue, - TP_PROTO(struct request_queue *q, struct bio *bio), - TP_ARGS(q, bio)); - -DECLARE_TRACE(block_getrq, - TP_PROTO(struct request_queue *q, struct bio *bio, int rw), - TP_ARGS(q, bio, rw)); - -DECLARE_TRACE(block_sleeprq, - TP_PROTO(struct request_queue *q, struct bio *bio, int rw), - TP_ARGS(q, bio, rw)); - -DECLARE_TRACE(block_plug, - TP_PROTO(struct request_queue *q), - TP_ARGS(q)); - -DECLARE_TRACE(block_unplug_timer, - TP_PROTO(struct request_queue *q), - TP_ARGS(q)); - -DECLARE_TRACE(block_unplug_io, - TP_PROTO(struct request_queue *q), - TP_ARGS(q)); - -DECLARE_TRACE(block_split, - TP_PROTO(struct request_queue *q, struct bio *bio, unsigned int pdu), - TP_ARGS(q, bio, pdu)); - -DECLARE_TRACE(block_remap, - TP_PROTO(struct request_queue *q, struct bio *bio, dev_t dev, - sector_t from, sector_t to), - TP_ARGS(q, bio, dev, from, to)); - -#endif diff --git a/include/trace/define_trace.h b/include/trace/define_trace.h new file mode 100644 index 000000000000..f7a7ae1e8f90 --- /dev/null +++ b/include/trace/define_trace.h @@ -0,0 +1,75 @@ +/* + * Trace files that want to automate creationg of all tracepoints defined + * in their file should include this file. The following are macros that the + * trace file may define: + * + * TRACE_SYSTEM defines the system the tracepoint is for + * + * TRACE_INCLUDE_FILE if the file name is something other than TRACE_SYSTEM.h + * This macro may be defined to tell define_trace.h what file to include. + * Note, leave off the ".h". + * + * TRACE_INCLUDE_PATH if the path is something other than core kernel include/trace + * then this macro can define the path to use. Note, the path is relative to + * define_trace.h, not the file including it. Full path names for out of tree + * modules must be used. + */ + +#ifdef CREATE_TRACE_POINTS + +/* Prevent recursion */ +#undef CREATE_TRACE_POINTS + +#include <linux/stringify.h> + +#undef TRACE_EVENT +#define TRACE_EVENT(name, proto, args, tstruct, assign, print) \ + DEFINE_TRACE(name) + +#undef DECLARE_TRACE +#define DECLARE_TRACE(name, proto, args) \ + DEFINE_TRACE(name) + +#undef TRACE_INCLUDE +#undef __TRACE_INCLUDE + +#ifndef TRACE_INCLUDE_FILE +# define TRACE_INCLUDE_FILE TRACE_SYSTEM +# define UNDEF_TRACE_INCLUDE_FILE +#endif + +#ifndef TRACE_INCLUDE_PATH +# define __TRACE_INCLUDE(system) <trace/events/system.h> +# define UNDEF_TRACE_INCLUDE_PATH +#else +# define __TRACE_INCLUDE(system) __stringify(TRACE_INCLUDE_PATH/system.h) +#endif + +# define TRACE_INCLUDE(system) __TRACE_INCLUDE(system) + +/* Let the trace headers be reread */ +#define TRACE_HEADER_MULTI_READ + +#include TRACE_INCLUDE(TRACE_INCLUDE_FILE) + +#ifdef CONFIG_EVENT_TRACING +#include <trace/ftrace.h> +#endif + +#undef TRACE_HEADER_MULTI_READ + +/* Only undef what we defined in this file */ +#ifdef UNDEF_TRACE_INCLUDE_FILE +# undef TRACE_INCLUDE_FILE +# undef UNDEF_TRACE_INCLUDE_FILE +#endif + +#ifdef UNDEF_TRACE_INCLUDE_PATH +# undef TRACE_INCLUDE_PATH +# undef UNDEF_TRACE_INCLUDE_PATH +#endif + +/* We may be processing more files */ +#define CREATE_TRACE_POINTS + +#endif /* CREATE_TRACE_POINTS */ diff --git a/include/trace/events/block.h b/include/trace/events/block.h new file mode 100644 index 000000000000..53effd496a50 --- /dev/null +++ b/include/trace/events/block.h @@ -0,0 +1,498 @@ +#if !defined(_TRACE_BLOCK_H) || defined(TRACE_HEADER_MULTI_READ) +#define _TRACE_BLOCK_H + +#include <linux/blktrace_api.h> +#include <linux/blkdev.h> +#include <linux/tracepoint.h> + +#undef TRACE_SYSTEM +#define TRACE_SYSTEM block + +TRACE_EVENT(block_rq_abort, + + TP_PROTO(struct request_queue *q, struct request *rq), + + TP_ARGS(q, rq), + + TP_STRUCT__entry( + __field( dev_t, dev ) + __field( sector_t, sector ) + __field( unsigned int, nr_sector ) + __field( int, errors ) + __array( char, rwbs, 6 ) + __dynamic_array( char, cmd, blk_cmd_buf_len(rq) ) + ), + + TP_fast_assign( + __entry->dev = rq->rq_disk ? disk_devt(rq->rq_disk) : 0; + __entry->sector = blk_pc_request(rq) ? 0 : rq->hard_sector; + __entry->nr_sector = blk_pc_request(rq) ? + 0 : rq->hard_nr_sectors; + __entry->errors = rq->errors; + + blk_fill_rwbs_rq(__entry->rwbs, rq); + blk_dump_cmd(__get_str(cmd), rq); + ), + + TP_printk("%d,%d %s (%s) %llu + %u [%d]", + MAJOR(__entry->dev), MINOR(__entry->dev), + __entry->rwbs, __get_str(cmd), + (unsigned long long)__entry->sector, + __entry->nr_sector, __entry->errors) +); + +TRACE_EVENT(block_rq_insert, + + TP_PROTO(struct request_queue *q, struct request *rq), + + TP_ARGS(q, rq), + + TP_STRUCT__entry( + __field( dev_t, dev ) + __field( sector_t, sector ) + __field( unsigned int, nr_sector ) + __field( unsigned int, bytes ) + __array( char, rwbs, 6 ) + __array( char, comm, TASK_COMM_LEN ) + __dynamic_array( char, cmd, blk_cmd_buf_len(rq) ) + ), + + TP_fast_assign( + __entry->dev = rq->rq_disk ? disk_devt(rq->rq_disk) : 0; + __entry->sector = blk_pc_request(rq) ? 0 : rq->hard_sector; + __entry->nr_sector = blk_pc_request(rq) ? + 0 : rq->hard_nr_sectors; + __entry->bytes = blk_pc_request(rq) ? rq->data_len : 0; + + blk_fill_rwbs_rq(__entry->rwbs, rq); + blk_dump_cmd(__get_str(cmd), rq); + memcpy(__entry->comm, current->comm, TASK_COMM_LEN); + ), + + TP_printk("%d,%d %s %u (%s) %llu + %u [%s]", + MAJOR(__entry->dev), MINOR(__entry->dev), + __entry->rwbs, __entry->bytes, __get_str(cmd), + (unsigned long long)__entry->sector, + __entry->nr_sector, __entry->comm) +); + +TRACE_EVENT(block_rq_issue, + + TP_PROTO(struct request_queue *q, struct request *rq), + + TP_ARGS(q, rq), + + TP_STRUCT__entry( + __field( dev_t, dev ) + __field( sector_t, sector ) + __field( unsigned int, nr_sector ) + __field( unsigned int, bytes ) + __array( char, rwbs, 6 ) + __array( char, comm, TASK_COMM_LEN ) + __dynamic_array( char, cmd, blk_cmd_buf_len(rq) ) + ), + + TP_fast_assign( + __entry->dev = rq->rq_disk ? disk_devt(rq->rq_disk) : 0; + __entry->sector = blk_pc_request(rq) ? 0 : rq->hard_sector; + __entry->nr_sector = blk_pc_request(rq) ? + 0 : rq->hard_nr_sectors; + __entry->bytes = blk_pc_request(rq) ? rq->data_len : 0; + + blk_fill_rwbs_rq(__entry->rwbs, rq); + blk_dump_cmd(__get_str(cmd), rq); + memcpy(__entry->comm, current->comm, TASK_COMM_LEN); + ), + + TP_printk("%d,%d %s %u (%s) %llu + %u [%s]", + MAJOR(__entry->dev), MINOR(__entry->dev), + __entry->rwbs, __entry->bytes, __get_str(cmd), + (unsigned long long)__entry->sector, + __entry->nr_sector, __entry->comm) +); + +TRACE_EVENT(block_rq_requeue, + + TP_PROTO(struct request_queue *q, struct request *rq), + + TP_ARGS(q, rq), + + TP_STRUCT__entry( + __field( dev_t, dev ) + __field( sector_t, sector ) + __field( unsigned int, nr_sector ) + __field( int, errors ) + __array( char, rwbs, 6 ) + __dynamic_array( char, cmd, blk_cmd_buf_len(rq) ) + ), + + TP_fast_assign( + __entry->dev = rq->rq_disk ? disk_devt(rq->rq_disk) : 0; + __entry->sector = blk_pc_request(rq) ? 0 : rq->hard_sector; + __entry->nr_sector = blk_pc_request(rq) ? + 0 : rq->hard_nr_sectors; + __entry->errors = rq->errors; + + blk_fill_rwbs_rq(__entry->rwbs, rq); + blk_dump_cmd(__get_str(cmd), rq); + ), + + TP_printk("%d,%d %s (%s) %llu + %u [%d]", + MAJOR(__entry->dev), MINOR(__entry->dev), + __entry->rwbs, __get_str(cmd), + (unsigned long long)__entry->sector, + __entry->nr_sector, __entry->errors) +); + +TRACE_EVENT(block_rq_complete, + + TP_PROTO(struct request_queue *q, struct request *rq), + + TP_ARGS(q, rq), + + TP_STRUCT__entry( + __field( dev_t, dev ) + __field( sector_t, sector ) + __field( unsigned int, nr_sector ) + __field( int, errors ) + __array( char, rwbs, 6 ) + __dynamic_array( char, cmd, blk_cmd_buf_len(rq) ) + ), + + TP_fast_assign( + __entry->dev = rq->rq_disk ? disk_devt(rq->rq_disk) : 0; + __entry->sector = blk_pc_request(rq) ? 0 : rq->hard_sector; + __entry->nr_sector = blk_pc_request(rq) ? + 0 : rq->hard_nr_sectors; + __entry->errors = rq->errors; + + blk_fill_rwbs_rq(__entry->rwbs, rq); + blk_dump_cmd(__get_str(cmd), rq); + ), + + TP_printk("%d,%d %s (%s) %llu + %u [%d]", + MAJOR(__entry->dev), MINOR(__entry->dev), + __entry->rwbs, __get_str(cmd), + (unsigned long long)__entry->sector, + __entry->nr_sector, __entry->errors) +); +TRACE_EVENT(block_bio_bounce, + + TP_PROTO(struct request_queue *q, struct bio *bio), + + TP_ARGS(q, bio), + + TP_STRUCT__entry( + __field( dev_t, dev ) + __field( sector_t, sector ) + __field( unsigned int, nr_sector ) + __array( char, rwbs, 6 ) + __array( char, comm, TASK_COMM_LEN ) + ), + + TP_fast_assign( + __entry->dev = bio->bi_bdev->bd_dev; + __entry->sector = bio->bi_sector; + __entry->nr_sector = bio->bi_size >> 9; + blk_fill_rwbs(__entry->rwbs, bio->bi_rw, bio->bi_size); + memcpy(__entry->comm, current->comm, TASK_COMM_LEN); + ), + + TP_printk("%d,%d %s %llu + %u [%s]", + MAJOR(__entry->dev), MINOR(__entry->dev), __entry->rwbs, + (unsigned long long)__entry->sector, + __entry->nr_sector, __entry->comm) +); + +TRACE_EVENT(block_bio_complete, + + TP_PROTO(struct request_queue *q, struct bio *bio), + + TP_ARGS(q, bio), + + TP_STRUCT__entry( + __field( dev_t, dev ) + __field( sector_t, sector ) + __field( unsigned, nr_sector ) + __field( int, error ) + __array( char, rwbs, 6 ) + ), + + TP_fast_assign( + __entry->dev = bio->bi_bdev->bd_dev; + __entry->sector = bio->bi_sector; + __entry->nr_sector = bio->bi_size >> 9; + blk_fill_rwbs(__entry->rwbs, bio->bi_rw, bio->bi_size); + ), + + TP_printk("%d,%d %s %llu + %u [%d]", + MAJOR(__entry->dev), MINOR(__entry->dev), __entry->rwbs, + (unsigned long long)__entry->sector, + __entry->nr_sector, __entry->error) +); + +TRACE_EVENT(block_bio_backmerge, + + TP_PROTO(struct request_queue *q, struct bio *bio), + + TP_ARGS(q, bio), + + TP_STRUCT__entry( + __field( dev_t, dev ) + __field( sector_t, sector ) + __field( unsigned int, nr_sector ) + __array( char, rwbs, 6 ) + __array( char, comm, TASK_COMM_LEN ) + ), + + TP_fast_assign( + __entry->dev = bio->bi_bdev->bd_dev; + __entry->sector = bio->bi_sector; + __entry->nr_sector = bio->bi_size >> 9; + blk_fill_rwbs(__entry->rwbs, bio->bi_rw, bio->bi_size); + memcpy(__entry->comm, current->comm, TASK_COMM_LEN); + ), + + TP_printk("%d,%d %s %llu + %u [%s]", + MAJOR(__entry->dev), MINOR(__entry->dev), __entry->rwbs, + (unsigned long long)__entry->sector, + __entry->nr_sector, __entry->comm) +); + +TRACE_EVENT(block_bio_frontmerge, + + TP_PROTO(struct request_queue *q, struct bio *bio), + + TP_ARGS(q, bio), + + TP_STRUCT__entry( + __field( dev_t, dev ) + __field( sector_t, sector ) + __field( unsigned, nr_sector ) + __array( char, rwbs, 6 ) + __array( char, comm, TASK_COMM_LEN ) + ), + + TP_fast_assign( + __entry->dev = bio->bi_bdev->bd_dev; + __entry->sector = bio->bi_sector; + __entry->nr_sector = bio->bi_size >> 9; + blk_fill_rwbs(__entry->rwbs, bio->bi_rw, bio->bi_size); + memcpy(__entry->comm, current->comm, TASK_COMM_LEN); + ), + + TP_printk("%d,%d %s %llu + %u [%s]", + MAJOR(__entry->dev), MINOR(__entry->dev), __entry->rwbs, + (unsigned long long)__entry->sector, + __entry->nr_sector, __entry->comm) +); + +TRACE_EVENT(block_bio_queue, + + TP_PROTO(struct request_queue *q, struct bio *bio), + + TP_ARGS(q, bio), + + TP_STRUCT__entry( + __field( dev_t, dev ) + __field( sector_t, sector ) + __field( unsigned int, nr_sector ) + __array( char, rwbs, 6 ) + __array( char, comm, TASK_COMM_LEN ) + ), + + TP_fast_assign( + __entry->dev = bio->bi_bdev->bd_dev; + __entry->sector = bio->bi_sector; + __entry->nr_sector = bio->bi_size >> 9; + blk_fill_rwbs(__entry->rwbs, bio->bi_rw, bio->bi_size); + memcpy(__entry->comm, current->comm, TASK_COMM_LEN); + ), + + TP_printk("%d,%d %s %llu + %u [%s]", + MAJOR(__entry->dev), MINOR(__entry->dev), __entry->rwbs, + (unsigned long long)__entry->sector, + __entry->nr_sector, __entry->comm) +); + +TRACE_EVENT(block_getrq, + + TP_PROTO(struct request_queue *q, struct bio *bio, int rw), + + TP_ARGS(q, bio, rw), + + TP_STRUCT__entry( + __field( dev_t, dev ) + __field( sector_t, sector ) + __field( unsigned int, nr_sector ) + __array( char, rwbs, 6 ) + __array( char, comm, TASK_COMM_LEN ) + ), + + TP_fast_assign( + __entry->dev = bio ? bio->bi_bdev->bd_dev : 0; + __entry->sector = bio ? bio->bi_sector : 0; + __entry->nr_sector = bio ? bio->bi_size >> 9 : 0; + blk_fill_rwbs(__entry->rwbs, + bio ? bio->bi_rw : 0, __entry->nr_sector); + memcpy(__entry->comm, current->comm, TASK_COMM_LEN); + ), + + TP_printk("%d,%d %s %llu + %u [%s]", + MAJOR(__entry->dev), MINOR(__entry->dev), __entry->rwbs, + (unsigned long long)__entry->sector, + __entry->nr_sector, __entry->comm) +); + +TRACE_EVENT(block_sleeprq, + + TP_PROTO(struct request_queue *q, struct bio *bio, int rw), + + TP_ARGS(q, bio, rw), + + TP_STRUCT__entry( + __field( dev_t, dev ) + __field( sector_t, sector ) + __field( unsigned int, nr_sector ) + __array( char, rwbs, 6 ) + __array( char, comm, TASK_COMM_LEN ) + ), + + TP_fast_assign( + __entry->dev = bio ? bio->bi_bdev->bd_dev : 0; + __entry->sector = bio ? bio->bi_sector : 0; + __entry->nr_sector = bio ? bio->bi_size >> 9 : 0; + blk_fill_rwbs(__entry->rwbs, + bio ? bio->bi_rw : 0, __entry->nr_sector); + memcpy(__entry->comm, current->comm, TASK_COMM_LEN); + ), + + TP_printk("%d,%d %s %llu + %u [%s]", + MAJOR(__entry->dev), MINOR(__entry->dev), __entry->rwbs, + (unsigned long long)__entry->sector, + __entry->nr_sector, __entry->comm) +); + +TRACE_EVENT(block_plug, + + TP_PROTO(struct request_queue *q), + + TP_ARGS(q), + + TP_STRUCT__entry( + __array( char, comm, TASK_COMM_LEN ) + ), + + TP_fast_assign( + memcpy(__entry->comm, current->comm, TASK_COMM_LEN); + ), + + TP_printk("[%s]", __entry->comm) +); + +TRACE_EVENT(block_unplug_timer, + + TP_PROTO(struct request_queue *q), + + TP_ARGS(q), + + TP_STRUCT__entry( + __field( int, nr_rq ) + __array( char, comm, TASK_COMM_LEN ) + ), + + TP_fast_assign( + __entry->nr_rq = q->rq.count[READ] + q->rq.count[WRITE]; + memcpy(__entry->comm, current->comm, TASK_COMM_LEN); + ), + + TP_printk("[%s] %d", __entry->comm, __entry->nr_rq) +); + +TRACE_EVENT(block_unplug_io, + + TP_PROTO(struct request_queue *q), + + TP_ARGS(q), + + TP_STRUCT__entry( + __field( int, nr_rq ) + __array( char, comm, TASK_COMM_LEN ) + ), + + TP_fast_assign( + __entry->nr_rq = q->rq.count[READ] + q->rq.count[WRITE]; + memcpy(__entry->comm, current->comm, TASK_COMM_LEN); + ), + + TP_printk("[%s] %d", __entry->comm, __entry->nr_rq) +); + +TRACE_EVENT(block_split, + + TP_PROTO(struct request_queue *q, struct bio *bio, + unsigned int new_sector), + + TP_ARGS(q, bio, new_sector), + + TP_STRUCT__entry( + __field( dev_t, dev ) + __field( sector_t, sector ) + __field( sector_t, new_sector ) + __array( char, rwbs, 6 ) + __array( char, comm, TASK_COMM_LEN ) + ), + + TP_fast_assign( + __entry->dev = bio->bi_bdev->bd_dev; + __entry->sector = bio->bi_sector; + __entry->new_sector = new_sector; + blk_fill_rwbs(__entry->rwbs, bio->bi_rw, bio->bi_size); + memcpy(__entry->comm, current->comm, TASK_COMM_LEN); + ), + + TP_printk("%d,%d %s %llu / %llu [%s]", + MAJOR(__entry->dev), MINOR(__entry->dev), __entry->rwbs, + (unsigned long long)__entry->sector, + (unsigned long long)__entry->new_sector, + __entry->comm) +); + +TRACE_EVENT(block_remap, + + TP_PROTO(struct request_queue *q, struct bio *bio, dev_t dev, + sector_t from), + + TP_ARGS(q, bio, dev, from), + + TP_STRUCT__entry( + __field( dev_t, dev ) + __field( sector_t, sector ) + __field( unsigned int, nr_sector ) + __field( dev_t, old_dev ) + __field( sector_t, old_sector ) + __array( char, rwbs, 6 ) + ), + + TP_fast_assign( + __entry->dev = bio->bi_bdev->bd_dev; + __entry->sector = bio->bi_sector; + __entry->nr_sector = bio->bi_size >> 9; + __entry->old_dev = dev; + __entry->old_sector = from; + blk_fill_rwbs(__entry->rwbs, bio->bi_rw, bio->bi_size); + ), + + TP_printk("%d,%d %s %llu + %u <- (%d,%d) %llu", + MAJOR(__entry->dev), MINOR(__entry->dev), __entry->rwbs, + (unsigned long long)__entry->sector, + __entry->nr_sector, + MAJOR(__entry->old_dev), MINOR(__entry->old_dev), + (unsigned long long)__entry->old_sector) +); + +#endif /* _TRACE_BLOCK_H */ + +/* This part must be outside protection */ +#include <trace/define_trace.h> + diff --git a/include/trace/events/irq.h b/include/trace/events/irq.h new file mode 100644 index 000000000000..b0c7ede55eb1 --- /dev/null +++ b/include/trace/events/irq.h @@ -0,0 +1,145 @@ +#if !defined(_TRACE_IRQ_H) || defined(TRACE_HEADER_MULTI_READ) +#define _TRACE_IRQ_H + +#include <linux/tracepoint.h> +#include <linux/interrupt.h> + +#undef TRACE_SYSTEM +#define TRACE_SYSTEM irq + +#define softirq_name(sirq) { sirq##_SOFTIRQ, #sirq } +#define show_softirq_name(val) \ + __print_symbolic(val, \ + softirq_name(HI), \ + softirq_name(TIMER), \ + softirq_name(NET_TX), \ + softirq_name(NET_RX), \ + softirq_name(BLOCK), \ + softirq_name(TASKLET), \ + softirq_name(SCHED), \ + softirq_name(HRTIMER), \ + softirq_name(RCU)) + +/** + * irq_handler_entry - called immediately before the irq action handler + * @irq: irq number + * @action: pointer to struct irqaction + * + * The struct irqaction pointed to by @action contains various + * information about the handler, including the device name, + * @action->name, and the device id, @action->dev_id. When used in + * conjunction with the irq_handler_exit tracepoint, we can figure + * out irq handler latencies. + */ +TRACE_EVENT(irq_handler_entry, + + TP_PROTO(int irq, struct irqaction *action), + + TP_ARGS(irq, action), + + TP_STRUCT__entry( + __field( int, irq ) + __string( name, action->name ) + ), + + TP_fast_assign( + __entry->irq = irq; + __assign_str(name, action->name); + ), + + TP_printk("irq=%d handler=%s", __entry->irq, __get_str(name)) +); + +/** + * irq_handler_exit - called immediately after the irq action handler returns + * @irq: irq number + * @action: pointer to struct irqaction + * @ret: return value + * + * If the @ret value is set to IRQ_HANDLED, then we know that the corresponding + * @action->handler scuccessully handled this irq. Otherwise, the irq might be + * a shared irq line, or the irq was not handled successfully. Can be used in + * conjunction with the irq_handler_entry to understand irq handler latencies. + */ +TRACE_EVENT(irq_handler_exit, + + TP_PROTO(int irq, struct irqaction *action, int ret), + + TP_ARGS(irq, action, ret), + + TP_STRUCT__entry( + __field( int, irq ) + __field( int, ret ) + ), + + TP_fast_assign( + __entry->irq = irq; + __entry->ret = ret; + ), + + TP_printk("irq=%d return=%s", + __entry->irq, __entry->ret ? "handled" : "unhandled") +); + +/** + * softirq_entry - called immediately before the softirq handler + * @h: pointer to struct softirq_action + * @vec: pointer to first struct softirq_action in softirq_vec array + * + * The @h parameter, contains a pointer to the struct softirq_action + * which has a pointer to the action handler that is called. By subtracting + * the @vec pointer from the @h pointer, we can determine the softirq + * number. Also, when used in combination with the softirq_exit tracepoint + * we can determine the softirq latency. + */ +TRACE_EVENT(softirq_entry, + + TP_PROTO(struct softirq_action *h, struct softirq_action *vec), + + TP_ARGS(h, vec), + + TP_STRUCT__entry( + __field( int, vec ) + ), + + TP_fast_assign( + __entry->vec = (int)(h - vec); + ), + + TP_printk("softirq=%d action=%s", __entry->vec, + show_softirq_name(__entry->vec)) +); + +/** + * softirq_exit - called immediately after the softirq handler returns + * @h: pointer to struct softirq_action + * @vec: pointer to first struct softirq_action in softirq_vec array + * + * The @h parameter contains a pointer to the struct softirq_action + * that has handled the softirq. By subtracting the @vec pointer from + * the @h pointer, we can determine the softirq number. Also, when used in + * combination with the softirq_entry tracepoint we can determine the softirq + * latency. + */ +TRACE_EVENT(softirq_exit, + + TP_PROTO(struct softirq_action *h, struct softirq_action *vec), + + TP_ARGS(h, vec), + + TP_STRUCT__entry( + __field( int, vec ) + ), + + TP_fast_assign( + __entry->vec = (int)(h - vec); + ), + + TP_printk("softirq=%d action=%s", __entry->vec, + show_softirq_name(__entry->vec)) +); + +#endif /* _TRACE_IRQ_H */ + +/* This part must be outside protection */ +#include <trace/define_trace.h> diff --git a/include/trace/events/kmem.h b/include/trace/events/kmem.h new file mode 100644 index 000000000000..9baba50d6512 --- /dev/null +++ b/include/trace/events/kmem.h @@ -0,0 +1,231 @@ +#if !defined(_TRACE_KMEM_H) || defined(TRACE_HEADER_MULTI_READ) +#define _TRACE_KMEM_H + +#include <linux/types.h> +#include <linux/tracepoint.h> + +#undef TRACE_SYSTEM +#define TRACE_SYSTEM kmem + +/* + * The order of these masks is important. Matching masks will be seen + * first and the left over flags will end up showing by themselves. + * + * For example, if we have GFP_KERNEL before GFP_USER we wil get: + * + * GFP_KERNEL|GFP_HARDWALL + * + * Thus most bits set go first. + */ +#define show_gfp_flags(flags) \ + (flags) ? __print_flags(flags, "|", \ + {(unsigned long)GFP_HIGHUSER_MOVABLE, "GFP_HIGHUSER_MOVABLE"}, \ + {(unsigned long)GFP_HIGHUSER, "GFP_HIGHUSER"}, \ + {(unsigned long)GFP_USER, "GFP_USER"}, \ + {(unsigned long)GFP_TEMPORARY, "GFP_TEMPORARY"}, \ + {(unsigned long)GFP_KERNEL, "GFP_KERNEL"}, \ + {(unsigned long)GFP_NOFS, "GFP_NOFS"}, \ + {(unsigned long)GFP_ATOMIC, "GFP_ATOMIC"}, \ + {(unsigned long)GFP_NOIO, "GFP_NOIO"}, \ + {(unsigned long)__GFP_HIGH, "GFP_HIGH"}, \ + {(unsigned long)__GFP_WAIT, "GFP_WAIT"}, \ + {(unsigned long)__GFP_IO, "GFP_IO"}, \ + {(unsigned long)__GFP_COLD, "GFP_COLD"}, \ + {(unsigned long)__GFP_NOWARN, "GFP_NOWARN"}, \ + {(unsigned long)__GFP_REPEAT, "GFP_REPEAT"}, \ + {(unsigned long)__GFP_NOFAIL, "GFP_NOFAIL"}, \ + {(unsigned long)__GFP_NORETRY, "GFP_NORETRY"}, \ + {(unsigned long)__GFP_COMP, "GFP_COMP"}, \ + {(unsigned long)__GFP_ZERO, "GFP_ZERO"}, \ + {(unsigned long)__GFP_NOMEMALLOC, "GFP_NOMEMALLOC"}, \ + {(unsigned long)__GFP_HARDWALL, "GFP_HARDWALL"}, \ + {(unsigned long)__GFP_THISNODE, "GFP_THISNODE"}, \ + {(unsigned long)__GFP_RECLAIMABLE, "GFP_RECLAIMABLE"}, \ + {(unsigned long)__GFP_MOVABLE, "GFP_MOVABLE"} \ + ) : "GFP_NOWAIT" + +TRACE_EVENT(kmalloc, + + TP_PROTO(unsigned long call_site, + const void *ptr, + size_t bytes_req, + size_t bytes_alloc, + gfp_t gfp_flags), + + TP_ARGS(call_site, ptr, bytes_req, bytes_alloc, gfp_flags), + + TP_STRUCT__entry( + __field( unsigned long, call_site ) + __field( const void *, ptr ) + __field( size_t, bytes_req ) + __field( size_t, bytes_alloc ) + __field( gfp_t, gfp_flags ) + ), + + TP_fast_assign( + __entry->call_site = call_site; + __entry->ptr = ptr; + __entry->bytes_req = bytes_req; + __entry->bytes_alloc = bytes_alloc; + __entry->gfp_flags = gfp_flags; + ), + + TP_printk("call_site=%lx ptr=%p bytes_req=%zu bytes_alloc=%zu gfp_flags=%s", + __entry->call_site, + __entry->ptr, + __entry->bytes_req, + __entry->bytes_alloc, + show_gfp_flags(__entry->gfp_flags)) +); + +TRACE_EVENT(kmem_cache_alloc, + + TP_PROTO(unsigned long call_site, + const void *ptr, + size_t bytes_req, + size_t bytes_alloc, + gfp_t gfp_flags), + + TP_ARGS(call_site, ptr, bytes_req, bytes_alloc, gfp_flags), + + TP_STRUCT__entry( + __field( unsigned long, call_site ) + __field( const void *, ptr ) + __field( size_t, bytes_req ) + __field( size_t, bytes_alloc ) + __field( gfp_t, gfp_flags ) + ), + + TP_fast_assign( + __entry->call_site = call_site; + __entry->ptr = ptr; + __entry->bytes_req = bytes_req; + __entry->bytes_alloc = bytes_alloc; + __entry->gfp_flags = gfp_flags; + ), + + TP_printk("call_site=%lx ptr=%p bytes_req=%zu bytes_alloc=%zu gfp_flags=%s", + __entry->call_site, + __entry->ptr, + __entry->bytes_req, + __entry->bytes_alloc, + show_gfp_flags(__entry->gfp_flags)) +); + +TRACE_EVENT(kmalloc_node, + + TP_PROTO(unsigned long call_site, + const void *ptr, + size_t bytes_req, + size_t bytes_alloc, + gfp_t gfp_flags, + int node), + + TP_ARGS(call_site, ptr, bytes_req, bytes_alloc, gfp_flags, node), + + TP_STRUCT__entry( + __field( unsigned long, call_site ) + __field( const void *, ptr ) + __field( size_t, bytes_req ) + __field( size_t, bytes_alloc ) + __field( gfp_t, gfp_flags ) + __field( int, node ) + ), + + TP_fast_assign( + __entry->call_site = call_site; + __entry->ptr = ptr; + __entry->bytes_req = bytes_req; + __entry->bytes_alloc = bytes_alloc; + __entry->gfp_flags = gfp_flags; + __entry->node = node; + ), + + TP_printk("call_site=%lx ptr=%p bytes_req=%zu bytes_alloc=%zu gfp_flags=%s node=%d", + __entry->call_site, + __entry->ptr, + __entry->bytes_req, + __entry->bytes_alloc, + show_gfp_flags(__entry->gfp_flags), + __entry->node) +); + +TRACE_EVENT(kmem_cache_alloc_node, + + TP_PROTO(unsigned long call_site, + const void *ptr, + size_t bytes_req, + size_t bytes_alloc, + gfp_t gfp_flags, + int node), + + TP_ARGS(call_site, ptr, bytes_req, bytes_alloc, gfp_flags, node), + + TP_STRUCT__entry( + __field( unsigned long, call_site ) + __field( const void *, ptr ) + __field( size_t, bytes_req ) + __field( size_t, bytes_alloc ) + __field( gfp_t, gfp_flags ) + __field( int, node ) + ), + + TP_fast_assign( + __entry->call_site = call_site; + __entry->ptr = ptr; + __entry->bytes_req = bytes_req; + __entry->bytes_alloc = bytes_alloc; + __entry->gfp_flags = gfp_flags; + __entry->node = node; + ), + + TP_printk("call_site=%lx ptr=%p bytes_req=%zu bytes_alloc=%zu gfp_flags=%s node=%d", + __entry->call_site, + __entry->ptr, + __entry->bytes_req, + __entry->bytes_alloc, + show_gfp_flags(__entry->gfp_flags), + __entry->node) +); + +TRACE_EVENT(kfree, + + TP_PROTO(unsigned long call_site, const void *ptr), + + TP_ARGS(call_site, ptr), + + TP_STRUCT__entry( + __field( unsigned long, call_site ) + __field( const void *, ptr ) + ), + + TP_fast_assign( + __entry->call_site = call_site; + __entry->ptr = ptr; + ), + + TP_printk("call_site=%lx ptr=%p", __entry->call_site, __entry->ptr) +); + +TRACE_EVENT(kmem_cache_free, + + TP_PROTO(unsigned long call_site, const void *ptr), + + TP_ARGS(call_site, ptr), + + TP_STRUCT__entry( + __field( unsigned long, call_site ) + __field( const void *, ptr ) + ), + + TP_fast_assign( + __entry->call_site = call_site; + __entry->ptr = ptr; + ), + + TP_printk("call_site=%lx ptr=%p", __entry->call_site, __entry->ptr) +); +#endif /* _TRACE_KMEM_H */ + +/* This part must be outside protection */ +#include <trace/define_trace.h> diff --git a/include/trace/events/lockdep.h b/include/trace/events/lockdep.h new file mode 100644 index 000000000000..0e956c9dfd7e --- /dev/null +++ b/include/trace/events/lockdep.h @@ -0,0 +1,96 @@ +#if !defined(_TRACE_LOCKDEP_H) || defined(TRACE_HEADER_MULTI_READ) +#define _TRACE_LOCKDEP_H + +#include <linux/lockdep.h> +#include <linux/tracepoint.h> + +#undef TRACE_SYSTEM +#define TRACE_SYSTEM lockdep + +#ifdef CONFIG_LOCKDEP + +TRACE_EVENT(lock_acquire, + + TP_PROTO(struct lockdep_map *lock, unsigned int subclass, + int trylock, int read, int check, + struct lockdep_map *next_lock, unsigned long ip), + + TP_ARGS(lock, subclass, trylock, read, check, next_lock, ip), + + TP_STRUCT__entry( + __field(unsigned int, flags) + __string(name, lock->name) + ), + + TP_fast_assign( + __entry->flags = (trylock ? 1 : 0) | (read ? 2 : 0); + __assign_str(name, lock->name); + ), + + TP_printk("%s%s%s", (__entry->flags & 1) ? "try " : "", + (__entry->flags & 2) ? "read " : "", + __get_str(name)) +); + +TRACE_EVENT(lock_release, + + TP_PROTO(struct lockdep_map *lock, int nested, unsigned long ip), + + TP_ARGS(lock, nested, ip), + + TP_STRUCT__entry( + __string(name, lock->name) + ), + + TP_fast_assign( + __assign_str(name, lock->name); + ), + + TP_printk("%s", __get_str(name)) +); + +#ifdef CONFIG_LOCK_STAT + +TRACE_EVENT(lock_contended, + + TP_PROTO(struct lockdep_map *lock, unsigned long ip), + + TP_ARGS(lock, ip), + + TP_STRUCT__entry( + __string(name, lock->name) + ), + + TP_fast_assign( + __assign_str(name, lock->name); + ), + + TP_printk("%s", __get_str(name)) +); + +TRACE_EVENT(lock_acquired, + TP_PROTO(struct lockdep_map *lock, unsigned long ip, s64 waittime), + + TP_ARGS(lock, ip, waittime), + + TP_STRUCT__entry( + __string(name, lock->name) + __field(unsigned long, wait_usec) + __field(unsigned long, wait_nsec_rem) + ), + TP_fast_assign( + __assign_str(name, lock->name); + __entry->wait_nsec_rem = do_div(waittime, NSEC_PER_USEC); + __entry->wait_usec = (unsigned long) waittime; + ), + TP_printk("%s (%lu.%03lu us)", __get_str(name), __entry->wait_usec, + __entry->wait_nsec_rem) +); + +#endif +#endif + +#endif /* _TRACE_LOCKDEP_H */ + +/* This part must be outside protection */ +#include <trace/define_trace.h> diff --git a/include/trace/sched_event_types.h b/include/trace/events/sched.h index 63547dc1125f..24ab5bcff7b2 100644 --- a/include/trace/sched_event_types.h +++ b/include/trace/events/sched.h @@ -1,9 +1,8 @@ +#if !defined(_TRACE_SCHED_H) || defined(TRACE_HEADER_MULTI_READ) +#define _TRACE_SCHED_H -/* use <trace/sched.h> instead */ -#ifndef TRACE_EVENT -# error Do not include this file directly. -# error Unless you know what you are doing. -#endif +#include <linux/sched.h> +#include <linux/tracepoint.h> #undef TRACE_SYSTEM #define TRACE_SYSTEM sched @@ -157,6 +156,7 @@ TRACE_EVENT(sched_switch, __array( char, prev_comm, TASK_COMM_LEN ) __field( pid_t, prev_pid ) __field( int, prev_prio ) + __field( long, prev_state ) __array( char, next_comm, TASK_COMM_LEN ) __field( pid_t, next_pid ) __field( int, next_prio ) @@ -166,13 +166,19 @@ TRACE_EVENT(sched_switch, memcpy(__entry->next_comm, next->comm, TASK_COMM_LEN); __entry->prev_pid = prev->pid; __entry->prev_prio = prev->prio; + __entry->prev_state = prev->state; memcpy(__entry->prev_comm, prev->comm, TASK_COMM_LEN); __entry->next_pid = next->pid; __entry->next_prio = next->prio; ), - TP_printk("task %s:%d [%d] ==> %s:%d [%d]", + TP_printk("task %s:%d [%d] (%s) ==> %s:%d [%d]", __entry->prev_comm, __entry->prev_pid, __entry->prev_prio, + __entry->prev_state ? + __print_flags(__entry->prev_state, "|", + { 1, "S"} , { 2, "D" }, { 4, "T" }, { 8, "t" }, + { 16, "Z" }, { 32, "X" }, { 64, "x" }, + { 128, "W" }) : "R", __entry->next_comm, __entry->next_pid, __entry->next_prio) ); @@ -181,9 +187,9 @@ TRACE_EVENT(sched_switch, */ TRACE_EVENT(sched_migrate_task, - TP_PROTO(struct task_struct *p, int orig_cpu, int dest_cpu), + TP_PROTO(struct task_struct *p, int dest_cpu), - TP_ARGS(p, orig_cpu, dest_cpu), + TP_ARGS(p, dest_cpu), TP_STRUCT__entry( __array( char, comm, TASK_COMM_LEN ) @@ -197,7 +203,7 @@ TRACE_EVENT(sched_migrate_task, memcpy(__entry->comm, p->comm, TASK_COMM_LEN); __entry->pid = p->pid; __entry->prio = p->prio; - __entry->orig_cpu = orig_cpu; + __entry->orig_cpu = task_cpu(p); __entry->dest_cpu = dest_cpu; ), @@ -334,4 +340,7 @@ TRACE_EVENT(sched_signal_send, __entry->sig, __entry->comm, __entry->pid) ); -#undef TRACE_SYSTEM +#endif /* _TRACE_SCHED_H */ + +/* This part must be outside protection */ +#include <trace/define_trace.h> diff --git a/include/trace/events/skb.h b/include/trace/events/skb.h new file mode 100644 index 000000000000..1e8fabb57c06 --- /dev/null +++ b/include/trace/events/skb.h @@ -0,0 +1,40 @@ +#if !defined(_TRACE_SKB_H) || defined(TRACE_HEADER_MULTI_READ) +#define _TRACE_SKB_H + +#include <linux/skbuff.h> +#include <linux/tracepoint.h> + +#undef TRACE_SYSTEM +#define TRACE_SYSTEM skb + +/* + * Tracepoint for free an sk_buff: + */ +TRACE_EVENT(kfree_skb, + + TP_PROTO(struct sk_buff *skb, void *location), + + TP_ARGS(skb, location), + + TP_STRUCT__entry( + __field( void *, skbaddr ) + __field( unsigned short, protocol ) + __field( void *, location ) + ), + + TP_fast_assign( + __entry->skbaddr = skb; + if (skb) { + __entry->protocol = ntohs(skb->protocol); + } + __entry->location = location; + ), + + TP_printk("skbaddr=%p protocol=%u location=%p", + __entry->skbaddr, __entry->protocol, __entry->location) +); + +#endif /* _TRACE_SKB_H */ + +/* This part must be outside protection */ +#include <trace/define_trace.h> diff --git a/include/trace/events/workqueue.h b/include/trace/events/workqueue.h new file mode 100644 index 000000000000..035f1bff288e --- /dev/null +++ b/include/trace/events/workqueue.h @@ -0,0 +1,100 @@ +#if !defined(_TRACE_WORKQUEUE_H) || defined(TRACE_HEADER_MULTI_READ) +#define _TRACE_WORKQUEUE_H + +#include <linux/workqueue.h> +#include <linux/sched.h> +#include <linux/tracepoint.h> + +#undef TRACE_SYSTEM +#define TRACE_SYSTEM workqueue + +TRACE_EVENT(workqueue_insertion, + + TP_PROTO(struct task_struct *wq_thread, struct work_struct *work), + + TP_ARGS(wq_thread, work), + + TP_STRUCT__entry( + __array(char, thread_comm, TASK_COMM_LEN) + __field(pid_t, thread_pid) + __field(work_func_t, func) + ), + + TP_fast_assign( + memcpy(__entry->thread_comm, wq_thread->comm, TASK_COMM_LEN); + __entry->thread_pid = wq_thread->pid; + __entry->func = work->func; + ), + + TP_printk("thread=%s:%d func=%pF", __entry->thread_comm, + __entry->thread_pid, __entry->func) +); + +TRACE_EVENT(workqueue_execution, + + TP_PROTO(struct task_struct *wq_thread, struct work_struct *work), + + TP_ARGS(wq_thread, work), + + TP_STRUCT__entry( + __array(char, thread_comm, TASK_COMM_LEN) + __field(pid_t, thread_pid) + __field(work_func_t, func) + ), + + TP_fast_assign( + memcpy(__entry->thread_comm, wq_thread->comm, TASK_COMM_LEN); + __entry->thread_pid = wq_thread->pid; + __entry->func = work->func; + ), + + TP_printk("thread=%s:%d func=%pF", __entry->thread_comm, + __entry->thread_pid, __entry->func) +); + +/* Trace the creation of one workqueue thread on a cpu */ +TRACE_EVENT(workqueue_creation, + + TP_PROTO(struct task_struct *wq_thread, int cpu), + + TP_ARGS(wq_thread, cpu), + + TP_STRUCT__entry( + __array(char, thread_comm, TASK_COMM_LEN) + __field(pid_t, thread_pid) + __field(int, cpu) + ), + + TP_fast_assign( + memcpy(__entry->thread_comm, wq_thread->comm, TASK_COMM_LEN); + __entry->thread_pid = wq_thread->pid; + __entry->cpu = cpu; + ), + + TP_printk("thread=%s:%d cpu=%d", __entry->thread_comm, + __entry->thread_pid, __entry->cpu) +); + +TRACE_EVENT(workqueue_destruction, + + TP_PROTO(struct task_struct *wq_thread), + + TP_ARGS(wq_thread), + + TP_STRUCT__entry( + __array(char, thread_comm, TASK_COMM_LEN) + __field(pid_t, thread_pid) + ), + + TP_fast_assign( + memcpy(__entry->thread_comm, wq_thread->comm, TASK_COMM_LEN); + __entry->thread_pid = wq_thread->pid; + ), + + TP_printk("thread=%s:%d", __entry->thread_comm, __entry->thread_pid) +); + +#endif /* _TRACE_WORKQUEUE_H */ + +/* This part must be outside protection */ +#include <trace/define_trace.h> diff --git a/include/trace/ftrace.h b/include/trace/ftrace.h new file mode 100644 index 000000000000..1867553c61e5 --- /dev/null +++ b/include/trace/ftrace.h @@ -0,0 +1,591 @@ +/* + * Stage 1 of the trace events. + * + * Override the macros in <trace/trace_events.h> to include the following: + * + * struct ftrace_raw_<call> { + * struct trace_entry ent; + * <type> <item>; + * <type2> <item2>[<len>]; + * [...] + * }; + * + * The <type> <item> is created by the __field(type, item) macro or + * the __array(type2, item2, len) macro. + * We simply do "type item;", and that will create the fields + * in the structure. + */ + +#include <linux/ftrace_event.h> + +#undef __field +#define __field(type, item) type item; + +#undef __array +#define __array(type, item, len) type item[len]; + +#undef __dynamic_array +#define __dynamic_array(type, item, len) unsigned short __data_loc_##item; + +#undef __string +#define __string(item, src) __dynamic_array(char, item, -1) + +#undef TP_STRUCT__entry +#define TP_STRUCT__entry(args...) args + +#undef TRACE_EVENT +#define TRACE_EVENT(name, proto, args, tstruct, assign, print) \ + struct ftrace_raw_##name { \ + struct trace_entry ent; \ + tstruct \ + char __data[0]; \ + }; \ + static struct ftrace_event_call event_##name + +#include TRACE_INCLUDE(TRACE_INCLUDE_FILE) + + +/* + * Stage 2 of the trace events. + * + * Include the following: + * + * struct ftrace_data_offsets_<call> { + * int <item1>; + * int <item2>; + * [...] + * }; + * + * The __dynamic_array() macro will create each int <item>, this is + * to keep the offset of each array from the beginning of the event. + */ + +#undef __field +#define __field(type, item); + +#undef __array +#define __array(type, item, len) + +#undef __dynamic_array +#define __dynamic_array(type, item, len) int item; + +#undef __string +#define __string(item, src) __dynamic_array(char, item, -1) + +#undef TRACE_EVENT +#define TRACE_EVENT(call, proto, args, tstruct, assign, print) \ + struct ftrace_data_offsets_##call { \ + tstruct; \ + }; + +#include TRACE_INCLUDE(TRACE_INCLUDE_FILE) + +/* + * Setup the showing format of trace point. + * + * int + * ftrace_format_##call(struct trace_seq *s) + * { + * struct ftrace_raw_##call field; + * int ret; + * + * ret = trace_seq_printf(s, #type " " #item ";" + * " offset:%u; size:%u;\n", + * offsetof(struct ftrace_raw_##call, item), + * sizeof(field.type)); + * + * } + */ + +#undef TP_STRUCT__entry +#define TP_STRUCT__entry(args...) args + +#undef __field +#define __field(type, item) \ + ret = trace_seq_printf(s, "\tfield:" #type " " #item ";\t" \ + "offset:%u;\tsize:%u;\n", \ + (unsigned int)offsetof(typeof(field), item), \ + (unsigned int)sizeof(field.item)); \ + if (!ret) \ + return 0; + +#undef __array +#define __array(type, item, len) \ + ret = trace_seq_printf(s, "\tfield:" #type " " #item "[" #len "];\t" \ + "offset:%u;\tsize:%u;\n", \ + (unsigned int)offsetof(typeof(field), item), \ + (unsigned int)sizeof(field.item)); \ + if (!ret) \ + return 0; + +#undef __dynamic_array +#define __dynamic_array(type, item, len) \ + ret = trace_seq_printf(s, "\tfield:__data_loc " #item ";\t" \ + "offset:%u;\tsize:%u;\n", \ + (unsigned int)offsetof(typeof(field), \ + __data_loc_##item), \ + (unsigned int)sizeof(field.__data_loc_##item)); \ + if (!ret) \ + return 0; + +#undef __string +#define __string(item, src) __dynamic_array(char, item, -1) + +#undef __entry +#define __entry REC + +#undef __print_symbolic +#undef __get_dynamic_array +#undef __get_str + +#undef TP_printk +#define TP_printk(fmt, args...) "%s, %s\n", #fmt, __stringify(args) + +#undef TP_fast_assign +#define TP_fast_assign(args...) args + +#undef TRACE_EVENT +#define TRACE_EVENT(call, proto, args, tstruct, func, print) \ +static int \ +ftrace_format_##call(struct trace_seq *s) \ +{ \ + struct ftrace_raw_##call field __attribute__((unused)); \ + int ret = 0; \ + \ + tstruct; \ + \ + trace_seq_printf(s, "\nprint fmt: " print); \ + \ + return ret; \ +} + +#include TRACE_INCLUDE(TRACE_INCLUDE_FILE) + +/* + * Stage 3 of the trace events. + * + * Override the macros in <trace/trace_events.h> to include the following: + * + * enum print_line_t + * ftrace_raw_output_<call>(struct trace_iterator *iter, int flags) + * { + * struct trace_seq *s = &iter->seq; + * struct ftrace_raw_<call> *field; <-- defined in stage 1 + * struct trace_entry *entry; + * struct trace_seq *p; + * int ret; + * + * entry = iter->ent; + * + * if (entry->type != event_<call>.id) { + * WARN_ON_ONCE(1); + * return TRACE_TYPE_UNHANDLED; + * } + * + * field = (typeof(field))entry; + * + * p = get_cpu_var(ftrace_event_seq); + * trace_seq_init(p); + * ret = trace_seq_printf(s, <TP_printk> "\n"); + * put_cpu(); + * if (!ret) + * return TRACE_TYPE_PARTIAL_LINE; + * + * return TRACE_TYPE_HANDLED; + * } + * + * This is the method used to print the raw event to the trace + * output format. Note, this is not needed if the data is read + * in binary. + */ + +#undef __entry +#define __entry field + +#undef TP_printk +#define TP_printk(fmt, args...) fmt "\n", args + +#undef __get_dynamic_array +#define __get_dynamic_array(field) \ + ((void *)__entry + __entry->__data_loc_##field) + +#undef __get_str +#define __get_str(field) (char *)__get_dynamic_array(field) + +#undef __print_flags +#define __print_flags(flag, delim, flag_array...) \ + ({ \ + static const struct trace_print_flags flags[] = \ + { flag_array, { -1, NULL }}; \ + ftrace_print_flags_seq(p, delim, flag, flags); \ + }) + +#undef __print_symbolic +#define __print_symbolic(value, symbol_array...) \ + ({ \ + static const struct trace_print_flags symbols[] = \ + { symbol_array, { -1, NULL }}; \ + ftrace_print_symbols_seq(p, value, symbols); \ + }) + +#undef TRACE_EVENT +#define TRACE_EVENT(call, proto, args, tstruct, assign, print) \ +enum print_line_t \ +ftrace_raw_output_##call(struct trace_iterator *iter, int flags) \ +{ \ + struct trace_seq *s = &iter->seq; \ + struct ftrace_raw_##call *field; \ + struct trace_entry *entry; \ + struct trace_seq *p; \ + int ret; \ + \ + entry = iter->ent; \ + \ + if (entry->type != event_##call.id) { \ + WARN_ON_ONCE(1); \ + return TRACE_TYPE_UNHANDLED; \ + } \ + \ + field = (typeof(field))entry; \ + \ + p = &get_cpu_var(ftrace_event_seq); \ + trace_seq_init(p); \ + ret = trace_seq_printf(s, #call ": " print); \ + put_cpu(); \ + if (!ret) \ + return TRACE_TYPE_PARTIAL_LINE; \ + \ + return TRACE_TYPE_HANDLED; \ +} + +#include TRACE_INCLUDE(TRACE_INCLUDE_FILE) + +#undef __field +#define __field(type, item) \ + ret = trace_define_field(event_call, #type, #item, \ + offsetof(typeof(field), item), \ + sizeof(field.item), is_signed_type(type)); \ + if (ret) \ + return ret; + +#undef __array +#define __array(type, item, len) \ + BUILD_BUG_ON(len > MAX_FILTER_STR_VAL); \ + ret = trace_define_field(event_call, #type "[" #len "]", #item, \ + offsetof(typeof(field), item), \ + sizeof(field.item), 0); \ + if (ret) \ + return ret; + +#undef __dynamic_array +#define __dynamic_array(type, item, len) \ + ret = trace_define_field(event_call, "__data_loc" "[" #type "]", #item,\ + offsetof(typeof(field), __data_loc_##item), \ + sizeof(field.__data_loc_##item), 0); + +#undef __string +#define __string(item, src) __dynamic_array(char, item, -1) + +#undef TRACE_EVENT +#define TRACE_EVENT(call, proto, args, tstruct, func, print) \ +int \ +ftrace_define_fields_##call(void) \ +{ \ + struct ftrace_raw_##call field; \ + struct ftrace_event_call *event_call = &event_##call; \ + int ret; \ + \ + __common_field(int, type, 1); \ + __common_field(unsigned char, flags, 0); \ + __common_field(unsigned char, preempt_count, 0); \ + __common_field(int, pid, 1); \ + __common_field(int, tgid, 1); \ + \ + tstruct; \ + \ + return ret; \ +} + +#include TRACE_INCLUDE(TRACE_INCLUDE_FILE) + +/* + * remember the offset of each array from the beginning of the event. + */ + +#undef __entry +#define __entry entry + +#undef __field +#define __field(type, item) + +#undef __array +#define __array(type, item, len) + +#undef __dynamic_array +#define __dynamic_array(type, item, len) \ + __data_offsets->item = __data_size + \ + offsetof(typeof(*entry), __data); \ + __data_size += (len) * sizeof(type); + +#undef __string +#define __string(item, src) __dynamic_array(char, item, strlen(src) + 1) \ + +#undef TRACE_EVENT +#define TRACE_EVENT(call, proto, args, tstruct, assign, print) \ +static inline int ftrace_get_offsets_##call( \ + struct ftrace_data_offsets_##call *__data_offsets, proto) \ +{ \ + int __data_size = 0; \ + struct ftrace_raw_##call __maybe_unused *entry; \ + \ + tstruct; \ + \ + return __data_size; \ +} + +#include TRACE_INCLUDE(TRACE_INCLUDE_FILE) + +/* + * Stage 4 of the trace events. + * + * Override the macros in <trace/trace_events.h> to include the following: + * + * static void ftrace_event_<call>(proto) + * { + * event_trace_printk(_RET_IP_, "<call>: " <fmt>); + * } + * + * static int ftrace_reg_event_<call>(void) + * { + * int ret; + * + * ret = register_trace_<call>(ftrace_event_<call>); + * if (!ret) + * pr_info("event trace: Could not activate trace point " + * "probe to <call>"); + * return ret; + * } + * + * static void ftrace_unreg_event_<call>(void) + * { + * unregister_trace_<call>(ftrace_event_<call>); + * } + * + * + * For those macros defined with TRACE_EVENT: + * + * static struct ftrace_event_call event_<call>; + * + * static void ftrace_raw_event_<call>(proto) + * { + * struct ring_buffer_event *event; + * struct ftrace_raw_<call> *entry; <-- defined in stage 1 + * unsigned long irq_flags; + * int pc; + * + * local_save_flags(irq_flags); + * pc = preempt_count(); + * + * event = trace_current_buffer_lock_reserve(event_<call>.id, + * sizeof(struct ftrace_raw_<call>), + * irq_flags, pc); + * if (!event) + * return; + * entry = ring_buffer_event_data(event); + * + * <assign>; <-- Here we assign the entries by the __field and + * __array macros. + * + * trace_current_buffer_unlock_commit(event, irq_flags, pc); + * } + * + * static int ftrace_raw_reg_event_<call>(void) + * { + * int ret; + * + * ret = register_trace_<call>(ftrace_raw_event_<call>); + * if (!ret) + * pr_info("event trace: Could not activate trace point " + * "probe to <call>"); + * return ret; + * } + * + * static void ftrace_unreg_event_<call>(void) + * { + * unregister_trace_<call>(ftrace_raw_event_<call>); + * } + * + * static struct trace_event ftrace_event_type_<call> = { + * .trace = ftrace_raw_output_<call>, <-- stage 2 + * }; + * + * static int ftrace_raw_init_event_<call>(void) + * { + * int id; + * + * id = register_ftrace_event(&ftrace_event_type_<call>); + * if (!id) + * return -ENODEV; + * event_<call>.id = id; + * return 0; + * } + * + * static struct ftrace_event_call __used + * __attribute__((__aligned__(4))) + * __attribute__((section("_ftrace_events"))) event_<call> = { + * .name = "<call>", + * .system = "<system>", + * .raw_init = ftrace_raw_init_event_<call>, + * .regfunc = ftrace_reg_event_<call>, + * .unregfunc = ftrace_unreg_event_<call>, + * .show_format = ftrace_format_<call>, + * } + * + */ + +#undef TP_FMT +#define TP_FMT(fmt, args...) fmt "\n", ##args + +#ifdef CONFIG_EVENT_PROFILE +#define _TRACE_PROFILE(call, proto, args) \ +static void ftrace_profile_##call(proto) \ +{ \ + extern void perf_tpcounter_event(int); \ + perf_tpcounter_event(event_##call.id); \ +} \ + \ +static int ftrace_profile_enable_##call(struct ftrace_event_call *event_call) \ +{ \ + int ret = 0; \ + \ + if (!atomic_inc_return(&event_call->profile_count)) \ + ret = register_trace_##call(ftrace_profile_##call); \ + \ + return ret; \ +} \ + \ +static void ftrace_profile_disable_##call(struct ftrace_event_call *event_call)\ +{ \ + if (atomic_add_negative(-1, &event_call->profile_count)) \ + unregister_trace_##call(ftrace_profile_##call); \ +} + +#define _TRACE_PROFILE_INIT(call) \ + .profile_count = ATOMIC_INIT(-1), \ + .profile_enable = ftrace_profile_enable_##call, \ + .profile_disable = ftrace_profile_disable_##call, + +#else +#define _TRACE_PROFILE(call, proto, args) +#define _TRACE_PROFILE_INIT(call) +#endif + +#undef __entry +#define __entry entry + +#undef __field +#define __field(type, item) + +#undef __array +#define __array(type, item, len) + +#undef __dynamic_array +#define __dynamic_array(type, item, len) \ + __entry->__data_loc_##item = __data_offsets.item; + +#undef __string +#define __string(item, src) __dynamic_array(char, item, -1) \ + +#undef __assign_str +#define __assign_str(dst, src) \ + strcpy(__get_str(dst), src); + +#undef TRACE_EVENT +#define TRACE_EVENT(call, proto, args, tstruct, assign, print) \ +_TRACE_PROFILE(call, PARAMS(proto), PARAMS(args)) \ + \ +static struct ftrace_event_call event_##call; \ + \ +static void ftrace_raw_event_##call(proto) \ +{ \ + struct ftrace_data_offsets_##call __maybe_unused __data_offsets;\ + struct ftrace_event_call *event_call = &event_##call; \ + struct ring_buffer_event *event; \ + struct ftrace_raw_##call *entry; \ + unsigned long irq_flags; \ + int __data_size; \ + int pc; \ + \ + local_save_flags(irq_flags); \ + pc = preempt_count(); \ + \ + __data_size = ftrace_get_offsets_##call(&__data_offsets, args); \ + \ + event = trace_current_buffer_lock_reserve(event_##call.id, \ + sizeof(*entry) + __data_size, \ + irq_flags, pc); \ + if (!event) \ + return; \ + entry = ring_buffer_event_data(event); \ + \ + \ + tstruct \ + \ + { assign; } \ + \ + if (!filter_current_check_discard(event_call, entry, event)) \ + trace_nowake_buffer_unlock_commit(event, irq_flags, pc); \ +} \ + \ +static int ftrace_raw_reg_event_##call(void) \ +{ \ + int ret; \ + \ + ret = register_trace_##call(ftrace_raw_event_##call); \ + if (ret) \ + pr_info("event trace: Could not activate trace point " \ + "probe to " #call "\n"); \ + return ret; \ +} \ + \ +static void ftrace_raw_unreg_event_##call(void) \ +{ \ + unregister_trace_##call(ftrace_raw_event_##call); \ +} \ + \ +static struct trace_event ftrace_event_type_##call = { \ + .trace = ftrace_raw_output_##call, \ +}; \ + \ +static int ftrace_raw_init_event_##call(void) \ +{ \ + int id; \ + \ + id = register_ftrace_event(&ftrace_event_type_##call); \ + if (!id) \ + return -ENODEV; \ + event_##call.id = id; \ + INIT_LIST_HEAD(&event_##call.fields); \ + init_preds(&event_##call); \ + return 0; \ +} \ + \ +static struct ftrace_event_call __used \ +__attribute__((__aligned__(4))) \ +__attribute__((section("_ftrace_events"))) event_##call = { \ + .name = #call, \ + .system = __stringify(TRACE_SYSTEM), \ + .event = &ftrace_event_type_##call, \ + .raw_init = ftrace_raw_init_event_##call, \ + .regfunc = ftrace_raw_reg_event_##call, \ + .unregfunc = ftrace_raw_unreg_event_##call, \ + .show_format = ftrace_format_##call, \ + .define_fields = ftrace_define_fields_##call, \ + _TRACE_PROFILE_INIT(call) \ +} + +#include TRACE_INCLUDE(TRACE_INCLUDE_FILE) + +#undef _TRACE_PROFILE +#undef _TRACE_PROFILE_INIT + diff --git a/include/trace/irq.h b/include/trace/irq.h deleted file mode 100644 index ff5d4495dc37..000000000000 --- a/include/trace/irq.h +++ /dev/null @@ -1,9 +0,0 @@ -#ifndef _TRACE_IRQ_H -#define _TRACE_IRQ_H - -#include <linux/interrupt.h> -#include <linux/tracepoint.h> - -#include <trace/irq_event_types.h> - -#endif diff --git a/include/trace/irq_event_types.h b/include/trace/irq_event_types.h deleted file mode 100644 index 85964ebd47ec..000000000000 --- a/include/trace/irq_event_types.h +++ /dev/null @@ -1,55 +0,0 @@ - -/* use <trace/irq.h> instead */ -#ifndef TRACE_FORMAT -# error Do not include this file directly. -# error Unless you know what you are doing. -#endif - -#undef TRACE_SYSTEM -#define TRACE_SYSTEM irq - -/* - * Tracepoint for entry of interrupt handler: - */ -TRACE_FORMAT(irq_handler_entry, - TP_PROTO(int irq, struct irqaction *action), - TP_ARGS(irq, action), - TP_FMT("irq=%d handler=%s", irq, action->name) - ); - -/* - * Tracepoint for return of an interrupt handler: - */ -TRACE_EVENT(irq_handler_exit, - - TP_PROTO(int irq, struct irqaction *action, int ret), - - TP_ARGS(irq, action, ret), - - TP_STRUCT__entry( - __field( int, irq ) - __field( int, ret ) - ), - - TP_fast_assign( - __entry->irq = irq; - __entry->ret = ret; - ), - - TP_printk("irq=%d return=%s", - __entry->irq, __entry->ret ? "handled" : "unhandled") -); - -TRACE_FORMAT(softirq_entry, - TP_PROTO(struct softirq_action *h, struct softirq_action *vec), - TP_ARGS(h, vec), - TP_FMT("softirq=%d action=%s", (int)(h - vec), softirq_to_name[h-vec]) - ); - -TRACE_FORMAT(softirq_exit, - TP_PROTO(struct softirq_action *h, struct softirq_action *vec), - TP_ARGS(h, vec), - TP_FMT("softirq=%d action=%s", (int)(h - vec), softirq_to_name[h-vec]) - ); - -#undef TRACE_SYSTEM diff --git a/include/trace/kmemtrace.h b/include/trace/kmemtrace.h deleted file mode 100644 index 28ee69f9cd46..000000000000 --- a/include/trace/kmemtrace.h +++ /dev/null @@ -1,63 +0,0 @@ -/* - * Copyright (C) 2008 Eduard - Gabriel Munteanu - * - * This file is released under GPL version 2. - */ - -#ifndef _LINUX_KMEMTRACE_H -#define _LINUX_KMEMTRACE_H - -#ifdef __KERNEL__ - -#include <linux/tracepoint.h> -#include <linux/types.h> - -#ifdef CONFIG_KMEMTRACE -extern void kmemtrace_init(void); -#else -static inline void kmemtrace_init(void) -{ -} -#endif - -DECLARE_TRACE(kmalloc, - TP_PROTO(unsigned long call_site, - const void *ptr, - size_t bytes_req, - size_t bytes_alloc, - gfp_t gfp_flags), - TP_ARGS(call_site, ptr, bytes_req, bytes_alloc, gfp_flags)); -DECLARE_TRACE(kmem_cache_alloc, - TP_PROTO(unsigned long call_site, - const void *ptr, - size_t bytes_req, - size_t bytes_alloc, - gfp_t gfp_flags), - TP_ARGS(call_site, ptr, bytes_req, bytes_alloc, gfp_flags)); -DECLARE_TRACE(kmalloc_node, - TP_PROTO(unsigned long call_site, - const void *ptr, - size_t bytes_req, - size_t bytes_alloc, - gfp_t gfp_flags, - int node), - TP_ARGS(call_site, ptr, bytes_req, bytes_alloc, gfp_flags, node)); -DECLARE_TRACE(kmem_cache_alloc_node, - TP_PROTO(unsigned long call_site, - const void *ptr, - size_t bytes_req, - size_t bytes_alloc, - gfp_t gfp_flags, - int node), - TP_ARGS(call_site, ptr, bytes_req, bytes_alloc, gfp_flags, node)); -DECLARE_TRACE(kfree, - TP_PROTO(unsigned long call_site, const void *ptr), - TP_ARGS(call_site, ptr)); -DECLARE_TRACE(kmem_cache_free, - TP_PROTO(unsigned long call_site, const void *ptr), - TP_ARGS(call_site, ptr)); - -#endif /* __KERNEL__ */ - -#endif /* _LINUX_KMEMTRACE_H */ - diff --git a/include/trace/lockdep.h b/include/trace/lockdep.h deleted file mode 100644 index 5ca67df87f2a..000000000000 --- a/include/trace/lockdep.h +++ /dev/null @@ -1,9 +0,0 @@ -#ifndef _TRACE_LOCKDEP_H -#define _TRACE_LOCKDEP_H - -#include <linux/lockdep.h> -#include <linux/tracepoint.h> - -#include <trace/lockdep_event_types.h> - -#endif diff --git a/include/trace/lockdep_event_types.h b/include/trace/lockdep_event_types.h deleted file mode 100644 index adccfcd2ec8f..000000000000 --- a/include/trace/lockdep_event_types.h +++ /dev/null @@ -1,44 +0,0 @@ - -#ifndef TRACE_FORMAT -# error Do not include this file directly. -# error Unless you know what you are doing. -#endif - -#undef TRACE_SYSTEM -#define TRACE_SYSTEM lock - -#ifdef CONFIG_LOCKDEP - -TRACE_FORMAT(lock_acquire, - TP_PROTO(struct lockdep_map *lock, unsigned int subclass, - int trylock, int read, int check, - struct lockdep_map *next_lock, unsigned long ip), - TP_ARGS(lock, subclass, trylock, read, check, next_lock, ip), - TP_FMT("%s%s%s", trylock ? "try " : "", - read ? "read " : "", lock->name) - ); - -TRACE_FORMAT(lock_release, - TP_PROTO(struct lockdep_map *lock, int nested, unsigned long ip), - TP_ARGS(lock, nested, ip), - TP_FMT("%s", lock->name) - ); - -#ifdef CONFIG_LOCK_STAT - -TRACE_FORMAT(lock_contended, - TP_PROTO(struct lockdep_map *lock, unsigned long ip), - TP_ARGS(lock, ip), - TP_FMT("%s", lock->name) - ); - -TRACE_FORMAT(lock_acquired, - TP_PROTO(struct lockdep_map *lock, unsigned long ip), - TP_ARGS(lock, ip), - TP_FMT("%s", lock->name) - ); - -#endif -#endif - -#undef TRACE_SYSTEM diff --git a/include/trace/sched.h b/include/trace/sched.h deleted file mode 100644 index 4e372a1a29bf..000000000000 --- a/include/trace/sched.h +++ /dev/null @@ -1,9 +0,0 @@ -#ifndef _TRACE_SCHED_H -#define _TRACE_SCHED_H - -#include <linux/sched.h> -#include <linux/tracepoint.h> - -#include <trace/sched_event_types.h> - -#endif diff --git a/include/trace/skb.h b/include/trace/skb.h deleted file mode 100644 index b66206d9be72..000000000000 --- a/include/trace/skb.h +++ /dev/null @@ -1,11 +0,0 @@ -#ifndef _TRACE_SKB_H_ -#define _TRACE_SKB_H_ - -#include <linux/skbuff.h> -#include <linux/tracepoint.h> - -DECLARE_TRACE(kfree_skb, - TP_PROTO(struct sk_buff *skb, void *location), - TP_ARGS(skb, location)); - -#endif diff --git a/include/trace/trace_event_types.h b/include/trace/trace_event_types.h deleted file mode 100644 index df56f5694be6..000000000000 --- a/include/trace/trace_event_types.h +++ /dev/null @@ -1,5 +0,0 @@ -/* trace/<type>_event_types.h here */ - -#include <trace/sched_event_types.h> -#include <trace/irq_event_types.h> -#include <trace/lockdep_event_types.h> diff --git a/include/trace/trace_events.h b/include/trace/trace_events.h deleted file mode 100644 index fd13750ca4ba..000000000000 --- a/include/trace/trace_events.h +++ /dev/null @@ -1,5 +0,0 @@ -/* trace/<type>.h here */ - -#include <trace/sched.h> -#include <trace/irq.h> -#include <trace/lockdep.h> diff --git a/include/trace/workqueue.h b/include/trace/workqueue.h deleted file mode 100644 index 7626523deeba..000000000000 --- a/include/trace/workqueue.h +++ /dev/null @@ -1,25 +0,0 @@ -#ifndef __TRACE_WORKQUEUE_H -#define __TRACE_WORKQUEUE_H - -#include <linux/tracepoint.h> -#include <linux/workqueue.h> -#include <linux/sched.h> - -DECLARE_TRACE(workqueue_insertion, - TP_PROTO(struct task_struct *wq_thread, struct work_struct *work), - TP_ARGS(wq_thread, work)); - -DECLARE_TRACE(workqueue_execution, - TP_PROTO(struct task_struct *wq_thread, struct work_struct *work), - TP_ARGS(wq_thread, work)); - -/* Trace the creation of one workqueue thread on a cpu */ -DECLARE_TRACE(workqueue_creation, - TP_PROTO(struct task_struct *wq_thread, int cpu), - TP_ARGS(wq_thread, cpu)); - -DECLARE_TRACE(workqueue_destruction, - TP_PROTO(struct task_struct *wq_thread), - TP_ARGS(wq_thread)); - -#endif /* __TRACE_WORKQUEUE_H */ diff --git a/init/Kconfig b/init/Kconfig index 7be4d3836745..d4e9671347ee 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -308,7 +308,7 @@ menu "RCU Subsystem" choice prompt "RCU Implementation" - default CLASSIC_RCU + default TREE_RCU config CLASSIC_RCU bool "Classic RCU" diff --git a/init/main.c b/init/main.c index d721dad05dd7..bb7dc57eee36 100644 --- a/init/main.c +++ b/init/main.c @@ -64,6 +64,7 @@ #include <linux/idr.h> #include <linux/ftrace.h> #include <linux/async.h> +#include <linux/kmemtrace.h> #include <trace/boot.h> #include <asm/io.h> @@ -71,7 +72,6 @@ #include <asm/setup.h> #include <asm/sections.h> #include <asm/cacheflush.h> -#include <trace/kmemtrace.h> #ifdef CONFIG_X86_LOCAL_APIC #include <asm/smp.h> diff --git a/ipc/sem.c b/ipc/sem.c index 16a2189e96f9..87c2b641fd7b 100644 --- a/ipc/sem.c +++ b/ipc/sem.c @@ -1290,8 +1290,8 @@ void exit_sem(struct task_struct *tsk) int i; rcu_read_lock(); - un = list_entry(rcu_dereference(ulp->list_proc.next), - struct sem_undo, list_proc); + un = list_entry_rcu(ulp->list_proc.next, + struct sem_undo, list_proc); if (&un->list_proc == &ulp->list_proc) semid = -1; else diff --git a/kernel/Makefile b/kernel/Makefile index 42423665660a..a35eee3436de 100644 --- a/kernel/Makefile +++ b/kernel/Makefile @@ -93,6 +93,7 @@ obj-$(CONFIG_LATENCYTOP) += latencytop.o obj-$(CONFIG_HAVE_GENERIC_DMA_COHERENT) += dma-coherent.o obj-$(CONFIG_FUNCTION_TRACER) += trace/ obj-$(CONFIG_TRACING) += trace/ +obj-$(CONFIG_X86_DS) += trace/ obj-$(CONFIG_SMP) += sched_cpupri.o obj-$(CONFIG_SLOW_WORK) += slow-work.o diff --git a/kernel/compat.c b/kernel/compat.c index 42d56544460f..f6c204f07ea6 100644 --- a/kernel/compat.c +++ b/kernel/compat.c @@ -882,6 +882,17 @@ compat_sys_rt_sigtimedwait (compat_sigset_t __user *uthese, } +asmlinkage long +compat_sys_rt_tgsigqueueinfo(compat_pid_t tgid, compat_pid_t pid, int sig, + struct compat_siginfo __user *uinfo) +{ + siginfo_t info; + + if (copy_siginfo_from_user32(&info, uinfo)) + return -EFAULT; + return do_rt_tgsigqueueinfo(tgid, pid, sig, &info); +} + #ifdef __ARCH_WANT_COMPAT_SYS_TIME /* compat_time_t is a 32 bit "long" and needs to get converted. */ diff --git a/kernel/exit.c b/kernel/exit.c index 036e8d740169..51d1fe3fb7ad 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -48,7 +48,7 @@ #include <linux/tracehook.h> #include <linux/fs_struct.h> #include <linux/init_task.h> -#include <trace/sched.h> +#include <trace/events/sched.h> #include <asm/uaccess.h> #include <asm/unistd.h> @@ -56,10 +56,6 @@ #include <asm/mmu_context.h> #include "cred-internals.h" -DEFINE_TRACE(sched_process_free); -DEFINE_TRACE(sched_process_exit); -DEFINE_TRACE(sched_process_wait); - static void exit_mm(struct task_struct * tsk); static void __unhash_process(struct task_struct *p) diff --git a/kernel/fork.c b/kernel/fork.c index 875ffbdd96d0..bb762b4dd217 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -61,7 +61,6 @@ #include <linux/proc_fs.h> #include <linux/blkdev.h> #include <linux/fs_struct.h> -#include <trace/sched.h> #include <linux/magic.h> #include <asm/pgtable.h> @@ -71,6 +70,8 @@ #include <asm/cacheflush.h> #include <asm/tlbflush.h> +#include <trace/events/sched.h> + /* * Protected counters by write_lock_irq(&tasklist_lock) */ @@ -83,8 +84,6 @@ DEFINE_PER_CPU(unsigned long, process_counts) = 0; __cacheline_aligned DEFINE_RWLOCK(tasklist_lock); /* outer */ -DEFINE_TRACE(sched_process_fork); - int nr_processes(void) { int cpu; @@ -982,6 +981,8 @@ static struct task_struct *copy_process(unsigned long clone_flags, if (!p) goto fork_out; + ftrace_graph_init_task(p); + rt_mutex_init_task(p); #ifdef CONFIG_PROVE_LOCKING @@ -1089,8 +1090,8 @@ static struct task_struct *copy_process(unsigned long clone_flags, #ifdef CONFIG_DEBUG_MUTEXES p->blocked_on = NULL; /* not blocked yet */ #endif - if (unlikely(current->ptrace)) - ptrace_fork(p, clone_flags); + + p->bts = NULL; /* Perform scheduler related setup. Assign this task to a CPU. */ sched_fork(p, clone_flags); @@ -1131,8 +1132,6 @@ static struct task_struct *copy_process(unsigned long clone_flags, } } - ftrace_graph_init_task(p); - p->pid = pid_nr(pid); p->tgid = p->pid; if (clone_flags & CLONE_THREAD) @@ -1141,7 +1140,7 @@ static struct task_struct *copy_process(unsigned long clone_flags, if (current->nsproxy != p->nsproxy) { retval = ns_cgroup_clone(p, pid); if (retval) - goto bad_fork_free_graph; + goto bad_fork_free_pid; } p->set_child_tid = (clone_flags & CLONE_CHILD_SETTID) ? child_tidptr : NULL; @@ -1233,7 +1232,7 @@ static struct task_struct *copy_process(unsigned long clone_flags, spin_unlock(¤t->sighand->siglock); write_unlock_irq(&tasklist_lock); retval = -ERESTARTNOINTR; - goto bad_fork_free_graph; + goto bad_fork_free_pid; } if (clone_flags & CLONE_THREAD) { @@ -1268,8 +1267,6 @@ static struct task_struct *copy_process(unsigned long clone_flags, cgroup_post_fork(p); return p; -bad_fork_free_graph: - ftrace_graph_exit_task(p); bad_fork_free_pid: if (pid != &init_struct_pid) free_pid(pid); diff --git a/kernel/irq/handle.c b/kernel/irq/handle.c index 18041a254d32..a60018402f42 100644 --- a/kernel/irq/handle.c +++ b/kernel/irq/handle.c @@ -18,8 +18,8 @@ #include <linux/kernel_stat.h> #include <linux/rculist.h> #include <linux/hash.h> -#include <trace/irq.h> #include <linux/bootmem.h> +#include <trace/events/irq.h> #include "internals.h" @@ -355,9 +355,6 @@ static void warn_no_thread(unsigned int irq, struct irqaction *action) "but no thread function available.", irq, action->name); } -DEFINE_TRACE(irq_handler_entry); -DEFINE_TRACE(irq_handler_exit); - /** * handle_IRQ_event - irq action chain handler * @irq: the interrupt number diff --git a/kernel/kthread.c b/kernel/kthread.c index 4ebaf8519abf..41c88fe40500 100644 --- a/kernel/kthread.c +++ b/kernel/kthread.c @@ -13,7 +13,7 @@ #include <linux/file.h> #include <linux/module.h> #include <linux/mutex.h> -#include <trace/sched.h> +#include <trace/events/sched.h> #define KTHREAD_NICE_LEVEL (-5) @@ -21,9 +21,6 @@ static DEFINE_SPINLOCK(kthread_create_lock); static LIST_HEAD(kthread_create_list); struct task_struct *kthreadd_task; -DEFINE_TRACE(sched_kthread_stop); -DEFINE_TRACE(sched_kthread_stop_ret); - struct kthread_create_info { /* Information passed to kthread() from kthreadd. */ diff --git a/kernel/lockdep.c b/kernel/lockdep.c index accb40cdb12a..8bbeef996c76 100644 --- a/kernel/lockdep.c +++ b/kernel/lockdep.c @@ -42,12 +42,14 @@ #include <linux/hash.h> #include <linux/ftrace.h> #include <linux/stringify.h> -#include <trace/lockdep.h> #include <asm/sections.h> #include "lockdep_internals.h" +#define CREATE_TRACE_POINTS +#include <trace/events/lockdep.h> + #ifdef CONFIG_PROVE_LOCKING int prove_locking = 1; module_param(prove_locking, int, 0644); @@ -2935,8 +2937,6 @@ void lock_set_class(struct lockdep_map *lock, const char *name, } EXPORT_SYMBOL_GPL(lock_set_class); -DEFINE_TRACE(lock_acquire); - /* * We are not always called with irqs disabled - do that here, * and also avoid lockdep recursion: @@ -2963,8 +2963,6 @@ void lock_acquire(struct lockdep_map *lock, unsigned int subclass, } EXPORT_SYMBOL_GPL(lock_acquire); -DEFINE_TRACE(lock_release); - void lock_release(struct lockdep_map *lock, int nested, unsigned long ip) { @@ -3105,6 +3103,8 @@ found_it: hlock->holdtime_stamp = now; } + trace_lock_acquired(lock, ip, waittime); + stats = get_lock_stats(hlock_class(hlock)); if (waittime) { if (hlock->read) @@ -3120,8 +3120,6 @@ found_it: lock->ip = ip; } -DEFINE_TRACE(lock_contended); - void lock_contended(struct lockdep_map *lock, unsigned long ip) { unsigned long flags; @@ -3143,14 +3141,10 @@ void lock_contended(struct lockdep_map *lock, unsigned long ip) } EXPORT_SYMBOL_GPL(lock_contended); -DEFINE_TRACE(lock_acquired); - void lock_acquired(struct lockdep_map *lock, unsigned long ip) { unsigned long flags; - trace_lock_acquired(lock, ip); - if (unlikely(!lock_stat)) return; diff --git a/kernel/module.c b/kernel/module.c index cb3887e770e2..278e9b6762bb 100644 --- a/kernel/module.c +++ b/kernel/module.c @@ -18,6 +18,7 @@ */ #include <linux/module.h> #include <linux/moduleloader.h> +#include <linux/ftrace_event.h> #include <linux/init.h> #include <linux/kallsyms.h> #include <linux/fs.h> @@ -1492,9 +1493,6 @@ static void free_module(struct module *mod) /* Free any allocated parameters. */ destroy_params(mod->kp, mod->num_kp); - /* release any pointers to mcount in this module */ - ftrace_release(mod->module_core, mod->core_size); - /* This may be NULL, but that's OK */ module_free(mod, mod->module_init); kfree(mod->args); @@ -1895,11 +1893,9 @@ static noinline struct module *load_module(void __user *umod, unsigned int symindex = 0; unsigned int strindex = 0; unsigned int modindex, versindex, infoindex, pcpuindex; - unsigned int num_mcount; struct module *mod; long err = 0; void *percpu = NULL, *ptr = NULL; /* Stops spurious gcc warning */ - unsigned long *mseg; mm_segment_t old_fs; DEBUGP("load_module: umod=%p, len=%lu, uargs=%p\n", @@ -2175,7 +2171,19 @@ static noinline struct module *load_module(void __user *umod, sizeof(*mod->tracepoints), &mod->num_tracepoints); #endif - +#ifdef CONFIG_EVENT_TRACING + mod->trace_events = section_objs(hdr, sechdrs, secstrings, + "_ftrace_events", + sizeof(*mod->trace_events), + &mod->num_trace_events); +#endif +#ifdef CONFIG_FTRACE_MCOUNT_RECORD + /* sechdrs[0].sh_size is always zero */ + mod->ftrace_callsites = section_objs(hdr, sechdrs, secstrings, + "__mcount_loc", + sizeof(*mod->ftrace_callsites), + &mod->num_ftrace_callsites); +#endif #ifdef CONFIG_MODVERSIONS if ((mod->num_syms && !mod->crcs) || (mod->num_gpl_syms && !mod->gpl_crcs) @@ -2240,11 +2248,6 @@ static noinline struct module *load_module(void __user *umod, dynamic_debug_setup(debug, num_debug); } - /* sechdrs[0].sh_size is always zero */ - mseg = section_objs(hdr, sechdrs, secstrings, "__mcount_loc", - sizeof(*mseg), &num_mcount); - ftrace_init_module(mod, mseg, mseg + num_mcount); - err = module_finalize(hdr, sechdrs, mod); if (err < 0) goto cleanup; @@ -2305,7 +2308,6 @@ static noinline struct module *load_module(void __user *umod, cleanup: kobject_del(&mod->mkobj.kobj); kobject_put(&mod->mkobj.kobj); - ftrace_release(mod->module_core, mod->core_size); free_unload: module_unload_free(mod); #if defined(CONFIG_MODULE_UNLOAD) && defined(CONFIG_SMP) diff --git a/kernel/ptrace.c b/kernel/ptrace.c index 43a5a3b0be79..f6d8b8cb5e34 100644 --- a/kernel/ptrace.c +++ b/kernel/ptrace.c @@ -25,16 +25,6 @@ /* - * Initialize a new task whose father had been ptraced. - * - * Called from copy_process(). - */ -void ptrace_fork(struct task_struct *child, unsigned long clone_flags) -{ - arch_ptrace_fork(child, clone_flags); -} - -/* * ptrace a task: make the debugger its new parent and * move it to the ptrace list. * diff --git a/kernel/rcupreempt.c b/kernel/rcupreempt.c index ce97a4df64d3..beb0e659adcc 100644 --- a/kernel/rcupreempt.c +++ b/kernel/rcupreempt.c @@ -1356,17 +1356,11 @@ static int rcu_sched_grace_period(void *arg) rcu_ctrlblk.sched_sleep = rcu_sched_sleeping; spin_unlock_irqrestore(&rcu_ctrlblk.schedlock, flags); - ret = 0; + ret = 0; /* unused */ __wait_event_interruptible(rcu_ctrlblk.sched_wq, rcu_ctrlblk.sched_sleep != rcu_sched_sleeping, ret); - /* - * Signals would prevent us from sleeping, and we cannot - * do much with them in any case. So flush them. - */ - if (ret) - flush_signals(current); couldsleepnext = 0; } while (!kthread_should_stop()); diff --git a/kernel/rcutree.c b/kernel/rcutree.c index d2a372fb0b9b..0dccfbba6d26 100644 --- a/kernel/rcutree.c +++ b/kernel/rcutree.c @@ -1259,31 +1259,44 @@ static int __rcu_pending(struct rcu_state *rsp, struct rcu_data *rdp) check_cpu_stall(rsp, rdp); /* Is the RCU core waiting for a quiescent state from this CPU? */ - if (rdp->qs_pending) + if (rdp->qs_pending) { + rdp->n_rp_qs_pending++; return 1; + } /* Does this CPU have callbacks ready to invoke? */ - if (cpu_has_callbacks_ready_to_invoke(rdp)) + if (cpu_has_callbacks_ready_to_invoke(rdp)) { + rdp->n_rp_cb_ready++; return 1; + } /* Has RCU gone idle with this CPU needing another grace period? */ - if (cpu_needs_another_gp(rsp, rdp)) + if (cpu_needs_another_gp(rsp, rdp)) { + rdp->n_rp_cpu_needs_gp++; return 1; + } /* Has another RCU grace period completed? */ - if (ACCESS_ONCE(rsp->completed) != rdp->completed) /* outside of lock */ + if (ACCESS_ONCE(rsp->completed) != rdp->completed) { /* outside lock */ + rdp->n_rp_gp_completed++; return 1; + } /* Has a new RCU grace period started? */ - if (ACCESS_ONCE(rsp->gpnum) != rdp->gpnum) /* outside of lock */ + if (ACCESS_ONCE(rsp->gpnum) != rdp->gpnum) { /* outside lock */ + rdp->n_rp_gp_started++; return 1; + } /* Has an RCU GP gone long enough to send resched IPIs &c? */ if (ACCESS_ONCE(rsp->completed) != ACCESS_ONCE(rsp->gpnum) && - ((long)(ACCESS_ONCE(rsp->jiffies_force_qs) - jiffies) < 0)) + ((long)(ACCESS_ONCE(rsp->jiffies_force_qs) - jiffies) < 0)) { + rdp->n_rp_need_fqs++; return 1; + } /* nothing to do */ + rdp->n_rp_need_nothing++; return 0; } diff --git a/kernel/rcutree_trace.c b/kernel/rcutree_trace.c index 4b1875ba9404..fe1dcdbf1ca3 100644 --- a/kernel/rcutree_trace.c +++ b/kernel/rcutree_trace.c @@ -213,7 +213,63 @@ static struct file_operations rcugp_fops = { .release = single_release, }; -static struct dentry *rcudir, *datadir, *datadir_csv, *hierdir, *gpdir; +static void print_one_rcu_pending(struct seq_file *m, struct rcu_data *rdp) +{ + seq_printf(m, "%3d%cnp=%ld " + "qsp=%ld cbr=%ld cng=%ld gpc=%ld gps=%ld nf=%ld nn=%ld\n", + rdp->cpu, + cpu_is_offline(rdp->cpu) ? '!' : ' ', + rdp->n_rcu_pending, + rdp->n_rp_qs_pending, + rdp->n_rp_cb_ready, + rdp->n_rp_cpu_needs_gp, + rdp->n_rp_gp_completed, + rdp->n_rp_gp_started, + rdp->n_rp_need_fqs, + rdp->n_rp_need_nothing); +} + +static void print_rcu_pendings(struct seq_file *m, struct rcu_state *rsp) +{ + int cpu; + struct rcu_data *rdp; + + for_each_possible_cpu(cpu) { + rdp = rsp->rda[cpu]; + if (rdp->beenonline) + print_one_rcu_pending(m, rdp); + } +} + +static int show_rcu_pending(struct seq_file *m, void *unused) +{ + seq_puts(m, "rcu:\n"); + print_rcu_pendings(m, &rcu_state); + seq_puts(m, "rcu_bh:\n"); + print_rcu_pendings(m, &rcu_bh_state); + return 0; +} + +static int rcu_pending_open(struct inode *inode, struct file *file) +{ + return single_open(file, show_rcu_pending, NULL); +} + +static struct file_operations rcu_pending_fops = { + .owner = THIS_MODULE, + .open = rcu_pending_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; + +static struct dentry *rcudir; +static struct dentry *datadir; +static struct dentry *datadir_csv; +static struct dentry *gpdir; +static struct dentry *hierdir; +static struct dentry *rcu_pendingdir; + static int __init rcuclassic_trace_init(void) { rcudir = debugfs_create_dir("rcu", NULL); @@ -238,6 +294,11 @@ static int __init rcuclassic_trace_init(void) NULL, &rcuhier_fops); if (!hierdir) goto free_out; + + rcu_pendingdir = debugfs_create_file("rcu_pending", 0444, rcudir, + NULL, &rcu_pending_fops); + if (!rcu_pendingdir) + goto free_out; return 0; free_out: if (datadir) @@ -257,6 +318,7 @@ static void __exit rcuclassic_trace_cleanup(void) debugfs_remove(datadir_csv); debugfs_remove(gpdir); debugfs_remove(hierdir); + debugfs_remove(rcu_pendingdir); debugfs_remove(rcudir); } diff --git a/kernel/sched.c b/kernel/sched.c index 076e403b9c88..14c447ae5d53 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -72,13 +72,15 @@ #include <linux/debugfs.h> #include <linux/ctype.h> #include <linux/ftrace.h> -#include <trace/sched.h> #include <asm/tlb.h> #include <asm/irq_regs.h> #include "sched_cpupri.h" +#define CREATE_TRACE_POINTS +#include <trace/events/sched.h> + /* * Convert user-nice values [ -20 ... 0 ... 19 ] * to static priority [ MAX_RT_PRIO..MAX_PRIO-1 ], @@ -118,12 +120,6 @@ */ #define RUNTIME_INF ((u64)~0ULL) -DEFINE_TRACE(sched_wait_task); -DEFINE_TRACE(sched_wakeup); -DEFINE_TRACE(sched_wakeup_new); -DEFINE_TRACE(sched_switch); -DEFINE_TRACE(sched_migrate_task); - #ifdef CONFIG_SMP static void double_rq_lock(struct rq *rq1, struct rq *rq2); @@ -1964,7 +1960,7 @@ void set_task_cpu(struct task_struct *p, unsigned int new_cpu) clock_offset = old_rq->clock - new_rq->clock; - trace_sched_migrate_task(p, task_cpu(p), new_cpu); + trace_sched_migrate_task(p, new_cpu); #ifdef CONFIG_SCHEDSTATS if (p->se.wait_start) @@ -2021,6 +2017,49 @@ migrate_task(struct task_struct *p, int dest_cpu, struct migration_req *req) } /* + * wait_task_context_switch - wait for a thread to complete at least one + * context switch. + * + * @p must not be current. + */ +void wait_task_context_switch(struct task_struct *p) +{ + unsigned long nvcsw, nivcsw, flags; + int running; + struct rq *rq; + + nvcsw = p->nvcsw; + nivcsw = p->nivcsw; + for (;;) { + /* + * The runqueue is assigned before the actual context + * switch. We need to take the runqueue lock. + * + * We could check initially without the lock but it is + * very likely that we need to take the lock in every + * iteration. + */ + rq = task_rq_lock(p, &flags); + running = task_running(rq, p); + task_rq_unlock(rq, &flags); + + if (likely(!running)) + break; + /* + * The switch count is incremented before the actual + * context switch. We thus wait for two switches to be + * sure at least one completed. + */ + if ((p->nvcsw - nvcsw) > 1) + break; + if ((p->nivcsw - nivcsw) > 1) + break; + + cpu_relax(); + } +} + +/* * wait_task_inactive - wait for a thread to unschedule. * * If @match_state is nonzero, it's the @p->state value just checked and diff --git a/kernel/signal.c b/kernel/signal.c index d2dd9cf5dcc6..809a228019ad 100644 --- a/kernel/signal.c +++ b/kernel/signal.c @@ -27,7 +27,7 @@ #include <linux/freezer.h> #include <linux/pid_namespace.h> #include <linux/nsproxy.h> -#include <trace/sched.h> +#include <trace/events/sched.h> #include <asm/param.h> #include <asm/uaccess.h> @@ -41,8 +41,6 @@ static struct kmem_cache *sigqueue_cachep; -DEFINE_TRACE(sched_signal_send); - static void __user *sig_handler(struct task_struct *t, int sig) { return t->sighand->action[sig - 1].sa.sa_handler; @@ -2283,24 +2281,17 @@ SYSCALL_DEFINE2(kill, pid_t, pid, int, sig) return kill_something_info(sig, &info, pid); } -static int do_tkill(pid_t tgid, pid_t pid, int sig) +static int +do_send_specific(pid_t tgid, pid_t pid, int sig, struct siginfo *info) { - int error; - struct siginfo info; struct task_struct *p; unsigned long flags; - - error = -ESRCH; - info.si_signo = sig; - info.si_errno = 0; - info.si_code = SI_TKILL; - info.si_pid = task_tgid_vnr(current); - info.si_uid = current_uid(); + int error = -ESRCH; rcu_read_lock(); p = find_task_by_vpid(pid); if (p && (tgid <= 0 || task_tgid_vnr(p) == tgid)) { - error = check_kill_permission(sig, &info, p); + error = check_kill_permission(sig, info, p); /* * The null signal is a permissions and process existence * probe. No signal is actually delivered. @@ -2310,7 +2301,7 @@ static int do_tkill(pid_t tgid, pid_t pid, int sig) * signal is private anyway. */ if (!error && sig && lock_task_sighand(p, &flags)) { - error = specific_send_sig_info(sig, &info, p); + error = specific_send_sig_info(sig, info, p); unlock_task_sighand(p, &flags); } } @@ -2319,6 +2310,19 @@ static int do_tkill(pid_t tgid, pid_t pid, int sig) return error; } +static int do_tkill(pid_t tgid, pid_t pid, int sig) +{ + struct siginfo info; + + info.si_signo = sig; + info.si_errno = 0; + info.si_code = SI_TKILL; + info.si_pid = task_tgid_vnr(current); + info.si_uid = current_uid(); + + return do_send_specific(tgid, pid, sig, &info); +} + /** * sys_tgkill - send signal to one specific thread * @tgid: the thread group ID of the thread @@ -2368,6 +2372,32 @@ SYSCALL_DEFINE3(rt_sigqueueinfo, pid_t, pid, int, sig, return kill_proc_info(sig, &info, pid); } +long do_rt_tgsigqueueinfo(pid_t tgid, pid_t pid, int sig, siginfo_t *info) +{ + /* This is only valid for single tasks */ + if (pid <= 0 || tgid <= 0) + return -EINVAL; + + /* Not even root can pretend to send signals from the kernel. + Nor can they impersonate a kill(), which adds source info. */ + if (info->si_code >= 0) + return -EPERM; + info->si_signo = sig; + + return do_send_specific(tgid, pid, sig, info); +} + +SYSCALL_DEFINE4(rt_tgsigqueueinfo, pid_t, tgid, pid_t, pid, int, sig, + siginfo_t __user *, uinfo) +{ + siginfo_t info; + + if (copy_from_user(&info, uinfo, sizeof(siginfo_t))) + return -EFAULT; + + return do_rt_tgsigqueueinfo(tgid, pid, sig, &info); +} + int do_sigaction(int sig, struct k_sigaction *act, struct k_sigaction *oact) { struct task_struct *t = current; diff --git a/kernel/softirq.c b/kernel/softirq.c index f674f332a024..258885a543db 100644 --- a/kernel/softirq.c +++ b/kernel/softirq.c @@ -24,7 +24,9 @@ #include <linux/ftrace.h> #include <linux/smp.h> #include <linux/tick.h> -#include <trace/irq.h> + +#define CREATE_TRACE_POINTS +#include <trace/events/irq.h> #include <asm/irq.h> /* @@ -186,9 +188,6 @@ EXPORT_SYMBOL(local_bh_enable_ip); */ #define MAX_SOFTIRQ_RESTART 10 -DEFINE_TRACE(softirq_entry); -DEFINE_TRACE(softirq_exit); - asmlinkage void __do_softirq(void) { struct softirq_action *h; diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig index 417d1985e299..4a13e5a01ce3 100644 --- a/kernel/trace/Kconfig +++ b/kernel/trace/Kconfig @@ -48,6 +48,21 @@ config FTRACE_NMI_ENTER depends on HAVE_FTRACE_NMI_ENTER default y +config EVENT_TRACING + select CONTEXT_SWITCH_TRACER + bool + +config CONTEXT_SWITCH_TRACER + select MARKERS + bool + +# All tracer options should select GENERIC_TRACER. For those options that are +# enabled by all tracers (context switch and event tracer) they select TRACING. +# This allows those options to appear when no other tracer is selected. But the +# options do not appear when something else selects it. We need the two options +# GENERIC_TRACER and TRACING to avoid circular dependencies to accomplish the +# hidding of the automatic options options. + config TRACING bool select DEBUG_FS @@ -56,6 +71,11 @@ config TRACING select TRACEPOINTS select NOP_TRACER select BINARY_PRINTF + select EVENT_TRACING + +config GENERIC_TRACER + bool + select TRACING # # Minimum requirements an architecture has to meet for us to @@ -73,14 +93,20 @@ config TRACING_SUPPORT if TRACING_SUPPORT -menu "Tracers" +menuconfig FTRACE + bool "Tracers" + default y if DEBUG_KERNEL + help + Enable the kernel tracing infrastructure. + +if FTRACE config FUNCTION_TRACER bool "Kernel Function Tracer" depends on HAVE_FUNCTION_TRACER select FRAME_POINTER select KALLSYMS - select TRACING + select GENERIC_TRACER select CONTEXT_SWITCH_TRACER help Enable the kernel to trace every kernel function. This is done @@ -104,13 +130,14 @@ config FUNCTION_GRAPH_TRACER the return value. This is done by setting the current return address on the current task structure into a stack of calls. + config IRQSOFF_TRACER bool "Interrupts-off Latency Tracer" default n depends on TRACE_IRQFLAGS_SUPPORT depends on GENERIC_TIME select TRACE_IRQFLAGS - select TRACING + select GENERIC_TRACER select TRACER_MAX_TRACE help This option measures the time spent in irqs-off critical @@ -131,7 +158,7 @@ config PREEMPT_TRACER default n depends on GENERIC_TIME depends on PREEMPT - select TRACING + select GENERIC_TRACER select TRACER_MAX_TRACE help This option measures the time spent in preemption off critical @@ -150,7 +177,7 @@ config PREEMPT_TRACER config SYSPROF_TRACER bool "Sysprof Tracer" depends on X86 - select TRACING + select GENERIC_TRACER select CONTEXT_SWITCH_TRACER help This tracer provides the trace needed by the 'Sysprof' userspace @@ -158,40 +185,33 @@ config SYSPROF_TRACER config SCHED_TRACER bool "Scheduling Latency Tracer" - select TRACING + select GENERIC_TRACER select CONTEXT_SWITCH_TRACER select TRACER_MAX_TRACE help This tracer tracks the latency of the highest priority task to be scheduled in, starting from the point it has woken up. -config CONTEXT_SWITCH_TRACER - bool "Trace process context switches" - select TRACING - select MARKERS - help - This tracer gets called from the context switch and records - all switching of tasks. - -config EVENT_TRACER - bool "Trace various events in the kernel" +config ENABLE_DEFAULT_TRACERS + bool "Trace process context switches and events" + depends on !GENERIC_TRACER select TRACING help This tracer hooks to various trace points in the kernel allowing the user to pick and choose which trace point they - want to trace. + want to trace. It also includes the sched_switch tracer plugin. config FTRACE_SYSCALLS bool "Trace syscalls" depends on HAVE_FTRACE_SYSCALLS - select TRACING + select GENERIC_TRACER select KALLSYMS help Basic tracer to catch the syscall entry and exit events. config BOOT_TRACER bool "Trace boot initcalls" - select TRACING + select GENERIC_TRACER select CONTEXT_SWITCH_TRACER help This tracer helps developers to optimize boot times: it records @@ -207,8 +227,36 @@ config BOOT_TRACER to enable this on bootup. config TRACE_BRANCH_PROFILING + bool + select GENERIC_TRACER + +choice + prompt "Branch Profiling" + default BRANCH_PROFILE_NONE + help + The branch profiling is a software profiler. It will add hooks + into the C conditionals to test which path a branch takes. + + The likely/unlikely profiler only looks at the conditions that + are annotated with a likely or unlikely macro. + + The "all branch" profiler will profile every if statement in the + kernel. This profiler will also enable the likely/unlikely + profiler as well. + + Either of the above profilers add a bit of overhead to the system. + If unsure choose "No branch profiling". + +config BRANCH_PROFILE_NONE + bool "No branch profiling" + help + No branch profiling. Branch profiling adds a bit of overhead. + Only enable it if you want to analyse the branching behavior. + Otherwise keep it disabled. + +config PROFILE_ANNOTATED_BRANCHES bool "Trace likely/unlikely profiler" - select TRACING + select TRACE_BRANCH_PROFILING help This tracer profiles all the the likely and unlikely macros in the kernel. It will display the results in: @@ -218,11 +266,9 @@ config TRACE_BRANCH_PROFILING Note: this will add a significant overhead, only turn this on if you need to profile the system's use of these macros. - Say N if unsure. - config PROFILE_ALL_BRANCHES bool "Profile all if conditionals" - depends on TRACE_BRANCH_PROFILING + select TRACE_BRANCH_PROFILING help This tracer profiles all branch conditions. Every if () taken in the kernel is recorded whether it hit or miss. @@ -230,11 +276,12 @@ config PROFILE_ALL_BRANCHES /debugfs/tracing/profile_branch + This option also enables the likely/unlikely profiler. + This configuration, when enabled, will impose a great overhead on the system. This should only be enabled when the system is to be analyzed - - Say N if unsure. +endchoice config TRACING_BRANCHES bool @@ -261,7 +308,7 @@ config BRANCH_TRACER config POWER_TRACER bool "Trace power consumption behavior" depends on X86 - select TRACING + select GENERIC_TRACER help This tracer helps developers to analyze and optimize the kernels power management decisions, specifically the C-state and P-state @@ -295,14 +342,14 @@ config STACK_TRACER config HW_BRANCH_TRACER depends on HAVE_HW_BRANCH_TRACER bool "Trace hw branches" - select TRACING + select GENERIC_TRACER help This tracer records all branches on the system in a circular buffer giving access to the last N branches for each cpu. config KMEMTRACE bool "Trace SLAB allocations" - select TRACING + select GENERIC_TRACER help kmemtrace provides tracing for slab allocator functions, such as kmalloc, kfree, kmem_cache_alloc, kmem_cache_free etc.. Collected @@ -322,7 +369,7 @@ config KMEMTRACE config WORKQUEUE_TRACER bool "Trace workqueues" - select TRACING + select GENERIC_TRACER help The workqueue tracer provides some statistical informations about each cpu workqueue thread such as the number of the @@ -338,7 +385,7 @@ config BLK_DEV_IO_TRACE select RELAY select DEBUG_FS select TRACEPOINTS - select TRACING + select GENERIC_TRACER select STACKTRACE help Say Y here if you want to be able to trace the block layer actions @@ -375,6 +422,20 @@ config DYNAMIC_FTRACE were made. If so, it runs stop_machine (stops all CPUS) and modifies the code to jump over the call to ftrace. +config FUNCTION_PROFILER + bool "Kernel function profiler" + depends on FUNCTION_TRACER + default n + help + This option enables the kernel function profiler. A file is created + in debugfs called function_profile_enabled which defaults to zero. + When a 1 is echoed into this file profiling begins, and when a + zero is entered, profiling stops. A file in the trace_stats + directory called functions, that show the list of functions that + have been hit and their counters. + + If in doubt, say N + config FTRACE_MCOUNT_RECORD def_bool y depends on DYNAMIC_FTRACE @@ -385,7 +446,7 @@ config FTRACE_SELFTEST config FTRACE_STARTUP_TEST bool "Perform a startup test on ftrace" - depends on TRACING + depends on GENERIC_TRACER select FTRACE_SELFTEST help This option performs a series of startup tests on ftrace. On bootup @@ -396,7 +457,7 @@ config FTRACE_STARTUP_TEST config MMIOTRACE bool "Memory mapped IO tracing" depends on HAVE_MMIOTRACE_SUPPORT && PCI - select TRACING + select GENERIC_TRACER help Mmiotrace traces Memory Mapped I/O access and is meant for debugging and reverse engineering. It is called from the ioremap @@ -416,7 +477,23 @@ config MMIOTRACE_TEST Say N, unless you absolutely know what you are doing. -endmenu +config RING_BUFFER_BENCHMARK + tristate "Ring buffer benchmark stress tester" + depends on RING_BUFFER + help + This option creates a test to stress the ring buffer and bench mark it. + It creates its own ring buffer such that it will not interfer with + any other users of the ring buffer (such as ftrace). It then creates + a producer and consumer that will run for 10 seconds and sleep for + 10 seconds. Each interval it will print out the number of events + it recorded and give a rough estimate of how long each iteration took. + + It does not disable interrupts or raise its priority, so it may be + affected by processes that are running. + + If unsure, say N + +endif # FTRACE endif # TRACING_SUPPORT diff --git a/kernel/trace/Makefile b/kernel/trace/Makefile index 2630f5121ec1..844164dca90a 100644 --- a/kernel/trace/Makefile +++ b/kernel/trace/Makefile @@ -15,11 +15,17 @@ ifdef CONFIG_TRACING_BRANCHES KBUILD_CFLAGS += -DDISABLE_BRANCH_PROFILING endif +# +# Make the trace clocks available generally: it's infrastructure +# relied on by ptrace for example: +# +obj-y += trace_clock.o + obj-$(CONFIG_FUNCTION_TRACER) += libftrace.o obj-$(CONFIG_RING_BUFFER) += ring_buffer.o +obj-$(CONFIG_RING_BUFFER_BENCHMARK) += ring_buffer_benchmark.o obj-$(CONFIG_TRACING) += trace.o -obj-$(CONFIG_TRACING) += trace_clock.o obj-$(CONFIG_TRACING) += trace_output.o obj-$(CONFIG_TRACING) += trace_stat.o obj-$(CONFIG_TRACING) += trace_printk.o @@ -39,12 +45,14 @@ obj-$(CONFIG_HW_BRANCH_TRACER) += trace_hw_branches.o obj-$(CONFIG_POWER_TRACER) += trace_power.o obj-$(CONFIG_KMEMTRACE) += kmemtrace.o obj-$(CONFIG_WORKQUEUE_TRACER) += trace_workqueue.o -obj-$(CONFIG_BLK_DEV_IO_TRACE) += blktrace.o -obj-$(CONFIG_EVENT_TRACER) += trace_events.o -obj-$(CONFIG_EVENT_TRACER) += events.o -obj-$(CONFIG_EVENT_TRACER) += trace_export.o +obj-$(CONFIG_BLK_DEV_IO_TRACE) += blktrace.o +ifeq ($(CONFIG_BLOCK),y) +obj-$(CONFIG_EVENT_TRACING) += blktrace.o +endif +obj-$(CONFIG_EVENT_TRACING) += trace_events.o +obj-$(CONFIG_EVENT_TRACING) += trace_export.o obj-$(CONFIG_FTRACE_SYSCALLS) += trace_syscalls.o obj-$(CONFIG_EVENT_PROFILE) += trace_event_profile.o -obj-$(CONFIG_EVENT_TRACER) += trace_events_filter.o +obj-$(CONFIG_EVENT_TRACING) += trace_events_filter.o libftrace-y := ftrace.o diff --git a/kernel/trace/blktrace.c b/kernel/trace/blktrace.c index 921ef5d1f0ba..7bd6a9893c24 100644 --- a/kernel/trace/blktrace.c +++ b/kernel/trace/blktrace.c @@ -23,10 +23,14 @@ #include <linux/mutex.h> #include <linux/debugfs.h> #include <linux/time.h> -#include <trace/block.h> #include <linux/uaccess.h> + +#include <trace/events/block.h> + #include "trace_output.h" +#ifdef CONFIG_BLK_DEV_IO_TRACE + static unsigned int blktrace_seq __read_mostly = 1; static struct trace_array *blk_tr; @@ -147,7 +151,7 @@ static int act_log_check(struct blk_trace *bt, u32 what, sector_t sector, { if (((bt->act_mask << BLK_TC_SHIFT) & what) == 0) return 1; - if (sector < bt->start_lba || sector > bt->end_lba) + if (sector && (sector < bt->start_lba || sector > bt->end_lba)) return 1; if (bt->pid && pid != bt->pid) return 1; @@ -192,7 +196,7 @@ static void __blk_add_trace(struct blk_trace *bt, sector_t sector, int bytes, what |= MASK_TC_BIT(rw, DISCARD); pid = tsk->pid; - if (unlikely(act_log_check(bt, what, sector, pid))) + if (act_log_check(bt, what, sector, pid)) return; cpu = raw_smp_processor_id(); @@ -262,6 +266,7 @@ static void blk_trace_free(struct blk_trace *bt) { debugfs_remove(bt->msg_file); debugfs_remove(bt->dropped_file); + debugfs_remove(bt->dir); relay_close(bt->rchan); free_percpu(bt->sequence); free_percpu(bt->msg_data); @@ -403,11 +408,29 @@ static struct rchan_callbacks blk_relay_callbacks = { .remove_buf_file = blk_remove_buf_file_callback, }; +static void blk_trace_setup_lba(struct blk_trace *bt, + struct block_device *bdev) +{ + struct hd_struct *part = NULL; + + if (bdev) + part = bdev->bd_part; + + if (part) { + bt->start_lba = part->start_sect; + bt->end_lba = part->start_sect + part->nr_sects; + } else { + bt->start_lba = 0; + bt->end_lba = -1ULL; + } +} + /* * Setup everything required to start tracing */ int do_blk_trace_setup(struct request_queue *q, char *name, dev_t dev, - struct blk_user_trace_setup *buts) + struct block_device *bdev, + struct blk_user_trace_setup *buts) { struct blk_trace *old_bt, *bt = NULL; struct dentry *dir = NULL; @@ -480,10 +503,13 @@ int do_blk_trace_setup(struct request_queue *q, char *name, dev_t dev, if (!bt->act_mask) bt->act_mask = (u16) -1; - bt->start_lba = buts->start_lba; - bt->end_lba = buts->end_lba; - if (!bt->end_lba) - bt->end_lba = -1ULL; + blk_trace_setup_lba(bt, bdev); + + /* overwrite with user settings */ + if (buts->start_lba) + bt->start_lba = buts->start_lba; + if (buts->end_lba) + bt->end_lba = buts->end_lba; bt->pid = buts->pid; bt->trace_state = Blktrace_setup; @@ -505,6 +531,7 @@ err: } int blk_trace_setup(struct request_queue *q, char *name, dev_t dev, + struct block_device *bdev, char __user *arg) { struct blk_user_trace_setup buts; @@ -514,7 +541,7 @@ int blk_trace_setup(struct request_queue *q, char *name, dev_t dev, if (ret) return -EFAULT; - ret = do_blk_trace_setup(q, name, dev, &buts); + ret = do_blk_trace_setup(q, name, dev, bdev, &buts); if (ret) return ret; @@ -582,7 +609,7 @@ int blk_trace_ioctl(struct block_device *bdev, unsigned cmd, char __user *arg) switch (cmd) { case BLKTRACESETUP: bdevname(bdev, b); - ret = blk_trace_setup(q, b, bdev->bd_dev, arg); + ret = blk_trace_setup(q, b, bdev->bd_dev, bdev, arg); break; case BLKTRACESTART: start = 1; @@ -809,7 +836,6 @@ static void blk_add_trace_split(struct request_queue *q, struct bio *bio, * @bio: the source bio * @dev: target device * @from: source sector - * @to: target sector * * Description: * Device mapper or raid target sometimes need to split a bio because @@ -817,7 +843,7 @@ static void blk_add_trace_split(struct request_queue *q, struct bio *bio, * **/ static void blk_add_trace_remap(struct request_queue *q, struct bio *bio, - dev_t dev, sector_t from, sector_t to) + dev_t dev, sector_t from) { struct blk_trace *bt = q->blk_trace; struct blk_io_trace_remap r; @@ -825,12 +851,13 @@ static void blk_add_trace_remap(struct request_queue *q, struct bio *bio, if (likely(!bt)) return; - r.device = cpu_to_be32(dev); - r.device_from = cpu_to_be32(bio->bi_bdev->bd_dev); - r.sector = cpu_to_be64(to); + r.device_from = cpu_to_be32(dev); + r.device_to = cpu_to_be32(bio->bi_bdev->bd_dev); + r.sector_from = cpu_to_be64(from); - __blk_add_trace(bt, from, bio->bi_size, bio->bi_rw, BLK_TA_REMAP, - !bio_flagged(bio, BIO_UPTODATE), sizeof(r), &r); + __blk_add_trace(bt, bio->bi_sector, bio->bi_size, bio->bi_rw, + BLK_TA_REMAP, !bio_flagged(bio, BIO_UPTODATE), + sizeof(r), &r); } /** @@ -971,6 +998,16 @@ static inline const void *pdu_start(const struct trace_entry *ent) return te_blk_io_trace(ent) + 1; } +static inline u32 t_action(const struct trace_entry *ent) +{ + return te_blk_io_trace(ent)->action; +} + +static inline u32 t_bytes(const struct trace_entry *ent) +{ + return te_blk_io_trace(ent)->bytes; +} + static inline u32 t_sec(const struct trace_entry *ent) { return te_blk_io_trace(ent)->bytes >> 9; @@ -996,11 +1033,11 @@ static void get_pdu_remap(const struct trace_entry *ent, struct blk_io_trace_remap *r) { const struct blk_io_trace_remap *__r = pdu_start(ent); - __u64 sector = __r->sector; + __u64 sector_from = __r->sector_from; - r->device = be32_to_cpu(__r->device); r->device_from = be32_to_cpu(__r->device_from); - r->sector = be64_to_cpu(sector); + r->device_to = be32_to_cpu(__r->device_to); + r->sector_from = be64_to_cpu(sector_from); } typedef int (blk_log_action_t) (struct trace_iterator *iter, const char *act); @@ -1031,36 +1068,98 @@ static int blk_log_action(struct trace_iterator *iter, const char *act) MAJOR(t->device), MINOR(t->device), act, rwbs); } +static int blk_log_dump_pdu(struct trace_seq *s, const struct trace_entry *ent) +{ + const unsigned char *pdu_buf; + int pdu_len; + int i, end, ret; + + pdu_buf = pdu_start(ent); + pdu_len = te_blk_io_trace(ent)->pdu_len; + + if (!pdu_len) + return 1; + + /* find the last zero that needs to be printed */ + for (end = pdu_len - 1; end >= 0; end--) + if (pdu_buf[end]) + break; + end++; + + if (!trace_seq_putc(s, '(')) + return 0; + + for (i = 0; i < pdu_len; i++) { + + ret = trace_seq_printf(s, "%s%02x", + i == 0 ? "" : " ", pdu_buf[i]); + if (!ret) + return ret; + + /* + * stop when the rest is just zeroes and indicate so + * with a ".." appended + */ + if (i == end && end != pdu_len - 1) + return trace_seq_puts(s, " ..) "); + } + + return trace_seq_puts(s, ") "); +} + static int blk_log_generic(struct trace_seq *s, const struct trace_entry *ent) { char cmd[TASK_COMM_LEN]; trace_find_cmdline(ent->pid, cmd); - if (t_sec(ent)) - return trace_seq_printf(s, "%llu + %u [%s]\n", - t_sector(ent), t_sec(ent), cmd); - return trace_seq_printf(s, "[%s]\n", cmd); + if (t_action(ent) & BLK_TC_ACT(BLK_TC_PC)) { + int ret; + + ret = trace_seq_printf(s, "%u ", t_bytes(ent)); + if (!ret) + return 0; + ret = blk_log_dump_pdu(s, ent); + if (!ret) + return 0; + return trace_seq_printf(s, "[%s]\n", cmd); + } else { + if (t_sec(ent)) + return trace_seq_printf(s, "%llu + %u [%s]\n", + t_sector(ent), t_sec(ent), cmd); + return trace_seq_printf(s, "[%s]\n", cmd); + } } static int blk_log_with_error(struct trace_seq *s, const struct trace_entry *ent) { - if (t_sec(ent)) - return trace_seq_printf(s, "%llu + %u [%d]\n", t_sector(ent), - t_sec(ent), t_error(ent)); - return trace_seq_printf(s, "%llu [%d]\n", t_sector(ent), t_error(ent)); + if (t_action(ent) & BLK_TC_ACT(BLK_TC_PC)) { + int ret; + + ret = blk_log_dump_pdu(s, ent); + if (ret) + return trace_seq_printf(s, "[%d]\n", t_error(ent)); + return 0; + } else { + if (t_sec(ent)) + return trace_seq_printf(s, "%llu + %u [%d]\n", + t_sector(ent), + t_sec(ent), t_error(ent)); + return trace_seq_printf(s, "%llu [%d]\n", + t_sector(ent), t_error(ent)); + } } static int blk_log_remap(struct trace_seq *s, const struct trace_entry *ent) { - struct blk_io_trace_remap r = { .device = 0, }; + struct blk_io_trace_remap r = { .device_from = 0, }; get_pdu_remap(ent, &r); return trace_seq_printf(s, "%llu + %u <- (%d,%d) %llu\n", - t_sector(ent), - t_sec(ent), MAJOR(r.device), MINOR(r.device), - (unsigned long long)r.sector); + t_sector(ent), t_sec(ent), + MAJOR(r.device_from), MINOR(r.device_from), + (unsigned long long)r.sector_from); } static int blk_log_plug(struct trace_seq *s, const struct trace_entry *ent) @@ -1117,7 +1216,6 @@ static void blk_tracer_print_header(struct seq_file *m) static void blk_tracer_start(struct trace_array *tr) { blk_tracer_enabled = true; - trace_flags &= ~TRACE_ITER_CONTEXT_INFO; } static int blk_tracer_init(struct trace_array *tr) @@ -1130,7 +1228,6 @@ static int blk_tracer_init(struct trace_array *tr) static void blk_tracer_stop(struct trace_array *tr) { blk_tracer_enabled = false; - trace_flags |= TRACE_ITER_CONTEXT_INFO; } static void blk_tracer_reset(struct trace_array *tr) @@ -1182,7 +1279,7 @@ static enum print_line_t print_one_line(struct trace_iterator *iter, } if (unlikely(what == 0 || what >= ARRAY_SIZE(what2act))) - ret = trace_seq_printf(s, "Bad pc action %x\n", what); + ret = trace_seq_printf(s, "Unknown action %x\n", what); else { ret = log_action(iter, what2act[what].act[long_act]); if (ret) @@ -1195,9 +1292,6 @@ out: static enum print_line_t blk_trace_event_print(struct trace_iterator *iter, int flags) { - if (!trace_print_context(iter)) - return TRACE_TYPE_PARTIAL_LINE; - return print_one_line(iter, false); } @@ -1232,6 +1326,18 @@ static enum print_line_t blk_tracer_print_line(struct trace_iterator *iter) return print_one_line(iter, true); } +static int blk_tracer_set_flag(u32 old_flags, u32 bit, int set) +{ + /* don't output context-info for blk_classic output */ + if (bit == TRACE_BLK_OPT_CLASSIC) { + if (set) + trace_flags &= ~TRACE_ITER_CONTEXT_INFO; + else + trace_flags |= TRACE_ITER_CONTEXT_INFO; + } + return 0; +} + static struct tracer blk_tracer __read_mostly = { .name = "blk", .init = blk_tracer_init, @@ -1241,6 +1347,7 @@ static struct tracer blk_tracer __read_mostly = { .print_header = blk_tracer_print_header, .print_line = blk_tracer_print_line, .flags = &blk_tracer_flags, + .set_flag = blk_tracer_set_flag, }; static struct trace_event trace_blk_event = { @@ -1285,7 +1392,8 @@ static int blk_trace_remove_queue(struct request_queue *q) /* * Setup everything required to start tracing */ -static int blk_trace_setup_queue(struct request_queue *q, dev_t dev) +static int blk_trace_setup_queue(struct request_queue *q, + struct block_device *bdev) { struct blk_trace *old_bt, *bt = NULL; int ret = -ENOMEM; @@ -1298,9 +1406,10 @@ static int blk_trace_setup_queue(struct request_queue *q, dev_t dev) if (!bt->msg_data) goto free_bt; - bt->dev = dev; + bt->dev = bdev->bd_dev; bt->act_mask = (u16)-1; - bt->end_lba = -1ULL; + + blk_trace_setup_lba(bt, bdev); old_bt = xchg(&q->blk_trace, bt); if (old_bt != NULL) { @@ -1517,7 +1626,7 @@ static ssize_t sysfs_blk_trace_attr_store(struct device *dev, if (attr == &dev_attr_enable) { if (value) - ret = blk_trace_setup_queue(q, bdev->bd_dev); + ret = blk_trace_setup_queue(q, bdev); else ret = blk_trace_remove_queue(q); goto out_unlock_bdev; @@ -1525,7 +1634,7 @@ static ssize_t sysfs_blk_trace_attr_store(struct device *dev, ret = 0; if (q->blk_trace == NULL) - ret = blk_trace_setup_queue(q, bdev->bd_dev); + ret = blk_trace_setup_queue(q, bdev); if (ret == 0) { if (attr == &dev_attr_act_mask) @@ -1548,3 +1657,80 @@ out: return ret ? ret : count; } +int blk_trace_init_sysfs(struct device *dev) +{ + return sysfs_create_group(&dev->kobj, &blk_trace_attr_group); +} + +#endif /* CONFIG_BLK_DEV_IO_TRACE */ + +#ifdef CONFIG_EVENT_TRACING + +void blk_dump_cmd(char *buf, struct request *rq) +{ + int i, end; + int len = rq->cmd_len; + unsigned char *cmd = rq->cmd; + + if (!blk_pc_request(rq)) { + buf[0] = '\0'; + return; + } + + for (end = len - 1; end >= 0; end--) + if (cmd[end]) + break; + end++; + + for (i = 0; i < len; i++) { + buf += sprintf(buf, "%s%02x", i == 0 ? "" : " ", cmd[i]); + if (i == end && end != len - 1) { + sprintf(buf, " .."); + break; + } + } +} + +void blk_fill_rwbs(char *rwbs, u32 rw, int bytes) +{ + int i = 0; + + if (rw & WRITE) + rwbs[i++] = 'W'; + else if (rw & 1 << BIO_RW_DISCARD) + rwbs[i++] = 'D'; + else if (bytes) + rwbs[i++] = 'R'; + else + rwbs[i++] = 'N'; + + if (rw & 1 << BIO_RW_AHEAD) + rwbs[i++] = 'A'; + if (rw & 1 << BIO_RW_BARRIER) + rwbs[i++] = 'B'; + if (rw & 1 << BIO_RW_SYNCIO) + rwbs[i++] = 'S'; + if (rw & 1 << BIO_RW_META) + rwbs[i++] = 'M'; + + rwbs[i] = '\0'; +} + +void blk_fill_rwbs_rq(char *rwbs, struct request *rq) +{ + int rw = rq->cmd_flags & 0x03; + int bytes; + + if (blk_discard_rq(rq)) + rw |= (1 << BIO_RW_DISCARD); + + if (blk_pc_request(rq)) + bytes = rq->data_len; + else + bytes = rq->hard_nr_sectors << 9; + + blk_fill_rwbs(rwbs, rw, bytes); +} + +#endif /* CONFIG_EVENT_TRACING */ + diff --git a/kernel/trace/events.c b/kernel/trace/events.c deleted file mode 100644 index 246f2aa6dc46..000000000000 --- a/kernel/trace/events.c +++ /dev/null @@ -1,14 +0,0 @@ -/* - * This is the place to register all trace points as events. - */ - -#include <linux/stringify.h> - -#include <trace/trace_events.h> - -#include "trace_output.h" - -#include "trace_events_stage_1.h" -#include "trace_events_stage_2.h" -#include "trace_events_stage_3.h" - diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index f1ed080406c3..bb60732ade0c 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -29,11 +29,13 @@ #include <linux/list.h> #include <linux/hash.h> -#include <trace/sched.h> +#include <trace/events/sched.h> #include <asm/ftrace.h> +#include <asm/setup.h> -#include "trace.h" +#include "trace_output.h" +#include "trace_stat.h" #define FTRACE_WARN_ON(cond) \ do { \ @@ -68,7 +70,7 @@ static DEFINE_MUTEX(ftrace_lock); static struct ftrace_ops ftrace_list_end __read_mostly = { - .func = ftrace_stub, + .func = ftrace_stub, }; static struct ftrace_ops *ftrace_list __read_mostly = &ftrace_list_end; @@ -240,6 +242,580 @@ static void ftrace_update_pid_func(void) #endif } +#ifdef CONFIG_FUNCTION_PROFILER +struct ftrace_profile { + struct hlist_node node; + unsigned long ip; + unsigned long counter; +#ifdef CONFIG_FUNCTION_GRAPH_TRACER + unsigned long long time; +#endif +}; + +struct ftrace_profile_page { + struct ftrace_profile_page *next; + unsigned long index; + struct ftrace_profile records[]; +}; + +struct ftrace_profile_stat { + atomic_t disabled; + struct hlist_head *hash; + struct ftrace_profile_page *pages; + struct ftrace_profile_page *start; + struct tracer_stat stat; +}; + +#define PROFILE_RECORDS_SIZE \ + (PAGE_SIZE - offsetof(struct ftrace_profile_page, records)) + +#define PROFILES_PER_PAGE \ + (PROFILE_RECORDS_SIZE / sizeof(struct ftrace_profile)) + +static int ftrace_profile_bits __read_mostly; +static int ftrace_profile_enabled __read_mostly; + +/* ftrace_profile_lock - synchronize the enable and disable of the profiler */ +static DEFINE_MUTEX(ftrace_profile_lock); + +static DEFINE_PER_CPU(struct ftrace_profile_stat, ftrace_profile_stats); + +#define FTRACE_PROFILE_HASH_SIZE 1024 /* must be power of 2 */ + +static void * +function_stat_next(void *v, int idx) +{ + struct ftrace_profile *rec = v; + struct ftrace_profile_page *pg; + + pg = (struct ftrace_profile_page *)((unsigned long)rec & PAGE_MASK); + + again: + rec++; + if ((void *)rec >= (void *)&pg->records[pg->index]) { + pg = pg->next; + if (!pg) + return NULL; + rec = &pg->records[0]; + if (!rec->counter) + goto again; + } + + return rec; +} + +static void *function_stat_start(struct tracer_stat *trace) +{ + struct ftrace_profile_stat *stat = + container_of(trace, struct ftrace_profile_stat, stat); + + if (!stat || !stat->start) + return NULL; + + return function_stat_next(&stat->start->records[0], 0); +} + +#ifdef CONFIG_FUNCTION_GRAPH_TRACER +/* function graph compares on total time */ +static int function_stat_cmp(void *p1, void *p2) +{ + struct ftrace_profile *a = p1; + struct ftrace_profile *b = p2; + + if (a->time < b->time) + return -1; + if (a->time > b->time) + return 1; + else + return 0; +} +#else +/* not function graph compares against hits */ +static int function_stat_cmp(void *p1, void *p2) +{ + struct ftrace_profile *a = p1; + struct ftrace_profile *b = p2; + + if (a->counter < b->counter) + return -1; + if (a->counter > b->counter) + return 1; + else + return 0; +} +#endif + +static int function_stat_headers(struct seq_file *m) +{ +#ifdef CONFIG_FUNCTION_GRAPH_TRACER + seq_printf(m, " Function " + "Hit Time Avg\n" + " -------- " + "--- ---- ---\n"); +#else + seq_printf(m, " Function Hit\n" + " -------- ---\n"); +#endif + return 0; +} + +static int function_stat_show(struct seq_file *m, void *v) +{ + struct ftrace_profile *rec = v; + char str[KSYM_SYMBOL_LEN]; +#ifdef CONFIG_FUNCTION_GRAPH_TRACER + static DEFINE_MUTEX(mutex); + static struct trace_seq s; + unsigned long long avg; +#endif + + kallsyms_lookup(rec->ip, NULL, NULL, NULL, str); + seq_printf(m, " %-30.30s %10lu", str, rec->counter); + +#ifdef CONFIG_FUNCTION_GRAPH_TRACER + seq_printf(m, " "); + avg = rec->time; + do_div(avg, rec->counter); + + mutex_lock(&mutex); + trace_seq_init(&s); + trace_print_graph_duration(rec->time, &s); + trace_seq_puts(&s, " "); + trace_print_graph_duration(avg, &s); + trace_print_seq(m, &s); + mutex_unlock(&mutex); +#endif + seq_putc(m, '\n'); + + return 0; +} + +static void ftrace_profile_reset(struct ftrace_profile_stat *stat) +{ + struct ftrace_profile_page *pg; + + pg = stat->pages = stat->start; + + while (pg) { + memset(pg->records, 0, PROFILE_RECORDS_SIZE); + pg->index = 0; + pg = pg->next; + } + + memset(stat->hash, 0, + FTRACE_PROFILE_HASH_SIZE * sizeof(struct hlist_head)); +} + +int ftrace_profile_pages_init(struct ftrace_profile_stat *stat) +{ + struct ftrace_profile_page *pg; + int functions; + int pages; + int i; + + /* If we already allocated, do nothing */ + if (stat->pages) + return 0; + + stat->pages = (void *)get_zeroed_page(GFP_KERNEL); + if (!stat->pages) + return -ENOMEM; + +#ifdef CONFIG_DYNAMIC_FTRACE + functions = ftrace_update_tot_cnt; +#else + /* + * We do not know the number of functions that exist because + * dynamic tracing is what counts them. With past experience + * we have around 20K functions. That should be more than enough. + * It is highly unlikely we will execute every function in + * the kernel. + */ + functions = 20000; +#endif + + pg = stat->start = stat->pages; + + pages = DIV_ROUND_UP(functions, PROFILES_PER_PAGE); + + for (i = 0; i < pages; i++) { + pg->next = (void *)get_zeroed_page(GFP_KERNEL); + if (!pg->next) + goto out_free; + pg = pg->next; + } + + return 0; + + out_free: + pg = stat->start; + while (pg) { + unsigned long tmp = (unsigned long)pg; + + pg = pg->next; + free_page(tmp); + } + + free_page((unsigned long)stat->pages); + stat->pages = NULL; + stat->start = NULL; + + return -ENOMEM; +} + +static int ftrace_profile_init_cpu(int cpu) +{ + struct ftrace_profile_stat *stat; + int size; + + stat = &per_cpu(ftrace_profile_stats, cpu); + + if (stat->hash) { + /* If the profile is already created, simply reset it */ + ftrace_profile_reset(stat); + return 0; + } + + /* + * We are profiling all functions, but usually only a few thousand + * functions are hit. We'll make a hash of 1024 items. + */ + size = FTRACE_PROFILE_HASH_SIZE; + + stat->hash = kzalloc(sizeof(struct hlist_head) * size, GFP_KERNEL); + + if (!stat->hash) + return -ENOMEM; + + if (!ftrace_profile_bits) { + size--; + + for (; size; size >>= 1) + ftrace_profile_bits++; + } + + /* Preallocate the function profiling pages */ + if (ftrace_profile_pages_init(stat) < 0) { + kfree(stat->hash); + stat->hash = NULL; + return -ENOMEM; + } + + return 0; +} + +static int ftrace_profile_init(void) +{ + int cpu; + int ret = 0; + + for_each_online_cpu(cpu) { + ret = ftrace_profile_init_cpu(cpu); + if (ret) + break; + } + + return ret; +} + +/* interrupts must be disabled */ +static struct ftrace_profile * +ftrace_find_profiled_func(struct ftrace_profile_stat *stat, unsigned long ip) +{ + struct ftrace_profile *rec; + struct hlist_head *hhd; + struct hlist_node *n; + unsigned long key; + + key = hash_long(ip, ftrace_profile_bits); + hhd = &stat->hash[key]; + + if (hlist_empty(hhd)) + return NULL; + + hlist_for_each_entry_rcu(rec, n, hhd, node) { + if (rec->ip == ip) + return rec; + } + + return NULL; +} + +static void ftrace_add_profile(struct ftrace_profile_stat *stat, + struct ftrace_profile *rec) +{ + unsigned long key; + + key = hash_long(rec->ip, ftrace_profile_bits); + hlist_add_head_rcu(&rec->node, &stat->hash[key]); +} + +/* + * The memory is already allocated, this simply finds a new record to use. + */ +static struct ftrace_profile * +ftrace_profile_alloc(struct ftrace_profile_stat *stat, unsigned long ip) +{ + struct ftrace_profile *rec = NULL; + + /* prevent recursion (from NMIs) */ + if (atomic_inc_return(&stat->disabled) != 1) + goto out; + + /* + * Try to find the function again since an NMI + * could have added it + */ + rec = ftrace_find_profiled_func(stat, ip); + if (rec) + goto out; + + if (stat->pages->index == PROFILES_PER_PAGE) { + if (!stat->pages->next) + goto out; + stat->pages = stat->pages->next; + } + + rec = &stat->pages->records[stat->pages->index++]; + rec->ip = ip; + ftrace_add_profile(stat, rec); + + out: + atomic_dec(&stat->disabled); + + return rec; +} + +static void +function_profile_call(unsigned long ip, unsigned long parent_ip) +{ + struct ftrace_profile_stat *stat; + struct ftrace_profile *rec; + unsigned long flags; + + if (!ftrace_profile_enabled) + return; + + local_irq_save(flags); + + stat = &__get_cpu_var(ftrace_profile_stats); + if (!stat->hash || !ftrace_profile_enabled) + goto out; + + rec = ftrace_find_profiled_func(stat, ip); + if (!rec) { + rec = ftrace_profile_alloc(stat, ip); + if (!rec) + goto out; + } + + rec->counter++; + out: + local_irq_restore(flags); +} + +#ifdef CONFIG_FUNCTION_GRAPH_TRACER +static int profile_graph_entry(struct ftrace_graph_ent *trace) +{ + function_profile_call(trace->func, 0); + return 1; +} + +static void profile_graph_return(struct ftrace_graph_ret *trace) +{ + struct ftrace_profile_stat *stat; + unsigned long long calltime; + struct ftrace_profile *rec; + unsigned long flags; + + local_irq_save(flags); + stat = &__get_cpu_var(ftrace_profile_stats); + if (!stat->hash || !ftrace_profile_enabled) + goto out; + + calltime = trace->rettime - trace->calltime; + + if (!(trace_flags & TRACE_ITER_GRAPH_TIME)) { + int index; + + index = trace->depth; + + /* Append this call time to the parent time to subtract */ + if (index) + current->ret_stack[index - 1].subtime += calltime; + + if (current->ret_stack[index].subtime < calltime) + calltime -= current->ret_stack[index].subtime; + else + calltime = 0; + } + + rec = ftrace_find_profiled_func(stat, trace->func); + if (rec) + rec->time += calltime; + + out: + local_irq_restore(flags); +} + +static int register_ftrace_profiler(void) +{ + return register_ftrace_graph(&profile_graph_return, + &profile_graph_entry); +} + +static void unregister_ftrace_profiler(void) +{ + unregister_ftrace_graph(); +} +#else +static struct ftrace_ops ftrace_profile_ops __read_mostly = +{ + .func = function_profile_call, +}; + +static int register_ftrace_profiler(void) +{ + return register_ftrace_function(&ftrace_profile_ops); +} + +static void unregister_ftrace_profiler(void) +{ + unregister_ftrace_function(&ftrace_profile_ops); +} +#endif /* CONFIG_FUNCTION_GRAPH_TRACER */ + +static ssize_t +ftrace_profile_write(struct file *filp, const char __user *ubuf, + size_t cnt, loff_t *ppos) +{ + unsigned long val; + char buf[64]; /* big enough to hold a number */ + int ret; + + if (cnt >= sizeof(buf)) + return -EINVAL; + + if (copy_from_user(&buf, ubuf, cnt)) + return -EFAULT; + + buf[cnt] = 0; + + ret = strict_strtoul(buf, 10, &val); + if (ret < 0) + return ret; + + val = !!val; + + mutex_lock(&ftrace_profile_lock); + if (ftrace_profile_enabled ^ val) { + if (val) { + ret = ftrace_profile_init(); + if (ret < 0) { + cnt = ret; + goto out; + } + + ret = register_ftrace_profiler(); + if (ret < 0) { + cnt = ret; + goto out; + } + ftrace_profile_enabled = 1; + } else { + ftrace_profile_enabled = 0; + /* + * unregister_ftrace_profiler calls stop_machine + * so this acts like an synchronize_sched. + */ + unregister_ftrace_profiler(); + } + } + out: + mutex_unlock(&ftrace_profile_lock); + + filp->f_pos += cnt; + + return cnt; +} + +static ssize_t +ftrace_profile_read(struct file *filp, char __user *ubuf, + size_t cnt, loff_t *ppos) +{ + char buf[64]; /* big enough to hold a number */ + int r; + + r = sprintf(buf, "%u\n", ftrace_profile_enabled); + return simple_read_from_buffer(ubuf, cnt, ppos, buf, r); +} + +static const struct file_operations ftrace_profile_fops = { + .open = tracing_open_generic, + .read = ftrace_profile_read, + .write = ftrace_profile_write, +}; + +/* used to initialize the real stat files */ +static struct tracer_stat function_stats __initdata = { + .name = "functions", + .stat_start = function_stat_start, + .stat_next = function_stat_next, + .stat_cmp = function_stat_cmp, + .stat_headers = function_stat_headers, + .stat_show = function_stat_show +}; + +static void ftrace_profile_debugfs(struct dentry *d_tracer) +{ + struct ftrace_profile_stat *stat; + struct dentry *entry; + char *name; + int ret; + int cpu; + + for_each_possible_cpu(cpu) { + stat = &per_cpu(ftrace_profile_stats, cpu); + + /* allocate enough for function name + cpu number */ + name = kmalloc(32, GFP_KERNEL); + if (!name) { + /* + * The files created are permanent, if something happens + * we still do not free memory. + */ + kfree(stat); + WARN(1, + "Could not allocate stat file for cpu %d\n", + cpu); + return; + } + stat->stat = function_stats; + snprintf(name, 32, "function%d", cpu); + stat->stat.name = name; + ret = register_stat_tracer(&stat->stat); + if (ret) { + WARN(1, + "Could not register function stat for cpu %d\n", + cpu); + kfree(name); + return; + } + } + + entry = debugfs_create_file("function_profile_enabled", 0644, + d_tracer, NULL, &ftrace_profile_fops); + if (!entry) + pr_warning("Could not create debugfs " + "'function_profile_enabled' entry\n"); +} + +#else /* CONFIG_FUNCTION_PROFILER */ +static void ftrace_profile_debugfs(struct dentry *d_tracer) +{ +} +#endif /* CONFIG_FUNCTION_PROFILER */ + /* set when tracing only a pid */ struct pid *ftrace_pid_trace; static struct pid * const ftrace_swapper_pid = &init_struct_pid; @@ -261,7 +837,6 @@ struct ftrace_func_probe { struct rcu_head rcu; }; - enum { FTRACE_ENABLE_CALLS = (1 << 0), FTRACE_DISABLE_CALLS = (1 << 1), @@ -346,30 +921,6 @@ static void ftrace_free_rec(struct dyn_ftrace *rec) rec->flags |= FTRACE_FL_FREE; } -void ftrace_release(void *start, unsigned long size) -{ - struct dyn_ftrace *rec; - struct ftrace_page *pg; - unsigned long s = (unsigned long)start; - unsigned long e = s + size; - - if (ftrace_disabled || !start) - return; - - mutex_lock(&ftrace_lock); - do_for_each_ftrace_rec(pg, rec) { - if ((rec->ip >= s) && (rec->ip < e)) { - /* - * rec->ip is changed in ftrace_free_rec() - * It should not between s and e if record was freed. - */ - FTRACE_WARN_ON(rec->flags & FTRACE_FL_FREE); - ftrace_free_rec(rec); - } - } while_for_each_ftrace_rec(); - mutex_unlock(&ftrace_lock); -} - static struct dyn_ftrace *ftrace_alloc_dyn_node(unsigned long ip) { struct dyn_ftrace *rec; @@ -1408,7 +1959,7 @@ function_trace_probe_call(unsigned long ip, unsigned long parent_ip) static struct ftrace_ops trace_probe_ops __read_mostly = { - .func = function_trace_probe_call, + .func = function_trace_probe_call, }; static int ftrace_probe_registered; @@ -1823,6 +2374,45 @@ void ftrace_set_notrace(unsigned char *buf, int len, int reset) ftrace_set_regex(buf, len, reset, 0); } +/* + * command line interface to allow users to set filters on boot up. + */ +#define FTRACE_FILTER_SIZE COMMAND_LINE_SIZE +static char ftrace_notrace_buf[FTRACE_FILTER_SIZE] __initdata; +static char ftrace_filter_buf[FTRACE_FILTER_SIZE] __initdata; + +static int __init set_ftrace_notrace(char *str) +{ + strncpy(ftrace_notrace_buf, str, FTRACE_FILTER_SIZE); + return 1; +} +__setup("ftrace_notrace=", set_ftrace_notrace); + +static int __init set_ftrace_filter(char *str) +{ + strncpy(ftrace_filter_buf, str, FTRACE_FILTER_SIZE); + return 1; +} +__setup("ftrace_filter=", set_ftrace_filter); + +static void __init set_ftrace_early_filter(char *buf, int enable) +{ + char *func; + + while (buf) { + func = strsep(&buf, ","); + ftrace_set_regex(func, strlen(func), 0, enable); + } +} + +static void __init set_ftrace_early_filters(void) +{ + if (ftrace_filter_buf[0]) + set_ftrace_early_filter(ftrace_filter_buf, 1); + if (ftrace_notrace_buf[0]) + set_ftrace_early_filter(ftrace_notrace_buf, 0); +} + static int ftrace_regex_release(struct inode *inode, struct file *file, int enable) { @@ -2128,38 +2718,23 @@ static const struct file_operations ftrace_graph_fops = { static __init int ftrace_init_dyn_debugfs(struct dentry *d_tracer) { - struct dentry *entry; - entry = debugfs_create_file("available_filter_functions", 0444, - d_tracer, NULL, &ftrace_avail_fops); - if (!entry) - pr_warning("Could not create debugfs " - "'available_filter_functions' entry\n"); + trace_create_file("available_filter_functions", 0444, + d_tracer, NULL, &ftrace_avail_fops); - entry = debugfs_create_file("failures", 0444, - d_tracer, NULL, &ftrace_failures_fops); - if (!entry) - pr_warning("Could not create debugfs 'failures' entry\n"); + trace_create_file("failures", 0444, + d_tracer, NULL, &ftrace_failures_fops); - entry = debugfs_create_file("set_ftrace_filter", 0644, d_tracer, - NULL, &ftrace_filter_fops); - if (!entry) - pr_warning("Could not create debugfs " - "'set_ftrace_filter' entry\n"); + trace_create_file("set_ftrace_filter", 0644, d_tracer, + NULL, &ftrace_filter_fops); - entry = debugfs_create_file("set_ftrace_notrace", 0644, d_tracer, + trace_create_file("set_ftrace_notrace", 0644, d_tracer, NULL, &ftrace_notrace_fops); - if (!entry) - pr_warning("Could not create debugfs " - "'set_ftrace_notrace' entry\n"); #ifdef CONFIG_FUNCTION_GRAPH_TRACER - entry = debugfs_create_file("set_graph_function", 0444, d_tracer, + trace_create_file("set_graph_function", 0444, d_tracer, NULL, &ftrace_graph_fops); - if (!entry) - pr_warning("Could not create debugfs " - "'set_graph_function' entry\n"); #endif /* CONFIG_FUNCTION_GRAPH_TRACER */ return 0; @@ -2197,14 +2772,72 @@ static int ftrace_convert_nops(struct module *mod, return 0; } -void ftrace_init_module(struct module *mod, - unsigned long *start, unsigned long *end) +#ifdef CONFIG_MODULES +void ftrace_release(void *start, void *end) +{ + struct dyn_ftrace *rec; + struct ftrace_page *pg; + unsigned long s = (unsigned long)start; + unsigned long e = (unsigned long)end; + + if (ftrace_disabled || !start || start == end) + return; + + mutex_lock(&ftrace_lock); + do_for_each_ftrace_rec(pg, rec) { + if ((rec->ip >= s) && (rec->ip < e)) { + /* + * rec->ip is changed in ftrace_free_rec() + * It should not between s and e if record was freed. + */ + FTRACE_WARN_ON(rec->flags & FTRACE_FL_FREE); + ftrace_free_rec(rec); + } + } while_for_each_ftrace_rec(); + mutex_unlock(&ftrace_lock); +} + +static void ftrace_init_module(struct module *mod, + unsigned long *start, unsigned long *end) { if (ftrace_disabled || start == end) return; ftrace_convert_nops(mod, start, end); } +static int ftrace_module_notify(struct notifier_block *self, + unsigned long val, void *data) +{ + struct module *mod = data; + + switch (val) { + case MODULE_STATE_COMING: + ftrace_init_module(mod, mod->ftrace_callsites, + mod->ftrace_callsites + + mod->num_ftrace_callsites); + break; + case MODULE_STATE_GOING: + ftrace_release(mod->ftrace_callsites, + mod->ftrace_callsites + + mod->num_ftrace_callsites); + break; + } + + return 0; +} +#else +static int ftrace_module_notify(struct notifier_block *self, + unsigned long val, void *data) +{ + return 0; +} +#endif /* CONFIG_MODULES */ + +struct notifier_block ftrace_module_nb = { + .notifier_call = ftrace_module_notify, + .priority = 0, +}; + extern unsigned long __start_mcount_loc[]; extern unsigned long __stop_mcount_loc[]; @@ -2236,6 +2869,12 @@ void __init ftrace_init(void) __start_mcount_loc, __stop_mcount_loc); + ret = register_module_notifier(&ftrace_module_nb); + if (ret) + pr_warning("Failed to register trace ftrace module notifier\n"); + + set_ftrace_early_filters(); + return; failed: ftrace_disabled = 1; @@ -2417,7 +3056,6 @@ static const struct file_operations ftrace_pid_fops = { static __init int ftrace_init_debugfs(void) { struct dentry *d_tracer; - struct dentry *entry; d_tracer = tracing_init_dentry(); if (!d_tracer) @@ -2425,11 +3063,11 @@ static __init int ftrace_init_debugfs(void) ftrace_init_dyn_debugfs(d_tracer); - entry = debugfs_create_file("set_ftrace_pid", 0644, d_tracer, - NULL, &ftrace_pid_fops); - if (!entry) - pr_warning("Could not create debugfs " - "'set_ftrace_pid' entry\n"); + trace_create_file("set_ftrace_pid", 0644, d_tracer, + NULL, &ftrace_pid_fops); + + ftrace_profile_debugfs(d_tracer); + return 0; } fs_initcall(ftrace_init_debugfs); @@ -2538,7 +3176,7 @@ ftrace_enable_sysctl(struct ctl_table *table, int write, #ifdef CONFIG_FUNCTION_GRAPH_TRACER -static atomic_t ftrace_graph_active; +static int ftrace_graph_active; static struct notifier_block ftrace_suspend_notifier; int ftrace_graph_entry_stub(struct ftrace_graph_ent *trace) @@ -2580,12 +3218,12 @@ static int alloc_retstack_tasklist(struct ftrace_ret_stack **ret_stack_list) } if (t->ret_stack == NULL) { - t->curr_ret_stack = -1; - /* Make sure IRQs see the -1 first: */ - barrier(); - t->ret_stack = ret_stack_list[start++]; atomic_set(&t->tracing_graph_pause, 0); atomic_set(&t->trace_overrun, 0); + t->curr_ret_stack = -1; + /* Make sure the tasks see the -1 first: */ + smp_wmb(); + t->ret_stack = ret_stack_list[start++]; } } while_each_thread(g, t); @@ -2643,8 +3281,10 @@ static int start_graph_tracing(void) return -ENOMEM; /* The cpu_boot init_task->ret_stack will never be freed */ - for_each_online_cpu(cpu) - ftrace_graph_init_task(idle_task(cpu)); + for_each_online_cpu(cpu) { + if (!idle_task(cpu)->ret_stack) + ftrace_graph_init_task(idle_task(cpu)); + } do { ret = alloc_retstack_tasklist(ret_stack_list); @@ -2690,7 +3330,7 @@ int register_ftrace_graph(trace_func_graph_ret_t retfunc, mutex_lock(&ftrace_lock); /* we currently allow only one tracer registered at a time */ - if (atomic_read(&ftrace_graph_active)) { + if (ftrace_graph_active) { ret = -EBUSY; goto out; } @@ -2698,10 +3338,10 @@ int register_ftrace_graph(trace_func_graph_ret_t retfunc, ftrace_suspend_notifier.notifier_call = ftrace_suspend_notifier_call; register_pm_notifier(&ftrace_suspend_notifier); - atomic_inc(&ftrace_graph_active); + ftrace_graph_active++; ret = start_graph_tracing(); if (ret) { - atomic_dec(&ftrace_graph_active); + ftrace_graph_active--; goto out; } @@ -2719,10 +3359,10 @@ void unregister_ftrace_graph(void) { mutex_lock(&ftrace_lock); - if (!unlikely(atomic_read(&ftrace_graph_active))) + if (unlikely(!ftrace_graph_active)) goto out; - atomic_dec(&ftrace_graph_active); + ftrace_graph_active--; unregister_trace_sched_switch(ftrace_graph_probe_sched_switch); ftrace_graph_return = (trace_func_graph_ret_t)ftrace_stub; ftrace_graph_entry = ftrace_graph_entry_stub; @@ -2736,18 +3376,25 @@ void unregister_ftrace_graph(void) /* Allocate a return stack for newly created task */ void ftrace_graph_init_task(struct task_struct *t) { - if (atomic_read(&ftrace_graph_active)) { - t->ret_stack = kmalloc(FTRACE_RETFUNC_DEPTH + /* Make sure we do not use the parent ret_stack */ + t->ret_stack = NULL; + + if (ftrace_graph_active) { + struct ftrace_ret_stack *ret_stack; + + ret_stack = kmalloc(FTRACE_RETFUNC_DEPTH * sizeof(struct ftrace_ret_stack), GFP_KERNEL); - if (!t->ret_stack) + if (!ret_stack) return; t->curr_ret_stack = -1; atomic_set(&t->tracing_graph_pause, 0); atomic_set(&t->trace_overrun, 0); t->ftrace_timestamp = 0; - } else - t->ret_stack = NULL; + /* make curr_ret_stack visable before we add the ret_stack */ + smp_wmb(); + t->ret_stack = ret_stack; + } } void ftrace_graph_exit_task(struct task_struct *t) diff --git a/kernel/trace/kmemtrace.c b/kernel/trace/kmemtrace.c index 5011f4d91e37..86cdf671d7e2 100644 --- a/kernel/trace/kmemtrace.c +++ b/kernel/trace/kmemtrace.c @@ -12,7 +12,7 @@ #include <linux/dcache.h> #include <linux/fs.h> -#include <trace/kmemtrace.h> +#include <linux/kmemtrace.h> #include "trace_output.h" #include "trace.h" @@ -42,6 +42,7 @@ static inline void kmemtrace_alloc(enum kmemtrace_type_id type_id, gfp_t gfp_flags, int node) { + struct ftrace_event_call *call = &event_kmem_alloc; struct trace_array *tr = kmemtrace_array; struct kmemtrace_alloc_entry *entry; struct ring_buffer_event *event; @@ -62,7 +63,8 @@ static inline void kmemtrace_alloc(enum kmemtrace_type_id type_id, entry->gfp_flags = gfp_flags; entry->node = node; - ring_buffer_unlock_commit(tr->buffer, event); + if (!filter_check_discard(call, entry, tr->buffer, event)) + ring_buffer_unlock_commit(tr->buffer, event); trace_wake_up(); } @@ -71,6 +73,7 @@ static inline void kmemtrace_free(enum kmemtrace_type_id type_id, unsigned long call_site, const void *ptr) { + struct ftrace_event_call *call = &event_kmem_free; struct trace_array *tr = kmemtrace_array; struct kmemtrace_free_entry *entry; struct ring_buffer_event *event; @@ -86,7 +89,8 @@ static inline void kmemtrace_free(enum kmemtrace_type_id type_id, entry->call_site = call_site; entry->ptr = ptr; - ring_buffer_unlock_commit(tr->buffer, event); + if (!filter_check_discard(call, entry, tr->buffer, event)) + ring_buffer_unlock_commit(tr->buffer, event); trace_wake_up(); } diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c index 960cbf44c844..2e642b2b7253 100644 --- a/kernel/trace/ring_buffer.c +++ b/kernel/trace/ring_buffer.c @@ -22,6 +22,28 @@ #include "trace.h" /* + * The ring buffer header is special. We must manually up keep it. + */ +int ring_buffer_print_entry_header(struct trace_seq *s) +{ + int ret; + + ret = trace_seq_printf(s, "# compressed entry header\n"); + ret = trace_seq_printf(s, "\ttype_len : 5 bits\n"); + ret = trace_seq_printf(s, "\ttime_delta : 27 bits\n"); + ret = trace_seq_printf(s, "\tarray : 32 bits\n"); + ret = trace_seq_printf(s, "\n"); + ret = trace_seq_printf(s, "\tpadding : type == %d\n", + RINGBUF_TYPE_PADDING); + ret = trace_seq_printf(s, "\ttime_extend : type == %d\n", + RINGBUF_TYPE_TIME_EXTEND); + ret = trace_seq_printf(s, "\tdata max type_len == %d\n", + RINGBUF_TYPE_DATA_TYPE_LEN_MAX); + + return ret; +} + +/* * The ring buffer is made up of a list of pages. A separate list of pages is * allocated for each CPU. A writer may only write to a buffer that is * associated with the CPU it is currently executing on. A reader may read @@ -182,7 +204,10 @@ EXPORT_SYMBOL_GPL(tracing_is_on); #define RB_EVNT_HDR_SIZE (offsetof(struct ring_buffer_event, array)) #define RB_ALIGNMENT 4U -#define RB_MAX_SMALL_DATA 28 +#define RB_MAX_SMALL_DATA (RB_ALIGNMENT * RINGBUF_TYPE_DATA_TYPE_LEN_MAX) + +/* define RINGBUF_TYPE_DATA for 'case RINGBUF_TYPE_DATA:' */ +#define RINGBUF_TYPE_DATA 0 ... RINGBUF_TYPE_DATA_TYPE_LEN_MAX enum { RB_LEN_TIME_EXTEND = 8, @@ -191,48 +216,28 @@ enum { static inline int rb_null_event(struct ring_buffer_event *event) { - return event->type == RINGBUF_TYPE_PADDING && event->time_delta == 0; + return event->type_len == RINGBUF_TYPE_PADDING + && event->time_delta == 0; } static inline int rb_discarded_event(struct ring_buffer_event *event) { - return event->type == RINGBUF_TYPE_PADDING && event->time_delta; + return event->type_len == RINGBUF_TYPE_PADDING && event->time_delta; } static void rb_event_set_padding(struct ring_buffer_event *event) { - event->type = RINGBUF_TYPE_PADDING; + event->type_len = RINGBUF_TYPE_PADDING; event->time_delta = 0; } -/** - * ring_buffer_event_discard - discard an event in the ring buffer - * @buffer: the ring buffer - * @event: the event to discard - * - * Sometimes a event that is in the ring buffer needs to be ignored. - * This function lets the user discard an event in the ring buffer - * and then that event will not be read later. - * - * Note, it is up to the user to be careful with this, and protect - * against races. If the user discards an event that has been consumed - * it is possible that it could corrupt the ring buffer. - */ -void ring_buffer_event_discard(struct ring_buffer_event *event) -{ - event->type = RINGBUF_TYPE_PADDING; - /* time delta must be non zero */ - if (!event->time_delta) - event->time_delta = 1; -} - static unsigned rb_event_data_length(struct ring_buffer_event *event) { unsigned length; - if (event->len) - length = event->len * RB_ALIGNMENT; + if (event->type_len) + length = event->type_len * RB_ALIGNMENT; else length = event->array[0]; return length + RB_EVNT_HDR_SIZE; @@ -242,12 +247,12 @@ rb_event_data_length(struct ring_buffer_event *event) static unsigned rb_event_length(struct ring_buffer_event *event) { - switch (event->type) { + switch (event->type_len) { case RINGBUF_TYPE_PADDING: if (rb_null_event(event)) /* undefined */ return -1; - return rb_event_data_length(event); + return event->array[0] + RB_EVNT_HDR_SIZE; case RINGBUF_TYPE_TIME_EXTEND: return RB_LEN_TIME_EXTEND; @@ -271,7 +276,7 @@ rb_event_length(struct ring_buffer_event *event) unsigned ring_buffer_event_length(struct ring_buffer_event *event) { unsigned length = rb_event_length(event); - if (event->type != RINGBUF_TYPE_DATA) + if (event->type_len > RINGBUF_TYPE_DATA_TYPE_LEN_MAX) return length; length -= RB_EVNT_HDR_SIZE; if (length > RB_MAX_SMALL_DATA + sizeof(event->array[0])) @@ -284,9 +289,9 @@ EXPORT_SYMBOL_GPL(ring_buffer_event_length); static void * rb_event_data(struct ring_buffer_event *event) { - BUG_ON(event->type != RINGBUF_TYPE_DATA); + BUG_ON(event->type_len > RINGBUF_TYPE_DATA_TYPE_LEN_MAX); /* If length is in len field, then array[0] has the data */ - if (event->len) + if (event->type_len) return (void *)&event->array[0]; /* Otherwise length is in array[0] and array[1] has the data */ return (void *)&event->array[1]; @@ -316,9 +321,10 @@ struct buffer_data_page { }; struct buffer_page { + struct list_head list; /* list of buffer pages */ local_t write; /* index for next write */ unsigned read; /* index for next read */ - struct list_head list; /* list of free pages */ + local_t entries; /* entries on this page */ struct buffer_data_page *page; /* Actual data page */ }; @@ -361,6 +367,34 @@ static inline int test_time_stamp(u64 delta) #define BUF_PAGE_SIZE (PAGE_SIZE - BUF_PAGE_HDR_SIZE) +/* Max payload is BUF_PAGE_SIZE - header (8bytes) */ +#define BUF_MAX_DATA_SIZE (BUF_PAGE_SIZE - (sizeof(u32) * 2)) + +/* Max number of timestamps that can fit on a page */ +#define RB_TIMESTAMPS_PER_PAGE (BUF_PAGE_SIZE / RB_LEN_TIME_STAMP) + +int ring_buffer_print_page_header(struct trace_seq *s) +{ + struct buffer_data_page field; + int ret; + + ret = trace_seq_printf(s, "\tfield: u64 timestamp;\t" + "offset:0;\tsize:%u;\n", + (unsigned int)sizeof(field.time_stamp)); + + ret = trace_seq_printf(s, "\tfield: local_t commit;\t" + "offset:%u;\tsize:%u;\n", + (unsigned int)offsetof(typeof(field), commit), + (unsigned int)sizeof(field.commit)); + + ret = trace_seq_printf(s, "\tfield: char data;\t" + "offset:%u;\tsize:%u;\n", + (unsigned int)offsetof(typeof(field), data), + (unsigned int)BUF_PAGE_SIZE); + + return ret; +} + /* * head_page == tail_page && head == tail then buffer is empty. */ @@ -375,8 +409,11 @@ struct ring_buffer_per_cpu { struct buffer_page *tail_page; /* write to tail */ struct buffer_page *commit_page; /* committed pages */ struct buffer_page *reader_page; + unsigned long nmi_dropped; + unsigned long commit_overrun; unsigned long overrun; - unsigned long entries; + unsigned long read; + local_t entries; u64 write_stamp; u64 read_stamp; atomic_t record_disabled; @@ -389,6 +426,8 @@ struct ring_buffer { atomic_t record_disabled; cpumask_var_t cpumask; + struct lock_class_key *reader_lock_key; + struct mutex mutex; struct ring_buffer_per_cpu **buffers; @@ -420,13 +459,18 @@ struct ring_buffer_iter { /* Up this if you want to test the TIME_EXTENTS and normalization */ #define DEBUG_SHIFT 0 +static inline u64 rb_time_stamp(struct ring_buffer *buffer, int cpu) +{ + /* shift to debug/test normalization and TIME_EXTENTS */ + return buffer->clock() << DEBUG_SHIFT; +} + u64 ring_buffer_time_stamp(struct ring_buffer *buffer, int cpu) { u64 time; preempt_disable_notrace(); - /* shift to debug/test normalization and TIME_EXTENTS */ - time = buffer->clock() << DEBUG_SHIFT; + time = rb_time_stamp(buffer, cpu); preempt_enable_no_resched_notrace(); return time; @@ -523,6 +567,7 @@ rb_allocate_cpu_buffer(struct ring_buffer *buffer, int cpu) cpu_buffer->cpu = cpu; cpu_buffer->buffer = buffer; spin_lock_init(&cpu_buffer->reader_lock); + lockdep_set_class(&cpu_buffer->reader_lock, buffer->reader_lock_key); cpu_buffer->lock = (raw_spinlock_t)__RAW_SPIN_LOCK_UNLOCKED; INIT_LIST_HEAD(&cpu_buffer->pages); @@ -593,7 +638,8 @@ static int rb_cpu_notify(struct notifier_block *self, * when the buffer wraps. If this flag is not set, the buffer will * drop data when the tail hits the head. */ -struct ring_buffer *ring_buffer_alloc(unsigned long size, unsigned flags) +struct ring_buffer *__ring_buffer_alloc(unsigned long size, unsigned flags, + struct lock_class_key *key) { struct ring_buffer *buffer; int bsize; @@ -616,6 +662,7 @@ struct ring_buffer *ring_buffer_alloc(unsigned long size, unsigned flags) buffer->pages = DIV_ROUND_UP(size, BUF_PAGE_SIZE); buffer->flags = flags; buffer->clock = trace_clock_local; + buffer->reader_lock_key = key; /* need at least two pages */ if (buffer->pages == 1) @@ -673,7 +720,7 @@ struct ring_buffer *ring_buffer_alloc(unsigned long size, unsigned flags) kfree(buffer); return NULL; } -EXPORT_SYMBOL_GPL(ring_buffer_alloc); +EXPORT_SYMBOL_GPL(__ring_buffer_alloc); /** * ring_buffer_free - free a ring buffer. @@ -947,31 +994,6 @@ static inline unsigned rb_head_size(struct ring_buffer_per_cpu *cpu_buffer) return rb_page_commit(cpu_buffer->head_page); } -/* - * When the tail hits the head and the buffer is in overwrite mode, - * the head jumps to the next page and all content on the previous - * page is discarded. But before doing so, we update the overrun - * variable of the buffer. - */ -static void rb_update_overflow(struct ring_buffer_per_cpu *cpu_buffer) -{ - struct ring_buffer_event *event; - unsigned long head; - - for (head = 0; head < rb_head_size(cpu_buffer); - head += rb_event_length(event)) { - - event = __rb_page_index(cpu_buffer->head_page, head); - if (RB_WARN_ON(cpu_buffer, rb_null_event(event))) - return; - /* Only count data entries */ - if (event->type != RINGBUF_TYPE_DATA) - continue; - cpu_buffer->overrun++; - cpu_buffer->entries--; - } -} - static inline void rb_inc_page(struct ring_buffer_per_cpu *cpu_buffer, struct buffer_page **bpage) { @@ -991,7 +1013,7 @@ rb_event_index(struct ring_buffer_event *event) return (addr & ~PAGE_MASK) - (PAGE_SIZE - BUF_PAGE_SIZE); } -static int +static inline int rb_is_commit(struct ring_buffer_per_cpu *cpu_buffer, struct ring_buffer_event *event) { @@ -1110,28 +1132,21 @@ static void rb_update_event(struct ring_buffer_event *event, unsigned type, unsigned length) { - event->type = type; + event->type_len = type; switch (type) { case RINGBUF_TYPE_PADDING: - break; - case RINGBUF_TYPE_TIME_EXTEND: - event->len = DIV_ROUND_UP(RB_LEN_TIME_EXTEND, RB_ALIGNMENT); - break; - case RINGBUF_TYPE_TIME_STAMP: - event->len = DIV_ROUND_UP(RB_LEN_TIME_STAMP, RB_ALIGNMENT); break; - case RINGBUF_TYPE_DATA: + case 0: length -= RB_EVNT_HDR_SIZE; - if (length > RB_MAX_SMALL_DATA) { - event->len = 0; + if (length > RB_MAX_SMALL_DATA) event->array[0] = length; - } else - event->len = DIV_ROUND_UP(length, RB_ALIGNMENT); + else + event->type_len = DIV_ROUND_UP(length, RB_ALIGNMENT); break; default: BUG(); @@ -1155,131 +1170,156 @@ static unsigned rb_calculate_event_length(unsigned length) return length; } + static struct ring_buffer_event * -__rb_reserve_next(struct ring_buffer_per_cpu *cpu_buffer, - unsigned type, unsigned long length, u64 *ts) +rb_move_tail(struct ring_buffer_per_cpu *cpu_buffer, + unsigned long length, unsigned long tail, + struct buffer_page *commit_page, + struct buffer_page *tail_page, u64 *ts) { - struct buffer_page *tail_page, *head_page, *reader_page, *commit_page; - unsigned long tail, write; + struct buffer_page *next_page, *head_page, *reader_page; struct ring_buffer *buffer = cpu_buffer->buffer; struct ring_buffer_event *event; - unsigned long flags; bool lock_taken = false; + unsigned long flags; - commit_page = cpu_buffer->commit_page; - /* we just need to protect against interrupts */ - barrier(); - tail_page = cpu_buffer->tail_page; - write = local_add_return(length, &tail_page->write); - tail = write - length; + next_page = tail_page; - /* See if we shot pass the end of this buffer page */ - if (write > BUF_PAGE_SIZE) { - struct buffer_page *next_page = tail_page; + local_irq_save(flags); + /* + * Since the write to the buffer is still not + * fully lockless, we must be careful with NMIs. + * The locks in the writers are taken when a write + * crosses to a new page. The locks protect against + * races with the readers (this will soon be fixed + * with a lockless solution). + * + * Because we can not protect against NMIs, and we + * want to keep traces reentrant, we need to manage + * what happens when we are in an NMI. + * + * NMIs can happen after we take the lock. + * If we are in an NMI, only take the lock + * if it is not already taken. Otherwise + * simply fail. + */ + if (unlikely(in_nmi())) { + if (!__raw_spin_trylock(&cpu_buffer->lock)) { + cpu_buffer->nmi_dropped++; + goto out_reset; + } + } else + __raw_spin_lock(&cpu_buffer->lock); - local_irq_save(flags); - /* - * Since the write to the buffer is still not - * fully lockless, we must be careful with NMIs. - * The locks in the writers are taken when a write - * crosses to a new page. The locks protect against - * races with the readers (this will soon be fixed - * with a lockless solution). - * - * Because we can not protect against NMIs, and we - * want to keep traces reentrant, we need to manage - * what happens when we are in an NMI. - * - * NMIs can happen after we take the lock. - * If we are in an NMI, only take the lock - * if it is not already taken. Otherwise - * simply fail. - */ - if (unlikely(in_nmi())) { - if (!__raw_spin_trylock(&cpu_buffer->lock)) - goto out_reset; - } else - __raw_spin_lock(&cpu_buffer->lock); + lock_taken = true; - lock_taken = true; + rb_inc_page(cpu_buffer, &next_page); - rb_inc_page(cpu_buffer, &next_page); + head_page = cpu_buffer->head_page; + reader_page = cpu_buffer->reader_page; - head_page = cpu_buffer->head_page; - reader_page = cpu_buffer->reader_page; + /* we grabbed the lock before incrementing */ + if (RB_WARN_ON(cpu_buffer, next_page == reader_page)) + goto out_reset; - /* we grabbed the lock before incrementing */ - if (RB_WARN_ON(cpu_buffer, next_page == reader_page)) - goto out_reset; + /* + * If for some reason, we had an interrupt storm that made + * it all the way around the buffer, bail, and warn + * about it. + */ + if (unlikely(next_page == commit_page)) { + cpu_buffer->commit_overrun++; + goto out_reset; + } - /* - * If for some reason, we had an interrupt storm that made - * it all the way around the buffer, bail, and warn - * about it. - */ - if (unlikely(next_page == commit_page)) { - WARN_ON_ONCE(1); + if (next_page == head_page) { + if (!(buffer->flags & RB_FL_OVERWRITE)) goto out_reset; - } - if (next_page == head_page) { - if (!(buffer->flags & RB_FL_OVERWRITE)) - goto out_reset; - - /* tail_page has not moved yet? */ - if (tail_page == cpu_buffer->tail_page) { - /* count overflows */ - rb_update_overflow(cpu_buffer); + /* tail_page has not moved yet? */ + if (tail_page == cpu_buffer->tail_page) { + /* count overflows */ + cpu_buffer->overrun += + local_read(&head_page->entries); - rb_inc_page(cpu_buffer, &head_page); - cpu_buffer->head_page = head_page; - cpu_buffer->head_page->read = 0; - } + rb_inc_page(cpu_buffer, &head_page); + cpu_buffer->head_page = head_page; + cpu_buffer->head_page->read = 0; } + } - /* - * If the tail page is still the same as what we think - * it is, then it is up to us to update the tail - * pointer. - */ - if (tail_page == cpu_buffer->tail_page) { - local_set(&next_page->write, 0); - local_set(&next_page->page->commit, 0); - cpu_buffer->tail_page = next_page; + /* + * If the tail page is still the same as what we think + * it is, then it is up to us to update the tail + * pointer. + */ + if (tail_page == cpu_buffer->tail_page) { + local_set(&next_page->write, 0); + local_set(&next_page->entries, 0); + local_set(&next_page->page->commit, 0); + cpu_buffer->tail_page = next_page; + + /* reread the time stamp */ + *ts = rb_time_stamp(buffer, cpu_buffer->cpu); + cpu_buffer->tail_page->page->time_stamp = *ts; + } - /* reread the time stamp */ - *ts = ring_buffer_time_stamp(buffer, cpu_buffer->cpu); - cpu_buffer->tail_page->page->time_stamp = *ts; - } + /* + * The actual tail page has moved forward. + */ + if (tail < BUF_PAGE_SIZE) { + /* Mark the rest of the page with padding */ + event = __rb_page_index(tail_page, tail); + rb_event_set_padding(event); + } - /* - * The actual tail page has moved forward. - */ - if (tail < BUF_PAGE_SIZE) { - /* Mark the rest of the page with padding */ - event = __rb_page_index(tail_page, tail); - rb_event_set_padding(event); - } + /* Set the write back to the previous setting */ + local_sub(length, &tail_page->write); - if (tail <= BUF_PAGE_SIZE) - /* Set the write back to the previous setting */ - local_set(&tail_page->write, tail); + /* + * If this was a commit entry that failed, + * increment that too + */ + if (tail_page == cpu_buffer->commit_page && + tail == rb_commit_index(cpu_buffer)) { + rb_set_commit_to_write(cpu_buffer); + } - /* - * If this was a commit entry that failed, - * increment that too - */ - if (tail_page == cpu_buffer->commit_page && - tail == rb_commit_index(cpu_buffer)) { - rb_set_commit_to_write(cpu_buffer); - } + __raw_spin_unlock(&cpu_buffer->lock); + local_irq_restore(flags); + + /* fail and let the caller try again */ + return ERR_PTR(-EAGAIN); + + out_reset: + /* reset write */ + local_sub(length, &tail_page->write); + if (likely(lock_taken)) __raw_spin_unlock(&cpu_buffer->lock); - local_irq_restore(flags); + local_irq_restore(flags); + return NULL; +} - /* fail and let the caller try again */ - return ERR_PTR(-EAGAIN); - } +static struct ring_buffer_event * +__rb_reserve_next(struct ring_buffer_per_cpu *cpu_buffer, + unsigned type, unsigned long length, u64 *ts) +{ + struct buffer_page *tail_page, *commit_page; + struct ring_buffer_event *event; + unsigned long tail, write; + + commit_page = cpu_buffer->commit_page; + /* we just need to protect against interrupts */ + barrier(); + tail_page = cpu_buffer->tail_page; + write = local_add_return(length, &tail_page->write); + tail = write - length; + + /* See if we shot pass the end of this buffer page */ + if (write > BUF_PAGE_SIZE) + return rb_move_tail(cpu_buffer, length, tail, + commit_page, tail_page, ts); /* We reserved something on the buffer */ @@ -1289,6 +1329,10 @@ __rb_reserve_next(struct ring_buffer_per_cpu *cpu_buffer, event = __rb_page_index(tail_page, tail); rb_update_event(event, type, length); + /* The passed in type is zero for DATA */ + if (likely(!type)) + local_inc(&tail_page->entries); + /* * If this is a commit and the tail is zero, then update * this page's time stamp. @@ -1297,16 +1341,38 @@ __rb_reserve_next(struct ring_buffer_per_cpu *cpu_buffer, cpu_buffer->commit_page->page->time_stamp = *ts; return event; +} - out_reset: - /* reset write */ - if (tail <= BUF_PAGE_SIZE) - local_set(&tail_page->write, tail); +static inline int +rb_try_to_discard(struct ring_buffer_per_cpu *cpu_buffer, + struct ring_buffer_event *event) +{ + unsigned long new_index, old_index; + struct buffer_page *bpage; + unsigned long index; + unsigned long addr; - if (likely(lock_taken)) - __raw_spin_unlock(&cpu_buffer->lock); - local_irq_restore(flags); - return NULL; + new_index = rb_event_index(event); + old_index = new_index + rb_event_length(event); + addr = (unsigned long)event; + addr &= PAGE_MASK; + + bpage = cpu_buffer->tail_page; + + if (bpage->page == (void *)addr && rb_page_write(bpage) == old_index) { + /* + * This is on the tail page. It is possible that + * a write could come in and move the tail page + * and write to the next page. That is fine + * because we just shorten what is on this page. + */ + index = local_cmpxchg(&bpage->write, old_index, new_index); + if (index == old_index) + return 1; + } + + /* could not discard */ + return 0; } static int @@ -1351,16 +1417,23 @@ rb_add_time_stamp(struct ring_buffer_per_cpu *cpu_buffer, event->array[0] = *delta >> TS_SHIFT; } else { cpu_buffer->commit_page->page->time_stamp = *ts; - event->time_delta = 0; - event->array[0] = 0; + /* try to discard, since we do not need this */ + if (!rb_try_to_discard(cpu_buffer, event)) { + /* nope, just zero it */ + event->time_delta = 0; + event->array[0] = 0; + } } cpu_buffer->write_stamp = *ts; /* let the caller know this was the commit */ ret = 1; } else { - /* Darn, this is just wasted space */ - event->time_delta = 0; - event->array[0] = 0; + /* Try to discard the event */ + if (!rb_try_to_discard(cpu_buffer, event)) { + /* Darn, this is just wasted space */ + event->time_delta = 0; + event->array[0] = 0; + } ret = 0; } @@ -1371,13 +1444,14 @@ rb_add_time_stamp(struct ring_buffer_per_cpu *cpu_buffer, static struct ring_buffer_event * rb_reserve_next_event(struct ring_buffer_per_cpu *cpu_buffer, - unsigned type, unsigned long length) + unsigned long length) { struct ring_buffer_event *event; - u64 ts, delta; + u64 ts, delta = 0; int commit = 0; int nr_loops = 0; + length = rb_calculate_event_length(length); again: /* * We allow for interrupts to reenter here and do a trace. @@ -1391,7 +1465,7 @@ rb_reserve_next_event(struct ring_buffer_per_cpu *cpu_buffer, if (RB_WARN_ON(cpu_buffer, ++nr_loops > 1000)) return NULL; - ts = ring_buffer_time_stamp(cpu_buffer->buffer, cpu_buffer->cpu); + ts = rb_time_stamp(cpu_buffer->buffer, cpu_buffer->cpu); /* * Only the first commit can update the timestamp. @@ -1401,23 +1475,24 @@ rb_reserve_next_event(struct ring_buffer_per_cpu *cpu_buffer, * also be made. But only the entry that did the actual * commit will be something other than zero. */ - if (cpu_buffer->tail_page == cpu_buffer->commit_page && - rb_page_write(cpu_buffer->tail_page) == - rb_commit_index(cpu_buffer)) { + if (likely(cpu_buffer->tail_page == cpu_buffer->commit_page && + rb_page_write(cpu_buffer->tail_page) == + rb_commit_index(cpu_buffer))) { + u64 diff; - delta = ts - cpu_buffer->write_stamp; + diff = ts - cpu_buffer->write_stamp; - /* make sure this delta is calculated here */ + /* make sure this diff is calculated here */ barrier(); /* Did the write stamp get updated already? */ if (unlikely(ts < cpu_buffer->write_stamp)) - delta = 0; + goto get_event; - if (test_time_stamp(delta)) { + delta = diff; + if (unlikely(test_time_stamp(delta))) { commit = rb_add_time_stamp(cpu_buffer, &ts, &delta); - if (commit == -EBUSY) return NULL; @@ -1426,12 +1501,11 @@ rb_reserve_next_event(struct ring_buffer_per_cpu *cpu_buffer, RB_WARN_ON(cpu_buffer, commit < 0); } - } else - /* Non commits have zero deltas */ - delta = 0; + } - event = __rb_reserve_next(cpu_buffer, type, length, &ts); - if (PTR_ERR(event) == -EAGAIN) + get_event: + event = __rb_reserve_next(cpu_buffer, 0, length, &ts); + if (unlikely(PTR_ERR(event) == -EAGAIN)) goto again; if (!event) { @@ -1448,7 +1522,7 @@ rb_reserve_next_event(struct ring_buffer_per_cpu *cpu_buffer, * If the timestamp was commited, make the commit our entry * now so that we will update it when needed. */ - if (commit) + if (unlikely(commit)) rb_set_commit_event(cpu_buffer, event); else if (!rb_is_commit(cpu_buffer, event)) delta = 0; @@ -1458,6 +1532,36 @@ rb_reserve_next_event(struct ring_buffer_per_cpu *cpu_buffer, return event; } +#define TRACE_RECURSIVE_DEPTH 16 + +static int trace_recursive_lock(void) +{ + current->trace_recursion++; + + if (likely(current->trace_recursion < TRACE_RECURSIVE_DEPTH)) + return 0; + + /* Disable all tracing before we do anything else */ + tracing_off_permanent(); + + printk_once(KERN_WARNING "Tracing recursion: depth[%ld]:" + "HC[%lu]:SC[%lu]:NMI[%lu]\n", + current->trace_recursion, + hardirq_count() >> HARDIRQ_SHIFT, + softirq_count() >> SOFTIRQ_SHIFT, + in_nmi()); + + WARN_ON_ONCE(1); + return -1; +} + +static void trace_recursive_unlock(void) +{ + WARN_ON_ONCE(!current->trace_recursion); + + current->trace_recursion--; +} + static DEFINE_PER_CPU(int, rb_need_resched); /** @@ -1491,6 +1595,9 @@ ring_buffer_lock_reserve(struct ring_buffer *buffer, unsigned long length) /* If we are tracing schedule, we don't want to recurse */ resched = ftrace_preempt_disable(); + if (trace_recursive_lock()) + goto out_nocheck; + cpu = raw_smp_processor_id(); if (!cpumask_test_cpu(cpu, buffer->cpumask)) @@ -1501,11 +1608,10 @@ ring_buffer_lock_reserve(struct ring_buffer *buffer, unsigned long length) if (atomic_read(&cpu_buffer->record_disabled)) goto out; - length = rb_calculate_event_length(length); - if (length > BUF_PAGE_SIZE) + if (length > BUF_MAX_DATA_SIZE) goto out; - event = rb_reserve_next_event(cpu_buffer, RINGBUF_TYPE_DATA, length); + event = rb_reserve_next_event(cpu_buffer, length); if (!event) goto out; @@ -1520,6 +1626,9 @@ ring_buffer_lock_reserve(struct ring_buffer *buffer, unsigned long length) return event; out: + trace_recursive_unlock(); + + out_nocheck: ftrace_preempt_enable(resched); return NULL; } @@ -1528,7 +1637,7 @@ EXPORT_SYMBOL_GPL(ring_buffer_lock_reserve); static void rb_commit(struct ring_buffer_per_cpu *cpu_buffer, struct ring_buffer_event *event) { - cpu_buffer->entries++; + local_inc(&cpu_buffer->entries); /* Only process further if we own the commit */ if (!rb_is_commit(cpu_buffer, event)) @@ -1558,6 +1667,8 @@ int ring_buffer_unlock_commit(struct ring_buffer *buffer, rb_commit(cpu_buffer, event); + trace_recursive_unlock(); + /* * Only the last preempt count needs to restore preemption. */ @@ -1570,6 +1681,99 @@ int ring_buffer_unlock_commit(struct ring_buffer *buffer, } EXPORT_SYMBOL_GPL(ring_buffer_unlock_commit); +static inline void rb_event_discard(struct ring_buffer_event *event) +{ + /* array[0] holds the actual length for the discarded event */ + event->array[0] = rb_event_data_length(event) - RB_EVNT_HDR_SIZE; + event->type_len = RINGBUF_TYPE_PADDING; + /* time delta must be non zero */ + if (!event->time_delta) + event->time_delta = 1; +} + +/** + * ring_buffer_event_discard - discard any event in the ring buffer + * @event: the event to discard + * + * Sometimes a event that is in the ring buffer needs to be ignored. + * This function lets the user discard an event in the ring buffer + * and then that event will not be read later. + * + * Note, it is up to the user to be careful with this, and protect + * against races. If the user discards an event that has been consumed + * it is possible that it could corrupt the ring buffer. + */ +void ring_buffer_event_discard(struct ring_buffer_event *event) +{ + rb_event_discard(event); +} +EXPORT_SYMBOL_GPL(ring_buffer_event_discard); + +/** + * ring_buffer_commit_discard - discard an event that has not been committed + * @buffer: the ring buffer + * @event: non committed event to discard + * + * This is similar to ring_buffer_event_discard but must only be + * performed on an event that has not been committed yet. The difference + * is that this will also try to free the event from the ring buffer + * if another event has not been added behind it. + * + * If another event has been added behind it, it will set the event + * up as discarded, and perform the commit. + * + * If this function is called, do not call ring_buffer_unlock_commit on + * the event. + */ +void ring_buffer_discard_commit(struct ring_buffer *buffer, + struct ring_buffer_event *event) +{ + struct ring_buffer_per_cpu *cpu_buffer; + int cpu; + + /* The event is discarded regardless */ + rb_event_discard(event); + + /* + * This must only be called if the event has not been + * committed yet. Thus we can assume that preemption + * is still disabled. + */ + RB_WARN_ON(buffer, preemptible()); + + cpu = smp_processor_id(); + cpu_buffer = buffer->buffers[cpu]; + + if (!rb_try_to_discard(cpu_buffer, event)) + goto out; + + /* + * The commit is still visible by the reader, so we + * must increment entries. + */ + local_inc(&cpu_buffer->entries); + out: + /* + * If a write came in and pushed the tail page + * we still need to update the commit pointer + * if we were the commit. + */ + if (rb_is_commit(cpu_buffer, event)) + rb_set_commit_to_write(cpu_buffer); + + trace_recursive_unlock(); + + /* + * Only the last preempt count needs to restore preemption. + */ + if (preempt_count() == 1) + ftrace_preempt_enable(per_cpu(rb_need_resched, cpu)); + else + preempt_enable_no_resched_notrace(); + +} +EXPORT_SYMBOL_GPL(ring_buffer_discard_commit); + /** * ring_buffer_write - write data to the buffer without reserving * @buffer: The ring buffer to write to. @@ -1589,7 +1793,6 @@ int ring_buffer_write(struct ring_buffer *buffer, { struct ring_buffer_per_cpu *cpu_buffer; struct ring_buffer_event *event; - unsigned long event_length; void *body; int ret = -EBUSY; int cpu, resched; @@ -1612,9 +1815,10 @@ int ring_buffer_write(struct ring_buffer *buffer, if (atomic_read(&cpu_buffer->record_disabled)) goto out; - event_length = rb_calculate_event_length(length); - event = rb_reserve_next_event(cpu_buffer, - RINGBUF_TYPE_DATA, event_length); + if (length > BUF_MAX_DATA_SIZE) + goto out; + + event = rb_reserve_next_event(cpu_buffer, length); if (!event) goto out; @@ -1728,7 +1932,8 @@ unsigned long ring_buffer_entries_cpu(struct ring_buffer *buffer, int cpu) return 0; cpu_buffer = buffer->buffers[cpu]; - ret = cpu_buffer->entries; + ret = (local_read(&cpu_buffer->entries) - cpu_buffer->overrun) + - cpu_buffer->read; return ret; } @@ -1755,6 +1960,47 @@ unsigned long ring_buffer_overrun_cpu(struct ring_buffer *buffer, int cpu) EXPORT_SYMBOL_GPL(ring_buffer_overrun_cpu); /** + * ring_buffer_nmi_dropped_cpu - get the number of nmis that were dropped + * @buffer: The ring buffer + * @cpu: The per CPU buffer to get the number of overruns from + */ +unsigned long ring_buffer_nmi_dropped_cpu(struct ring_buffer *buffer, int cpu) +{ + struct ring_buffer_per_cpu *cpu_buffer; + unsigned long ret; + + if (!cpumask_test_cpu(cpu, buffer->cpumask)) + return 0; + + cpu_buffer = buffer->buffers[cpu]; + ret = cpu_buffer->nmi_dropped; + + return ret; +} +EXPORT_SYMBOL_GPL(ring_buffer_nmi_dropped_cpu); + +/** + * ring_buffer_commit_overrun_cpu - get the number of overruns caused by commits + * @buffer: The ring buffer + * @cpu: The per CPU buffer to get the number of overruns from + */ +unsigned long +ring_buffer_commit_overrun_cpu(struct ring_buffer *buffer, int cpu) +{ + struct ring_buffer_per_cpu *cpu_buffer; + unsigned long ret; + + if (!cpumask_test_cpu(cpu, buffer->cpumask)) + return 0; + + cpu_buffer = buffer->buffers[cpu]; + ret = cpu_buffer->commit_overrun; + + return ret; +} +EXPORT_SYMBOL_GPL(ring_buffer_commit_overrun_cpu); + +/** * ring_buffer_entries - get the number of entries in a buffer * @buffer: The ring buffer * @@ -1770,7 +2016,8 @@ unsigned long ring_buffer_entries(struct ring_buffer *buffer) /* if you care about this being correct, lock the buffer */ for_each_buffer_cpu(buffer, cpu) { cpu_buffer = buffer->buffers[cpu]; - entries += cpu_buffer->entries; + entries += (local_read(&cpu_buffer->entries) - + cpu_buffer->overrun) - cpu_buffer->read; } return entries; @@ -1862,7 +2109,7 @@ rb_update_read_stamp(struct ring_buffer_per_cpu *cpu_buffer, { u64 delta; - switch (event->type) { + switch (event->type_len) { case RINGBUF_TYPE_PADDING: return; @@ -1893,7 +2140,7 @@ rb_update_iter_read_stamp(struct ring_buffer_iter *iter, { u64 delta; - switch (event->type) { + switch (event->type_len) { case RINGBUF_TYPE_PADDING: return; @@ -1966,6 +2213,7 @@ rb_get_reader_page(struct ring_buffer_per_cpu *cpu_buffer) cpu_buffer->reader_page->list.prev = reader->list.prev; local_set(&cpu_buffer->reader_page->write, 0); + local_set(&cpu_buffer->reader_page->entries, 0); local_set(&cpu_buffer->reader_page->page->commit, 0); /* Make the reader page now replace the head */ @@ -2008,8 +2256,9 @@ static void rb_advance_reader(struct ring_buffer_per_cpu *cpu_buffer) event = rb_reader_event(cpu_buffer); - if (event->type == RINGBUF_TYPE_DATA || rb_discarded_event(event)) - cpu_buffer->entries--; + if (event->type_len <= RINGBUF_TYPE_DATA_TYPE_LEN_MAX + || rb_discarded_event(event)) + cpu_buffer->read++; rb_update_read_stamp(cpu_buffer, event); @@ -2031,8 +2280,8 @@ static void rb_advance_iter(struct ring_buffer_iter *iter) * Check if we are at the end of the buffer. */ if (iter->head >= rb_page_size(iter->head_page)) { - if (RB_WARN_ON(buffer, - iter->head_page == cpu_buffer->commit_page)) + /* discarded commits can make the page empty */ + if (iter->head_page == cpu_buffer->commit_page) return; rb_inc_iter(iter); return; @@ -2075,12 +2324,10 @@ rb_buffer_peek(struct ring_buffer *buffer, int cpu, u64 *ts) /* * We repeat when a timestamp is encountered. It is possible * to get multiple timestamps from an interrupt entering just - * as one timestamp is about to be written. The max times - * that this can happen is the number of nested interrupts we - * can have. Nesting 10 deep of interrupts is clearly - * an anomaly. + * as one timestamp is about to be written, or from discarded + * commits. The most that we can have is the number on a single page. */ - if (RB_WARN_ON(cpu_buffer, ++nr_loops > 10)) + if (RB_WARN_ON(cpu_buffer, ++nr_loops > RB_TIMESTAMPS_PER_PAGE)) return NULL; reader = rb_get_reader_page(cpu_buffer); @@ -2089,7 +2336,7 @@ rb_buffer_peek(struct ring_buffer *buffer, int cpu, u64 *ts) event = rb_reader_event(cpu_buffer); - switch (event->type) { + switch (event->type_len) { case RINGBUF_TYPE_PADDING: if (rb_null_event(event)) RB_WARN_ON(cpu_buffer, 1); @@ -2146,14 +2393,14 @@ rb_iter_peek(struct ring_buffer_iter *iter, u64 *ts) again: /* - * We repeat when a timestamp is encountered. It is possible - * to get multiple timestamps from an interrupt entering just - * as one timestamp is about to be written. The max times - * that this can happen is the number of nested interrupts we - * can have. Nesting 10 deep of interrupts is clearly - * an anomaly. + * We repeat when a timestamp is encountered. + * We can get multiple timestamps by nested interrupts or also + * if filtering is on (discarding commits). Since discarding + * commits can be frequent we can get a lot of timestamps. + * But we limit them by not adding timestamps if they begin + * at the start of a page. */ - if (RB_WARN_ON(cpu_buffer, ++nr_loops > 10)) + if (RB_WARN_ON(cpu_buffer, ++nr_loops > RB_TIMESTAMPS_PER_PAGE)) return NULL; if (rb_per_cpu_empty(cpu_buffer)) @@ -2161,7 +2408,7 @@ rb_iter_peek(struct ring_buffer_iter *iter, u64 *ts) event = rb_iter_head_event(iter); - switch (event->type) { + switch (event->type_len) { case RINGBUF_TYPE_PADDING: if (rb_null_event(event)) { rb_inc_iter(iter); @@ -2220,7 +2467,7 @@ ring_buffer_peek(struct ring_buffer *buffer, int cpu, u64 *ts) event = rb_buffer_peek(buffer, cpu, ts); spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags); - if (event && event->type == RINGBUF_TYPE_PADDING) { + if (event && event->type_len == RINGBUF_TYPE_PADDING) { cpu_relax(); goto again; } @@ -2248,7 +2495,7 @@ ring_buffer_iter_peek(struct ring_buffer_iter *iter, u64 *ts) event = rb_iter_peek(iter, ts); spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags); - if (event && event->type == RINGBUF_TYPE_PADDING) { + if (event && event->type_len == RINGBUF_TYPE_PADDING) { cpu_relax(); goto again; } @@ -2293,7 +2540,7 @@ ring_buffer_consume(struct ring_buffer *buffer, int cpu, u64 *ts) out: preempt_enable(); - if (event && event->type == RINGBUF_TYPE_PADDING) { + if (event && event->type_len == RINGBUF_TYPE_PADDING) { cpu_relax(); goto again; } @@ -2386,7 +2633,7 @@ ring_buffer_read(struct ring_buffer_iter *iter, u64 *ts) out: spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags); - if (event && event->type == RINGBUF_TYPE_PADDING) { + if (event && event->type_len == RINGBUF_TYPE_PADDING) { cpu_relax(); goto again; } @@ -2411,6 +2658,7 @@ rb_reset_cpu(struct ring_buffer_per_cpu *cpu_buffer) cpu_buffer->head_page = list_entry(cpu_buffer->pages.next, struct buffer_page, list); local_set(&cpu_buffer->head_page->write, 0); + local_set(&cpu_buffer->head_page->entries, 0); local_set(&cpu_buffer->head_page->page->commit, 0); cpu_buffer->head_page->read = 0; @@ -2420,11 +2668,15 @@ rb_reset_cpu(struct ring_buffer_per_cpu *cpu_buffer) INIT_LIST_HEAD(&cpu_buffer->reader_page->list); local_set(&cpu_buffer->reader_page->write, 0); + local_set(&cpu_buffer->reader_page->entries, 0); local_set(&cpu_buffer->reader_page->page->commit, 0); cpu_buffer->reader_page->read = 0; + cpu_buffer->nmi_dropped = 0; + cpu_buffer->commit_overrun = 0; cpu_buffer->overrun = 0; - cpu_buffer->entries = 0; + cpu_buffer->read = 0; + local_set(&cpu_buffer->entries, 0); cpu_buffer->write_stamp = 0; cpu_buffer->read_stamp = 0; @@ -2443,6 +2695,8 @@ void ring_buffer_reset_cpu(struct ring_buffer *buffer, int cpu) if (!cpumask_test_cpu(cpu, buffer->cpumask)) return; + atomic_inc(&cpu_buffer->record_disabled); + spin_lock_irqsave(&cpu_buffer->reader_lock, flags); __raw_spin_lock(&cpu_buffer->lock); @@ -2452,6 +2706,8 @@ void ring_buffer_reset_cpu(struct ring_buffer *buffer, int cpu) __raw_spin_unlock(&cpu_buffer->lock); spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags); + + atomic_dec(&cpu_buffer->record_disabled); } EXPORT_SYMBOL_GPL(ring_buffer_reset_cpu); @@ -2578,28 +2834,6 @@ out: } EXPORT_SYMBOL_GPL(ring_buffer_swap_cpu); -static void rb_remove_entries(struct ring_buffer_per_cpu *cpu_buffer, - struct buffer_data_page *bpage, - unsigned int offset) -{ - struct ring_buffer_event *event; - unsigned long head; - - __raw_spin_lock(&cpu_buffer->lock); - for (head = offset; head < local_read(&bpage->commit); - head += rb_event_length(event)) { - - event = __rb_data_page_index(bpage, head); - if (RB_WARN_ON(cpu_buffer, rb_null_event(event))) - return; - /* Only count data entries */ - if (event->type != RINGBUF_TYPE_DATA) - continue; - cpu_buffer->entries--; - } - __raw_spin_unlock(&cpu_buffer->lock); -} - /** * ring_buffer_alloc_read_page - allocate a page to read from buffer * @buffer: the buffer to allocate for. @@ -2630,6 +2864,7 @@ void *ring_buffer_alloc_read_page(struct ring_buffer *buffer) return bpage; } +EXPORT_SYMBOL_GPL(ring_buffer_alloc_read_page); /** * ring_buffer_free_read_page - free an allocated read page @@ -2642,6 +2877,7 @@ void ring_buffer_free_read_page(struct ring_buffer *buffer, void *data) { free_page((unsigned long)data); } +EXPORT_SYMBOL_GPL(ring_buffer_free_read_page); /** * ring_buffer_read_page - extract a page from the ring buffer @@ -2768,16 +3004,17 @@ int ring_buffer_read_page(struct ring_buffer *buffer, /* we copied everything to the beginning */ read = 0; } else { + /* update the entry counter */ + cpu_buffer->read += local_read(&reader->entries); + /* swap the pages */ rb_init_page(bpage); bpage = reader->page; reader->page = *data_page; local_set(&reader->write, 0); + local_set(&reader->entries, 0); reader->read = 0; *data_page = bpage; - - /* update the entry counter */ - rb_remove_entries(cpu_buffer, bpage, read); } ret = read; @@ -2787,6 +3024,7 @@ int ring_buffer_read_page(struct ring_buffer *buffer, out: return ret; } +EXPORT_SYMBOL_GPL(ring_buffer_read_page); static ssize_t rb_simple_read(struct file *filp, char __user *ubuf, @@ -2845,14 +3083,11 @@ static const struct file_operations rb_simple_fops = { static __init int rb_init_debugfs(void) { struct dentry *d_tracer; - struct dentry *entry; d_tracer = tracing_init_dentry(); - entry = debugfs_create_file("tracing_on", 0644, d_tracer, - &ring_buffer_flags, &rb_simple_fops); - if (!entry) - pr_warning("Could not create debugfs 'tracing_on' entry\n"); + trace_create_file("tracing_on", 0644, d_tracer, + &ring_buffer_flags, &rb_simple_fops); return 0; } diff --git a/kernel/trace/ring_buffer_benchmark.c b/kernel/trace/ring_buffer_benchmark.c new file mode 100644 index 000000000000..8d68e149a8b3 --- /dev/null +++ b/kernel/trace/ring_buffer_benchmark.c @@ -0,0 +1,416 @@ +/* + * ring buffer tester and benchmark + * + * Copyright (C) 2009 Steven Rostedt <srostedt@redhat.com> + */ +#include <linux/ring_buffer.h> +#include <linux/completion.h> +#include <linux/kthread.h> +#include <linux/module.h> +#include <linux/time.h> + +struct rb_page { + u64 ts; + local_t commit; + char data[4080]; +}; + +/* run time and sleep time in seconds */ +#define RUN_TIME 10 +#define SLEEP_TIME 10 + +/* number of events for writer to wake up the reader */ +static int wakeup_interval = 100; + +static int reader_finish; +static struct completion read_start; +static struct completion read_done; + +static struct ring_buffer *buffer; +static struct task_struct *producer; +static struct task_struct *consumer; +static unsigned long read; + +static int disable_reader; +module_param(disable_reader, uint, 0644); +MODULE_PARM_DESC(disable_reader, "only run producer"); + +static int read_events; + +static int kill_test; + +#define KILL_TEST() \ + do { \ + if (!kill_test) { \ + kill_test = 1; \ + WARN_ON(1); \ + } \ + } while (0) + +enum event_status { + EVENT_FOUND, + EVENT_DROPPED, +}; + +static enum event_status read_event(int cpu) +{ + struct ring_buffer_event *event; + int *entry; + u64 ts; + + event = ring_buffer_consume(buffer, cpu, &ts); + if (!event) + return EVENT_DROPPED; + + entry = ring_buffer_event_data(event); + if (*entry != cpu) { + KILL_TEST(); + return EVENT_DROPPED; + } + + read++; + return EVENT_FOUND; +} + +static enum event_status read_page(int cpu) +{ + struct ring_buffer_event *event; + struct rb_page *rpage; + unsigned long commit; + void *bpage; + int *entry; + int ret; + int inc; + int i; + + bpage = ring_buffer_alloc_read_page(buffer); + if (!bpage) + return EVENT_DROPPED; + + ret = ring_buffer_read_page(buffer, &bpage, PAGE_SIZE, cpu, 1); + if (ret >= 0) { + rpage = bpage; + commit = local_read(&rpage->commit); + for (i = 0; i < commit && !kill_test; i += inc) { + + if (i >= (PAGE_SIZE - offsetof(struct rb_page, data))) { + KILL_TEST(); + break; + } + + inc = -1; + event = (void *)&rpage->data[i]; + switch (event->type_len) { + case RINGBUF_TYPE_PADDING: + /* We don't expect any padding */ + KILL_TEST(); + break; + case RINGBUF_TYPE_TIME_EXTEND: + inc = 8; + break; + case 0: + entry = ring_buffer_event_data(event); + if (*entry != cpu) { + KILL_TEST(); + break; + } + read++; + if (!event->array[0]) { + KILL_TEST(); + break; + } + inc = event->array[0]; + break; + default: + entry = ring_buffer_event_data(event); + if (*entry != cpu) { + KILL_TEST(); + break; + } + read++; + inc = ((event->type_len + 1) * 4); + } + if (kill_test) + break; + + if (inc <= 0) { + KILL_TEST(); + break; + } + } + } + ring_buffer_free_read_page(buffer, bpage); + + if (ret < 0) + return EVENT_DROPPED; + return EVENT_FOUND; +} + +static void ring_buffer_consumer(void) +{ + /* toggle between reading pages and events */ + read_events ^= 1; + + read = 0; + while (!reader_finish && !kill_test) { + int found; + + do { + int cpu; + + found = 0; + for_each_online_cpu(cpu) { + enum event_status stat; + + if (read_events) + stat = read_event(cpu); + else + stat = read_page(cpu); + + if (kill_test) + break; + if (stat == EVENT_FOUND) + found = 1; + } + } while (found && !kill_test); + + set_current_state(TASK_INTERRUPTIBLE); + if (reader_finish) + break; + + schedule(); + __set_current_state(TASK_RUNNING); + } + reader_finish = 0; + complete(&read_done); +} + +static void ring_buffer_producer(void) +{ + struct timeval start_tv; + struct timeval end_tv; + unsigned long long time; + unsigned long long entries; + unsigned long long overruns; + unsigned long missed = 0; + unsigned long hit = 0; + unsigned long avg; + int cnt = 0; + + /* + * Hammer the buffer for 10 secs (this may + * make the system stall) + */ + pr_info("Starting ring buffer hammer\n"); + do_gettimeofday(&start_tv); + do { + struct ring_buffer_event *event; + int *entry; + + event = ring_buffer_lock_reserve(buffer, 10); + if (!event) { + missed++; + } else { + hit++; + entry = ring_buffer_event_data(event); + *entry = smp_processor_id(); + ring_buffer_unlock_commit(buffer, event); + } + do_gettimeofday(&end_tv); + + cnt++; + if (consumer && !(cnt % wakeup_interval)) + wake_up_process(consumer); + +#ifndef CONFIG_PREEMPT + /* + * If we are a non preempt kernel, the 10 second run will + * stop everything while it runs. Instead, we will call + * cond_resched and also add any time that was lost by a + * rescedule. + * + * Do a cond resched at the same frequency we would wake up + * the reader. + */ + if (cnt % wakeup_interval) + cond_resched(); +#endif + + } while (end_tv.tv_sec < (start_tv.tv_sec + RUN_TIME) && !kill_test); + pr_info("End ring buffer hammer\n"); + + if (consumer) { + /* Init both completions here to avoid races */ + init_completion(&read_start); + init_completion(&read_done); + /* the completions must be visible before the finish var */ + smp_wmb(); + reader_finish = 1; + /* finish var visible before waking up the consumer */ + smp_wmb(); + wake_up_process(consumer); + wait_for_completion(&read_done); + } + + time = end_tv.tv_sec - start_tv.tv_sec; + time *= USEC_PER_SEC; + time += (long long)((long)end_tv.tv_usec - (long)start_tv.tv_usec); + + entries = ring_buffer_entries(buffer); + overruns = ring_buffer_overruns(buffer); + + if (kill_test) + pr_info("ERROR!\n"); + pr_info("Time: %lld (usecs)\n", time); + pr_info("Overruns: %lld\n", overruns); + if (disable_reader) + pr_info("Read: (reader disabled)\n"); + else + pr_info("Read: %ld (by %s)\n", read, + read_events ? "events" : "pages"); + pr_info("Entries: %lld\n", entries); + pr_info("Total: %lld\n", entries + overruns + read); + pr_info("Missed: %ld\n", missed); + pr_info("Hit: %ld\n", hit); + + /* Convert time from usecs to millisecs */ + do_div(time, USEC_PER_MSEC); + if (time) + hit /= (long)time; + else + pr_info("TIME IS ZERO??\n"); + + pr_info("Entries per millisec: %ld\n", hit); + + if (hit) { + /* Calculate the average time in nanosecs */ + avg = NSEC_PER_MSEC / hit; + pr_info("%ld ns per entry\n", avg); + } + + if (missed) { + if (time) + missed /= (long)time; + + pr_info("Total iterations per millisec: %ld\n", hit + missed); + + /* it is possible that hit + missed will overflow and be zero */ + if (!(hit + missed)) { + pr_info("hit + missed overflowed and totalled zero!\n"); + hit--; /* make it non zero */ + } + + /* Caculate the average time in nanosecs */ + avg = NSEC_PER_MSEC / (hit + missed); + pr_info("%ld ns per entry\n", avg); + } +} + +static void wait_to_die(void) +{ + set_current_state(TASK_INTERRUPTIBLE); + while (!kthread_should_stop()) { + schedule(); + set_current_state(TASK_INTERRUPTIBLE); + } + __set_current_state(TASK_RUNNING); +} + +static int ring_buffer_consumer_thread(void *arg) +{ + while (!kthread_should_stop() && !kill_test) { + complete(&read_start); + + ring_buffer_consumer(); + + set_current_state(TASK_INTERRUPTIBLE); + if (kthread_should_stop() || kill_test) + break; + + schedule(); + __set_current_state(TASK_RUNNING); + } + __set_current_state(TASK_RUNNING); + + if (kill_test) + wait_to_die(); + + return 0; +} + +static int ring_buffer_producer_thread(void *arg) +{ + init_completion(&read_start); + + while (!kthread_should_stop() && !kill_test) { + ring_buffer_reset(buffer); + + if (consumer) { + smp_wmb(); + wake_up_process(consumer); + wait_for_completion(&read_start); + } + + ring_buffer_producer(); + + pr_info("Sleeping for 10 secs\n"); + set_current_state(TASK_INTERRUPTIBLE); + schedule_timeout(HZ * SLEEP_TIME); + __set_current_state(TASK_RUNNING); + } + + if (kill_test) + wait_to_die(); + + return 0; +} + +static int __init ring_buffer_benchmark_init(void) +{ + int ret; + + /* make a one meg buffer in overwite mode */ + buffer = ring_buffer_alloc(1000000, RB_FL_OVERWRITE); + if (!buffer) + return -ENOMEM; + + if (!disable_reader) { + consumer = kthread_create(ring_buffer_consumer_thread, + NULL, "rb_consumer"); + ret = PTR_ERR(consumer); + if (IS_ERR(consumer)) + goto out_fail; + } + + producer = kthread_run(ring_buffer_producer_thread, + NULL, "rb_producer"); + ret = PTR_ERR(producer); + + if (IS_ERR(producer)) + goto out_kill; + + return 0; + + out_kill: + if (consumer) + kthread_stop(consumer); + + out_fail: + ring_buffer_free(buffer); + return ret; +} + +static void __exit ring_buffer_benchmark_exit(void) +{ + kthread_stop(producer); + if (consumer) + kthread_stop(consumer); + ring_buffer_free(buffer); +} + +module_init(ring_buffer_benchmark_init); +module_exit(ring_buffer_benchmark_exit); + +MODULE_AUTHOR("Steven Rostedt"); +MODULE_DESCRIPTION("ring_buffer_benchmark"); +MODULE_LICENSE("GPL"); diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index cda81ec58d9f..8acd9b81a5d7 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -171,6 +171,13 @@ static struct trace_array global_trace; static DEFINE_PER_CPU(struct trace_array_cpu, global_trace_cpu); +int filter_current_check_discard(struct ftrace_event_call *call, void *rec, + struct ring_buffer_event *event) +{ + return filter_check_discard(call, rec, global_trace.buffer, event); +} +EXPORT_SYMBOL_GPL(filter_current_check_discard); + cycle_t ftrace_now(int cpu) { u64 ts; @@ -255,7 +262,8 @@ static DECLARE_WAIT_QUEUE_HEAD(trace_wait); /* trace_flags holds trace_options default values */ unsigned long trace_flags = TRACE_ITER_PRINT_PARENT | TRACE_ITER_PRINTK | - TRACE_ITER_ANNOTATE | TRACE_ITER_CONTEXT_INFO | TRACE_ITER_SLEEP_TIME; + TRACE_ITER_ANNOTATE | TRACE_ITER_CONTEXT_INFO | TRACE_ITER_SLEEP_TIME | + TRACE_ITER_GRAPH_TIME; /** * trace_wake_up - wake up tasks waiting for trace input @@ -317,6 +325,7 @@ static const char *trace_options[] = { "latency-format", "global-clock", "sleep-time", + "graph-time", NULL }; @@ -402,17 +411,6 @@ static ssize_t trace_seq_to_buffer(struct trace_seq *s, void *buf, size_t cnt) return cnt; } -static void -trace_print_seq(struct seq_file *m, struct trace_seq *s) -{ - int len = s->len >= PAGE_SIZE ? PAGE_SIZE - 1 : s->len; - - s->buffer[len] = 0; - seq_puts(m, s->buffer); - - trace_seq_init(s); -} - /** * update_max_tr - snapshot all trace buffers from global_trace to max_tr * @tr: tracer @@ -641,6 +639,16 @@ void tracing_reset_online_cpus(struct trace_array *tr) tracing_reset(tr, cpu); } +void tracing_reset_current(int cpu) +{ + tracing_reset(&global_trace, cpu); +} + +void tracing_reset_current_online_cpus(void) +{ + tracing_reset_online_cpus(&global_trace); +} + #define SAVED_CMDLINES 128 #define NO_CMDLINE_MAP UINT_MAX static unsigned map_pid_to_cmdline[PID_MAX_DEFAULT+1]; @@ -800,6 +808,7 @@ void trace_find_cmdline(int pid, char comm[]) return; } + preempt_disable(); __raw_spin_lock(&trace_cmdline_lock); map = map_pid_to_cmdline[pid]; if (map != NO_CMDLINE_MAP) @@ -808,6 +817,7 @@ void trace_find_cmdline(int pid, char comm[]) strcpy(comm, "<...>"); __raw_spin_unlock(&trace_cmdline_lock); + preempt_enable(); } void tracing_record_cmdline(struct task_struct *tsk) @@ -840,7 +850,7 @@ tracing_generic_entry_update(struct trace_entry *entry, unsigned long flags, } struct ring_buffer_event *trace_buffer_lock_reserve(struct trace_array *tr, - unsigned char type, + int type, unsigned long len, unsigned long flags, int pc) { @@ -883,30 +893,40 @@ void trace_buffer_unlock_commit(struct trace_array *tr, } struct ring_buffer_event * -trace_current_buffer_lock_reserve(unsigned char type, unsigned long len, +trace_current_buffer_lock_reserve(int type, unsigned long len, unsigned long flags, int pc) { return trace_buffer_lock_reserve(&global_trace, type, len, flags, pc); } +EXPORT_SYMBOL_GPL(trace_current_buffer_lock_reserve); void trace_current_buffer_unlock_commit(struct ring_buffer_event *event, unsigned long flags, int pc) { - return __trace_buffer_unlock_commit(&global_trace, event, flags, pc, 1); + __trace_buffer_unlock_commit(&global_trace, event, flags, pc, 1); } +EXPORT_SYMBOL_GPL(trace_current_buffer_unlock_commit); void trace_nowake_buffer_unlock_commit(struct ring_buffer_event *event, unsigned long flags, int pc) { - return __trace_buffer_unlock_commit(&global_trace, event, flags, pc, 0); + __trace_buffer_unlock_commit(&global_trace, event, flags, pc, 0); +} +EXPORT_SYMBOL_GPL(trace_nowake_buffer_unlock_commit); + +void trace_current_buffer_discard_commit(struct ring_buffer_event *event) +{ + ring_buffer_discard_commit(global_trace.buffer, event); } +EXPORT_SYMBOL_GPL(trace_current_buffer_discard_commit); void trace_function(struct trace_array *tr, unsigned long ip, unsigned long parent_ip, unsigned long flags, int pc) { + struct ftrace_event_call *call = &event_function; struct ring_buffer_event *event; struct ftrace_entry *entry; @@ -921,7 +941,9 @@ trace_function(struct trace_array *tr, entry = ring_buffer_event_data(event); entry->ip = ip; entry->parent_ip = parent_ip; - ring_buffer_unlock_commit(tr->buffer, event); + + if (!filter_check_discard(call, entry, tr->buffer, event)) + ring_buffer_unlock_commit(tr->buffer, event); } #ifdef CONFIG_FUNCTION_GRAPH_TRACER @@ -930,6 +952,7 @@ static int __trace_graph_entry(struct trace_array *tr, unsigned long flags, int pc) { + struct ftrace_event_call *call = &event_funcgraph_entry; struct ring_buffer_event *event; struct ftrace_graph_ent_entry *entry; @@ -942,7 +965,8 @@ static int __trace_graph_entry(struct trace_array *tr, return 0; entry = ring_buffer_event_data(event); entry->graph_ent = *trace; - ring_buffer_unlock_commit(global_trace.buffer, event); + if (!filter_current_check_discard(call, entry, event)) + ring_buffer_unlock_commit(global_trace.buffer, event); return 1; } @@ -952,6 +976,7 @@ static void __trace_graph_return(struct trace_array *tr, unsigned long flags, int pc) { + struct ftrace_event_call *call = &event_funcgraph_exit; struct ring_buffer_event *event; struct ftrace_graph_ret_entry *entry; @@ -964,7 +989,8 @@ static void __trace_graph_return(struct trace_array *tr, return; entry = ring_buffer_event_data(event); entry->ret = *trace; - ring_buffer_unlock_commit(global_trace.buffer, event); + if (!filter_current_check_discard(call, entry, event)) + ring_buffer_unlock_commit(global_trace.buffer, event); } #endif @@ -982,6 +1008,7 @@ static void __ftrace_trace_stack(struct trace_array *tr, int skip, int pc) { #ifdef CONFIG_STACKTRACE + struct ftrace_event_call *call = &event_kernel_stack; struct ring_buffer_event *event; struct stack_entry *entry; struct stack_trace trace; @@ -999,7 +1026,8 @@ static void __ftrace_trace_stack(struct trace_array *tr, trace.entries = entry->caller; save_stack_trace(&trace); - ring_buffer_unlock_commit(tr->buffer, event); + if (!filter_check_discard(call, entry, tr->buffer, event)) + ring_buffer_unlock_commit(tr->buffer, event); #endif } @@ -1024,6 +1052,7 @@ static void ftrace_trace_userstack(struct trace_array *tr, unsigned long flags, int pc) { #ifdef CONFIG_STACKTRACE + struct ftrace_event_call *call = &event_user_stack; struct ring_buffer_event *event; struct userstack_entry *entry; struct stack_trace trace; @@ -1045,7 +1074,8 @@ static void ftrace_trace_userstack(struct trace_array *tr, trace.entries = entry->caller; save_stack_trace_user(&trace); - ring_buffer_unlock_commit(tr->buffer, event); + if (!filter_check_discard(call, entry, tr->buffer, event)) + ring_buffer_unlock_commit(tr->buffer, event); #endif } @@ -1089,6 +1119,7 @@ tracing_sched_switch_trace(struct trace_array *tr, struct task_struct *next, unsigned long flags, int pc) { + struct ftrace_event_call *call = &event_context_switch; struct ring_buffer_event *event; struct ctx_switch_entry *entry; @@ -1104,7 +1135,9 @@ tracing_sched_switch_trace(struct trace_array *tr, entry->next_prio = next->prio; entry->next_state = next->state; entry->next_cpu = task_cpu(next); - trace_buffer_unlock_commit(tr, event, flags, pc); + + if (!filter_check_discard(call, entry, tr->buffer, event)) + trace_buffer_unlock_commit(tr, event, flags, pc); } void @@ -1113,6 +1146,7 @@ tracing_sched_wakeup_trace(struct trace_array *tr, struct task_struct *curr, unsigned long flags, int pc) { + struct ftrace_event_call *call = &event_wakeup; struct ring_buffer_event *event; struct ctx_switch_entry *entry; @@ -1129,7 +1163,8 @@ tracing_sched_wakeup_trace(struct trace_array *tr, entry->next_state = wakee->state; entry->next_cpu = task_cpu(wakee); - ring_buffer_unlock_commit(tr->buffer, event); + if (!filter_check_discard(call, entry, tr->buffer, event)) + ring_buffer_unlock_commit(tr->buffer, event); ftrace_trace_stack(tr, flags, 6, pc); ftrace_trace_userstack(tr, flags, pc); } @@ -1230,11 +1265,13 @@ int trace_vbprintk(unsigned long ip, const char *fmt, va_list args) (raw_spinlock_t)__RAW_SPIN_LOCK_UNLOCKED; static u32 trace_buf[TRACE_BUF_SIZE]; + struct ftrace_event_call *call = &event_bprint; struct ring_buffer_event *event; struct trace_array *tr = &global_trace; struct trace_array_cpu *data; struct bprint_entry *entry; unsigned long flags; + int disable; int resched; int cpu, len = 0, size, pc; @@ -1249,7 +1286,8 @@ int trace_vbprintk(unsigned long ip, const char *fmt, va_list args) cpu = raw_smp_processor_id(); data = tr->data[cpu]; - if (unlikely(atomic_read(&data->disabled))) + disable = atomic_inc_return(&data->disabled); + if (unlikely(disable != 1)) goto out; /* Lockdep uses trace_printk for lock tracing */ @@ -1269,13 +1307,15 @@ int trace_vbprintk(unsigned long ip, const char *fmt, va_list args) entry->fmt = fmt; memcpy(entry->buf, trace_buf, sizeof(u32) * len); - ring_buffer_unlock_commit(tr->buffer, event); + if (!filter_check_discard(call, entry, tr->buffer, event)) + ring_buffer_unlock_commit(tr->buffer, event); out_unlock: __raw_spin_unlock(&trace_buf_lock); local_irq_restore(flags); out: + atomic_dec_return(&data->disabled); ftrace_preempt_enable(resched); unpause_graph_tracing(); @@ -1288,12 +1328,14 @@ int trace_vprintk(unsigned long ip, const char *fmt, va_list args) static raw_spinlock_t trace_buf_lock = __RAW_SPIN_LOCK_UNLOCKED; static char trace_buf[TRACE_BUF_SIZE]; + struct ftrace_event_call *call = &event_print; struct ring_buffer_event *event; struct trace_array *tr = &global_trace; struct trace_array_cpu *data; int cpu, len = 0, size, pc; struct print_entry *entry; unsigned long irq_flags; + int disable; if (tracing_disabled || tracing_selftest_running) return 0; @@ -1303,7 +1345,8 @@ int trace_vprintk(unsigned long ip, const char *fmt, va_list args) cpu = raw_smp_processor_id(); data = tr->data[cpu]; - if (unlikely(atomic_read(&data->disabled))) + disable = atomic_inc_return(&data->disabled); + if (unlikely(disable != 1)) goto out; pause_graph_tracing(); @@ -1323,13 +1366,15 @@ int trace_vprintk(unsigned long ip, const char *fmt, va_list args) memcpy(&entry->buf, trace_buf, len); entry->buf[len] = 0; - ring_buffer_unlock_commit(tr->buffer, event); + if (!filter_check_discard(call, entry, tr->buffer, event)) + ring_buffer_unlock_commit(tr->buffer, event); out_unlock: __raw_spin_unlock(&trace_buf_lock); raw_local_irq_restore(irq_flags); unpause_graph_tracing(); out: + atomic_dec_return(&data->disabled); preempt_enable_notrace(); return len; @@ -1526,12 +1571,14 @@ static void *s_start(struct seq_file *m, loff_t *pos) p = s_next(m, p, &l); } + trace_event_read_lock(); return p; } static void s_stop(struct seq_file *m, void *p) { atomic_dec(&trace_record_cmdline_disabled); + trace_event_read_unlock(); } static void print_lat_help_header(struct seq_file *m) @@ -1774,6 +1821,7 @@ static int trace_empty(struct trace_iterator *iter) return 1; } +/* Called with trace_event_read_lock() held. */ static enum print_line_t print_trace_line(struct trace_iterator *iter) { enum print_line_t ret; @@ -2397,6 +2445,56 @@ static const struct file_operations tracing_readme_fops = { }; static ssize_t +tracing_saved_cmdlines_read(struct file *file, char __user *ubuf, + size_t cnt, loff_t *ppos) +{ + char *buf_comm; + char *file_buf; + char *buf; + int len = 0; + int pid; + int i; + + file_buf = kmalloc(SAVED_CMDLINES*(16+TASK_COMM_LEN), GFP_KERNEL); + if (!file_buf) + return -ENOMEM; + + buf_comm = kmalloc(TASK_COMM_LEN, GFP_KERNEL); + if (!buf_comm) { + kfree(file_buf); + return -ENOMEM; + } + + buf = file_buf; + + for (i = 0; i < SAVED_CMDLINES; i++) { + int r; + + pid = map_cmdline_to_pid[i]; + if (pid == -1 || pid == NO_CMDLINE_MAP) + continue; + + trace_find_cmdline(pid, buf_comm); + r = sprintf(buf, "%d %s\n", pid, buf_comm); + buf += r; + len += r; + } + + len = simple_read_from_buffer(ubuf, cnt, ppos, + file_buf, len); + + kfree(file_buf); + kfree(buf_comm); + + return len; +} + +static const struct file_operations tracing_saved_cmdlines_fops = { + .open = tracing_open_generic, + .read = tracing_saved_cmdlines_read, +}; + +static ssize_t tracing_ctrl_read(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos) { @@ -2728,6 +2826,9 @@ static int tracing_open_pipe(struct inode *inode, struct file *filp) /* trace pipe does not show start of buffer */ cpumask_setall(iter->started); + if (trace_flags & TRACE_ITER_LATENCY_FMT) + iter->iter_flags |= TRACE_FILE_LAT_FMT; + iter->cpu_file = cpu_file; iter->tr = &global_trace; mutex_init(&iter->mutex); @@ -2915,6 +3016,7 @@ waitagain: offsetof(struct trace_iterator, seq)); iter->pos = -1; + trace_event_read_lock(); while (find_next_entry_inc(iter) != NULL) { enum print_line_t ret; int len = iter->seq.len; @@ -2931,6 +3033,7 @@ waitagain: if (iter->seq.len >= cnt) break; } + trace_event_read_unlock(); /* Now copy what we have to the user */ sret = trace_seq_to_user(&iter->seq, ubuf, cnt); @@ -3053,6 +3156,8 @@ static ssize_t tracing_splice_read_pipe(struct file *filp, goto out_err; } + trace_event_read_lock(); + /* Fill as many pages as possible. */ for (i = 0, rem = len; i < PIPE_BUFFERS && rem; i++) { pages[i] = alloc_page(GFP_KERNEL); @@ -3075,6 +3180,7 @@ static ssize_t tracing_splice_read_pipe(struct file *filp, trace_seq_init(&iter->seq); } + trace_event_read_unlock(); mutex_unlock(&iter->mutex); spd.nr_pages = i; @@ -3425,7 +3531,7 @@ tracing_buffers_splice_read(struct file *file, loff_t *ppos, .spd_release = buffer_spd_release, }; struct buffer_ref *ref; - int size, i; + int entries, size, i; size_t ret; if (*ppos & (PAGE_SIZE - 1)) { @@ -3440,7 +3546,9 @@ tracing_buffers_splice_read(struct file *file, loff_t *ppos, len &= PAGE_MASK; } - for (i = 0; i < PIPE_BUFFERS && len; i++, len -= PAGE_SIZE) { + entries = ring_buffer_entries_cpu(info->tr->buffer, info->cpu); + + for (i = 0; i < PIPE_BUFFERS && len && entries; i++, len -= PAGE_SIZE) { struct page *page; int r; @@ -3457,7 +3565,7 @@ tracing_buffers_splice_read(struct file *file, loff_t *ppos, } r = ring_buffer_read_page(ref->buffer, &ref->page, - len, info->cpu, 0); + len, info->cpu, 1); if (r < 0) { ring_buffer_free_read_page(ref->buffer, ref->page); @@ -3481,6 +3589,8 @@ tracing_buffers_splice_read(struct file *file, loff_t *ppos, spd.partial[i].private = (unsigned long)ref; spd.nr_pages++; *ppos += PAGE_SIZE; + + entries = ring_buffer_entries_cpu(info->tr->buffer, info->cpu); } spd.nr_pages = i; @@ -3508,6 +3618,45 @@ static const struct file_operations tracing_buffers_fops = { .llseek = no_llseek, }; +static ssize_t +tracing_stats_read(struct file *filp, char __user *ubuf, + size_t count, loff_t *ppos) +{ + unsigned long cpu = (unsigned long)filp->private_data; + struct trace_array *tr = &global_trace; + struct trace_seq *s; + unsigned long cnt; + + s = kmalloc(sizeof(*s), GFP_ATOMIC); + if (!s) + return ENOMEM; + + trace_seq_init(s); + + cnt = ring_buffer_entries_cpu(tr->buffer, cpu); + trace_seq_printf(s, "entries: %ld\n", cnt); + + cnt = ring_buffer_overrun_cpu(tr->buffer, cpu); + trace_seq_printf(s, "overrun: %ld\n", cnt); + + cnt = ring_buffer_commit_overrun_cpu(tr->buffer, cpu); + trace_seq_printf(s, "commit overrun: %ld\n", cnt); + + cnt = ring_buffer_nmi_dropped_cpu(tr->buffer, cpu); + trace_seq_printf(s, "nmi dropped: %ld\n", cnt); + + count = simple_read_from_buffer(ubuf, count, ppos, s->buffer, s->len); + + kfree(s); + + return count; +} + +static const struct file_operations tracing_stats_fops = { + .open = tracing_open_generic, + .read = tracing_stats_read, +}; + #ifdef CONFIG_DYNAMIC_FTRACE int __weak ftrace_arch_read_dyn_info(char *buf, int size) @@ -3597,7 +3746,7 @@ struct dentry *tracing_dentry_percpu(void) static void tracing_init_debugfs_percpu(long cpu) { struct dentry *d_percpu = tracing_dentry_percpu(); - struct dentry *entry, *d_cpu; + struct dentry *d_cpu; /* strlen(cpu) + MAX(log10(cpu)) + '\0' */ char cpu_dir[7]; @@ -3612,21 +3761,18 @@ static void tracing_init_debugfs_percpu(long cpu) } /* per cpu trace_pipe */ - entry = debugfs_create_file("trace_pipe", 0444, d_cpu, - (void *) cpu, &tracing_pipe_fops); - if (!entry) - pr_warning("Could not create debugfs 'trace_pipe' entry\n"); + trace_create_file("trace_pipe", 0444, d_cpu, + (void *) cpu, &tracing_pipe_fops); /* per cpu trace */ - entry = debugfs_create_file("trace", 0644, d_cpu, - (void *) cpu, &tracing_fops); - if (!entry) - pr_warning("Could not create debugfs 'trace' entry\n"); + trace_create_file("trace", 0644, d_cpu, + (void *) cpu, &tracing_fops); + + trace_create_file("trace_pipe_raw", 0444, d_cpu, + (void *) cpu, &tracing_buffers_fops); - entry = debugfs_create_file("trace_pipe_raw", 0444, d_cpu, - (void *) cpu, &tracing_buffers_fops); - if (!entry) - pr_warning("Could not create debugfs 'trace_pipe_raw' entry\n"); + trace_create_file("stats", 0444, d_cpu, + (void *) cpu, &tracing_stats_fops); } #ifdef CONFIG_FTRACE_SELFTEST @@ -3782,6 +3928,22 @@ static const struct file_operations trace_options_core_fops = { .write = trace_options_core_write, }; +struct dentry *trace_create_file(const char *name, + mode_t mode, + struct dentry *parent, + void *data, + const struct file_operations *fops) +{ + struct dentry *ret; + + ret = debugfs_create_file(name, mode, parent, data, fops); + if (!ret) + pr_warning("Could not create debugfs '%s' entry\n", name); + + return ret; +} + + static struct dentry *trace_options_init_dentry(void) { struct dentry *d_tracer; @@ -3809,7 +3971,6 @@ create_trace_option_file(struct trace_option_dentry *topt, struct tracer_opt *opt) { struct dentry *t_options; - struct dentry *entry; t_options = trace_options_init_dentry(); if (!t_options) @@ -3818,11 +3979,9 @@ create_trace_option_file(struct trace_option_dentry *topt, topt->flags = flags; topt->opt = opt; - entry = debugfs_create_file(opt->name, 0644, t_options, topt, + topt->entry = trace_create_file(opt->name, 0644, t_options, topt, &trace_options_fops); - topt->entry = entry; - } static struct trace_option_dentry * @@ -3877,123 +4036,84 @@ static struct dentry * create_trace_option_core_file(const char *option, long index) { struct dentry *t_options; - struct dentry *entry; t_options = trace_options_init_dentry(); if (!t_options) return NULL; - entry = debugfs_create_file(option, 0644, t_options, (void *)index, + return trace_create_file(option, 0644, t_options, (void *)index, &trace_options_core_fops); - - return entry; } static __init void create_trace_options_dir(void) { struct dentry *t_options; - struct dentry *entry; int i; t_options = trace_options_init_dentry(); if (!t_options) return; - for (i = 0; trace_options[i]; i++) { - entry = create_trace_option_core_file(trace_options[i], i); - if (!entry) - pr_warning("Could not create debugfs %s entry\n", - trace_options[i]); - } + for (i = 0; trace_options[i]; i++) + create_trace_option_core_file(trace_options[i], i); } static __init int tracer_init_debugfs(void) { struct dentry *d_tracer; - struct dentry *entry; int cpu; d_tracer = tracing_init_dentry(); - entry = debugfs_create_file("tracing_enabled", 0644, d_tracer, - &global_trace, &tracing_ctrl_fops); - if (!entry) - pr_warning("Could not create debugfs 'tracing_enabled' entry\n"); + trace_create_file("tracing_enabled", 0644, d_tracer, + &global_trace, &tracing_ctrl_fops); - entry = debugfs_create_file("trace_options", 0644, d_tracer, - NULL, &tracing_iter_fops); - if (!entry) - pr_warning("Could not create debugfs 'trace_options' entry\n"); + trace_create_file("trace_options", 0644, d_tracer, + NULL, &tracing_iter_fops); - create_trace_options_dir(); + trace_create_file("tracing_cpumask", 0644, d_tracer, + NULL, &tracing_cpumask_fops); + + trace_create_file("trace", 0644, d_tracer, + (void *) TRACE_PIPE_ALL_CPU, &tracing_fops); - entry = debugfs_create_file("tracing_cpumask", 0644, d_tracer, - NULL, &tracing_cpumask_fops); - if (!entry) - pr_warning("Could not create debugfs 'tracing_cpumask' entry\n"); - - entry = debugfs_create_file("trace", 0644, d_tracer, - (void *) TRACE_PIPE_ALL_CPU, &tracing_fops); - if (!entry) - pr_warning("Could not create debugfs 'trace' entry\n"); - - entry = debugfs_create_file("available_tracers", 0444, d_tracer, - &global_trace, &show_traces_fops); - if (!entry) - pr_warning("Could not create debugfs 'available_tracers' entry\n"); - - entry = debugfs_create_file("current_tracer", 0444, d_tracer, - &global_trace, &set_tracer_fops); - if (!entry) - pr_warning("Could not create debugfs 'current_tracer' entry\n"); - - entry = debugfs_create_file("tracing_max_latency", 0644, d_tracer, - &tracing_max_latency, - &tracing_max_lat_fops); - if (!entry) - pr_warning("Could not create debugfs " - "'tracing_max_latency' entry\n"); - - entry = debugfs_create_file("tracing_thresh", 0644, d_tracer, - &tracing_thresh, &tracing_max_lat_fops); - if (!entry) - pr_warning("Could not create debugfs " - "'tracing_thresh' entry\n"); - entry = debugfs_create_file("README", 0644, d_tracer, - NULL, &tracing_readme_fops); - if (!entry) - pr_warning("Could not create debugfs 'README' entry\n"); - - entry = debugfs_create_file("trace_pipe", 0444, d_tracer, + trace_create_file("available_tracers", 0444, d_tracer, + &global_trace, &show_traces_fops); + + trace_create_file("current_tracer", 0644, d_tracer, + &global_trace, &set_tracer_fops); + + trace_create_file("tracing_max_latency", 0644, d_tracer, + &tracing_max_latency, &tracing_max_lat_fops); + + trace_create_file("tracing_thresh", 0644, d_tracer, + &tracing_thresh, &tracing_max_lat_fops); + + trace_create_file("README", 0444, d_tracer, + NULL, &tracing_readme_fops); + + trace_create_file("trace_pipe", 0444, d_tracer, (void *) TRACE_PIPE_ALL_CPU, &tracing_pipe_fops); - if (!entry) - pr_warning("Could not create debugfs " - "'trace_pipe' entry\n"); - - entry = debugfs_create_file("buffer_size_kb", 0644, d_tracer, - &global_trace, &tracing_entries_fops); - if (!entry) - pr_warning("Could not create debugfs " - "'buffer_size_kb' entry\n"); - - entry = debugfs_create_file("trace_marker", 0220, d_tracer, - NULL, &tracing_mark_fops); - if (!entry) - pr_warning("Could not create debugfs " - "'trace_marker' entry\n"); + + trace_create_file("buffer_size_kb", 0644, d_tracer, + &global_trace, &tracing_entries_fops); + + trace_create_file("trace_marker", 0220, d_tracer, + NULL, &tracing_mark_fops); + + trace_create_file("saved_cmdlines", 0444, d_tracer, + NULL, &tracing_saved_cmdlines_fops); #ifdef CONFIG_DYNAMIC_FTRACE - entry = debugfs_create_file("dyn_ftrace_total_info", 0444, d_tracer, - &ftrace_update_tot_cnt, - &tracing_dyn_info_fops); - if (!entry) - pr_warning("Could not create debugfs " - "'dyn_ftrace_total_info' entry\n"); + trace_create_file("dyn_ftrace_total_info", 0444, d_tracer, + &ftrace_update_tot_cnt, &tracing_dyn_info_fops); #endif #ifdef CONFIG_SYSPROF_TRACER init_tracer_sysprof_debugfs(d_tracer); #endif + create_trace_options_dir(); + for_each_tracing_cpu(cpu) tracing_init_debugfs_percpu(cpu); @@ -4064,7 +4184,8 @@ trace_printk_seq(struct trace_seq *s) static void __ftrace_dump(bool disable_tracing) { - static DEFINE_SPINLOCK(ftrace_dump_lock); + static raw_spinlock_t ftrace_dump_lock = + (raw_spinlock_t)__RAW_SPIN_LOCK_UNLOCKED; /* use static because iter can be a bit big for the stack */ static struct trace_iterator iter; unsigned int old_userobj; @@ -4073,7 +4194,8 @@ static void __ftrace_dump(bool disable_tracing) int cnt = 0, cpu; /* only one dump */ - spin_lock_irqsave(&ftrace_dump_lock, flags); + local_irq_save(flags); + __raw_spin_lock(&ftrace_dump_lock); if (dump_ran) goto out; @@ -4145,7 +4267,8 @@ static void __ftrace_dump(bool disable_tracing) } out: - spin_unlock_irqrestore(&ftrace_dump_lock, flags); + __raw_spin_unlock(&ftrace_dump_lock); + local_irq_restore(flags); } /* By default: disable tracing after the dump */ diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index e685ac2b2ba1..6e735d4771f8 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -9,9 +9,12 @@ #include <linux/mmiotrace.h> #include <linux/ftrace.h> #include <trace/boot.h> -#include <trace/kmemtrace.h> +#include <linux/kmemtrace.h> #include <trace/power.h> +#include <linux/trace_seq.h> +#include <linux/ftrace_event.h> + enum trace_type { __TRACE_FIRST_TYPE = 0, @@ -42,20 +45,6 @@ enum trace_type { }; /* - * The trace entry - the most basic unit of tracing. This is what - * is printed in the end as a single line in the trace output, such as: - * - * bash-15816 [01] 235.197585: idle_cpu <- irq_enter - */ -struct trace_entry { - unsigned char type; - unsigned char flags; - unsigned char preempt_count; - int pid; - int tgid; -}; - -/* * Function trace entry - function address and parent function addres: */ struct ftrace_entry { @@ -263,8 +252,6 @@ struct trace_array_cpu { char comm[TASK_COMM_LEN]; }; -struct trace_iterator; - /* * The trace array - an array of per-CPU trace arrays. This is the * highest level data structure that individual tracers deal with. @@ -339,15 +326,6 @@ extern void __ftrace_bad_type(void); __ftrace_bad_type(); \ } while (0) -/* Return values for print_line callback */ -enum print_line_t { - TRACE_TYPE_PARTIAL_LINE = 0, /* Retry after flushing the seq */ - TRACE_TYPE_HANDLED = 1, - TRACE_TYPE_UNHANDLED = 2, /* Relay to other output functions */ - TRACE_TYPE_NO_CONSUME = 3 /* Handled but ask to not consume */ -}; - - /* * An option specific to a tracer. This is a boolean value. * The bit is the bit index that sets its value on the @@ -423,60 +401,30 @@ struct tracer { struct tracer_stat *stats; }; -struct trace_seq { - unsigned char buffer[PAGE_SIZE]; - unsigned int len; - unsigned int readpos; -}; - -static inline void -trace_seq_init(struct trace_seq *s) -{ - s->len = 0; - s->readpos = 0; -} - #define TRACE_PIPE_ALL_CPU -1 -/* - * Trace iterator - used by printout routines who present trace - * results to users and which routines might sleep, etc: - */ -struct trace_iterator { - struct trace_array *tr; - struct tracer *trace; - void *private; - int cpu_file; - struct mutex mutex; - struct ring_buffer_iter *buffer_iter[NR_CPUS]; - - /* The below is zeroed out in pipe_read */ - struct trace_seq seq; - struct trace_entry *ent; - int cpu; - u64 ts; - - unsigned long iter_flags; - loff_t pos; - long idx; - - cpumask_var_t started; -}; - int tracer_init(struct tracer *t, struct trace_array *tr); int tracing_is_enabled(void); void trace_wake_up(void); void tracing_reset(struct trace_array *tr, int cpu); void tracing_reset_online_cpus(struct trace_array *tr); +void tracing_reset_current(int cpu); +void tracing_reset_current_online_cpus(void); int tracing_open_generic(struct inode *inode, struct file *filp); +struct dentry *trace_create_file(const char *name, + mode_t mode, + struct dentry *parent, + void *data, + const struct file_operations *fops); + struct dentry *tracing_init_dentry(void); void init_tracer_sysprof_debugfs(struct dentry *d_tracer); struct ring_buffer_event; struct ring_buffer_event *trace_buffer_lock_reserve(struct trace_array *tr, - unsigned char type, + int type, unsigned long len, unsigned long flags, int pc); @@ -484,14 +432,6 @@ void trace_buffer_unlock_commit(struct trace_array *tr, struct ring_buffer_event *event, unsigned long flags, int pc); -struct ring_buffer_event * -trace_current_buffer_lock_reserve(unsigned char type, unsigned long len, - unsigned long flags, int pc); -void trace_current_buffer_unlock_commit(struct ring_buffer_event *event, - unsigned long flags, int pc); -void trace_nowake_buffer_unlock_commit(struct ring_buffer_event *event, - unsigned long flags, int pc); - struct trace_entry *tracing_get_trace_entry(struct trace_array *tr, struct trace_array_cpu *data); @@ -514,7 +454,6 @@ void tracing_sched_switch_trace(struct trace_array *tr, struct task_struct *prev, struct task_struct *next, unsigned long flags, int pc); -void tracing_record_cmdline(struct task_struct *tsk); void tracing_sched_wakeup_trace(struct trace_array *tr, struct task_struct *wakee, @@ -599,6 +538,8 @@ extern int trace_selftest_startup_sysprof(struct tracer *trace, struct trace_array *tr); extern int trace_selftest_startup_branch(struct tracer *trace, struct trace_array *tr); +extern int trace_selftest_startup_hw_branches(struct tracer *trace, + struct trace_array *tr); #endif /* CONFIG_FTRACE_STARTUP_TEST */ extern void *head_page(struct trace_array_cpu *data); @@ -613,6 +554,8 @@ extern unsigned long trace_flags; /* Standard output formatting function used for function return traces */ #ifdef CONFIG_FUNCTION_GRAPH_TRACER extern enum print_line_t print_graph_function(struct trace_iterator *iter); +extern enum print_line_t +trace_print_graph_duration(unsigned long long duration, struct trace_seq *s); #ifdef CONFIG_DYNAMIC_FTRACE /* TODO: make this variable */ @@ -644,7 +587,6 @@ static inline int ftrace_graph_addr(unsigned long addr) return 1; } #endif /* CONFIG_DYNAMIC_FTRACE */ - #else /* CONFIG_FUNCTION_GRAPH_TRACER */ static inline enum print_line_t print_graph_function(struct trace_iterator *iter) @@ -692,6 +634,7 @@ enum trace_iterator_flags { TRACE_ITER_LATENCY_FMT = 0x40000, TRACE_ITER_GLOBAL_CLK = 0x80000, TRACE_ITER_SLEEP_TIME = 0x100000, + TRACE_ITER_GRAPH_TIME = 0x200000, }; /* @@ -790,103 +733,113 @@ struct ftrace_event_field { char *type; int offset; int size; + int is_signed; }; -struct ftrace_event_call { - char *name; - char *system; - struct dentry *dir; - int enabled; - int (*regfunc)(void); - void (*unregfunc)(void); - int id; - int (*raw_init)(void); - int (*show_format)(struct trace_seq *s); - int (*define_fields)(void); - struct list_head fields; +struct event_filter { + int n_preds; struct filter_pred **preds; - -#ifdef CONFIG_EVENT_PROFILE - atomic_t profile_count; - int (*profile_enable)(struct ftrace_event_call *); - void (*profile_disable)(struct ftrace_event_call *); -#endif + char *filter_string; }; struct event_subsystem { struct list_head list; const char *name; struct dentry *entry; - struct filter_pred **preds; + void *filter; }; -#define events_for_each(event) \ - for (event = __start_ftrace_events; \ - (unsigned long)event < (unsigned long)__stop_ftrace_events; \ - event++) - -#define MAX_FILTER_PRED 8 - struct filter_pred; -typedef int (*filter_pred_fn_t) (struct filter_pred *pred, void *event); +typedef int (*filter_pred_fn_t) (struct filter_pred *pred, void *event, + int val1, int val2); struct filter_pred { filter_pred_fn_t fn; u64 val; - char *str_val; + char str_val[MAX_FILTER_STR_VAL]; int str_len; char *field_name; int offset; int not; - int or; - int compound; - int clear; + int op; + int pop_n; }; -int trace_define_field(struct ftrace_event_call *call, char *type, - char *name, int offset, int size); -extern void filter_free_pred(struct filter_pred *pred); -extern void filter_print_preds(struct filter_pred **preds, +extern void print_event_filter(struct ftrace_event_call *call, struct trace_seq *s); -extern int filter_parse(char **pbuf, struct filter_pred *pred); -extern int filter_add_pred(struct ftrace_event_call *call, - struct filter_pred *pred); -extern void filter_free_preds(struct ftrace_event_call *call); -extern int filter_match_preds(struct ftrace_event_call *call, void *rec); -extern void filter_free_subsystem_preds(struct event_subsystem *system); -extern int filter_add_subsystem_pred(struct event_subsystem *system, - struct filter_pred *pred); - -void event_trace_printk(unsigned long ip, const char *fmt, ...); -extern struct ftrace_event_call __start_ftrace_events[]; -extern struct ftrace_event_call __stop_ftrace_events[]; - -#define for_each_event(event) \ - for (event = __start_ftrace_events; \ - (unsigned long)event < (unsigned long)__stop_ftrace_events; \ - event++) +extern int apply_event_filter(struct ftrace_event_call *call, + char *filter_string); +extern int apply_subsystem_event_filter(struct event_subsystem *system, + char *filter_string); +extern void print_subsystem_event_filter(struct event_subsystem *system, + struct trace_seq *s); + +static inline int +filter_check_discard(struct ftrace_event_call *call, void *rec, + struct ring_buffer *buffer, + struct ring_buffer_event *event) +{ + if (unlikely(call->filter_active) && !filter_match_preds(call, rec)) { + ring_buffer_discard_commit(buffer, event); + return 1; + } + + return 0; +} + +#define DEFINE_COMPARISON_PRED(type) \ +static int filter_pred_##type(struct filter_pred *pred, void *event, \ + int val1, int val2) \ +{ \ + type *addr = (type *)(event + pred->offset); \ + type val = (type)pred->val; \ + int match = 0; \ + \ + switch (pred->op) { \ + case OP_LT: \ + match = (*addr < val); \ + break; \ + case OP_LE: \ + match = (*addr <= val); \ + break; \ + case OP_GT: \ + match = (*addr > val); \ + break; \ + case OP_GE: \ + match = (*addr >= val); \ + break; \ + default: \ + break; \ + } \ + \ + return match; \ +} + +#define DEFINE_EQUALITY_PRED(size) \ +static int filter_pred_##size(struct filter_pred *pred, void *event, \ + int val1, int val2) \ +{ \ + u##size *addr = (u##size *)(event + pred->offset); \ + u##size val = (u##size)pred->val; \ + int match; \ + \ + match = (val == *addr) ^ pred->not; \ + \ + return match; \ +} + +extern struct mutex event_mutex; +extern struct list_head ftrace_events; extern const char *__start___trace_bprintk_fmt[]; extern const char *__stop___trace_bprintk_fmt[]; -/* - * The double __builtin_constant_p is because gcc will give us an error - * if we try to allocate the static variable to fmt if it is not a - * constant. Even with the outer if statement optimizing out. - */ -#define event_trace_printk(ip, fmt, args...) \ -do { \ - __trace_printk_check_format(fmt, ##args); \ - tracing_record_cmdline(current); \ - if (__builtin_constant_p(fmt)) { \ - static const char *trace_printk_fmt \ - __attribute__((section("__trace_printk_fmt"))) = \ - __builtin_constant_p(fmt) ? fmt : NULL; \ - \ - __trace_bprintk(ip, trace_printk_fmt, ##args); \ - } else \ - __trace_printk(ip, fmt, ##args); \ -} while (0) +#undef TRACE_EVENT_FORMAT +#define TRACE_EVENT_FORMAT(call, proto, args, fmt, tstruct, tpfmt) \ + extern struct ftrace_event_call event_##call; +#undef TRACE_EVENT_FORMAT_NOFILTER +#define TRACE_EVENT_FORMAT_NOFILTER(call, proto, args, fmt, tstruct, tpfmt) +#include "trace_event_types.h" #endif /* _LINUX_KERNEL_TRACE_H */ diff --git a/kernel/trace/trace_boot.c b/kernel/trace/trace_boot.c index 7a30fc4c3642..a29ef23ffb47 100644 --- a/kernel/trace/trace_boot.c +++ b/kernel/trace/trace_boot.c @@ -9,6 +9,7 @@ #include <linux/debugfs.h> #include <linux/ftrace.h> #include <linux/kallsyms.h> +#include <linux/time.h> #include "trace.h" #include "trace_output.h" @@ -67,7 +68,7 @@ initcall_call_print_line(struct trace_iterator *iter) trace_assign_type(field, entry); call = &field->boot_call; ts = iter->ts; - nsec_rem = do_div(ts, 1000000000); + nsec_rem = do_div(ts, NSEC_PER_SEC); ret = trace_seq_printf(s, "[%5ld.%09ld] calling %s @ %i\n", (unsigned long)ts, nsec_rem, call->func, call->caller); @@ -92,7 +93,7 @@ initcall_ret_print_line(struct trace_iterator *iter) trace_assign_type(field, entry); init_ret = &field->boot_ret; ts = iter->ts; - nsec_rem = do_div(ts, 1000000000); + nsec_rem = do_div(ts, NSEC_PER_SEC); ret = trace_seq_printf(s, "[%5ld.%09ld] initcall %s " "returned %d after %llu msecs\n", diff --git a/kernel/trace/trace_branch.c b/kernel/trace/trace_branch.c index 8333715e4066..7a7a9fd249a9 100644 --- a/kernel/trace/trace_branch.c +++ b/kernel/trace/trace_branch.c @@ -30,6 +30,7 @@ static struct trace_array *branch_tracer; static void probe_likely_condition(struct ftrace_branch_data *f, int val, int expect) { + struct ftrace_event_call *call = &event_branch; struct trace_array *tr = branch_tracer; struct ring_buffer_event *event; struct trace_branch *entry; @@ -73,7 +74,8 @@ probe_likely_condition(struct ftrace_branch_data *f, int val, int expect) entry->line = f->line; entry->correct = val == expect; - ring_buffer_unlock_commit(tr->buffer, event); + if (!filter_check_discard(call, entry, tr->buffer, event)) + ring_buffer_unlock_commit(tr->buffer, event); out: atomic_dec(&tr->data[cpu]->disabled); @@ -271,7 +273,7 @@ static int branch_stat_show(struct seq_file *m, void *v) return 0; } -static void *annotated_branch_stat_start(void) +static void *annotated_branch_stat_start(struct tracer_stat *trace) { return __start_annotated_branch_profile; } @@ -346,7 +348,7 @@ static int all_branch_stat_headers(struct seq_file *m) return 0; } -static void *all_branch_stat_start(void) +static void *all_branch_stat_start(struct tracer_stat *trace) { return __start_branch_profile; } diff --git a/kernel/trace/trace_event_profile.c b/kernel/trace/trace_event_profile.c index 22cba9970776..5b5895afecfe 100644 --- a/kernel/trace/trace_event_profile.c +++ b/kernel/trace/trace_event_profile.c @@ -10,22 +10,30 @@ int ftrace_profile_enable(int event_id) { struct ftrace_event_call *event; + int ret = -EINVAL; - for_each_event(event) { - if (event->id == event_id) - return event->profile_enable(event); + mutex_lock(&event_mutex); + list_for_each_entry(event, &ftrace_events, list) { + if (event->id == event_id) { + ret = event->profile_enable(event); + break; + } } + mutex_unlock(&event_mutex); - return -EINVAL; + return ret; } void ftrace_profile_disable(int event_id) { struct ftrace_event_call *event; - for_each_event(event) { - if (event->id == event_id) - return event->profile_disable(event); + mutex_lock(&event_mutex); + list_for_each_entry(event, &ftrace_events, list) { + if (event->id == event_id) { + event->profile_disable(event); + break; + } } + mutex_unlock(&event_mutex); } - diff --git a/kernel/trace/trace_event_types.h b/kernel/trace/trace_event_types.h index fd78bee71dd7..5e32e375134d 100644 --- a/kernel/trace/trace_event_types.h +++ b/kernel/trace/trace_event_types.h @@ -57,7 +57,7 @@ TRACE_EVENT_FORMAT(context_switch, TRACE_CTX, ctx_switch_entry, ignore, TP_RAW_FMT("%u:%u:%u ==+ %u:%u:%u [%03u]") ); -TRACE_EVENT_FORMAT(special, TRACE_SPECIAL, special_entry, ignore, +TRACE_EVENT_FORMAT_NOFILTER(special, TRACE_SPECIAL, special_entry, ignore, TRACE_STRUCT( TRACE_FIELD(unsigned long, arg1, arg1) TRACE_FIELD(unsigned long, arg2, arg2) @@ -122,8 +122,10 @@ TRACE_EVENT_FORMAT(print, TRACE_PRINT, print_entry, ignore, TRACE_EVENT_FORMAT(branch, TRACE_BRANCH, trace_branch, ignore, TRACE_STRUCT( TRACE_FIELD(unsigned int, line, line) - TRACE_FIELD_SPECIAL(char func[TRACE_FUNC_SIZE+1], func, func) - TRACE_FIELD_SPECIAL(char file[TRACE_FUNC_SIZE+1], file, file) + TRACE_FIELD_SPECIAL(char func[TRACE_FUNC_SIZE+1], func, + TRACE_FUNC_SIZE+1, func) + TRACE_FIELD_SPECIAL(char file[TRACE_FUNC_SIZE+1], file, + TRACE_FUNC_SIZE+1, file) TRACE_FIELD(char, correct, correct) ), TP_RAW_FMT("%u:%s:%s (%u)") @@ -139,8 +141,8 @@ TRACE_EVENT_FORMAT(hw_branch, TRACE_HW_BRANCHES, hw_branch_entry, ignore, TRACE_EVENT_FORMAT(power, TRACE_POWER, trace_power, ignore, TRACE_STRUCT( - TRACE_FIELD(ktime_t, state_data.stamp, stamp) - TRACE_FIELD(ktime_t, state_data.end, end) + TRACE_FIELD_SIGN(ktime_t, state_data.stamp, stamp, 1) + TRACE_FIELD_SIGN(ktime_t, state_data.end, end, 1) TRACE_FIELD(int, state_data.type, type) TRACE_FIELD(int, state_data.state, state) ), diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c index 576f4fa2af0d..aa08be69a1b6 100644 --- a/kernel/trace/trace_events.c +++ b/kernel/trace/trace_events.c @@ -8,19 +8,25 @@ * */ +#include <linux/workqueue.h> +#include <linux/spinlock.h> +#include <linux/kthread.h> #include <linux/debugfs.h> #include <linux/uaccess.h> #include <linux/module.h> #include <linux/ctype.h> +#include <linux/delay.h> #include "trace_output.h" #define TRACE_SYSTEM "TRACE_SYSTEM" -static DEFINE_MUTEX(event_mutex); +DEFINE_MUTEX(event_mutex); + +LIST_HEAD(ftrace_events); int trace_define_field(struct ftrace_event_call *call, char *type, - char *name, int offset, int size) + char *name, int offset, int size, int is_signed) { struct ftrace_event_field *field; @@ -38,6 +44,7 @@ int trace_define_field(struct ftrace_event_call *call, char *type, field->offset = offset; field->size = size; + field->is_signed = is_signed; list_add(&field->link, &call->fields); return 0; @@ -51,47 +58,94 @@ err: return -ENOMEM; } +EXPORT_SYMBOL_GPL(trace_define_field); -static void ftrace_clear_events(void) -{ - struct ftrace_event_call *call = (void *)__start_ftrace_events; - +#ifdef CONFIG_MODULES - while ((unsigned long)call < (unsigned long)__stop_ftrace_events) { +static void trace_destroy_fields(struct ftrace_event_call *call) +{ + struct ftrace_event_field *field, *next; - if (call->enabled) { - call->enabled = 0; - call->unregfunc(); - } - call++; + list_for_each_entry_safe(field, next, &call->fields, link) { + list_del(&field->link); + kfree(field->type); + kfree(field->name); + kfree(field); } } +#endif /* CONFIG_MODULES */ + static void ftrace_event_enable_disable(struct ftrace_event_call *call, int enable) { - switch (enable) { case 0: if (call->enabled) { call->enabled = 0; + tracing_stop_cmdline_record(); call->unregfunc(); } break; case 1: if (!call->enabled) { call->enabled = 1; + tracing_start_cmdline_record(); call->regfunc(); } break; } } +static void ftrace_clear_events(void) +{ + struct ftrace_event_call *call; + + mutex_lock(&event_mutex); + list_for_each_entry(call, &ftrace_events, list) { + ftrace_event_enable_disable(call, 0); + } + mutex_unlock(&event_mutex); +} + +/* + * __ftrace_set_clr_event(NULL, NULL, NULL, set) will set/unset all events. + */ +static int __ftrace_set_clr_event(const char *match, const char *sub, + const char *event, int set) +{ + struct ftrace_event_call *call; + int ret = -EINVAL; + + mutex_lock(&event_mutex); + list_for_each_entry(call, &ftrace_events, list) { + + if (!call->name || !call->regfunc) + continue; + + if (match && + strcmp(match, call->name) != 0 && + strcmp(match, call->system) != 0) + continue; + + if (sub && strcmp(sub, call->system) != 0) + continue; + + if (event && strcmp(event, call->name) != 0) + continue; + + ftrace_event_enable_disable(call, set); + + ret = 0; + } + mutex_unlock(&event_mutex); + + return ret; +} + static int ftrace_set_clr_event(char *buf, int set) { - struct ftrace_event_call *call = __start_ftrace_events; char *event = NULL, *sub = NULL, *match; - int ret = -EINVAL; /* * The buf format can be <subsystem>:<event-name> @@ -117,30 +171,24 @@ static int ftrace_set_clr_event(char *buf, int set) event = NULL; } - mutex_lock(&event_mutex); - for_each_event(call) { - - if (!call->name || !call->regfunc) - continue; - - if (match && - strcmp(match, call->name) != 0 && - strcmp(match, call->system) != 0) - continue; - - if (sub && strcmp(sub, call->system) != 0) - continue; - - if (event && strcmp(event, call->name) != 0) - continue; - - ftrace_event_enable_disable(call, set); - - ret = 0; - } - mutex_unlock(&event_mutex); + return __ftrace_set_clr_event(match, sub, event, set); +} - return ret; +/** + * trace_set_clr_event - enable or disable an event + * @system: system name to match (NULL for any system) + * @event: event name to match (NULL for all events, within system) + * @set: 1 to enable, 0 to disable + * + * This is a way for other parts of the kernel to enable or disable + * event recording. + * + * Returns 0 on success, -EINVAL if the parameters do not match any + * registered events. + */ +int trace_set_clr_event(const char *system, const char *event, int set) +{ + return __ftrace_set_clr_event(NULL, system, event, set); } /* 128 should be much more than enough */ @@ -224,15 +272,17 @@ ftrace_event_write(struct file *file, const char __user *ubuf, static void * t_next(struct seq_file *m, void *v, loff_t *pos) { - struct ftrace_event_call *call = m->private; - struct ftrace_event_call *next = call; + struct list_head *list = m->private; + struct ftrace_event_call *call; (*pos)++; for (;;) { - if ((unsigned long)call >= (unsigned long)__stop_ftrace_events) + if (list == &ftrace_events) return NULL; + call = list_entry(list, struct ftrace_event_call, list); + /* * The ftrace subsystem is for showing formats only. * They can not be enabled or disabled via the event files. @@ -240,45 +290,51 @@ t_next(struct seq_file *m, void *v, loff_t *pos) if (call->regfunc) break; - call++; - next = call; + list = list->next; } - m->private = ++next; + m->private = list->next; return call; } static void *t_start(struct seq_file *m, loff_t *pos) { + mutex_lock(&event_mutex); + if (*pos == 0) + m->private = ftrace_events.next; return t_next(m, NULL, pos); } static void * s_next(struct seq_file *m, void *v, loff_t *pos) { - struct ftrace_event_call *call = m->private; - struct ftrace_event_call *next; + struct list_head *list = m->private; + struct ftrace_event_call *call; (*pos)++; retry: - if ((unsigned long)call >= (unsigned long)__stop_ftrace_events) + if (list == &ftrace_events) return NULL; + call = list_entry(list, struct ftrace_event_call, list); + if (!call->enabled) { - call++; + list = list->next; goto retry; } - next = call; - m->private = ++next; + m->private = list->next; return call; } static void *s_start(struct seq_file *m, loff_t *pos) { + mutex_lock(&event_mutex); + if (*pos == 0) + m->private = ftrace_events.next; return s_next(m, NULL, pos); } @@ -295,12 +351,12 @@ static int t_show(struct seq_file *m, void *v) static void t_stop(struct seq_file *m, void *p) { + mutex_unlock(&event_mutex); } static int ftrace_event_seq_open(struct inode *inode, struct file *file) { - int ret; const struct seq_operations *seq_ops; if ((file->f_mode & FMODE_WRITE) && @@ -308,13 +364,7 @@ ftrace_event_seq_open(struct inode *inode, struct file *file) ftrace_clear_events(); seq_ops = inode->i_private; - ret = seq_open(file, seq_ops); - if (!ret) { - struct seq_file *m = file->private_data; - - m->private = __start_ftrace_events; - } - return ret; + return seq_open(file, seq_ops); } static ssize_t @@ -374,8 +424,93 @@ event_enable_write(struct file *filp, const char __user *ubuf, size_t cnt, return cnt; } +static ssize_t +system_enable_read(struct file *filp, char __user *ubuf, size_t cnt, + loff_t *ppos) +{ + const char set_to_char[4] = { '?', '0', '1', 'X' }; + const char *system = filp->private_data; + struct ftrace_event_call *call; + char buf[2]; + int set = 0; + int ret; + + mutex_lock(&event_mutex); + list_for_each_entry(call, &ftrace_events, list) { + if (!call->name || !call->regfunc) + continue; + + if (system && strcmp(call->system, system) != 0) + continue; + + /* + * We need to find out if all the events are set + * or if all events or cleared, or if we have + * a mixture. + */ + set |= (1 << !!call->enabled); + + /* + * If we have a mixture, no need to look further. + */ + if (set == 3) + break; + } + mutex_unlock(&event_mutex); + + buf[0] = set_to_char[set]; + buf[1] = '\n'; + + ret = simple_read_from_buffer(ubuf, cnt, ppos, buf, 2); + + return ret; +} + +static ssize_t +system_enable_write(struct file *filp, const char __user *ubuf, size_t cnt, + loff_t *ppos) +{ + const char *system = filp->private_data; + unsigned long val; + char buf[64]; + ssize_t ret; + + if (cnt >= sizeof(buf)) + return -EINVAL; + + if (copy_from_user(&buf, ubuf, cnt)) + return -EFAULT; + + buf[cnt] = 0; + + ret = strict_strtoul(buf, 10, &val); + if (ret < 0) + return ret; + + ret = tracing_update_buffers(); + if (ret < 0) + return ret; + + if (val != 0 && val != 1) + return -EINVAL; + + ret = __ftrace_set_clr_event(NULL, system, NULL, val); + if (ret) + goto out; + + ret = cnt; + +out: + *ppos += cnt; + + return ret; +} + +extern char *__bad_type_size(void); + #undef FIELD #define FIELD(type, name) \ + sizeof(type) != sizeof(field.name) ? __bad_type_size() : \ #type, "common_" #name, offsetof(typeof(field), name), \ sizeof(field.name) @@ -391,7 +526,7 @@ static int trace_write_header(struct trace_seq *s) "\tfield:%s %s;\toffset:%zu;\tsize:%zu;\n" "\tfield:%s %s;\toffset:%zu;\tsize:%zu;\n" "\n", - FIELD(unsigned char, type), + FIELD(unsigned short, type), FIELD(unsigned char, flags), FIELD(unsigned char, preempt_count), FIELD(int, pid), @@ -481,7 +616,7 @@ event_filter_read(struct file *filp, char __user *ubuf, size_t cnt, trace_seq_init(s); - filter_print_preds(call->preds, s); + print_event_filter(call, s); r = simple_read_from_buffer(ubuf, cnt, ppos, s->buffer, s->len); kfree(s); @@ -494,38 +629,26 @@ event_filter_write(struct file *filp, const char __user *ubuf, size_t cnt, loff_t *ppos) { struct ftrace_event_call *call = filp->private_data; - char buf[64], *pbuf = buf; - struct filter_pred *pred; + char *buf; int err; - if (cnt >= sizeof(buf)) + if (cnt >= PAGE_SIZE) return -EINVAL; - if (copy_from_user(&buf, ubuf, cnt)) - return -EFAULT; - buf[cnt] = '\0'; - - pred = kzalloc(sizeof(*pred), GFP_KERNEL); - if (!pred) + buf = (char *)__get_free_page(GFP_TEMPORARY); + if (!buf) return -ENOMEM; - err = filter_parse(&pbuf, pred); - if (err < 0) { - filter_free_pred(pred); - return err; - } - - if (pred->clear) { - filter_free_preds(call); - filter_free_pred(pred); - return cnt; + if (copy_from_user(buf, ubuf, cnt)) { + free_page((unsigned long) buf); + return -EFAULT; } + buf[cnt] = '\0'; - err = filter_add_pred(call, pred); - if (err < 0) { - filter_free_pred(pred); + err = apply_event_filter(call, buf); + free_page((unsigned long) buf); + if (err < 0) return err; - } *ppos += cnt; @@ -549,7 +672,7 @@ subsystem_filter_read(struct file *filp, char __user *ubuf, size_t cnt, trace_seq_init(s); - filter_print_preds(system->preds, s); + print_subsystem_event_filter(system, s); r = simple_read_from_buffer(ubuf, cnt, ppos, s->buffer, s->len); kfree(s); @@ -562,45 +685,56 @@ subsystem_filter_write(struct file *filp, const char __user *ubuf, size_t cnt, loff_t *ppos) { struct event_subsystem *system = filp->private_data; - char buf[64], *pbuf = buf; - struct filter_pred *pred; + char *buf; int err; - if (cnt >= sizeof(buf)) + if (cnt >= PAGE_SIZE) return -EINVAL; - if (copy_from_user(&buf, ubuf, cnt)) - return -EFAULT; - buf[cnt] = '\0'; - - pred = kzalloc(sizeof(*pred), GFP_KERNEL); - if (!pred) + buf = (char *)__get_free_page(GFP_TEMPORARY); + if (!buf) return -ENOMEM; - err = filter_parse(&pbuf, pred); - if (err < 0) { - filter_free_pred(pred); - return err; - } - - if (pred->clear) { - filter_free_subsystem_preds(system); - filter_free_pred(pred); - return cnt; + if (copy_from_user(buf, ubuf, cnt)) { + free_page((unsigned long) buf); + return -EFAULT; } + buf[cnt] = '\0'; - err = filter_add_subsystem_pred(system, pred); - if (err < 0) { - filter_free_subsystem_preds(system); - filter_free_pred(pred); + err = apply_subsystem_event_filter(system, buf); + free_page((unsigned long) buf); + if (err < 0) return err; - } *ppos += cnt; return cnt; } +static ssize_t +show_header(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos) +{ + int (*func)(struct trace_seq *s) = filp->private_data; + struct trace_seq *s; + int r; + + if (*ppos) + return 0; + + s = kmalloc(sizeof(*s), GFP_KERNEL); + if (!s) + return -ENOMEM; + + trace_seq_init(s); + + func(s); + r = simple_read_from_buffer(ubuf, cnt, ppos, s->buffer, s->len); + + kfree(s); + + return r; +} + static const struct seq_operations show_event_seq_ops = { .start = t_start, .next = t_next, @@ -658,6 +792,17 @@ static const struct file_operations ftrace_subsystem_filter_fops = { .write = subsystem_filter_write, }; +static const struct file_operations ftrace_system_enable_fops = { + .open = tracing_open_generic, + .read = system_enable_read, + .write = system_enable_write, +}; + +static const struct file_operations ftrace_show_header_fops = { + .open = tracing_open_generic, + .read = show_header, +}; + static struct dentry *event_trace_events_dir(void) { static struct dentry *d_tracer; @@ -684,6 +829,7 @@ static struct dentry * event_subsystem_dir(const char *name, struct dentry *d_events) { struct event_subsystem *system; + struct dentry *entry; /* First see if we did not already create this dir */ list_for_each_entry(system, &event_subsystems, list) { @@ -707,16 +853,46 @@ event_subsystem_dir(const char *name, struct dentry *d_events) return d_events; } - system->name = name; + system->name = kstrdup(name, GFP_KERNEL); + if (!system->name) { + debugfs_remove(system->entry); + kfree(system); + return d_events; + } + list_add(&system->list, &event_subsystems); - system->preds = NULL; + system->filter = NULL; + + system->filter = kzalloc(sizeof(struct event_filter), GFP_KERNEL); + if (!system->filter) { + pr_warning("Could not allocate filter for subsystem " + "'%s'\n", name); + return system->entry; + } + + entry = debugfs_create_file("filter", 0644, system->entry, system, + &ftrace_subsystem_filter_fops); + if (!entry) { + kfree(system->filter); + system->filter = NULL; + pr_warning("Could not create debugfs " + "'%s/filter' entry\n", name); + } + + entry = trace_create_file("enable", 0644, system->entry, + (void *)system->name, + &ftrace_system_enable_fops); return system->entry; } static int -event_create_dir(struct ftrace_event_call *call, struct dentry *d_events) +event_create_dir(struct ftrace_event_call *call, struct dentry *d_events, + const struct file_operations *id, + const struct file_operations *enable, + const struct file_operations *filter, + const struct file_operations *format) { struct dentry *entry; int ret; @@ -725,7 +901,7 @@ event_create_dir(struct ftrace_event_call *call, struct dentry *d_events) * If the trace point header did not define TRACE_SYSTEM * then the system would be called "TRACE_SYSTEM". */ - if (strcmp(call->system, "TRACE_SYSTEM") != 0) + if (strcmp(call->system, TRACE_SYSTEM) != 0) d_events = event_subsystem_dir(call->system, d_events); if (call->raw_init) { @@ -744,21 +920,13 @@ event_create_dir(struct ftrace_event_call *call, struct dentry *d_events) return -1; } - if (call->regfunc) { - entry = debugfs_create_file("enable", 0644, call->dir, call, - &ftrace_enable_fops); - if (!entry) - pr_warning("Could not create debugfs " - "'%s/enable' entry\n", call->name); - } + if (call->regfunc) + entry = trace_create_file("enable", 0644, call->dir, call, + enable); - if (call->id) { - entry = debugfs_create_file("id", 0444, call->dir, call, - &ftrace_event_id_fops); - if (!entry) - pr_warning("Could not create debugfs '%s/id' entry\n", - call->name); - } + if (call->id) + entry = trace_create_file("id", 0444, call->dir, call, + id); if (call->define_fields) { ret = call->define_fields(); @@ -767,32 +935,195 @@ event_create_dir(struct ftrace_event_call *call, struct dentry *d_events) " events/%s\n", call->name); return ret; } - entry = debugfs_create_file("filter", 0644, call->dir, call, - &ftrace_event_filter_fops); - if (!entry) - pr_warning("Could not create debugfs " - "'%s/filter' entry\n", call->name); + entry = trace_create_file("filter", 0644, call->dir, call, + filter); } /* A trace may not want to export its format */ if (!call->show_format) return 0; - entry = debugfs_create_file("format", 0444, call->dir, call, - &ftrace_event_format_fops); - if (!entry) - pr_warning("Could not create debugfs " - "'%s/format' entry\n", call->name); + entry = trace_create_file("format", 0444, call->dir, call, + format); + + return 0; +} + +#define for_each_event(event, start, end) \ + for (event = start; \ + (unsigned long)event < (unsigned long)end; \ + event++) + +#ifdef CONFIG_MODULES + +static LIST_HEAD(ftrace_module_file_list); + +/* + * Modules must own their file_operations to keep up with + * reference counting. + */ +struct ftrace_module_file_ops { + struct list_head list; + struct module *mod; + struct file_operations id; + struct file_operations enable; + struct file_operations format; + struct file_operations filter; +}; + +static struct ftrace_module_file_ops * +trace_create_file_ops(struct module *mod) +{ + struct ftrace_module_file_ops *file_ops; + + /* + * This is a bit of a PITA. To allow for correct reference + * counting, modules must "own" their file_operations. + * To do this, we allocate the file operations that will be + * used in the event directory. + */ + + file_ops = kmalloc(sizeof(*file_ops), GFP_KERNEL); + if (!file_ops) + return NULL; + + file_ops->mod = mod; + + file_ops->id = ftrace_event_id_fops; + file_ops->id.owner = mod; + + file_ops->enable = ftrace_enable_fops; + file_ops->enable.owner = mod; + + file_ops->filter = ftrace_event_filter_fops; + file_ops->filter.owner = mod; + + file_ops->format = ftrace_event_format_fops; + file_ops->format.owner = mod; + + list_add(&file_ops->list, &ftrace_module_file_list); + + return file_ops; +} + +static void trace_module_add_events(struct module *mod) +{ + struct ftrace_module_file_ops *file_ops = NULL; + struct ftrace_event_call *call, *start, *end; + struct dentry *d_events; + + start = mod->trace_events; + end = mod->trace_events + mod->num_trace_events; + + if (start == end) + return; + + d_events = event_trace_events_dir(); + if (!d_events) + return; + + for_each_event(call, start, end) { + /* The linker may leave blanks */ + if (!call->name) + continue; + + /* + * This module has events, create file ops for this module + * if not already done. + */ + if (!file_ops) { + file_ops = trace_create_file_ops(mod); + if (!file_ops) + return; + } + call->mod = mod; + list_add(&call->list, &ftrace_events); + event_create_dir(call, d_events, + &file_ops->id, &file_ops->enable, + &file_ops->filter, &file_ops->format); + } +} + +static void trace_module_remove_events(struct module *mod) +{ + struct ftrace_module_file_ops *file_ops; + struct ftrace_event_call *call, *p; + bool found = false; + + down_write(&trace_event_mutex); + list_for_each_entry_safe(call, p, &ftrace_events, list) { + if (call->mod == mod) { + found = true; + ftrace_event_enable_disable(call, 0); + if (call->event) + __unregister_ftrace_event(call->event); + debugfs_remove_recursive(call->dir); + list_del(&call->list); + trace_destroy_fields(call); + destroy_preds(call); + } + } + + /* Now free the file_operations */ + list_for_each_entry(file_ops, &ftrace_module_file_list, list) { + if (file_ops->mod == mod) + break; + } + if (&file_ops->list != &ftrace_module_file_list) { + list_del(&file_ops->list); + kfree(file_ops); + } + + /* + * It is safest to reset the ring buffer if the module being unloaded + * registered any events. + */ + if (found) + tracing_reset_current_online_cpus(); + up_write(&trace_event_mutex); +} + +static int trace_module_notify(struct notifier_block *self, + unsigned long val, void *data) +{ + struct module *mod = data; + + mutex_lock(&event_mutex); + switch (val) { + case MODULE_STATE_COMING: + trace_module_add_events(mod); + break; + case MODULE_STATE_GOING: + trace_module_remove_events(mod); + break; + } + mutex_unlock(&event_mutex); return 0; } +#else +static int trace_module_notify(struct notifier_block *self, + unsigned long val, void *data) +{ + return 0; +} +#endif /* CONFIG_MODULES */ + +struct notifier_block trace_module_nb = { + .notifier_call = trace_module_notify, + .priority = 0, +}; + +extern struct ftrace_event_call __start_ftrace_events[]; +extern struct ftrace_event_call __stop_ftrace_events[]; static __init int event_trace_init(void) { - struct ftrace_event_call *call = __start_ftrace_events; + struct ftrace_event_call *call; struct dentry *d_tracer; struct dentry *entry; struct dentry *d_events; + int ret; d_tracer = tracing_init_dentry(); if (!d_tracer) @@ -816,13 +1147,243 @@ static __init int event_trace_init(void) if (!d_events) return 0; - for_each_event(call) { + /* ring buffer internal formats */ + trace_create_file("header_page", 0444, d_events, + ring_buffer_print_page_header, + &ftrace_show_header_fops); + + trace_create_file("header_event", 0444, d_events, + ring_buffer_print_entry_header, + &ftrace_show_header_fops); + + trace_create_file("enable", 0644, d_events, + NULL, &ftrace_system_enable_fops); + + for_each_event(call, __start_ftrace_events, __stop_ftrace_events) { /* The linker may leave blanks */ if (!call->name) continue; - event_create_dir(call, d_events); + list_add(&call->list, &ftrace_events); + event_create_dir(call, d_events, &ftrace_event_id_fops, + &ftrace_enable_fops, &ftrace_event_filter_fops, + &ftrace_event_format_fops); } + ret = register_module_notifier(&trace_module_nb); + if (ret) + pr_warning("Failed to register trace events module notifier\n"); + return 0; } fs_initcall(event_trace_init); + +#ifdef CONFIG_FTRACE_STARTUP_TEST + +static DEFINE_SPINLOCK(test_spinlock); +static DEFINE_SPINLOCK(test_spinlock_irq); +static DEFINE_MUTEX(test_mutex); + +static __init void test_work(struct work_struct *dummy) +{ + spin_lock(&test_spinlock); + spin_lock_irq(&test_spinlock_irq); + udelay(1); + spin_unlock_irq(&test_spinlock_irq); + spin_unlock(&test_spinlock); + + mutex_lock(&test_mutex); + msleep(1); + mutex_unlock(&test_mutex); +} + +static __init int event_test_thread(void *unused) +{ + void *test_malloc; + + test_malloc = kmalloc(1234, GFP_KERNEL); + if (!test_malloc) + pr_info("failed to kmalloc\n"); + + schedule_on_each_cpu(test_work); + + kfree(test_malloc); + + set_current_state(TASK_INTERRUPTIBLE); + while (!kthread_should_stop()) + schedule(); + + return 0; +} + +/* + * Do various things that may trigger events. + */ +static __init void event_test_stuff(void) +{ + struct task_struct *test_thread; + + test_thread = kthread_run(event_test_thread, NULL, "test-events"); + msleep(1); + kthread_stop(test_thread); +} + +/* + * For every trace event defined, we will test each trace point separately, + * and then by groups, and finally all trace points. + */ +static __init void event_trace_self_tests(void) +{ + struct ftrace_event_call *call; + struct event_subsystem *system; + int ret; + + pr_info("Running tests on trace events:\n"); + + list_for_each_entry(call, &ftrace_events, list) { + + /* Only test those that have a regfunc */ + if (!call->regfunc) + continue; + + pr_info("Testing event %s: ", call->name); + + /* + * If an event is already enabled, someone is using + * it and the self test should not be on. + */ + if (call->enabled) { + pr_warning("Enabled event during self test!\n"); + WARN_ON_ONCE(1); + continue; + } + + ftrace_event_enable_disable(call, 1); + event_test_stuff(); + ftrace_event_enable_disable(call, 0); + + pr_cont("OK\n"); + } + + /* Now test at the sub system level */ + + pr_info("Running tests on trace event systems:\n"); + + list_for_each_entry(system, &event_subsystems, list) { + + /* the ftrace system is special, skip it */ + if (strcmp(system->name, "ftrace") == 0) + continue; + + pr_info("Testing event system %s: ", system->name); + + ret = __ftrace_set_clr_event(NULL, system->name, NULL, 1); + if (WARN_ON_ONCE(ret)) { + pr_warning("error enabling system %s\n", + system->name); + continue; + } + + event_test_stuff(); + + ret = __ftrace_set_clr_event(NULL, system->name, NULL, 0); + if (WARN_ON_ONCE(ret)) + pr_warning("error disabling system %s\n", + system->name); + + pr_cont("OK\n"); + } + + /* Test with all events enabled */ + + pr_info("Running tests on all trace events:\n"); + pr_info("Testing all events: "); + + ret = __ftrace_set_clr_event(NULL, NULL, NULL, 1); + if (WARN_ON_ONCE(ret)) { + pr_warning("error enabling all events\n"); + return; + } + + event_test_stuff(); + + /* reset sysname */ + ret = __ftrace_set_clr_event(NULL, NULL, NULL, 0); + if (WARN_ON_ONCE(ret)) { + pr_warning("error disabling all events\n"); + return; + } + + pr_cont("OK\n"); +} + +#ifdef CONFIG_FUNCTION_TRACER + +static DEFINE_PER_CPU(atomic_t, test_event_disable); + +static void +function_test_events_call(unsigned long ip, unsigned long parent_ip) +{ + struct ring_buffer_event *event; + struct ftrace_entry *entry; + unsigned long flags; + long disabled; + int resched; + int cpu; + int pc; + + pc = preempt_count(); + resched = ftrace_preempt_disable(); + cpu = raw_smp_processor_id(); + disabled = atomic_inc_return(&per_cpu(test_event_disable, cpu)); + + if (disabled != 1) + goto out; + + local_save_flags(flags); + + event = trace_current_buffer_lock_reserve(TRACE_FN, sizeof(*entry), + flags, pc); + if (!event) + goto out; + entry = ring_buffer_event_data(event); + entry->ip = ip; + entry->parent_ip = parent_ip; + + trace_nowake_buffer_unlock_commit(event, flags, pc); + + out: + atomic_dec(&per_cpu(test_event_disable, cpu)); + ftrace_preempt_enable(resched); +} + +static struct ftrace_ops trace_ops __initdata = +{ + .func = function_test_events_call, +}; + +static __init void event_trace_self_test_with_function(void) +{ + register_ftrace_function(&trace_ops); + pr_info("Running tests again, along with the function tracer\n"); + event_trace_self_tests(); + unregister_ftrace_function(&trace_ops); +} +#else +static __init void event_trace_self_test_with_function(void) +{ +} +#endif + +static __init int event_trace_self_tests_init(void) +{ + + event_trace_self_tests(); + + event_trace_self_test_with_function(); + + return 0; +} + +late_initcall(event_trace_self_tests_init); + +#endif diff --git a/kernel/trace/trace_events_filter.c b/kernel/trace/trace_events_filter.c index e03cbf1e38f3..db6e54bdb596 100644 --- a/kernel/trace/trace_events_filter.c +++ b/kernel/trace/trace_events_filter.c @@ -22,119 +22,297 @@ #include <linux/uaccess.h> #include <linux/module.h> #include <linux/ctype.h> +#include <linux/mutex.h> #include "trace.h" #include "trace_output.h" -static int filter_pred_64(struct filter_pred *pred, void *event) +static DEFINE_MUTEX(filter_mutex); + +enum filter_op_ids +{ + OP_OR, + OP_AND, + OP_NE, + OP_EQ, + OP_LT, + OP_LE, + OP_GT, + OP_GE, + OP_NONE, + OP_OPEN_PAREN, +}; + +struct filter_op { + int id; + char *string; + int precedence; +}; + +static struct filter_op filter_ops[] = { + { OP_OR, "||", 1 }, + { OP_AND, "&&", 2 }, + { OP_NE, "!=", 4 }, + { OP_EQ, "==", 4 }, + { OP_LT, "<", 5 }, + { OP_LE, "<=", 5 }, + { OP_GT, ">", 5 }, + { OP_GE, ">=", 5 }, + { OP_NONE, "OP_NONE", 0 }, + { OP_OPEN_PAREN, "(", 0 }, +}; + +enum { + FILT_ERR_NONE, + FILT_ERR_INVALID_OP, + FILT_ERR_UNBALANCED_PAREN, + FILT_ERR_TOO_MANY_OPERANDS, + FILT_ERR_OPERAND_TOO_LONG, + FILT_ERR_FIELD_NOT_FOUND, + FILT_ERR_ILLEGAL_FIELD_OP, + FILT_ERR_ILLEGAL_INTVAL, + FILT_ERR_BAD_SUBSYS_FILTER, + FILT_ERR_TOO_MANY_PREDS, + FILT_ERR_MISSING_FIELD, + FILT_ERR_INVALID_FILTER, +}; + +static char *err_text[] = { + "No error", + "Invalid operator", + "Unbalanced parens", + "Too many operands", + "Operand too long", + "Field not found", + "Illegal operation for field type", + "Illegal integer value", + "Couldn't find or set field in one of a subsystem's events", + "Too many terms in predicate expression", + "Missing field name and/or value", + "Meaningless filter expression", +}; + +struct opstack_op { + int op; + struct list_head list; +}; + +struct postfix_elt { + int op; + char *operand; + struct list_head list; +}; + +struct filter_parse_state { + struct filter_op *ops; + struct list_head opstack; + struct list_head postfix; + int lasterr; + int lasterr_pos; + + struct { + char *string; + unsigned int cnt; + unsigned int tail; + } infix; + + struct { + char string[MAX_FILTER_STR_VAL]; + int pos; + unsigned int tail; + } operand; +}; + +DEFINE_COMPARISON_PRED(s64); +DEFINE_COMPARISON_PRED(u64); +DEFINE_COMPARISON_PRED(s32); +DEFINE_COMPARISON_PRED(u32); +DEFINE_COMPARISON_PRED(s16); +DEFINE_COMPARISON_PRED(u16); +DEFINE_COMPARISON_PRED(s8); +DEFINE_COMPARISON_PRED(u8); + +DEFINE_EQUALITY_PRED(64); +DEFINE_EQUALITY_PRED(32); +DEFINE_EQUALITY_PRED(16); +DEFINE_EQUALITY_PRED(8); + +static int filter_pred_and(struct filter_pred *pred __attribute((unused)), + void *event __attribute((unused)), + int val1, int val2) +{ + return val1 && val2; +} + +static int filter_pred_or(struct filter_pred *pred __attribute((unused)), + void *event __attribute((unused)), + int val1, int val2) +{ + return val1 || val2; +} + +/* Filter predicate for fixed sized arrays of characters */ +static int filter_pred_string(struct filter_pred *pred, void *event, + int val1, int val2) { - u64 *addr = (u64 *)(event + pred->offset); - u64 val = (u64)pred->val; - int match; + char *addr = (char *)(event + pred->offset); + int cmp, match; + + cmp = strncmp(addr, pred->str_val, pred->str_len); - match = (val == *addr) ^ pred->not; + match = (!cmp) ^ pred->not; return match; } -static int filter_pred_32(struct filter_pred *pred, void *event) +/* + * Filter predicate for dynamic sized arrays of characters. + * These are implemented through a list of strings at the end + * of the entry. + * Also each of these strings have a field in the entry which + * contains its offset from the beginning of the entry. + * We have then first to get this field, dereference it + * and add it to the address of the entry, and at last we have + * the address of the string. + */ +static int filter_pred_strloc(struct filter_pred *pred, void *event, + int val1, int val2) { - u32 *addr = (u32 *)(event + pred->offset); - u32 val = (u32)pred->val; - int match; + int str_loc = *(int *)(event + pred->offset); + char *addr = (char *)(event + str_loc); + int cmp, match; + + cmp = strncmp(addr, pred->str_val, pred->str_len); - match = (val == *addr) ^ pred->not; + match = (!cmp) ^ pred->not; return match; } -static int filter_pred_16(struct filter_pred *pred, void *event) +static int filter_pred_none(struct filter_pred *pred, void *event, + int val1, int val2) +{ + return 0; +} + +/* return 1 if event matches, 0 otherwise (discard) */ +int filter_match_preds(struct ftrace_event_call *call, void *rec) { - u16 *addr = (u16 *)(event + pred->offset); - u16 val = (u16)pred->val; - int match; + struct event_filter *filter = call->filter; + int match, top = 0, val1 = 0, val2 = 0; + int stack[MAX_FILTER_PRED]; + struct filter_pred *pred; + int i; + + for (i = 0; i < filter->n_preds; i++) { + pred = filter->preds[i]; + if (!pred->pop_n) { + match = pred->fn(pred, rec, val1, val2); + stack[top++] = match; + continue; + } + if (pred->pop_n > top) { + WARN_ON_ONCE(1); + return 0; + } + val1 = stack[--top]; + val2 = stack[--top]; + match = pred->fn(pred, rec, val1, val2); + stack[top++] = match; + } - match = (val == *addr) ^ pred->not; + return stack[--top]; +} +EXPORT_SYMBOL_GPL(filter_match_preds); - return match; +static void parse_error(struct filter_parse_state *ps, int err, int pos) +{ + ps->lasterr = err; + ps->lasterr_pos = pos; } -static int filter_pred_8(struct filter_pred *pred, void *event) +static void remove_filter_string(struct event_filter *filter) { - u8 *addr = (u8 *)(event + pred->offset); - u8 val = (u8)pred->val; - int match; + kfree(filter->filter_string); + filter->filter_string = NULL; +} - match = (val == *addr) ^ pred->not; +static int replace_filter_string(struct event_filter *filter, + char *filter_string) +{ + kfree(filter->filter_string); + filter->filter_string = kstrdup(filter_string, GFP_KERNEL); + if (!filter->filter_string) + return -ENOMEM; - return match; + return 0; } -static int filter_pred_string(struct filter_pred *pred, void *event) +static int append_filter_string(struct event_filter *filter, + char *string) { - char *addr = (char *)(event + pred->offset); - int cmp, match; + int newlen; + char *new_filter_string; - cmp = strncmp(addr, pred->str_val, pred->str_len); + BUG_ON(!filter->filter_string); + newlen = strlen(filter->filter_string) + strlen(string) + 1; + new_filter_string = kmalloc(newlen, GFP_KERNEL); + if (!new_filter_string) + return -ENOMEM; - match = (!cmp) ^ pred->not; + strcpy(new_filter_string, filter->filter_string); + strcat(new_filter_string, string); + kfree(filter->filter_string); + filter->filter_string = new_filter_string; - return match; + return 0; } -/* return 1 if event matches, 0 otherwise (discard) */ -int filter_match_preds(struct ftrace_event_call *call, void *rec) +static void append_filter_err(struct filter_parse_state *ps, + struct event_filter *filter) { - int i, matched, and_failed = 0; - struct filter_pred *pred; + int pos = ps->lasterr_pos; + char *buf, *pbuf; - for (i = 0; i < MAX_FILTER_PRED; i++) { - if (call->preds[i]) { - pred = call->preds[i]; - if (and_failed && !pred->or) - continue; - matched = pred->fn(pred, rec); - if (!matched && !pred->or) { - and_failed = 1; - continue; - } else if (matched && pred->or) - return 1; - } else - break; - } + buf = (char *)__get_free_page(GFP_TEMPORARY); + if (!buf) + return; - if (and_failed) - return 0; + append_filter_string(filter, "\n"); + memset(buf, ' ', PAGE_SIZE); + if (pos > PAGE_SIZE - 128) + pos = 0; + buf[pos] = '^'; + pbuf = &buf[pos] + 1; - return 1; + sprintf(pbuf, "\nparse_error: %s\n", err_text[ps->lasterr]); + append_filter_string(filter, buf); + free_page((unsigned long) buf); } -void filter_print_preds(struct filter_pred **preds, struct trace_seq *s) +void print_event_filter(struct ftrace_event_call *call, struct trace_seq *s) { - char *field_name; - struct filter_pred *pred; - int i; + struct event_filter *filter = call->filter; - if (!preds) { + mutex_lock(&filter_mutex); + if (filter->filter_string) + trace_seq_printf(s, "%s\n", filter->filter_string); + else trace_seq_printf(s, "none\n"); - return; - } + mutex_unlock(&filter_mutex); +} - for (i = 0; i < MAX_FILTER_PRED; i++) { - if (preds[i]) { - pred = preds[i]; - field_name = pred->field_name; - if (i) - trace_seq_printf(s, pred->or ? "|| " : "&& "); - trace_seq_printf(s, "%s ", field_name); - trace_seq_printf(s, pred->not ? "!= " : "== "); - if (pred->str_val) - trace_seq_printf(s, "%s\n", pred->str_val); - else - trace_seq_printf(s, "%llu\n", pred->val); - } else - break; - } +void print_subsystem_event_filter(struct event_subsystem *system, + struct trace_seq *s) +{ + struct event_filter *filter = system->filter; + + mutex_lock(&filter_mutex); + if (filter->filter_string) + trace_seq_printf(s, "%s\n", filter->filter_string); + else + trace_seq_printf(s, "none\n"); + mutex_unlock(&filter_mutex); } static struct ftrace_event_field * @@ -150,284 +328,828 @@ find_event_field(struct ftrace_event_call *call, char *name) return NULL; } -void filter_free_pred(struct filter_pred *pred) +static void filter_free_pred(struct filter_pred *pred) { if (!pred) return; kfree(pred->field_name); - kfree(pred->str_val); kfree(pred); } -void filter_free_preds(struct ftrace_event_call *call) +static void filter_clear_pred(struct filter_pred *pred) { - int i; + kfree(pred->field_name); + pred->field_name = NULL; + pred->str_len = 0; +} - if (call->preds) { - for (i = 0; i < MAX_FILTER_PRED; i++) - filter_free_pred(call->preds[i]); - kfree(call->preds); - call->preds = NULL; +static int filter_set_pred(struct filter_pred *dest, + struct filter_pred *src, + filter_pred_fn_t fn) +{ + *dest = *src; + if (src->field_name) { + dest->field_name = kstrdup(src->field_name, GFP_KERNEL); + if (!dest->field_name) + return -ENOMEM; } + dest->fn = fn; + + return 0; } -void filter_free_subsystem_preds(struct event_subsystem *system) +static void filter_disable_preds(struct ftrace_event_call *call) { - struct ftrace_event_call *call = __start_ftrace_events; + struct event_filter *filter = call->filter; int i; - if (system->preds) { - for (i = 0; i < MAX_FILTER_PRED; i++) - filter_free_pred(system->preds[i]); - kfree(system->preds); - system->preds = NULL; - } + call->filter_active = 0; + filter->n_preds = 0; - events_for_each(call) { - if (!call->name || !call->regfunc) - continue; + for (i = 0; i < MAX_FILTER_PRED; i++) + filter->preds[i]->fn = filter_pred_none; +} + +void destroy_preds(struct ftrace_event_call *call) +{ + struct event_filter *filter = call->filter; + int i; - if (!strcmp(call->system, system->name)) - filter_free_preds(call); + for (i = 0; i < MAX_FILTER_PRED; i++) { + if (filter->preds[i]) + filter_free_pred(filter->preds[i]); } + kfree(filter->preds); + kfree(filter); + call->filter = NULL; } -static int __filter_add_pred(struct ftrace_event_call *call, - struct filter_pred *pred) +int init_preds(struct ftrace_event_call *call) { + struct event_filter *filter; + struct filter_pred *pred; int i; - if (call->preds && !pred->compound) - filter_free_preds(call); + filter = call->filter = kzalloc(sizeof(*filter), GFP_KERNEL); + if (!call->filter) + return -ENOMEM; - if (!call->preds) { - call->preds = kzalloc(MAX_FILTER_PRED * sizeof(pred), - GFP_KERNEL); - if (!call->preds) - return -ENOMEM; - } + call->filter_active = 0; + filter->n_preds = 0; + + filter->preds = kzalloc(MAX_FILTER_PRED * sizeof(pred), GFP_KERNEL); + if (!filter->preds) + goto oom; for (i = 0; i < MAX_FILTER_PRED; i++) { - if (!call->preds[i]) { - call->preds[i] = pred; - return 0; + pred = kzalloc(sizeof(*pred), GFP_KERNEL); + if (!pred) + goto oom; + pred->fn = filter_pred_none; + filter->preds[i] = pred; + } + + return 0; + +oom: + destroy_preds(call); + + return -ENOMEM; +} +EXPORT_SYMBOL_GPL(init_preds); + +static void filter_free_subsystem_preds(struct event_subsystem *system) +{ + struct event_filter *filter = system->filter; + struct ftrace_event_call *call; + int i; + + if (filter->n_preds) { + for (i = 0; i < filter->n_preds; i++) + filter_free_pred(filter->preds[i]); + kfree(filter->preds); + filter->preds = NULL; + filter->n_preds = 0; + } + + mutex_lock(&event_mutex); + list_for_each_entry(call, &ftrace_events, list) { + if (!call->define_fields) + continue; + + if (!strcmp(call->system, system->name)) { + filter_disable_preds(call); + remove_filter_string(call->filter); } } + mutex_unlock(&event_mutex); +} + +static int filter_add_pred_fn(struct filter_parse_state *ps, + struct ftrace_event_call *call, + struct filter_pred *pred, + filter_pred_fn_t fn) +{ + struct event_filter *filter = call->filter; + int idx, err; + + if (filter->n_preds == MAX_FILTER_PRED) { + parse_error(ps, FILT_ERR_TOO_MANY_PREDS, 0); + return -ENOSPC; + } + + idx = filter->n_preds; + filter_clear_pred(filter->preds[idx]); + err = filter_set_pred(filter->preds[idx], pred, fn); + if (err) + return err; - return -ENOSPC; + filter->n_preds++; + call->filter_active = 1; + + return 0; } +enum { + FILTER_STATIC_STRING = 1, + FILTER_DYN_STRING +}; + static int is_string_field(const char *type) { + if (strstr(type, "__data_loc") && strstr(type, "char")) + return FILTER_DYN_STRING; + if (strchr(type, '[') && strstr(type, "char")) - return 1; + return FILTER_STATIC_STRING; return 0; } -int filter_add_pred(struct ftrace_event_call *call, struct filter_pred *pred) +static int is_legal_op(struct ftrace_event_field *field, int op) { - struct ftrace_event_field *field; - - field = find_event_field(call, pred->field_name); - if (!field) - return -EINVAL; + if (is_string_field(field->type) && (op != OP_EQ && op != OP_NE)) + return 0; - pred->offset = field->offset; + return 1; +} - if (is_string_field(field->type)) { - if (!pred->str_val) - return -EINVAL; - pred->fn = filter_pred_string; - pred->str_len = field->size; - return __filter_add_pred(call, pred); - } else { - if (pred->str_val) - return -EINVAL; - } +static filter_pred_fn_t select_comparison_fn(int op, int field_size, + int field_is_signed) +{ + filter_pred_fn_t fn = NULL; - switch (field->size) { + switch (field_size) { case 8: - pred->fn = filter_pred_64; + if (op == OP_EQ || op == OP_NE) + fn = filter_pred_64; + else if (field_is_signed) + fn = filter_pred_s64; + else + fn = filter_pred_u64; break; case 4: - pred->fn = filter_pred_32; + if (op == OP_EQ || op == OP_NE) + fn = filter_pred_32; + else if (field_is_signed) + fn = filter_pred_s32; + else + fn = filter_pred_u32; break; case 2: - pred->fn = filter_pred_16; + if (op == OP_EQ || op == OP_NE) + fn = filter_pred_16; + else if (field_is_signed) + fn = filter_pred_s16; + else + fn = filter_pred_u16; break; case 1: - pred->fn = filter_pred_8; + if (op == OP_EQ || op == OP_NE) + fn = filter_pred_8; + else if (field_is_signed) + fn = filter_pred_s8; + else + fn = filter_pred_u8; break; - default: - return -EINVAL; } - return __filter_add_pred(call, pred); + return fn; } -static struct filter_pred *copy_pred(struct filter_pred *pred) +static int filter_add_pred(struct filter_parse_state *ps, + struct ftrace_event_call *call, + struct filter_pred *pred) { - struct filter_pred *new_pred = kmalloc(sizeof(*pred), GFP_KERNEL); - if (!new_pred) - return NULL; + struct ftrace_event_field *field; + filter_pred_fn_t fn; + unsigned long long val; + int string_type; + + pred->fn = filter_pred_none; + + if (pred->op == OP_AND) { + pred->pop_n = 2; + return filter_add_pred_fn(ps, call, pred, filter_pred_and); + } else if (pred->op == OP_OR) { + pred->pop_n = 2; + return filter_add_pred_fn(ps, call, pred, filter_pred_or); + } + + field = find_event_field(call, pred->field_name); + if (!field) { + parse_error(ps, FILT_ERR_FIELD_NOT_FOUND, 0); + return -EINVAL; + } - memcpy(new_pred, pred, sizeof(*pred)); + pred->offset = field->offset; - if (pred->field_name) { - new_pred->field_name = kstrdup(pred->field_name, GFP_KERNEL); - if (!new_pred->field_name) { - kfree(new_pred); - return NULL; - } + if (!is_legal_op(field, pred->op)) { + parse_error(ps, FILT_ERR_ILLEGAL_FIELD_OP, 0); + return -EINVAL; } - if (pred->str_val) { - new_pred->str_val = kstrdup(pred->str_val, GFP_KERNEL); - if (!new_pred->str_val) { - filter_free_pred(new_pred); - return NULL; + string_type = is_string_field(field->type); + if (string_type) { + if (string_type == FILTER_STATIC_STRING) + fn = filter_pred_string; + else + fn = filter_pred_strloc; + pred->str_len = field->size; + if (pred->op == OP_NE) + pred->not = 1; + return filter_add_pred_fn(ps, call, pred, fn); + } else { + if (strict_strtoull(pred->str_val, 0, &val)) { + parse_error(ps, FILT_ERR_ILLEGAL_INTVAL, 0); + return -EINVAL; } + pred->val = val; + } + + fn = select_comparison_fn(pred->op, field->size, field->is_signed); + if (!fn) { + parse_error(ps, FILT_ERR_INVALID_OP, 0); + return -EINVAL; } - return new_pred; + if (pred->op == OP_NE) + pred->not = 1; + + return filter_add_pred_fn(ps, call, pred, fn); } -int filter_add_subsystem_pred(struct event_subsystem *system, - struct filter_pred *pred) +static int filter_add_subsystem_pred(struct filter_parse_state *ps, + struct event_subsystem *system, + struct filter_pred *pred, + char *filter_string) { - struct ftrace_event_call *call = __start_ftrace_events; - struct filter_pred *event_pred; - int i; - - if (system->preds && !pred->compound) - filter_free_subsystem_preds(system); + struct event_filter *filter = system->filter; + struct ftrace_event_call *call; + int err = 0; - if (!system->preds) { - system->preds = kzalloc(MAX_FILTER_PRED * sizeof(pred), + if (!filter->preds) { + filter->preds = kzalloc(MAX_FILTER_PRED * sizeof(pred), GFP_KERNEL); - if (!system->preds) + + if (!filter->preds) return -ENOMEM; } - for (i = 0; i < MAX_FILTER_PRED; i++) { - if (!system->preds[i]) { - system->preds[i] = pred; - break; - } + if (filter->n_preds == MAX_FILTER_PRED) { + parse_error(ps, FILT_ERR_TOO_MANY_PREDS, 0); + return -ENOSPC; } - if (i == MAX_FILTER_PRED) - return -ENOSPC; + filter->preds[filter->n_preds] = pred; + filter->n_preds++; - events_for_each(call) { - int err; + mutex_lock(&event_mutex); + list_for_each_entry(call, &ftrace_events, list) { - if (!call->name || !call->regfunc) + if (!call->define_fields) continue; if (strcmp(call->system, system->name)) continue; - if (!find_event_field(call, pred->field_name)) - continue; + err = filter_add_pred(ps, call, pred); + if (err) { + mutex_unlock(&event_mutex); + filter_free_subsystem_preds(system); + parse_error(ps, FILT_ERR_BAD_SUBSYS_FILTER, 0); + goto out; + } + replace_filter_string(call->filter, filter_string); + } + mutex_unlock(&event_mutex); +out: + return err; +} - event_pred = copy_pred(pred); - if (!event_pred) - goto oom; +static void parse_init(struct filter_parse_state *ps, + struct filter_op *ops, + char *infix_string) +{ + memset(ps, '\0', sizeof(*ps)); - err = filter_add_pred(call, event_pred); - if (err) - filter_free_pred(event_pred); - if (err == -ENOMEM) - goto oom; + ps->infix.string = infix_string; + ps->infix.cnt = strlen(infix_string); + ps->ops = ops; + + INIT_LIST_HEAD(&ps->opstack); + INIT_LIST_HEAD(&ps->postfix); +} + +static char infix_next(struct filter_parse_state *ps) +{ + ps->infix.cnt--; + + return ps->infix.string[ps->infix.tail++]; +} + +static char infix_peek(struct filter_parse_state *ps) +{ + if (ps->infix.tail == strlen(ps->infix.string)) + return 0; + + return ps->infix.string[ps->infix.tail]; +} + +static void infix_advance(struct filter_parse_state *ps) +{ + ps->infix.cnt--; + ps->infix.tail++; +} + +static inline int is_precedence_lower(struct filter_parse_state *ps, + int a, int b) +{ + return ps->ops[a].precedence < ps->ops[b].precedence; +} + +static inline int is_op_char(struct filter_parse_state *ps, char c) +{ + int i; + + for (i = 0; strcmp(ps->ops[i].string, "OP_NONE"); i++) { + if (ps->ops[i].string[0] == c) + return 1; } return 0; +} -oom: - system->preds[i] = NULL; - return -ENOMEM; +static int infix_get_op(struct filter_parse_state *ps, char firstc) +{ + char nextc = infix_peek(ps); + char opstr[3]; + int i; + + opstr[0] = firstc; + opstr[1] = nextc; + opstr[2] = '\0'; + + for (i = 0; strcmp(ps->ops[i].string, "OP_NONE"); i++) { + if (!strcmp(opstr, ps->ops[i].string)) { + infix_advance(ps); + return ps->ops[i].id; + } + } + + opstr[1] = '\0'; + + for (i = 0; strcmp(ps->ops[i].string, "OP_NONE"); i++) { + if (!strcmp(opstr, ps->ops[i].string)) + return ps->ops[i].id; + } + + return OP_NONE; } -int filter_parse(char **pbuf, struct filter_pred *pred) +static inline void clear_operand_string(struct filter_parse_state *ps) { - char *tmp, *tok, *val_str = NULL; - int tok_n = 0; + memset(ps->operand.string, '\0', MAX_FILTER_STR_VAL); + ps->operand.tail = 0; +} - /* field ==/!= number, or/and field ==/!= number, number */ - while ((tok = strsep(pbuf, " \n"))) { - if (tok_n == 0) { - if (!strcmp(tok, "0")) { - pred->clear = 1; - return 0; - } else if (!strcmp(tok, "&&")) { - pred->or = 0; - pred->compound = 1; - } else if (!strcmp(tok, "||")) { - pred->or = 1; - pred->compound = 1; - } else - pred->field_name = tok; - tok_n = 1; +static inline int append_operand_char(struct filter_parse_state *ps, char c) +{ + if (ps->operand.tail == MAX_FILTER_STR_VAL - 1) + return -EINVAL; + + ps->operand.string[ps->operand.tail++] = c; + + return 0; +} + +static int filter_opstack_push(struct filter_parse_state *ps, int op) +{ + struct opstack_op *opstack_op; + + opstack_op = kmalloc(sizeof(*opstack_op), GFP_KERNEL); + if (!opstack_op) + return -ENOMEM; + + opstack_op->op = op; + list_add(&opstack_op->list, &ps->opstack); + + return 0; +} + +static int filter_opstack_empty(struct filter_parse_state *ps) +{ + return list_empty(&ps->opstack); +} + +static int filter_opstack_top(struct filter_parse_state *ps) +{ + struct opstack_op *opstack_op; + + if (filter_opstack_empty(ps)) + return OP_NONE; + + opstack_op = list_first_entry(&ps->opstack, struct opstack_op, list); + + return opstack_op->op; +} + +static int filter_opstack_pop(struct filter_parse_state *ps) +{ + struct opstack_op *opstack_op; + int op; + + if (filter_opstack_empty(ps)) + return OP_NONE; + + opstack_op = list_first_entry(&ps->opstack, struct opstack_op, list); + op = opstack_op->op; + list_del(&opstack_op->list); + + kfree(opstack_op); + + return op; +} + +static void filter_opstack_clear(struct filter_parse_state *ps) +{ + while (!filter_opstack_empty(ps)) + filter_opstack_pop(ps); +} + +static char *curr_operand(struct filter_parse_state *ps) +{ + return ps->operand.string; +} + +static int postfix_append_operand(struct filter_parse_state *ps, char *operand) +{ + struct postfix_elt *elt; + + elt = kmalloc(sizeof(*elt), GFP_KERNEL); + if (!elt) + return -ENOMEM; + + elt->op = OP_NONE; + elt->operand = kstrdup(operand, GFP_KERNEL); + if (!elt->operand) { + kfree(elt); + return -ENOMEM; + } + + list_add_tail(&elt->list, &ps->postfix); + + return 0; +} + +static int postfix_append_op(struct filter_parse_state *ps, int op) +{ + struct postfix_elt *elt; + + elt = kmalloc(sizeof(*elt), GFP_KERNEL); + if (!elt) + return -ENOMEM; + + elt->op = op; + elt->operand = NULL; + + list_add_tail(&elt->list, &ps->postfix); + + return 0; +} + +static void postfix_clear(struct filter_parse_state *ps) +{ + struct postfix_elt *elt; + + while (!list_empty(&ps->postfix)) { + elt = list_first_entry(&ps->postfix, struct postfix_elt, list); + kfree(elt->operand); + list_del(&elt->list); + } +} + +static int filter_parse(struct filter_parse_state *ps) +{ + int in_string = 0; + int op, top_op; + char ch; + + while ((ch = infix_next(ps))) { + if (ch == '"') { + in_string ^= 1; continue; } - if (tok_n == 1) { - if (!pred->field_name) - pred->field_name = tok; - else if (!strcmp(tok, "!=")) - pred->not = 1; - else if (!strcmp(tok, "==")) - pred->not = 0; - else { - pred->field_name = NULL; + + if (in_string) + goto parse_operand; + + if (isspace(ch)) + continue; + + if (is_op_char(ps, ch)) { + op = infix_get_op(ps, ch); + if (op == OP_NONE) { + parse_error(ps, FILT_ERR_INVALID_OP, 0); return -EINVAL; } - tok_n = 2; + + if (strlen(curr_operand(ps))) { + postfix_append_operand(ps, curr_operand(ps)); + clear_operand_string(ps); + } + + while (!filter_opstack_empty(ps)) { + top_op = filter_opstack_top(ps); + if (!is_precedence_lower(ps, top_op, op)) { + top_op = filter_opstack_pop(ps); + postfix_append_op(ps, top_op); + continue; + } + break; + } + + filter_opstack_push(ps, op); continue; } - if (tok_n == 2) { - if (pred->compound) { - if (!strcmp(tok, "!=")) - pred->not = 1; - else if (!strcmp(tok, "==")) - pred->not = 0; - else { - pred->field_name = NULL; - return -EINVAL; - } - } else { - val_str = tok; - break; /* done */ + + if (ch == '(') { + filter_opstack_push(ps, OP_OPEN_PAREN); + continue; + } + + if (ch == ')') { + if (strlen(curr_operand(ps))) { + postfix_append_operand(ps, curr_operand(ps)); + clear_operand_string(ps); + } + + top_op = filter_opstack_pop(ps); + while (top_op != OP_NONE) { + if (top_op == OP_OPEN_PAREN) + break; + postfix_append_op(ps, top_op); + top_op = filter_opstack_pop(ps); + } + if (top_op == OP_NONE) { + parse_error(ps, FILT_ERR_UNBALANCED_PAREN, 0); + return -EINVAL; } - tok_n = 3; continue; } - if (tok_n == 3) { - val_str = tok; - break; /* done */ +parse_operand: + if (append_operand_char(ps, ch)) { + parse_error(ps, FILT_ERR_OPERAND_TOO_LONG, 0); + return -EINVAL; } } - if (!val_str) { - pred->field_name = NULL; - return -EINVAL; + if (strlen(curr_operand(ps))) + postfix_append_operand(ps, curr_operand(ps)); + + while (!filter_opstack_empty(ps)) { + top_op = filter_opstack_pop(ps); + if (top_op == OP_NONE) + break; + if (top_op == OP_OPEN_PAREN) { + parse_error(ps, FILT_ERR_UNBALANCED_PAREN, 0); + return -EINVAL; + } + postfix_append_op(ps, top_op); } - pred->field_name = kstrdup(pred->field_name, GFP_KERNEL); - if (!pred->field_name) - return -ENOMEM; + return 0; +} - pred->val = simple_strtoull(val_str, &tmp, 0); - if (tmp == val_str) { - pred->str_val = kstrdup(val_str, GFP_KERNEL); - if (!pred->str_val) - return -ENOMEM; - } else if (*tmp != '\0') +static struct filter_pred *create_pred(int op, char *operand1, char *operand2) +{ + struct filter_pred *pred; + + pred = kzalloc(sizeof(*pred), GFP_KERNEL); + if (!pred) + return NULL; + + pred->field_name = kstrdup(operand1, GFP_KERNEL); + if (!pred->field_name) { + kfree(pred); + return NULL; + } + + strcpy(pred->str_val, operand2); + pred->str_len = strlen(operand2); + + pred->op = op; + + return pred; +} + +static struct filter_pred *create_logical_pred(int op) +{ + struct filter_pred *pred; + + pred = kzalloc(sizeof(*pred), GFP_KERNEL); + if (!pred) + return NULL; + + pred->op = op; + + return pred; +} + +static int check_preds(struct filter_parse_state *ps) +{ + int n_normal_preds = 0, n_logical_preds = 0; + struct postfix_elt *elt; + + list_for_each_entry(elt, &ps->postfix, list) { + if (elt->op == OP_NONE) + continue; + + if (elt->op == OP_AND || elt->op == OP_OR) { + n_logical_preds++; + continue; + } + n_normal_preds++; + } + + if (!n_normal_preds || n_logical_preds >= n_normal_preds) { + parse_error(ps, FILT_ERR_INVALID_FILTER, 0); return -EINVAL; + } return 0; } +static int replace_preds(struct event_subsystem *system, + struct ftrace_event_call *call, + struct filter_parse_state *ps, + char *filter_string) +{ + char *operand1 = NULL, *operand2 = NULL; + struct filter_pred *pred; + struct postfix_elt *elt; + int err; + + err = check_preds(ps); + if (err) + return err; + + list_for_each_entry(elt, &ps->postfix, list) { + if (elt->op == OP_NONE) { + if (!operand1) + operand1 = elt->operand; + else if (!operand2) + operand2 = elt->operand; + else { + parse_error(ps, FILT_ERR_TOO_MANY_OPERANDS, 0); + return -EINVAL; + } + continue; + } + + if (elt->op == OP_AND || elt->op == OP_OR) { + pred = create_logical_pred(elt->op); + if (call) { + err = filter_add_pred(ps, call, pred); + filter_free_pred(pred); + } else + err = filter_add_subsystem_pred(ps, system, + pred, filter_string); + if (err) + return err; + + operand1 = operand2 = NULL; + continue; + } + + if (!operand1 || !operand2) { + parse_error(ps, FILT_ERR_MISSING_FIELD, 0); + return -EINVAL; + } + + pred = create_pred(elt->op, operand1, operand2); + if (call) { + err = filter_add_pred(ps, call, pred); + filter_free_pred(pred); + } else + err = filter_add_subsystem_pred(ps, system, pred, + filter_string); + if (err) + return err; + + operand1 = operand2 = NULL; + } + + return 0; +} + +int apply_event_filter(struct ftrace_event_call *call, char *filter_string) +{ + int err; + + struct filter_parse_state *ps; + + mutex_lock(&filter_mutex); + + if (!strcmp(strstrip(filter_string), "0")) { + filter_disable_preds(call); + remove_filter_string(call->filter); + mutex_unlock(&filter_mutex); + return 0; + } + + err = -ENOMEM; + ps = kzalloc(sizeof(*ps), GFP_KERNEL); + if (!ps) + goto out_unlock; + + filter_disable_preds(call); + replace_filter_string(call->filter, filter_string); + + parse_init(ps, filter_ops, filter_string); + err = filter_parse(ps); + if (err) { + append_filter_err(ps, call->filter); + goto out; + } + + err = replace_preds(NULL, call, ps, filter_string); + if (err) + append_filter_err(ps, call->filter); + +out: + filter_opstack_clear(ps); + postfix_clear(ps); + kfree(ps); +out_unlock: + mutex_unlock(&filter_mutex); + + return err; +} + +int apply_subsystem_event_filter(struct event_subsystem *system, + char *filter_string) +{ + int err; + + struct filter_parse_state *ps; + + mutex_lock(&filter_mutex); + + if (!strcmp(strstrip(filter_string), "0")) { + filter_free_subsystem_preds(system); + remove_filter_string(system->filter); + mutex_unlock(&filter_mutex); + return 0; + } + + err = -ENOMEM; + ps = kzalloc(sizeof(*ps), GFP_KERNEL); + if (!ps) + goto out_unlock; + + filter_free_subsystem_preds(system); + replace_filter_string(system->filter, filter_string); + + parse_init(ps, filter_ops, filter_string); + err = filter_parse(ps); + if (err) { + append_filter_err(ps, system->filter); + goto out; + } + + err = replace_preds(system, NULL, ps, filter_string); + if (err) + append_filter_err(ps, system->filter); + +out: + filter_opstack_clear(ps); + postfix_clear(ps); + kfree(ps); +out_unlock: + mutex_unlock(&filter_mutex); + + return err; +} diff --git a/kernel/trace/trace_events_stage_1.h b/kernel/trace/trace_events_stage_1.h deleted file mode 100644 index 38985f9b379c..000000000000 --- a/kernel/trace/trace_events_stage_1.h +++ /dev/null @@ -1,39 +0,0 @@ -/* - * Stage 1 of the trace events. - * - * Override the macros in <trace/trace_event_types.h> to include the following: - * - * struct ftrace_raw_<call> { - * struct trace_entry ent; - * <type> <item>; - * <type2> <item2>[<len>]; - * [...] - * }; - * - * The <type> <item> is created by the __field(type, item) macro or - * the __array(type2, item2, len) macro. - * We simply do "type item;", and that will create the fields - * in the structure. - */ - -#undef TRACE_FORMAT -#define TRACE_FORMAT(call, proto, args, fmt) - -#undef __array -#define __array(type, item, len) type item[len]; - -#undef __field -#define __field(type, item) type item; - -#undef TP_STRUCT__entry -#define TP_STRUCT__entry(args...) args - -#undef TRACE_EVENT -#define TRACE_EVENT(name, proto, args, tstruct, assign, print) \ - struct ftrace_raw_##name { \ - struct trace_entry ent; \ - tstruct \ - }; \ - static struct ftrace_event_call event_##name - -#include <trace/trace_event_types.h> diff --git a/kernel/trace/trace_events_stage_2.h b/kernel/trace/trace_events_stage_2.h deleted file mode 100644 index d363c6672c6c..000000000000 --- a/kernel/trace/trace_events_stage_2.h +++ /dev/null @@ -1,176 +0,0 @@ -/* - * Stage 2 of the trace events. - * - * Override the macros in <trace/trace_event_types.h> to include the following: - * - * enum print_line_t - * ftrace_raw_output_<call>(struct trace_iterator *iter, int flags) - * { - * struct trace_seq *s = &iter->seq; - * struct ftrace_raw_<call> *field; <-- defined in stage 1 - * struct trace_entry *entry; - * int ret; - * - * entry = iter->ent; - * - * if (entry->type != event_<call>.id) { - * WARN_ON_ONCE(1); - * return TRACE_TYPE_UNHANDLED; - * } - * - * field = (typeof(field))entry; - * - * ret = trace_seq_printf(s, <TP_printk> "\n"); - * if (!ret) - * return TRACE_TYPE_PARTIAL_LINE; - * - * return TRACE_TYPE_HANDLED; - * } - * - * This is the method used to print the raw event to the trace - * output format. Note, this is not needed if the data is read - * in binary. - */ - -#undef __entry -#define __entry field - -#undef TP_printk -#define TP_printk(fmt, args...) fmt "\n", args - -#undef TRACE_EVENT -#define TRACE_EVENT(call, proto, args, tstruct, assign, print) \ -enum print_line_t \ -ftrace_raw_output_##call(struct trace_iterator *iter, int flags) \ -{ \ - struct trace_seq *s = &iter->seq; \ - struct ftrace_raw_##call *field; \ - struct trace_entry *entry; \ - int ret; \ - \ - entry = iter->ent; \ - \ - if (entry->type != event_##call.id) { \ - WARN_ON_ONCE(1); \ - return TRACE_TYPE_UNHANDLED; \ - } \ - \ - field = (typeof(field))entry; \ - \ - ret = trace_seq_printf(s, #call ": " print); \ - if (!ret) \ - return TRACE_TYPE_PARTIAL_LINE; \ - \ - return TRACE_TYPE_HANDLED; \ -} - -#include <trace/trace_event_types.h> - -/* - * Setup the showing format of trace point. - * - * int - * ftrace_format_##call(struct trace_seq *s) - * { - * struct ftrace_raw_##call field; - * int ret; - * - * ret = trace_seq_printf(s, #type " " #item ";" - * " offset:%u; size:%u;\n", - * offsetof(struct ftrace_raw_##call, item), - * sizeof(field.type)); - * - * } - */ - -#undef TP_STRUCT__entry -#define TP_STRUCT__entry(args...) args - -#undef __field -#define __field(type, item) \ - ret = trace_seq_printf(s, "\tfield:" #type " " #item ";\t" \ - "offset:%u;\tsize:%u;\n", \ - (unsigned int)offsetof(typeof(field), item), \ - (unsigned int)sizeof(field.item)); \ - if (!ret) \ - return 0; - -#undef __array -#define __array(type, item, len) \ - ret = trace_seq_printf(s, "\tfield:" #type " " #item "[" #len "];\t" \ - "offset:%u;\tsize:%u;\n", \ - (unsigned int)offsetof(typeof(field), item), \ - (unsigned int)sizeof(field.item)); \ - if (!ret) \ - return 0; - -#undef __entry -#define __entry REC - -#undef TP_printk -#define TP_printk(fmt, args...) "%s, %s\n", #fmt, __stringify(args) - -#undef TP_fast_assign -#define TP_fast_assign(args...) args - -#undef TRACE_EVENT -#define TRACE_EVENT(call, proto, args, tstruct, func, print) \ -static int \ -ftrace_format_##call(struct trace_seq *s) \ -{ \ - struct ftrace_raw_##call field; \ - int ret; \ - \ - tstruct; \ - \ - trace_seq_printf(s, "\nprint fmt: " print); \ - \ - return ret; \ -} - -#include <trace/trace_event_types.h> - -#undef __field -#define __field(type, item) \ - ret = trace_define_field(event_call, #type, #item, \ - offsetof(typeof(field), item), \ - sizeof(field.item)); \ - if (ret) \ - return ret; - -#undef __array -#define __array(type, item, len) \ - ret = trace_define_field(event_call, #type "[" #len "]", #item, \ - offsetof(typeof(field), item), \ - sizeof(field.item)); \ - if (ret) \ - return ret; - -#define __common_field(type, item) \ - ret = trace_define_field(event_call, #type, "common_" #item, \ - offsetof(typeof(field.ent), item), \ - sizeof(field.ent.item)); \ - if (ret) \ - return ret; - -#undef TRACE_EVENT -#define TRACE_EVENT(call, proto, args, tstruct, func, print) \ -int \ -ftrace_define_fields_##call(void) \ -{ \ - struct ftrace_raw_##call field; \ - struct ftrace_event_call *event_call = &event_##call; \ - int ret; \ - \ - __common_field(unsigned char, type); \ - __common_field(unsigned char, flags); \ - __common_field(unsigned char, preempt_count); \ - __common_field(int, pid); \ - __common_field(int, tgid); \ - \ - tstruct; \ - \ - return ret; \ -} - -#include <trace/trace_event_types.h> diff --git a/kernel/trace/trace_events_stage_3.h b/kernel/trace/trace_events_stage_3.h deleted file mode 100644 index 9d2fa78cecca..000000000000 --- a/kernel/trace/trace_events_stage_3.h +++ /dev/null @@ -1,281 +0,0 @@ -/* - * Stage 3 of the trace events. - * - * Override the macros in <trace/trace_event_types.h> to include the following: - * - * static void ftrace_event_<call>(proto) - * { - * event_trace_printk(_RET_IP_, "<call>: " <fmt>); - * } - * - * static int ftrace_reg_event_<call>(void) - * { - * int ret; - * - * ret = register_trace_<call>(ftrace_event_<call>); - * if (!ret) - * pr_info("event trace: Could not activate trace point " - * "probe to <call>"); - * return ret; - * } - * - * static void ftrace_unreg_event_<call>(void) - * { - * unregister_trace_<call>(ftrace_event_<call>); - * } - * - * For those macros defined with TRACE_FORMAT: - * - * static struct ftrace_event_call __used - * __attribute__((__aligned__(4))) - * __attribute__((section("_ftrace_events"))) event_<call> = { - * .name = "<call>", - * .regfunc = ftrace_reg_event_<call>, - * .unregfunc = ftrace_unreg_event_<call>, - * } - * - * - * For those macros defined with TRACE_EVENT: - * - * static struct ftrace_event_call event_<call>; - * - * static void ftrace_raw_event_<call>(proto) - * { - * struct ring_buffer_event *event; - * struct ftrace_raw_<call> *entry; <-- defined in stage 1 - * unsigned long irq_flags; - * int pc; - * - * local_save_flags(irq_flags); - * pc = preempt_count(); - * - * event = trace_current_buffer_lock_reserve(event_<call>.id, - * sizeof(struct ftrace_raw_<call>), - * irq_flags, pc); - * if (!event) - * return; - * entry = ring_buffer_event_data(event); - * - * <assign>; <-- Here we assign the entries by the __field and - * __array macros. - * - * trace_current_buffer_unlock_commit(event, irq_flags, pc); - * } - * - * static int ftrace_raw_reg_event_<call>(void) - * { - * int ret; - * - * ret = register_trace_<call>(ftrace_raw_event_<call>); - * if (!ret) - * pr_info("event trace: Could not activate trace point " - * "probe to <call>"); - * return ret; - * } - * - * static void ftrace_unreg_event_<call>(void) - * { - * unregister_trace_<call>(ftrace_raw_event_<call>); - * } - * - * static struct trace_event ftrace_event_type_<call> = { - * .trace = ftrace_raw_output_<call>, <-- stage 2 - * }; - * - * static int ftrace_raw_init_event_<call>(void) - * { - * int id; - * - * id = register_ftrace_event(&ftrace_event_type_<call>); - * if (!id) - * return -ENODEV; - * event_<call>.id = id; - * return 0; - * } - * - * static struct ftrace_event_call __used - * __attribute__((__aligned__(4))) - * __attribute__((section("_ftrace_events"))) event_<call> = { - * .name = "<call>", - * .system = "<system>", - * .raw_init = ftrace_raw_init_event_<call>, - * .regfunc = ftrace_reg_event_<call>, - * .unregfunc = ftrace_unreg_event_<call>, - * .show_format = ftrace_format_<call>, - * } - * - */ - -#undef TP_FMT -#define TP_FMT(fmt, args...) fmt "\n", ##args - -#ifdef CONFIG_EVENT_PROFILE -#define _TRACE_PROFILE(call, proto, args) \ -static void ftrace_profile_##call(proto) \ -{ \ - extern void perf_tpcounter_event(int); \ - perf_tpcounter_event(event_##call.id); \ -} \ - \ -static int ftrace_profile_enable_##call(struct ftrace_event_call *call) \ -{ \ - int ret = 0; \ - \ - if (!atomic_inc_return(&call->profile_count)) \ - ret = register_trace_##call(ftrace_profile_##call); \ - \ - return ret; \ -} \ - \ -static void ftrace_profile_disable_##call(struct ftrace_event_call *call) \ -{ \ - if (atomic_add_negative(-1, &call->profile_count)) \ - unregister_trace_##call(ftrace_profile_##call); \ -} - -#define _TRACE_PROFILE_INIT(call) \ - .profile_count = ATOMIC_INIT(-1), \ - .profile_enable = ftrace_profile_enable_##call, \ - .profile_disable = ftrace_profile_disable_##call, - -#else -#define _TRACE_PROFILE(call, proto, args) -#define _TRACE_PROFILE_INIT(call) -#endif - -#define _TRACE_FORMAT(call, proto, args, fmt) \ -static void ftrace_event_##call(proto) \ -{ \ - event_trace_printk(_RET_IP_, #call ": " fmt); \ -} \ - \ -static int ftrace_reg_event_##call(void) \ -{ \ - int ret; \ - \ - ret = register_trace_##call(ftrace_event_##call); \ - if (ret) \ - pr_info("event trace: Could not activate trace point " \ - "probe to " #call "\n"); \ - return ret; \ -} \ - \ -static void ftrace_unreg_event_##call(void) \ -{ \ - unregister_trace_##call(ftrace_event_##call); \ -} \ - \ -static struct ftrace_event_call event_##call; \ - \ -static int ftrace_init_event_##call(void) \ -{ \ - int id; \ - \ - id = register_ftrace_event(NULL); \ - if (!id) \ - return -ENODEV; \ - event_##call.id = id; \ - return 0; \ -} - -#undef TRACE_FORMAT -#define TRACE_FORMAT(call, proto, args, fmt) \ -_TRACE_FORMAT(call, PARAMS(proto), PARAMS(args), PARAMS(fmt)) \ -_TRACE_PROFILE(call, PARAMS(proto), PARAMS(args)) \ -static struct ftrace_event_call __used \ -__attribute__((__aligned__(4))) \ -__attribute__((section("_ftrace_events"))) event_##call = { \ - .name = #call, \ - .system = __stringify(TRACE_SYSTEM), \ - .raw_init = ftrace_init_event_##call, \ - .regfunc = ftrace_reg_event_##call, \ - .unregfunc = ftrace_unreg_event_##call, \ - _TRACE_PROFILE_INIT(call) \ -} - -#undef __entry -#define __entry entry - -#undef TRACE_EVENT -#define TRACE_EVENT(call, proto, args, tstruct, assign, print) \ -_TRACE_PROFILE(call, PARAMS(proto), PARAMS(args)) \ - \ -static struct ftrace_event_call event_##call; \ - \ -static void ftrace_raw_event_##call(proto) \ -{ \ - struct ftrace_event_call *call = &event_##call; \ - struct ring_buffer_event *event; \ - struct ftrace_raw_##call *entry; \ - unsigned long irq_flags; \ - int pc; \ - \ - local_save_flags(irq_flags); \ - pc = preempt_count(); \ - \ - event = trace_current_buffer_lock_reserve(event_##call.id, \ - sizeof(struct ftrace_raw_##call), \ - irq_flags, pc); \ - if (!event) \ - return; \ - entry = ring_buffer_event_data(event); \ - \ - assign; \ - \ - if (call->preds && !filter_match_preds(call, entry)) \ - ring_buffer_event_discard(event); \ - \ - trace_nowake_buffer_unlock_commit(event, irq_flags, pc); \ - \ -} \ - \ -static int ftrace_raw_reg_event_##call(void) \ -{ \ - int ret; \ - \ - ret = register_trace_##call(ftrace_raw_event_##call); \ - if (ret) \ - pr_info("event trace: Could not activate trace point " \ - "probe to " #call "\n"); \ - return ret; \ -} \ - \ -static void ftrace_raw_unreg_event_##call(void) \ -{ \ - unregister_trace_##call(ftrace_raw_event_##call); \ -} \ - \ -static struct trace_event ftrace_event_type_##call = { \ - .trace = ftrace_raw_output_##call, \ -}; \ - \ -static int ftrace_raw_init_event_##call(void) \ -{ \ - int id; \ - \ - id = register_ftrace_event(&ftrace_event_type_##call); \ - if (!id) \ - return -ENODEV; \ - event_##call.id = id; \ - INIT_LIST_HEAD(&event_##call.fields); \ - return 0; \ -} \ - \ -static struct ftrace_event_call __used \ -__attribute__((__aligned__(4))) \ -__attribute__((section("_ftrace_events"))) event_##call = { \ - .name = #call, \ - .system = __stringify(TRACE_SYSTEM), \ - .raw_init = ftrace_raw_init_event_##call, \ - .regfunc = ftrace_raw_reg_event_##call, \ - .unregfunc = ftrace_raw_unreg_event_##call, \ - .show_format = ftrace_format_##call, \ - .define_fields = ftrace_define_fields_##call, \ - _TRACE_PROFILE_INIT(call) \ -} - -#include <trace/trace_event_types.h> - -#undef _TRACE_PROFILE -#undef _TRACE_PROFILE_INIT - diff --git a/kernel/trace/trace_export.c b/kernel/trace/trace_export.c index 07a22c33ebf3..d06cf898dc86 100644 --- a/kernel/trace/trace_export.c +++ b/kernel/trace/trace_export.c @@ -19,8 +19,12 @@ #undef TRACE_STRUCT #define TRACE_STRUCT(args...) args +extern void __bad_type_size(void); + #undef TRACE_FIELD #define TRACE_FIELD(type, item, assign) \ + if (sizeof(type) != sizeof(field.item)) \ + __bad_type_size(); \ ret = trace_seq_printf(s, "\tfield:" #type " " #item ";\t" \ "offset:%u;\tsize:%u;\n", \ (unsigned int)offsetof(typeof(field), item), \ @@ -30,7 +34,7 @@ #undef TRACE_FIELD_SPECIAL -#define TRACE_FIELD_SPECIAL(type_item, item, cmd) \ +#define TRACE_FIELD_SPECIAL(type_item, item, len, cmd) \ ret = trace_seq_printf(s, "\tfield special:" #type_item ";\t" \ "offset:%u;\tsize:%u;\n", \ (unsigned int)offsetof(typeof(field), item), \ @@ -46,6 +50,9 @@ if (!ret) \ return 0; +#undef TRACE_FIELD_SIGN +#define TRACE_FIELD_SIGN(type, item, assign, is_signed) \ + TRACE_FIELD(type, item, assign) #undef TP_RAW_FMT #define TP_RAW_FMT(args...) args @@ -65,6 +72,22 @@ ftrace_format_##call(struct trace_seq *s) \ return ret; \ } +#undef TRACE_EVENT_FORMAT_NOFILTER +#define TRACE_EVENT_FORMAT_NOFILTER(call, proto, args, fmt, tstruct, \ + tpfmt) \ +static int \ +ftrace_format_##call(struct trace_seq *s) \ +{ \ + struct args field; \ + int ret; \ + \ + tstruct; \ + \ + trace_seq_printf(s, "\nprint fmt: \"%s\"\n", tpfmt); \ + \ + return ret; \ +} + #include "trace_event_types.h" #undef TRACE_ZERO_CHAR @@ -78,6 +101,10 @@ ftrace_format_##call(struct trace_seq *s) \ #define TRACE_FIELD(type, item, assign)\ entry->item = assign; +#undef TRACE_FIELD_SIGN +#define TRACE_FIELD_SIGN(type, item, assign, is_signed) \ + TRACE_FIELD(type, item, assign) + #undef TP_CMD #define TP_CMD(cmd...) cmd @@ -85,18 +112,95 @@ ftrace_format_##call(struct trace_seq *s) \ #define TRACE_ENTRY entry #undef TRACE_FIELD_SPECIAL -#define TRACE_FIELD_SPECIAL(type_item, item, cmd) \ +#define TRACE_FIELD_SPECIAL(type_item, item, len, cmd) \ cmd; #undef TRACE_EVENT_FORMAT #define TRACE_EVENT_FORMAT(call, proto, args, fmt, tstruct, tpfmt) \ +int ftrace_define_fields_##call(void); \ +static int ftrace_raw_init_event_##call(void); \ + \ +struct ftrace_event_call __used \ +__attribute__((__aligned__(4))) \ +__attribute__((section("_ftrace_events"))) event_##call = { \ + .name = #call, \ + .id = proto, \ + .system = __stringify(TRACE_SYSTEM), \ + .raw_init = ftrace_raw_init_event_##call, \ + .show_format = ftrace_format_##call, \ + .define_fields = ftrace_define_fields_##call, \ +}; \ +static int ftrace_raw_init_event_##call(void) \ +{ \ + INIT_LIST_HEAD(&event_##call.fields); \ + init_preds(&event_##call); \ + return 0; \ +} \ + +#undef TRACE_EVENT_FORMAT_NOFILTER +#define TRACE_EVENT_FORMAT_NOFILTER(call, proto, args, fmt, tstruct, \ + tpfmt) \ \ -static struct ftrace_event_call __used \ +struct ftrace_event_call __used \ __attribute__((__aligned__(4))) \ __attribute__((section("_ftrace_events"))) event_##call = { \ .name = #call, \ .id = proto, \ .system = __stringify(TRACE_SYSTEM), \ .show_format = ftrace_format_##call, \ +}; + +#include "trace_event_types.h" + +#undef TRACE_FIELD +#define TRACE_FIELD(type, item, assign) \ + ret = trace_define_field(event_call, #type, #item, \ + offsetof(typeof(field), item), \ + sizeof(field.item), is_signed_type(type)); \ + if (ret) \ + return ret; + +#undef TRACE_FIELD_SPECIAL +#define TRACE_FIELD_SPECIAL(type, item, len, cmd) \ + ret = trace_define_field(event_call, #type "[" #len "]", #item, \ + offsetof(typeof(field), item), \ + sizeof(field.item), 0); \ + if (ret) \ + return ret; + +#undef TRACE_FIELD_SIGN +#define TRACE_FIELD_SIGN(type, item, assign, is_signed) \ + ret = trace_define_field(event_call, #type, #item, \ + offsetof(typeof(field), item), \ + sizeof(field.item), is_signed); \ + if (ret) \ + return ret; + +#undef TRACE_FIELD_ZERO_CHAR +#define TRACE_FIELD_ZERO_CHAR(item) + +#undef TRACE_EVENT_FORMAT +#define TRACE_EVENT_FORMAT(call, proto, args, fmt, tstruct, tpfmt) \ +int \ +ftrace_define_fields_##call(void) \ +{ \ + struct ftrace_event_call *event_call = &event_##call; \ + struct args field; \ + int ret; \ + \ + __common_field(unsigned char, type, 0); \ + __common_field(unsigned char, flags, 0); \ + __common_field(unsigned char, preempt_count, 0); \ + __common_field(int, pid, 1); \ + __common_field(int, tgid, 1); \ + \ + tstruct; \ + \ + return ret; \ } + +#undef TRACE_EVENT_FORMAT_NOFILTER +#define TRACE_EVENT_FORMAT_NOFILTER(call, proto, args, fmt, tstruct, \ + tpfmt) + #include "trace_event_types.h" diff --git a/kernel/trace/trace_functions_graph.c b/kernel/trace/trace_functions_graph.c index d28687e7b3a7..8b592418d8b2 100644 --- a/kernel/trace/trace_functions_graph.c +++ b/kernel/trace/trace_functions_graph.c @@ -65,6 +65,12 @@ ftrace_push_return_trace(unsigned long ret, unsigned long func, int *depth) if (!current->ret_stack) return -EBUSY; + /* + * We must make sure the ret_stack is tested before we read + * anything else. + */ + smp_rmb(); + /* The return trace stack is full */ if (current->curr_ret_stack == FTRACE_RETFUNC_DEPTH - 1) { atomic_inc(¤t->trace_overrun); @@ -78,13 +84,14 @@ ftrace_push_return_trace(unsigned long ret, unsigned long func, int *depth) current->ret_stack[index].ret = ret; current->ret_stack[index].func = func; current->ret_stack[index].calltime = calltime; + current->ret_stack[index].subtime = 0; *depth = index; return 0; } /* Retrieve a function return address to the trace stack on thread info.*/ -void +static void ftrace_pop_return_trace(struct ftrace_graph_ret *trace, unsigned long *ret) { int index; @@ -104,9 +111,6 @@ ftrace_pop_return_trace(struct ftrace_graph_ret *trace, unsigned long *ret) trace->calltime = current->ret_stack[index].calltime; trace->overrun = atomic_read(¤t->trace_overrun); trace->depth = index; - barrier(); - current->curr_ret_stack--; - } /* @@ -121,6 +125,8 @@ unsigned long ftrace_return_to_handler(void) ftrace_pop_return_trace(&trace, &ret); trace.rettime = trace_clock_local(); ftrace_graph_return(&trace); + barrier(); + current->curr_ret_stack--; if (unlikely(!ret)) { ftrace_graph_stop(); @@ -426,8 +432,8 @@ print_graph_irq(struct trace_iterator *iter, unsigned long addr, return TRACE_TYPE_HANDLED; } -static enum print_line_t -print_graph_duration(unsigned long long duration, struct trace_seq *s) +enum print_line_t +trace_print_graph_duration(unsigned long long duration, struct trace_seq *s) { unsigned long nsecs_rem = do_div(duration, 1000); /* log10(ULONG_MAX) + '\0' */ @@ -464,12 +470,23 @@ print_graph_duration(unsigned long long duration, struct trace_seq *s) if (!ret) return TRACE_TYPE_PARTIAL_LINE; } + return TRACE_TYPE_HANDLED; +} + +static enum print_line_t +print_graph_duration(unsigned long long duration, struct trace_seq *s) +{ + int ret; + + ret = trace_print_graph_duration(duration, s); + if (ret != TRACE_TYPE_HANDLED) + return ret; ret = trace_seq_printf(s, "| "); if (!ret) return TRACE_TYPE_PARTIAL_LINE; - return TRACE_TYPE_HANDLED; + return TRACE_TYPE_HANDLED; } /* Case of a leaf function on its call entry */ diff --git a/kernel/trace/trace_hw_branches.c b/kernel/trace/trace_hw_branches.c index 7bfdf4c2347f..ca7d7c4d0c2a 100644 --- a/kernel/trace/trace_hw_branches.c +++ b/kernel/trace/trace_hw_branches.c @@ -1,10 +1,9 @@ /* - * h/w branch tracer for x86 based on bts + * h/w branch tracer for x86 based on BTS * * Copyright (C) 2008-2009 Intel Corporation. * Markus Metzger <markus.t.metzger@gmail.com>, 2008-2009 */ -#include <linux/spinlock.h> #include <linux/kallsyms.h> #include <linux/debugfs.h> #include <linux/ftrace.h> @@ -15,110 +14,119 @@ #include <asm/ds.h> -#include "trace.h" #include "trace_output.h" +#include "trace.h" -#define SIZEOF_BTS (1 << 13) +#define BTS_BUFFER_SIZE (1 << 13) -/* - * The tracer lock protects the below per-cpu tracer array. - * It needs to be held to: - * - start tracing on all cpus - * - stop tracing on all cpus - * - start tracing on a single hotplug cpu - * - stop tracing on a single hotplug cpu - * - read the trace from all cpus - * - read the trace from a single cpu - */ -static DEFINE_SPINLOCK(bts_tracer_lock); static DEFINE_PER_CPU(struct bts_tracer *, tracer); -static DEFINE_PER_CPU(unsigned char[SIZEOF_BTS], buffer); +static DEFINE_PER_CPU(unsigned char[BTS_BUFFER_SIZE], buffer); #define this_tracer per_cpu(tracer, smp_processor_id()) -#define this_buffer per_cpu(buffer, smp_processor_id()) -static int __read_mostly trace_hw_branches_enabled; +static int trace_hw_branches_enabled __read_mostly; +static int trace_hw_branches_suspended __read_mostly; static struct trace_array *hw_branch_trace __read_mostly; -/* - * Start tracing on the current cpu. - * The argument is ignored. - * - * pre: bts_tracer_lock must be locked. - */ -static void bts_trace_start_cpu(void *arg) +static void bts_trace_init_cpu(int cpu) { - if (this_tracer) - ds_release_bts(this_tracer); - - this_tracer = - ds_request_bts(/* task = */ NULL, this_buffer, SIZEOF_BTS, - /* ovfl = */ NULL, /* th = */ (size_t)-1, - BTS_KERNEL); - if (IS_ERR(this_tracer)) { - this_tracer = NULL; - return; - } + per_cpu(tracer, cpu) = + ds_request_bts_cpu(cpu, per_cpu(buffer, cpu), BTS_BUFFER_SIZE, + NULL, (size_t)-1, BTS_KERNEL); + + if (IS_ERR(per_cpu(tracer, cpu))) + per_cpu(tracer, cpu) = NULL; } -static void bts_trace_start(struct trace_array *tr) +static int bts_trace_init(struct trace_array *tr) { - spin_lock(&bts_tracer_lock); + int cpu; + + hw_branch_trace = tr; + trace_hw_branches_enabled = 0; - on_each_cpu(bts_trace_start_cpu, NULL, 1); - trace_hw_branches_enabled = 1; + get_online_cpus(); + for_each_online_cpu(cpu) { + bts_trace_init_cpu(cpu); - spin_unlock(&bts_tracer_lock); + if (likely(per_cpu(tracer, cpu))) + trace_hw_branches_enabled = 1; + } + trace_hw_branches_suspended = 0; + put_online_cpus(); + + /* If we could not enable tracing on a single cpu, we fail. */ + return trace_hw_branches_enabled ? 0 : -EOPNOTSUPP; } -/* - * Stop tracing on the current cpu. - * The argument is ignored. - * - * pre: bts_tracer_lock must be locked. - */ -static void bts_trace_stop_cpu(void *arg) +static void bts_trace_reset(struct trace_array *tr) { - if (this_tracer) { - ds_release_bts(this_tracer); - this_tracer = NULL; + int cpu; + + get_online_cpus(); + for_each_online_cpu(cpu) { + if (likely(per_cpu(tracer, cpu))) { + ds_release_bts(per_cpu(tracer, cpu)); + per_cpu(tracer, cpu) = NULL; + } } + trace_hw_branches_enabled = 0; + trace_hw_branches_suspended = 0; + put_online_cpus(); } -static void bts_trace_stop(struct trace_array *tr) +static void bts_trace_start(struct trace_array *tr) { - spin_lock(&bts_tracer_lock); + int cpu; - trace_hw_branches_enabled = 0; - on_each_cpu(bts_trace_stop_cpu, NULL, 1); + get_online_cpus(); + for_each_online_cpu(cpu) + if (likely(per_cpu(tracer, cpu))) + ds_resume_bts(per_cpu(tracer, cpu)); + trace_hw_branches_suspended = 0; + put_online_cpus(); +} - spin_unlock(&bts_tracer_lock); +static void bts_trace_stop(struct trace_array *tr) +{ + int cpu; + + get_online_cpus(); + for_each_online_cpu(cpu) + if (likely(per_cpu(tracer, cpu))) + ds_suspend_bts(per_cpu(tracer, cpu)); + trace_hw_branches_suspended = 1; + put_online_cpus(); } static int __cpuinit bts_hotcpu_handler(struct notifier_block *nfb, unsigned long action, void *hcpu) { - unsigned int cpu = (unsigned long)hcpu; - - spin_lock(&bts_tracer_lock); - - if (!trace_hw_branches_enabled) - goto out; + int cpu = (long)hcpu; switch (action) { case CPU_ONLINE: case CPU_DOWN_FAILED: - smp_call_function_single(cpu, bts_trace_start_cpu, NULL, 1); + /* The notification is sent with interrupts enabled. */ + if (trace_hw_branches_enabled) { + bts_trace_init_cpu(cpu); + + if (trace_hw_branches_suspended && + likely(per_cpu(tracer, cpu))) + ds_suspend_bts(per_cpu(tracer, cpu)); + } break; + case CPU_DOWN_PREPARE: - smp_call_function_single(cpu, bts_trace_stop_cpu, NULL, 1); - break; + /* The notification is sent with interrupts enabled. */ + if (likely(per_cpu(tracer, cpu))) { + ds_release_bts(per_cpu(tracer, cpu)); + per_cpu(tracer, cpu) = NULL; + } } - out: - spin_unlock(&bts_tracer_lock); return NOTIFY_DONE; } @@ -126,20 +134,6 @@ static struct notifier_block bts_hotcpu_notifier __cpuinitdata = { .notifier_call = bts_hotcpu_handler }; -static int bts_trace_init(struct trace_array *tr) -{ - hw_branch_trace = tr; - - bts_trace_start(tr); - - return 0; -} - -static void bts_trace_reset(struct trace_array *tr) -{ - bts_trace_stop(tr); -} - static void bts_trace_print_header(struct seq_file *m) { seq_puts(m, "# CPU# TO <- FROM\n"); @@ -147,10 +141,10 @@ static void bts_trace_print_header(struct seq_file *m) static enum print_line_t bts_trace_print_line(struct trace_iterator *iter) { + unsigned long symflags = TRACE_ITER_SYM_OFFSET; struct trace_entry *entry = iter->ent; struct trace_seq *seq = &iter->seq; struct hw_branch_entry *it; - unsigned long symflags = TRACE_ITER_SYM_OFFSET; trace_assign_type(it, entry); @@ -168,6 +162,7 @@ static enum print_line_t bts_trace_print_line(struct trace_iterator *iter) void trace_hw_branch(u64 from, u64 to) { + struct ftrace_event_call *call = &event_hw_branch; struct trace_array *tr = hw_branch_trace; struct ring_buffer_event *event; struct hw_branch_entry *entry; @@ -194,7 +189,8 @@ void trace_hw_branch(u64 from, u64 to) entry->ent.type = TRACE_HW_BRANCHES; entry->from = from; entry->to = to; - trace_buffer_unlock_commit(tr, event, 0, 0); + if (!filter_check_discard(call, entry, tr->buffer, event)) + trace_buffer_unlock_commit(tr, event, 0, 0); out: atomic_dec(&tr->data[cpu]->disabled); @@ -224,11 +220,11 @@ static void trace_bts_at(const struct bts_trace *trace, void *at) /* * Collect the trace on the current cpu and write it into the ftrace buffer. * - * pre: bts_tracer_lock must be locked + * pre: tracing must be suspended on the current cpu */ static void trace_bts_cpu(void *arg) { - struct trace_array *tr = (struct trace_array *) arg; + struct trace_array *tr = (struct trace_array *)arg; const struct bts_trace *trace; unsigned char *at; @@ -241,10 +237,9 @@ static void trace_bts_cpu(void *arg) if (unlikely(!this_tracer)) return; - ds_suspend_bts(this_tracer); trace = ds_read_bts(this_tracer); if (!trace) - goto out; + return; for (at = trace->ds.top; (void *)at < trace->ds.end; at += trace->ds.size) @@ -253,18 +248,27 @@ static void trace_bts_cpu(void *arg) for (at = trace->ds.begin; (void *)at < trace->ds.top; at += trace->ds.size) trace_bts_at(trace, at); - -out: - ds_resume_bts(this_tracer); } static void trace_bts_prepare(struct trace_iterator *iter) { - spin_lock(&bts_tracer_lock); + int cpu; + get_online_cpus(); + for_each_online_cpu(cpu) + if (likely(per_cpu(tracer, cpu))) + ds_suspend_bts(per_cpu(tracer, cpu)); + /* + * We need to collect the trace on the respective cpu since ftrace + * implicitly adds the record for the current cpu. + * Once that is more flexible, we could collect the data from any cpu. + */ on_each_cpu(trace_bts_cpu, iter->tr, 1); - spin_unlock(&bts_tracer_lock); + for_each_online_cpu(cpu) + if (likely(per_cpu(tracer, cpu))) + ds_resume_bts(per_cpu(tracer, cpu)); + put_online_cpus(); } static void trace_bts_close(struct trace_iterator *iter) @@ -274,11 +278,11 @@ static void trace_bts_close(struct trace_iterator *iter) void trace_hw_branch_oops(void) { - spin_lock(&bts_tracer_lock); - - trace_bts_cpu(hw_branch_trace); - - spin_unlock(&bts_tracer_lock); + if (this_tracer) { + ds_suspend_bts_noirq(this_tracer); + trace_bts_cpu(hw_branch_trace); + ds_resume_bts_noirq(this_tracer); + } } struct tracer bts_tracer __read_mostly = @@ -291,7 +295,10 @@ struct tracer bts_tracer __read_mostly = .start = bts_trace_start, .stop = bts_trace_stop, .open = trace_bts_prepare, - .close = trace_bts_close + .close = trace_bts_close, +#ifdef CONFIG_FTRACE_SELFTEST + .selftest = trace_selftest_startup_hw_branches, +#endif /* CONFIG_FTRACE_SELFTEST */ }; __init static int init_bts_trace(void) diff --git a/kernel/trace/trace_mmiotrace.c b/kernel/trace/trace_mmiotrace.c index 8e37fcddd8b4..d53b45ed0806 100644 --- a/kernel/trace/trace_mmiotrace.c +++ b/kernel/trace/trace_mmiotrace.c @@ -9,6 +9,8 @@ #include <linux/kernel.h> #include <linux/mmiotrace.h> #include <linux/pci.h> +#include <linux/time.h> + #include <asm/atomic.h> #include "trace.h" @@ -174,7 +176,7 @@ static enum print_line_t mmio_print_rw(struct trace_iterator *iter) struct mmiotrace_rw *rw; struct trace_seq *s = &iter->seq; unsigned long long t = ns2usecs(iter->ts); - unsigned long usec_rem = do_div(t, 1000000ULL); + unsigned long usec_rem = do_div(t, USEC_PER_SEC); unsigned secs = (unsigned long)t; int ret = 1; @@ -221,7 +223,7 @@ static enum print_line_t mmio_print_map(struct trace_iterator *iter) struct mmiotrace_map *m; struct trace_seq *s = &iter->seq; unsigned long long t = ns2usecs(iter->ts); - unsigned long usec_rem = do_div(t, 1000000ULL); + unsigned long usec_rem = do_div(t, USEC_PER_SEC); unsigned secs = (unsigned long)t; int ret; diff --git a/kernel/trace/trace_output.c b/kernel/trace/trace_output.c index 64b54a59c55b..7938f3ae93e3 100644 --- a/kernel/trace/trace_output.c +++ b/kernel/trace/trace_output.c @@ -14,11 +14,25 @@ /* must be a power of 2 */ #define EVENT_HASHSIZE 128 -static DEFINE_MUTEX(trace_event_mutex); +DECLARE_RWSEM(trace_event_mutex); + +DEFINE_PER_CPU(struct trace_seq, ftrace_event_seq); +EXPORT_PER_CPU_SYMBOL(ftrace_event_seq); + static struct hlist_head event_hash[EVENT_HASHSIZE] __read_mostly; static int next_event_type = __TRACE_LAST_TYPE + 1; +void trace_print_seq(struct seq_file *m, struct trace_seq *s) +{ + int len = s->len >= PAGE_SIZE ? PAGE_SIZE - 1 : s->len; + + s->buffer[len] = 0; + seq_puts(m, s->buffer); + + trace_seq_init(s); +} + enum print_line_t trace_print_bprintk_msg_only(struct trace_iterator *iter) { struct trace_seq *s = &iter->seq; @@ -84,6 +98,39 @@ trace_seq_printf(struct trace_seq *s, const char *fmt, ...) return len; } +EXPORT_SYMBOL_GPL(trace_seq_printf); + +/** + * trace_seq_vprintf - sequence printing of trace information + * @s: trace sequence descriptor + * @fmt: printf format string + * + * The tracer may use either sequence operations or its own + * copy to user routines. To simplify formating of a trace + * trace_seq_printf is used to store strings into a special + * buffer (@s). Then the output may be either used by + * the sequencer or pulled into another buffer. + */ +int +trace_seq_vprintf(struct trace_seq *s, const char *fmt, va_list args) +{ + int len = (PAGE_SIZE - 1) - s->len; + int ret; + + if (!len) + return 0; + + ret = vsnprintf(s->buffer + s->len, len, fmt, args); + + /* If we can't write it all, don't bother writing anything */ + if (ret >= len) + return 0; + + s->len += ret; + + return len; +} +EXPORT_SYMBOL_GPL(trace_seq_vprintf); int trace_seq_bprintf(struct trace_seq *s, const char *fmt, const u32 *binary) { @@ -201,6 +248,67 @@ int trace_seq_path(struct trace_seq *s, struct path *path) return 0; } +const char * +ftrace_print_flags_seq(struct trace_seq *p, const char *delim, + unsigned long flags, + const struct trace_print_flags *flag_array) +{ + unsigned long mask; + const char *str; + const char *ret = p->buffer + p->len; + int i; + + for (i = 0; flag_array[i].name && flags; i++) { + + mask = flag_array[i].mask; + if ((flags & mask) != mask) + continue; + + str = flag_array[i].name; + flags &= ~mask; + if (p->len && delim) + trace_seq_puts(p, delim); + trace_seq_puts(p, str); + } + + /* check for left over flags */ + if (flags) { + if (p->len && delim) + trace_seq_puts(p, delim); + trace_seq_printf(p, "0x%lx", flags); + } + + trace_seq_putc(p, 0); + + return ret; +} +EXPORT_SYMBOL(ftrace_print_flags_seq); + +const char * +ftrace_print_symbols_seq(struct trace_seq *p, unsigned long val, + const struct trace_print_flags *symbol_array) +{ + int i; + const char *ret = p->buffer + p->len; + + for (i = 0; symbol_array[i].name; i++) { + + if (val != symbol_array[i].mask) + continue; + + trace_seq_puts(p, symbol_array[i].name); + break; + } + + if (!p->len) + trace_seq_printf(p, "0x%lx", val); + + trace_seq_putc(p, 0); + + return ret; +} +EXPORT_SYMBOL(ftrace_print_symbols_seq); + #ifdef CONFIG_KRETPROBES static inline const char *kretprobed(const char *name) { @@ -311,17 +419,20 @@ seq_print_userip_objs(const struct userstack_entry *entry, struct trace_seq *s, if (ip == ULONG_MAX || !ret) break; - if (i && ret) - ret = trace_seq_puts(s, " <- "); + if (ret) + ret = trace_seq_puts(s, " => "); if (!ip) { if (ret) ret = trace_seq_puts(s, "??"); + if (ret) + ret = trace_seq_puts(s, "\n"); continue; } if (!ret) break; if (ret) ret = seq_print_user_ip(s, mm, ip, sym_flags); + ret = trace_seq_puts(s, "\n"); } if (mm) @@ -455,6 +566,7 @@ static int task_state_char(unsigned long state) * @type: the type of event to look for * * Returns an event of type @type otherwise NULL + * Called with trace_event_read_lock() held. */ struct trace_event *ftrace_find_event(int type) { @@ -464,7 +576,7 @@ struct trace_event *ftrace_find_event(int type) key = type & (EVENT_HASHSIZE - 1); - hlist_for_each_entry_rcu(event, n, &event_hash[key], node) { + hlist_for_each_entry(event, n, &event_hash[key], node) { if (event->type == type) return event; } @@ -472,6 +584,46 @@ struct trace_event *ftrace_find_event(int type) return NULL; } +static LIST_HEAD(ftrace_event_list); + +static int trace_search_list(struct list_head **list) +{ + struct trace_event *e; + int last = __TRACE_LAST_TYPE; + + if (list_empty(&ftrace_event_list)) { + *list = &ftrace_event_list; + return last + 1; + } + + /* + * We used up all possible max events, + * lets see if somebody freed one. + */ + list_for_each_entry(e, &ftrace_event_list, list) { + if (e->type != last + 1) + break; + last++; + } + + /* Did we used up all 65 thousand events??? */ + if ((last + 1) > FTRACE_MAX_EVENT) + return 0; + + *list = &e->list; + return last + 1; +} + +void trace_event_read_lock(void) +{ + down_read(&trace_event_mutex); +} + +void trace_event_read_unlock(void) +{ + up_read(&trace_event_mutex); +} + /** * register_ftrace_event - register output for an event type * @event: the event type to register @@ -492,22 +644,42 @@ int register_ftrace_event(struct trace_event *event) unsigned key; int ret = 0; - mutex_lock(&trace_event_mutex); + down_write(&trace_event_mutex); - if (!event) { - ret = next_event_type++; + if (WARN_ON(!event)) goto out; - } - if (!event->type) - event->type = next_event_type++; - else if (event->type > __TRACE_LAST_TYPE) { + INIT_LIST_HEAD(&event->list); + + if (!event->type) { + struct list_head *list = NULL; + + if (next_event_type > FTRACE_MAX_EVENT) { + + event->type = trace_search_list(&list); + if (!event->type) + goto out; + + } else { + + event->type = next_event_type++; + list = &ftrace_event_list; + } + + if (WARN_ON(ftrace_find_event(event->type))) + goto out; + + list_add_tail(&event->list, list); + + } else if (event->type > __TRACE_LAST_TYPE) { printk(KERN_WARNING "Need to add type to trace.h\n"); WARN_ON(1); - } - - if (ftrace_find_event(event->type)) goto out; + } else { + /* Is this event already used */ + if (ftrace_find_event(event->type)) + goto out; + } if (event->trace == NULL) event->trace = trace_nop_print; @@ -520,14 +692,25 @@ int register_ftrace_event(struct trace_event *event) key = event->type & (EVENT_HASHSIZE - 1); - hlist_add_head_rcu(&event->node, &event_hash[key]); + hlist_add_head(&event->node, &event_hash[key]); ret = event->type; out: - mutex_unlock(&trace_event_mutex); + up_write(&trace_event_mutex); return ret; } +EXPORT_SYMBOL_GPL(register_ftrace_event); + +/* + * Used by module code with the trace_event_mutex held for write. + */ +int __unregister_ftrace_event(struct trace_event *event) +{ + hlist_del(&event->node); + list_del(&event->list); + return 0; +} /** * unregister_ftrace_event - remove a no longer used event @@ -535,12 +718,13 @@ int register_ftrace_event(struct trace_event *event) */ int unregister_ftrace_event(struct trace_event *event) { - mutex_lock(&trace_event_mutex); - hlist_del(&event->node); - mutex_unlock(&trace_event_mutex); + down_write(&trace_event_mutex); + __unregister_ftrace_event(event); + up_write(&trace_event_mutex); return 0; } +EXPORT_SYMBOL_GPL(unregister_ftrace_event); /* * Standard events @@ -833,14 +1017,16 @@ static enum print_line_t trace_stack_print(struct trace_iterator *iter, trace_assign_type(field, iter->ent); + if (!trace_seq_puts(s, "<stack trace>\n")) + goto partial; for (i = 0; i < FTRACE_STACK_ENTRIES; i++) { - if (i) { - if (!trace_seq_puts(s, " <= ")) - goto partial; + if (!field->caller[i] || (field->caller[i] == ULONG_MAX)) + break; + if (!trace_seq_puts(s, " => ")) + goto partial; - if (!seq_print_ip_sym(s, field->caller[i], flags)) - goto partial; - } + if (!seq_print_ip_sym(s, field->caller[i], flags)) + goto partial; if (!trace_seq_puts(s, "\n")) goto partial; } @@ -868,10 +1054,10 @@ static enum print_line_t trace_user_stack_print(struct trace_iterator *iter, trace_assign_type(field, iter->ent); - if (!seq_print_userip_objs(field, s, flags)) + if (!trace_seq_puts(s, "<user stack trace>\n")) goto partial; - if (!trace_seq_putc(s, '\n')) + if (!seq_print_userip_objs(field, s, flags)) goto partial; return TRACE_TYPE_HANDLED; diff --git a/kernel/trace/trace_output.h b/kernel/trace/trace_output.h index e0bde39c2dd9..d38bec4a9c30 100644 --- a/kernel/trace/trace_output.h +++ b/kernel/trace/trace_output.h @@ -1,41 +1,17 @@ #ifndef __TRACE_EVENTS_H #define __TRACE_EVENTS_H +#include <linux/trace_seq.h> #include "trace.h" -typedef enum print_line_t (*trace_print_func)(struct trace_iterator *iter, - int flags); - -struct trace_event { - struct hlist_node node; - int type; - trace_print_func trace; - trace_print_func raw; - trace_print_func hex; - trace_print_func binary; -}; - extern enum print_line_t trace_print_bprintk_msg_only(struct trace_iterator *iter); extern enum print_line_t trace_print_printk_msg_only(struct trace_iterator *iter); -extern int trace_seq_printf(struct trace_seq *s, const char *fmt, ...) - __attribute__ ((format (printf, 2, 3))); -extern int -trace_seq_bprintf(struct trace_seq *s, const char *fmt, const u32 *binary); extern int seq_print_ip_sym(struct trace_seq *s, unsigned long ip, unsigned long sym_flags); -extern ssize_t trace_seq_to_user(struct trace_seq *s, char __user *ubuf, - size_t cnt); -extern int trace_seq_puts(struct trace_seq *s, const char *str); -extern int trace_seq_putc(struct trace_seq *s, unsigned char c); -extern int trace_seq_putmem(struct trace_seq *s, const void *mem, size_t len); -extern int trace_seq_putmem_hex(struct trace_seq *s, const void *mem, - size_t len); -extern void *trace_seq_reserve(struct trace_seq *s, size_t len); -extern int trace_seq_path(struct trace_seq *s, struct path *path); extern int seq_print_userip_objs(const struct userstack_entry *entry, struct trace_seq *s, unsigned long sym_flags); extern int seq_print_user_ip(struct trace_seq *s, struct mm_struct *mm, @@ -44,13 +20,17 @@ extern int seq_print_user_ip(struct trace_seq *s, struct mm_struct *mm, extern int trace_print_context(struct trace_iterator *iter); extern int trace_print_lat_context(struct trace_iterator *iter); +extern void trace_event_read_lock(void); +extern void trace_event_read_unlock(void); extern struct trace_event *ftrace_find_event(int type); -extern int register_ftrace_event(struct trace_event *event); -extern int unregister_ftrace_event(struct trace_event *event); extern enum print_line_t trace_nop_print(struct trace_iterator *iter, int flags); +/* used by module unregistering */ +extern int __unregister_ftrace_event(struct trace_event *event); +extern struct rw_semaphore trace_event_mutex; + #define MAX_MEMHEX_BYTES 8 #define HEX_CHARS (MAX_MEMHEX_BYTES*2 + 1) diff --git a/kernel/trace/trace_power.c b/kernel/trace/trace_power.c index 118439709fb7..8a30d9874cd4 100644 --- a/kernel/trace/trace_power.c +++ b/kernel/trace/trace_power.c @@ -36,6 +36,7 @@ static void probe_power_start(struct power_trace *it, unsigned int type, static void probe_power_end(struct power_trace *it) { + struct ftrace_event_call *call = &event_power; struct ring_buffer_event *event; struct trace_power *entry; struct trace_array_cpu *data; @@ -54,7 +55,8 @@ static void probe_power_end(struct power_trace *it) goto out; entry = ring_buffer_event_data(event); entry->state_data = *it; - trace_buffer_unlock_commit(tr, event, 0, 0); + if (!filter_check_discard(call, entry, tr->buffer, event)) + trace_buffer_unlock_commit(tr, event, 0, 0); out: preempt_enable(); } @@ -62,6 +64,7 @@ static void probe_power_end(struct power_trace *it) static void probe_power_mark(struct power_trace *it, unsigned int type, unsigned int level) { + struct ftrace_event_call *call = &event_power; struct ring_buffer_event *event; struct trace_power *entry; struct trace_array_cpu *data; @@ -84,7 +87,8 @@ static void probe_power_mark(struct power_trace *it, unsigned int type, goto out; entry = ring_buffer_event_data(event); entry->state_data = *it; - trace_buffer_unlock_commit(tr, event, 0, 0); + if (!filter_check_discard(call, entry, tr->buffer, event)) + trace_buffer_unlock_commit(tr, event, 0, 0); out: preempt_enable(); } diff --git a/kernel/trace/trace_printk.c b/kernel/trace/trace_printk.c index eb81556107fe..9bece9687b62 100644 --- a/kernel/trace/trace_printk.c +++ b/kernel/trace/trace_printk.c @@ -245,17 +245,13 @@ static const struct file_operations ftrace_formats_fops = { static __init int init_trace_printk_function_export(void) { struct dentry *d_tracer; - struct dentry *entry; d_tracer = tracing_init_dentry(); if (!d_tracer) return 0; - entry = debugfs_create_file("printk_formats", 0444, d_tracer, + trace_create_file("printk_formats", 0444, d_tracer, NULL, &ftrace_formats_fops); - if (!entry) - pr_warning("Could not create debugfs " - "'printk_formats' entry\n"); return 0; } diff --git a/kernel/trace/trace_sched_switch.c b/kernel/trace/trace_sched_switch.c index 9117cea6f1ae..a98106dd979c 100644 --- a/kernel/trace/trace_sched_switch.c +++ b/kernel/trace/trace_sched_switch.c @@ -10,7 +10,7 @@ #include <linux/kallsyms.h> #include <linux/uaccess.h> #include <linux/ftrace.h> -#include <trace/sched.h> +#include <trace/events/sched.h> #include "trace.h" @@ -29,13 +29,13 @@ probe_sched_switch(struct rq *__rq, struct task_struct *prev, int cpu; int pc; - if (!sched_ref || sched_stopped) + if (unlikely(!sched_ref)) return; tracing_record_cmdline(prev); tracing_record_cmdline(next); - if (!tracer_enabled) + if (!tracer_enabled || sched_stopped) return; pc = preempt_count(); @@ -56,15 +56,15 @@ probe_sched_wakeup(struct rq *__rq, struct task_struct *wakee, int success) unsigned long flags; int cpu, pc; - if (!likely(tracer_enabled)) + if (unlikely(!sched_ref)) return; - pc = preempt_count(); tracing_record_cmdline(current); - if (sched_stopped) + if (!tracer_enabled || sched_stopped) return; + pc = preempt_count(); local_irq_save(flags); cpu = raw_smp_processor_id(); data = ctx_trace->data[cpu]; diff --git a/kernel/trace/trace_sched_wakeup.c b/kernel/trace/trace_sched_wakeup.c index 5bc00e8f153e..eacb27225173 100644 --- a/kernel/trace/trace_sched_wakeup.c +++ b/kernel/trace/trace_sched_wakeup.c @@ -15,7 +15,7 @@ #include <linux/kallsyms.h> #include <linux/uaccess.h> #include <linux/ftrace.h> -#include <trace/sched.h> +#include <trace/events/sched.h> #include "trace.h" @@ -138,9 +138,6 @@ probe_wakeup_sched_switch(struct rq *rq, struct task_struct *prev, pc = preempt_count(); - /* The task we are waiting for is waking up */ - data = wakeup_trace->data[wakeup_cpu]; - /* disable local data, not wakeup_cpu data */ cpu = raw_smp_processor_id(); disabled = atomic_inc_return(&wakeup_trace->data[cpu]->disabled); @@ -154,6 +151,9 @@ probe_wakeup_sched_switch(struct rq *rq, struct task_struct *prev, if (unlikely(!tracer_enabled || next != wakeup_task)) goto out_unlock; + /* The task we are waiting for is waking up */ + data = wakeup_trace->data[wakeup_cpu]; + trace_function(wakeup_trace, CALLER_ADDR0, CALLER_ADDR1, flags, pc); tracing_sched_switch_trace(wakeup_trace, prev, next, flags, pc); diff --git a/kernel/trace/trace_selftest.c b/kernel/trace/trace_selftest.c index 08f4eb2763d1..00dd6485bdd7 100644 --- a/kernel/trace/trace_selftest.c +++ b/kernel/trace/trace_selftest.c @@ -16,6 +16,7 @@ static inline int trace_valid_entry(struct trace_entry *entry) case TRACE_BRANCH: case TRACE_GRAPH_ENT: case TRACE_GRAPH_RET: + case TRACE_HW_BRANCHES: return 1; } return 0; @@ -188,6 +189,7 @@ int trace_selftest_startup_dynamic_tracing(struct tracer *trace, #else # define trace_selftest_startup_dynamic_tracing(trace, tr, func) ({ 0; }) #endif /* CONFIG_DYNAMIC_FTRACE */ + /* * Simple verification test of ftrace function tracer. * Enable ftrace, sleep 1/10 second, and then read the trace @@ -749,3 +751,59 @@ trace_selftest_startup_branch(struct tracer *trace, struct trace_array *tr) return ret; } #endif /* CONFIG_BRANCH_TRACER */ + +#ifdef CONFIG_HW_BRANCH_TRACER +int +trace_selftest_startup_hw_branches(struct tracer *trace, + struct trace_array *tr) +{ + struct trace_iterator *iter; + struct tracer tracer; + unsigned long count; + int ret; + + if (!trace->open) { + printk(KERN_CONT "missing open function..."); + return -1; + } + + ret = tracer_init(trace, tr); + if (ret) { + warn_failed_init_tracer(trace, ret); + return ret; + } + + /* + * The hw-branch tracer needs to collect the trace from the various + * cpu trace buffers - before tracing is stopped. + */ + iter = kzalloc(sizeof(*iter), GFP_KERNEL); + if (!iter) + return -ENOMEM; + + memcpy(&tracer, trace, sizeof(tracer)); + + iter->trace = &tracer; + iter->tr = tr; + iter->pos = -1; + mutex_init(&iter->mutex); + + trace->open(iter); + + mutex_destroy(&iter->mutex); + kfree(iter); + + tracing_stop(); + + ret = trace_test_buffer(tr, &count); + trace->reset(tr); + tracing_start(); + + if (!ret && !count) { + printk(KERN_CONT "no entries found.."); + ret = -1; + } + + return ret; +} +#endif /* CONFIG_HW_BRANCH_TRACER */ diff --git a/kernel/trace/trace_stack.c b/kernel/trace/trace_stack.c index c750f65f9661..2d7aebd71dbd 100644 --- a/kernel/trace/trace_stack.c +++ b/kernel/trace/trace_stack.c @@ -265,7 +265,7 @@ static int t_show(struct seq_file *m, void *v) seq_printf(m, " Depth Size Location" " (%d entries)\n" " ----- ---- --------\n", - max_stack_trace.nr_entries); + max_stack_trace.nr_entries - 1); if (!stack_tracer_enabled && !max_stack_size) print_disabled(m); @@ -352,19 +352,14 @@ __setup("stacktrace", enable_stacktrace); static __init int stack_trace_init(void) { struct dentry *d_tracer; - struct dentry *entry; d_tracer = tracing_init_dentry(); - entry = debugfs_create_file("stack_max_size", 0644, d_tracer, - &max_stack_size, &stack_max_size_fops); - if (!entry) - pr_warning("Could not create debugfs 'stack_max_size' entry\n"); + trace_create_file("stack_max_size", 0644, d_tracer, + &max_stack_size, &stack_max_size_fops); - entry = debugfs_create_file("stack_trace", 0444, d_tracer, - NULL, &stack_trace_fops); - if (!entry) - pr_warning("Could not create debugfs 'stack_trace' entry\n"); + trace_create_file("stack_trace", 0444, d_tracer, + NULL, &stack_trace_fops); if (stack_tracer_enabled) register_ftrace_function(&trace_ops); diff --git a/kernel/trace/trace_stat.c b/kernel/trace/trace_stat.c index acdebd771a93..c00643733f4c 100644 --- a/kernel/trace/trace_stat.c +++ b/kernel/trace/trace_stat.c @@ -1,7 +1,7 @@ /* * Infrastructure for statistic tracing (histogram output). * - * Copyright (C) 2008 Frederic Weisbecker <fweisbec@gmail.com> + * Copyright (C) 2008-2009 Frederic Weisbecker <fweisbec@gmail.com> * * Based on the code from trace_branch.c which is * Copyright (C) 2008 Steven Rostedt <srostedt@redhat.com> @@ -10,22 +10,27 @@ #include <linux/list.h> +#include <linux/rbtree.h> #include <linux/debugfs.h> #include "trace_stat.h" #include "trace.h" -/* List of stat entries from a tracer */ -struct trace_stat_list { - struct list_head list; +/* + * List of stat red-black nodes from a tracer + * We use a such tree to sort quickly the stat + * entries from the tracer. + */ +struct stat_node { + struct rb_node node; void *stat; }; /* A stat session is the stats output in one file */ -struct tracer_stat_session { +struct stat_session { struct list_head session_list; struct tracer_stat *ts; - struct list_head stat_list; + struct rb_root stat_root; struct mutex stat_mutex; struct dentry *file; }; @@ -37,18 +42,48 @@ static DEFINE_MUTEX(all_stat_sessions_mutex); /* The root directory for all stat files */ static struct dentry *stat_dir; +/* + * Iterate through the rbtree using a post order traversal path + * to release the next node. + * It won't necessary release one at each iteration + * but it will at least advance closer to the next one + * to be released. + */ +static struct rb_node *release_next(struct rb_node *node) +{ + struct stat_node *snode; + struct rb_node *parent = rb_parent(node); + + if (node->rb_left) + return node->rb_left; + else if (node->rb_right) + return node->rb_right; + else { + if (!parent) + ; + else if (parent->rb_left == node) + parent->rb_left = NULL; + else + parent->rb_right = NULL; + + snode = container_of(node, struct stat_node, node); + kfree(snode); + + return parent; + } +} -static void reset_stat_session(struct tracer_stat_session *session) +static void reset_stat_session(struct stat_session *session) { - struct trace_stat_list *node, *next; + struct rb_node *node = session->stat_root.rb_node; - list_for_each_entry_safe(node, next, &session->stat_list, list) - kfree(node); + while (node) + node = release_next(node); - INIT_LIST_HEAD(&session->stat_list); + session->stat_root = RB_ROOT; } -static void destroy_session(struct tracer_stat_session *session) +static void destroy_session(struct stat_session *session) { debugfs_remove(session->file); reset_stat_session(session); @@ -56,25 +91,60 @@ static void destroy_session(struct tracer_stat_session *session) kfree(session); } +typedef int (*cmp_stat_t)(void *, void *); + +static int insert_stat(struct rb_root *root, void *stat, cmp_stat_t cmp) +{ + struct rb_node **new = &(root->rb_node), *parent = NULL; + struct stat_node *data; + + data = kzalloc(sizeof(*data), GFP_KERNEL); + if (!data) + return -ENOMEM; + data->stat = stat; + + /* + * Figure out where to put new node + * This is a descendent sorting + */ + while (*new) { + struct stat_node *this; + int result; + + this = container_of(*new, struct stat_node, node); + result = cmp(data->stat, this->stat); + + parent = *new; + if (result >= 0) + new = &((*new)->rb_left); + else + new = &((*new)->rb_right); + } + + rb_link_node(&data->node, parent, new); + rb_insert_color(&data->node, root); + return 0; +} + /* * For tracers that don't provide a stat_cmp callback. - * This one will force an immediate insertion on tail of - * the list. + * This one will force an insertion as right-most node + * in the rbtree. */ static int dummy_cmp(void *p1, void *p2) { - return 1; + return -1; } /* - * Initialize the stat list at each trace_stat file opening. + * Initialize the stat rbtree at each trace_stat file opening. * All of these copies and sorting are required on all opening * since the stats could have changed between two file sessions. */ -static int stat_seq_init(struct tracer_stat_session *session) +static int stat_seq_init(struct stat_session *session) { - struct trace_stat_list *iter_entry, *new_entry; struct tracer_stat *ts = session->ts; + struct rb_root *root = &session->stat_root; void *stat; int ret = 0; int i; @@ -85,29 +155,16 @@ static int stat_seq_init(struct tracer_stat_session *session) if (!ts->stat_cmp) ts->stat_cmp = dummy_cmp; - stat = ts->stat_start(); + stat = ts->stat_start(ts); if (!stat) goto exit; - /* - * The first entry. Actually this is the second, but the first - * one (the stat_list head) is pointless. - */ - new_entry = kmalloc(sizeof(struct trace_stat_list), GFP_KERNEL); - if (!new_entry) { - ret = -ENOMEM; + ret = insert_stat(root, stat, ts->stat_cmp); + if (ret) goto exit; - } - - INIT_LIST_HEAD(&new_entry->list); - - list_add(&new_entry->list, &session->stat_list); - - new_entry->stat = stat; /* - * Iterate over the tracer stat entries and store them in a sorted - * list. + * Iterate over the tracer stat entries and store them in an rbtree. */ for (i = 1; ; i++) { stat = ts->stat_next(stat, i); @@ -116,36 +173,16 @@ static int stat_seq_init(struct tracer_stat_session *session) if (!stat) break; - new_entry = kmalloc(sizeof(struct trace_stat_list), GFP_KERNEL); - if (!new_entry) { - ret = -ENOMEM; - goto exit_free_list; - } - - INIT_LIST_HEAD(&new_entry->list); - new_entry->stat = stat; - - list_for_each_entry_reverse(iter_entry, &session->stat_list, - list) { - - /* Insertion with a descendent sorting */ - if (ts->stat_cmp(iter_entry->stat, - new_entry->stat) >= 0) { - - list_add(&new_entry->list, &iter_entry->list); - break; - } - } - - /* The current larger value */ - if (list_empty(&new_entry->list)) - list_add(&new_entry->list, &session->stat_list); + ret = insert_stat(root, stat, ts->stat_cmp); + if (ret) + goto exit_free_rbtree; } + exit: mutex_unlock(&session->stat_mutex); return ret; -exit_free_list: +exit_free_rbtree: reset_stat_session(session); mutex_unlock(&session->stat_mutex); return ret; @@ -154,38 +191,51 @@ exit_free_list: static void *stat_seq_start(struct seq_file *s, loff_t *pos) { - struct tracer_stat_session *session = s->private; + struct stat_session *session = s->private; + struct rb_node *node; + int i; - /* Prevent from tracer switch or stat_list modification */ + /* Prevent from tracer switch or rbtree modification */ mutex_lock(&session->stat_mutex); /* If we are in the beginning of the file, print the headers */ - if (!*pos && session->ts->stat_headers) + if (!*pos && session->ts->stat_headers) { + (*pos)++; return SEQ_START_TOKEN; + } - return seq_list_start(&session->stat_list, *pos); + node = rb_first(&session->stat_root); + for (i = 0; node && i < *pos; i++) + node = rb_next(node); + + (*pos)++; + + return node; } static void *stat_seq_next(struct seq_file *s, void *p, loff_t *pos) { - struct tracer_stat_session *session = s->private; + struct stat_session *session = s->private; + struct rb_node *node = p; + + (*pos)++; if (p == SEQ_START_TOKEN) - return seq_list_start(&session->stat_list, *pos); + return rb_first(&session->stat_root); - return seq_list_next(p, &session->stat_list, pos); + return rb_next(node); } static void stat_seq_stop(struct seq_file *s, void *p) { - struct tracer_stat_session *session = s->private; + struct stat_session *session = s->private; mutex_unlock(&session->stat_mutex); } static int stat_seq_show(struct seq_file *s, void *v) { - struct tracer_stat_session *session = s->private; - struct trace_stat_list *l = list_entry(v, struct trace_stat_list, list); + struct stat_session *session = s->private; + struct stat_node *l = container_of(v, struct stat_node, node); if (v == SEQ_START_TOKEN) return session->ts->stat_headers(s); @@ -205,7 +255,7 @@ static int tracing_stat_open(struct inode *inode, struct file *file) { int ret; - struct tracer_stat_session *session = inode->i_private; + struct stat_session *session = inode->i_private; ret = seq_open(file, &trace_stat_seq_ops); if (!ret) { @@ -218,11 +268,11 @@ static int tracing_stat_open(struct inode *inode, struct file *file) } /* - * Avoid consuming memory with our now useless list. + * Avoid consuming memory with our now useless rbtree. */ static int tracing_stat_release(struct inode *i, struct file *f) { - struct tracer_stat_session *session = i->i_private; + struct stat_session *session = i->i_private; mutex_lock(&session->stat_mutex); reset_stat_session(session); @@ -251,7 +301,7 @@ static int tracing_stat_init(void) return 0; } -static int init_stat_file(struct tracer_stat_session *session) +static int init_stat_file(struct stat_session *session) { if (!stat_dir && tracing_stat_init()) return -ENODEV; @@ -266,7 +316,7 @@ static int init_stat_file(struct tracer_stat_session *session) int register_stat_tracer(struct tracer_stat *trace) { - struct tracer_stat_session *session, *node, *tmp; + struct stat_session *session, *node; int ret; if (!trace) @@ -277,7 +327,7 @@ int register_stat_tracer(struct tracer_stat *trace) /* Already registered? */ mutex_lock(&all_stat_sessions_mutex); - list_for_each_entry_safe(node, tmp, &all_stat_sessions, session_list) { + list_for_each_entry(node, &all_stat_sessions, session_list) { if (node->ts == trace) { mutex_unlock(&all_stat_sessions_mutex); return -EINVAL; @@ -286,15 +336,13 @@ int register_stat_tracer(struct tracer_stat *trace) mutex_unlock(&all_stat_sessions_mutex); /* Init the session */ - session = kmalloc(sizeof(struct tracer_stat_session), GFP_KERNEL); + session = kzalloc(sizeof(*session), GFP_KERNEL); if (!session) return -ENOMEM; session->ts = trace; INIT_LIST_HEAD(&session->session_list); - INIT_LIST_HEAD(&session->stat_list); mutex_init(&session->stat_mutex); - session->file = NULL; ret = init_stat_file(session); if (ret) { @@ -312,7 +360,7 @@ int register_stat_tracer(struct tracer_stat *trace) void unregister_stat_tracer(struct tracer_stat *trace) { - struct tracer_stat_session *node, *tmp; + struct stat_session *node, *tmp; mutex_lock(&all_stat_sessions_mutex); list_for_each_entry_safe(node, tmp, &all_stat_sessions, session_list) { diff --git a/kernel/trace/trace_stat.h b/kernel/trace/trace_stat.h index 202274cf7f3d..f3546a2cd826 100644 --- a/kernel/trace/trace_stat.h +++ b/kernel/trace/trace_stat.h @@ -12,7 +12,7 @@ struct tracer_stat { /* The name of your stat file */ const char *name; /* Iteration over statistic entries */ - void *(*stat_start)(void); + void *(*stat_start)(struct tracer_stat *trace); void *(*stat_next)(void *prev, int idx); /* Compare two entries for stats sorting */ int (*stat_cmp)(void *p1, void *p2); diff --git a/kernel/trace/trace_sysprof.c b/kernel/trace/trace_sysprof.c index 91fd19c2149f..e04b76cc238a 100644 --- a/kernel/trace/trace_sysprof.c +++ b/kernel/trace/trace_sysprof.c @@ -321,11 +321,7 @@ static const struct file_operations sysprof_sample_fops = { void init_tracer_sysprof_debugfs(struct dentry *d_tracer) { - struct dentry *entry; - entry = debugfs_create_file("sysprof_sample_period", 0644, + trace_create_file("sysprof_sample_period", 0644, d_tracer, NULL, &sysprof_sample_fops); - if (entry) - return; - pr_warning("Could not create debugfs 'sysprof_sample_period' entry\n"); } diff --git a/kernel/trace/trace_workqueue.c b/kernel/trace/trace_workqueue.c index 797201e4a137..97fcea4acce1 100644 --- a/kernel/trace/trace_workqueue.c +++ b/kernel/trace/trace_workqueue.c @@ -6,7 +6,7 @@ */ -#include <trace/workqueue.h> +#include <trace/events/workqueue.h> #include <linux/list.h> #include <linux/percpu.h> #include "trace_stat.h" @@ -16,8 +16,6 @@ /* A cpu workqueue thread */ struct cpu_workqueue_stats { struct list_head list; -/* Useful to know if we print the cpu headers */ - bool first_entry; int cpu; pid_t pid; /* Can be inserted from interrupt or user context, need to be atomic */ @@ -47,12 +45,11 @@ probe_workqueue_insertion(struct task_struct *wq_thread, struct work_struct *work) { int cpu = cpumask_first(&wq_thread->cpus_allowed); - struct cpu_workqueue_stats *node, *next; + struct cpu_workqueue_stats *node; unsigned long flags; spin_lock_irqsave(&workqueue_cpu_stat(cpu)->lock, flags); - list_for_each_entry_safe(node, next, &workqueue_cpu_stat(cpu)->list, - list) { + list_for_each_entry(node, &workqueue_cpu_stat(cpu)->list, list) { if (node->pid == wq_thread->pid) { atomic_inc(&node->inserted); goto found; @@ -69,12 +66,11 @@ probe_workqueue_execution(struct task_struct *wq_thread, struct work_struct *work) { int cpu = cpumask_first(&wq_thread->cpus_allowed); - struct cpu_workqueue_stats *node, *next; + struct cpu_workqueue_stats *node; unsigned long flags; spin_lock_irqsave(&workqueue_cpu_stat(cpu)->lock, flags); - list_for_each_entry_safe(node, next, &workqueue_cpu_stat(cpu)->list, - list) { + list_for_each_entry(node, &workqueue_cpu_stat(cpu)->list, list) { if (node->pid == wq_thread->pid) { node->executed++; goto found; @@ -105,8 +101,6 @@ static void probe_workqueue_creation(struct task_struct *wq_thread, int cpu) cws->pid = wq_thread->pid; spin_lock_irqsave(&workqueue_cpu_stat(cpu)->lock, flags); - if (list_empty(&workqueue_cpu_stat(cpu)->list)) - cws->first_entry = true; list_add_tail(&cws->list, &workqueue_cpu_stat(cpu)->list); spin_unlock_irqrestore(&workqueue_cpu_stat(cpu)->lock, flags); } @@ -152,7 +146,7 @@ static struct cpu_workqueue_stats *workqueue_stat_start_cpu(int cpu) return ret; } -static void *workqueue_stat_start(void) +static void *workqueue_stat_start(struct tracer_stat *trace) { int cpu; void *ret = NULL; @@ -191,16 +185,9 @@ static void *workqueue_stat_next(void *prev, int idx) static int workqueue_stat_show(struct seq_file *s, void *p) { struct cpu_workqueue_stats *cws = p; - unsigned long flags; - int cpu = cws->cpu; struct pid *pid; struct task_struct *tsk; - spin_lock_irqsave(&workqueue_cpu_stat(cpu)->lock, flags); - if (&cws->list == workqueue_cpu_stat(cpu)->list.next) - seq_printf(s, "\n"); - spin_unlock_irqrestore(&workqueue_cpu_stat(cpu)->lock, flags); - pid = find_get_pid(cws->pid); if (pid) { tsk = get_pid_task(pid, PIDTYPE_PID); diff --git a/kernel/workqueue.c b/kernel/workqueue.c index f71fb2a08950..0668795d8818 100644 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c @@ -33,7 +33,8 @@ #include <linux/kallsyms.h> #include <linux/debug_locks.h> #include <linux/lockdep.h> -#include <trace/workqueue.h> +#define CREATE_TRACE_POINTS +#include <trace/events/workqueue.h> /* * The per-CPU workqueue (if single thread, we always use the first @@ -124,8 +125,6 @@ struct cpu_workqueue_struct *get_wq_data(struct work_struct *work) return (void *) (atomic_long_read(&work->data) & WORK_STRUCT_WQ_DATA_MASK); } -DEFINE_TRACE(workqueue_insertion); - static void insert_work(struct cpu_workqueue_struct *cwq, struct work_struct *work, struct list_head *head) { @@ -262,8 +261,6 @@ int queue_delayed_work_on(int cpu, struct workqueue_struct *wq, } EXPORT_SYMBOL_GPL(queue_delayed_work_on); -DEFINE_TRACE(workqueue_execution); - static void run_workqueue(struct cpu_workqueue_struct *cwq) { spin_lock_irq(&cwq->lock); @@ -753,8 +750,6 @@ init_cpu_workqueue(struct workqueue_struct *wq, int cpu) return cwq; } -DEFINE_TRACE(workqueue_creation); - static int create_workqueue_thread(struct cpu_workqueue_struct *cwq, int cpu) { struct sched_param param = { .sched_priority = MAX_RT_PRIO-1 }; @@ -860,8 +855,6 @@ struct workqueue_struct *__create_workqueue_key(const char *name, } EXPORT_SYMBOL_GPL(__create_workqueue_key); -DEFINE_TRACE(workqueue_destruction); - static void cleanup_workqueue_thread(struct cpu_workqueue_struct *cwq) { /* diff --git a/lib/Kconfig b/lib/Kconfig index 8ade0a7a91e0..9960be04cbbe 100644 --- a/lib/Kconfig +++ b/lib/Kconfig @@ -10,6 +10,9 @@ menu "Library routines" config BITREVERSE tristate +config RATIONAL + boolean + config GENERIC_FIND_FIRST_BIT bool diff --git a/lib/Makefile b/lib/Makefile index 33a40e40e3ee..1f6edefebffe 100644 --- a/lib/Makefile +++ b/lib/Makefile @@ -50,6 +50,7 @@ ifneq ($(CONFIG_HAVE_DEC_LOCK),y) endif obj-$(CONFIG_BITREVERSE) += bitrev.o +obj-$(CONFIG_RATIONAL) += rational.o obj-$(CONFIG_CRC_CCITT) += crc-ccitt.o obj-$(CONFIG_CRC16) += crc16.o obj-$(CONFIG_CRC_T10DIF)+= crc-t10dif.o diff --git a/lib/rational.c b/lib/rational.c new file mode 100644 index 000000000000..b3c099b5478e --- /dev/null +++ b/lib/rational.c @@ -0,0 +1,62 @@ +/* + * rational fractions + * + * Copyright (C) 2009 emlix GmbH, Oskar Schirmer <os@emlix.com> + * + * helper functions when coping with rational numbers + */ + +#include <linux/rational.h> + +/* + * calculate best rational approximation for a given fraction + * taking into account restricted register size, e.g. to find + * appropriate values for a pll with 5 bit denominator and + * 8 bit numerator register fields, trying to set up with a + * frequency ratio of 3.1415, one would say: + * + * rational_best_approximation(31415, 10000, + * (1 << 8) - 1, (1 << 5) - 1, &n, &d); + * + * you may look at given_numerator as a fixed point number, + * with the fractional part size described in given_denominator. + * + * for theoretical background, see: + * http://en.wikipedia.org/wiki/Continued_fraction + */ + +void rational_best_approximation( + unsigned long given_numerator, unsigned long given_denominator, + unsigned long max_numerator, unsigned long max_denominator, + unsigned long *best_numerator, unsigned long *best_denominator) +{ + unsigned long n, d, n0, d0, n1, d1; + n = given_numerator; + d = given_denominator; + n0 = d1 = 0; + n1 = d0 = 1; + for (;;) { + unsigned long t, a; + if ((n1 > max_numerator) || (d1 > max_denominator)) { + n1 = n0; + d1 = d0; + break; + } + if (d == 0) + break; + t = d; + a = n / d; + d = n % d; + n = t; + t = n0 + a * n1; + n0 = n1; + n1 = t; + t = d0 + a * d1; + d0 = d1; + d1 = t; + } + *best_numerator = n1; + *best_denominator = d1; +} + +EXPORT_SYMBOL(rational_best_approximation); diff --git a/mm/bounce.c b/mm/bounce.c index e590272fe7a8..65f5e17e411a 100644 --- a/mm/bounce.c +++ b/mm/bounce.c @@ -14,16 +14,15 @@ #include <linux/hash.h> #include <linux/highmem.h> #include <linux/blktrace_api.h> -#include <trace/block.h> #include <asm/tlbflush.h> +#include <trace/events/block.h> + #define POOL_SIZE 64 #define ISA_POOL_SIZE 16 static mempool_t *page_pool, *isa_page_pool; -DEFINE_TRACE(block_bio_bounce); - #ifdef CONFIG_HIGHMEM static __init int init_emergency_pool(void) { diff --git a/mm/mlock.c b/mm/mlock.c index cbe9e0581b75..ac130433c7d3 100644 --- a/mm/mlock.c +++ b/mm/mlock.c @@ -629,52 +629,43 @@ void user_shm_unlock(size_t size, struct user_struct *user) free_uid(user); } -void *alloc_locked_buffer(size_t size) +int account_locked_memory(struct mm_struct *mm, struct rlimit *rlim, + size_t size) { - unsigned long rlim, vm, pgsz; - void *buffer = NULL; + unsigned long lim, vm, pgsz; + int error = -ENOMEM; pgsz = PAGE_ALIGN(size) >> PAGE_SHIFT; - down_write(¤t->mm->mmap_sem); - - rlim = current->signal->rlim[RLIMIT_AS].rlim_cur >> PAGE_SHIFT; - vm = current->mm->total_vm + pgsz; - if (rlim < vm) - goto out; + down_write(&mm->mmap_sem); - rlim = current->signal->rlim[RLIMIT_MEMLOCK].rlim_cur >> PAGE_SHIFT; - vm = current->mm->locked_vm + pgsz; - if (rlim < vm) + lim = rlim[RLIMIT_AS].rlim_cur >> PAGE_SHIFT; + vm = mm->total_vm + pgsz; + if (lim < vm) goto out; - buffer = kzalloc(size, GFP_KERNEL); - if (!buffer) + lim = rlim[RLIMIT_MEMLOCK].rlim_cur >> PAGE_SHIFT; + vm = mm->locked_vm + pgsz; + if (lim < vm) goto out; - current->mm->total_vm += pgsz; - current->mm->locked_vm += pgsz; + mm->total_vm += pgsz; + mm->locked_vm += pgsz; + error = 0; out: - up_write(¤t->mm->mmap_sem); - return buffer; + up_write(&mm->mmap_sem); + return error; } -void release_locked_buffer(void *buffer, size_t size) +void refund_locked_memory(struct mm_struct *mm, size_t size) { unsigned long pgsz = PAGE_ALIGN(size) >> PAGE_SHIFT; - down_write(¤t->mm->mmap_sem); - - current->mm->total_vm -= pgsz; - current->mm->locked_vm -= pgsz; - - up_write(¤t->mm->mmap_sem); -} + down_write(&mm->mmap_sem); -void free_locked_buffer(void *buffer, size_t size) -{ - release_locked_buffer(buffer, size); + mm->total_vm -= pgsz; + mm->locked_vm -= pgsz; - kfree(buffer); + up_write(&mm->mmap_sem); } diff --git a/mm/slab.c b/mm/slab.c index 9a90b00d2f91..f85831da9080 100644 --- a/mm/slab.c +++ b/mm/slab.c @@ -102,7 +102,7 @@ #include <linux/cpu.h> #include <linux/sysctl.h> #include <linux/module.h> -#include <trace/kmemtrace.h> +#include <linux/kmemtrace.h> #include <linux/rcupdate.h> #include <linux/string.h> #include <linux/uaccess.h> diff --git a/mm/slob.c b/mm/slob.c index f92e66d558bd..9b1737b0787b 100644 --- a/mm/slob.c +++ b/mm/slob.c @@ -66,7 +66,7 @@ #include <linux/module.h> #include <linux/rcupdate.h> #include <linux/list.h> -#include <trace/kmemtrace.h> +#include <linux/kmemtrace.h> #include <asm/atomic.h> /* diff --git a/mm/slub.c b/mm/slub.c index 65ffda5934b0..5e805a6fe36c 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -17,7 +17,7 @@ #include <linux/slab.h> #include <linux/proc_fs.h> #include <linux/seq_file.h> -#include <trace/kmemtrace.h> +#include <linux/kmemtrace.h> #include <linux/cpu.h> #include <linux/cpuset.h> #include <linux/mempolicy.h> diff --git a/mm/util.c b/mm/util.c index 55bef160b9f1..abc65aa7cdfc 100644 --- a/mm/util.c +++ b/mm/util.c @@ -4,9 +4,11 @@ #include <linux/module.h> #include <linux/err.h> #include <linux/sched.h> -#include <linux/tracepoint.h> #include <asm/uaccess.h> +#define CREATE_TRACE_POINTS +#include <trace/events/kmem.h> + /** * kstrdup - allocate space for and copy an existing string * @s: the string to duplicate @@ -255,13 +257,6 @@ int __attribute__((weak)) get_user_pages_fast(unsigned long start, EXPORT_SYMBOL_GPL(get_user_pages_fast); /* Tracepoints definitions. */ -DEFINE_TRACE(kmalloc); -DEFINE_TRACE(kmem_cache_alloc); -DEFINE_TRACE(kmalloc_node); -DEFINE_TRACE(kmem_cache_alloc_node); -DEFINE_TRACE(kfree); -DEFINE_TRACE(kmem_cache_free); - EXPORT_TRACEPOINT_SYMBOL(kmalloc); EXPORT_TRACEPOINT_SYMBOL(kmem_cache_alloc); EXPORT_TRACEPOINT_SYMBOL(kmalloc_node); diff --git a/net/core/drop_monitor.c b/net/core/drop_monitor.c index 9fd0dc3cca99..b75b6cea49da 100644 --- a/net/core/drop_monitor.c +++ b/net/core/drop_monitor.c @@ -23,7 +23,7 @@ #include <linux/bitops.h> #include <net/genetlink.h> -#include <trace/skb.h> +#include <trace/events/skb.h> #include <asm/unaligned.h> diff --git a/net/core/net-traces.c b/net/core/net-traces.c index c8fb45665e4f..499a67eaf3ae 100644 --- a/net/core/net-traces.c +++ b/net/core/net-traces.c @@ -19,11 +19,11 @@ #include <linux/workqueue.h> #include <linux/netlink.h> #include <linux/net_dropmon.h> -#include <trace/skb.h> #include <asm/unaligned.h> #include <asm/bitops.h> +#define CREATE_TRACE_POINTS +#include <trace/events/skb.h> -DEFINE_TRACE(kfree_skb); EXPORT_TRACEPOINT_SYMBOL_GPL(kfree_skb); diff --git a/net/core/skbuff.c b/net/core/skbuff.c index e505b5392e1e..c2e4fb8f3546 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -65,7 +65,7 @@ #include <asm/uaccess.h> #include <asm/system.h> -#include <trace/skb.h> +#include <trace/events/skb.h> #include "kmap_skb.h" diff --git a/samples/Kconfig b/samples/Kconfig index 4b02f5a0e656..b75d28cba3f7 100644 --- a/samples/Kconfig +++ b/samples/Kconfig @@ -19,6 +19,12 @@ config SAMPLE_TRACEPOINTS help This build tracepoints example modules. +config SAMPLE_TRACE_EVENTS + tristate "Build trace_events examples -- loadable modules only" + depends on EVENT_TRACING && m + help + This build trace event example modules. + config SAMPLE_KOBJECT tristate "Build kobject examples" help diff --git a/samples/Makefile b/samples/Makefile index 10eaca89fe17..13e4b470b539 100644 --- a/samples/Makefile +++ b/samples/Makefile @@ -1,3 +1,3 @@ # Makefile for Linux samples code -obj-$(CONFIG_SAMPLES) += markers/ kobject/ kprobes/ tracepoints/ +obj-$(CONFIG_SAMPLES) += markers/ kobject/ kprobes/ tracepoints/ trace_events/ diff --git a/samples/trace_events/Makefile b/samples/trace_events/Makefile new file mode 100644 index 000000000000..0d428dc67283 --- /dev/null +++ b/samples/trace_events/Makefile @@ -0,0 +1,6 @@ +# builds the trace events example kernel modules; +# then to use one (as root): insmod <module_name.ko> + +CFLAGS_trace-events-sample.o := -I$(src) + +obj-$(CONFIG_SAMPLE_TRACE_EVENTS) += trace-events-sample.o diff --git a/samples/trace_events/trace-events-sample.c b/samples/trace_events/trace-events-sample.c new file mode 100644 index 000000000000..aabc4e970911 --- /dev/null +++ b/samples/trace_events/trace-events-sample.c @@ -0,0 +1,52 @@ +#include <linux/module.h> +#include <linux/kthread.h> + +/* + * Any file that uses trace points, must include the header. + * But only one file, must include the header by defining + * CREATE_TRACE_POINTS first. This will make the C code that + * creates the handles for the trace points. + */ +#define CREATE_TRACE_POINTS +#include "trace-events-sample.h" + + +static void simple_thread_func(int cnt) +{ + set_current_state(TASK_INTERRUPTIBLE); + schedule_timeout(HZ); + trace_foo_bar("hello", cnt); +} + +static int simple_thread(void *arg) +{ + int cnt = 0; + + while (!kthread_should_stop()) + simple_thread_func(cnt++); + + return 0; +} + +static struct task_struct *simple_tsk; + +static int __init trace_event_init(void) +{ + simple_tsk = kthread_run(simple_thread, NULL, "event-sample"); + if (IS_ERR(simple_tsk)) + return -1; + + return 0; +} + +static void __exit trace_event_exit(void) +{ + kthread_stop(simple_tsk); +} + +module_init(trace_event_init); +module_exit(trace_event_exit); + +MODULE_AUTHOR("Steven Rostedt"); +MODULE_DESCRIPTION("trace-events-sample"); +MODULE_LICENSE("GPL"); diff --git a/samples/trace_events/trace-events-sample.h b/samples/trace_events/trace-events-sample.h new file mode 100644 index 000000000000..128a897687c5 --- /dev/null +++ b/samples/trace_events/trace-events-sample.h @@ -0,0 +1,129 @@ +/* + * Notice that this file is not protected like a normal header. + * We also must allow for rereading of this file. The + * + * || defined(TRACE_HEADER_MULTI_READ) + * + * serves this purpose. + */ +#if !defined(_TRACE_EVENT_SAMPLE_H) || defined(TRACE_HEADER_MULTI_READ) +#define _TRACE_EVENT_SAMPLE_H + +/* + * All trace headers should include tracepoint.h, until we finally + * make it into a standard header. + */ +#include <linux/tracepoint.h> + +/* + * If TRACE_SYSTEM is defined, that will be the directory created + * in the ftrace directory under /debugfs/tracing/events/<system> + * + * The define_trace.h belowe will also look for a file name of + * TRACE_SYSTEM.h where TRACE_SYSTEM is what is defined here. + * + * If you want a different system than file name, you can override + * the header name by defining TRACE_INCLUDE_FILE + * + * If this file was called, goofy.h, then we would define: + * + * #define TRACE_INCLUDE_FILE goofy + * + */ +#undef TRACE_SYSTEM +#define TRACE_SYSTEM sample + +/* + * The TRACE_EVENT macro is broken up into 5 parts. + * + * name: name of the trace point. This is also how to enable the tracepoint. + * A function called trace_foo_bar() will be created. + * + * proto: the prototype of the function trace_foo_bar() + * Here it is trace_foo_bar(char *foo, int bar). + * + * args: must match the arguments in the prototype. + * Here it is simply "foo, bar". + * + * struct: This defines the way the data will be stored in the ring buffer. + * There are currently two types of elements. __field and __array. + * a __field is broken up into (type, name). Where type can be any + * type but an array. + * For an array. there are three fields. (type, name, size). The + * type of elements in the array, the name of the field and the size + * of the array. + * + * __array( char, foo, 10) is the same as saying char foo[10]. + * + * fast_assign: This is a C like function that is used to store the items + * into the ring buffer. + * + * printk: This is a way to print out the data in pretty print. This is + * useful if the system crashes and you are logging via a serial line, + * the data can be printed to the console using this "printk" method. + * + * Note, that for both the assign and the printk, __entry is the handler + * to the data structure in the ring buffer, and is defined by the + * TP_STRUCT__entry. + */ +TRACE_EVENT(foo_bar, + + TP_PROTO(char *foo, int bar), + + TP_ARGS(foo, bar), + + TP_STRUCT__entry( + __array( char, foo, 10 ) + __field( int, bar ) + ), + + TP_fast_assign( + strncpy(__entry->foo, foo, 10); + __entry->bar = bar; + ), + + TP_printk("foo %s %d", __entry->foo, __entry->bar) +); +#endif + +/***** NOTICE! The #if protection ends here. *****/ + + +/* + * There are several ways I could have done this. If I left out the + * TRACE_INCLUDE_PATH, then it would default to the kernel source + * include/trace/events directory. + * + * I could specify a path from the define_trace.h file back to this + * file. + * + * #define TRACE_INCLUDE_PATH ../../samples/trace_events + * + * But I chose to simply make it use the current directory and then in + * the Makefile I added: + * + * CFLAGS_trace-events-sample.o := -I$(PWD)/samples/trace_events/ + * + * This will make sure the current path is part of the include + * structure for our file so that we can find it. + * + * I could have made only the top level directory the include: + * + * CFLAGS_trace-events-sample.o := -I$(PWD) + * + * And then let the path to this directory be the TRACE_INCLUDE_PATH: + * + * #define TRACE_INCLUDE_PATH samples/trace_events + * + * But then if something defines "samples" or "trace_events" then we + * could risk that being converted too, and give us an unexpected + * result. + */ +#undef TRACE_INCLUDE_PATH +#undef TRACE_INCLUDE_FILE +#define TRACE_INCLUDE_PATH . +/* + * TRACE_INCLUDE_FILE is not needed if the filename and TRACE_SYSTEM are equal + */ +#define TRACE_INCLUDE_FILE trace-events-sample +#include <trace/define_trace.h> diff --git a/scripts/kernel-doc b/scripts/kernel-doc index 3208a3a7e7fe..acd8c4a8e3e0 100755 --- a/scripts/kernel-doc +++ b/scripts/kernel-doc @@ -1828,6 +1828,25 @@ sub reset_state { $state = 0; } +sub tracepoint_munge($) { + my $file = shift; + my $tracepointname = 0; + my $tracepointargs = 0; + + if($prototype =~ m/TRACE_EVENT\((.*?),/) { + $tracepointname = $1; + } + if($prototype =~ m/TP_PROTO\((.*?)\)/) { + $tracepointargs = $1; + } + if (($tracepointname eq 0) || ($tracepointargs eq 0)) { + print STDERR "Warning(${file}:$.): Unrecognized tracepoint format: \n". + "$prototype\n"; + } else { + $prototype = "static inline void trace_$tracepointname($tracepointargs)"; + } +} + sub syscall_munge() { my $void = 0; @@ -1882,6 +1901,9 @@ sub process_state3_function($$) { if ($prototype =~ /SYSCALL_DEFINE/) { syscall_munge(); } + if ($prototype =~ /TRACE_EVENT/) { + tracepoint_munge($file); + } dump_function($prototype, $file); reset_state(); } diff --git a/scripts/recordmcount.pl b/scripts/recordmcount.pl index 409596eca124..0fae7da0529c 100755 --- a/scripts/recordmcount.pl +++ b/scripts/recordmcount.pl @@ -26,7 +26,7 @@ # which will also be the location of that section after final link. # e.g. # -# .section ".text.sched" +# .section ".sched.text", "ax" # .globl my_func # my_func: # [...] @@ -39,7 +39,7 @@ # [...] # # Both relocation offsets for the mcounts in the above example will be -# offset from .text.sched. If we make another file called tmp.s with: +# offset from .sched.text. If we make another file called tmp.s with: # # .section __mcount_loc # .quad my_func + 0x5 @@ -51,7 +51,7 @@ # But this gets hard if my_func is not globl (a static function). # In such a case we have: # -# .section ".text.sched" +# .section ".sched.text", "ax" # my_func: # [...] # call mcount (offset: 0x5) diff --git a/security/integrity/ima/ima_fs.c b/security/integrity/ima/ima_fs.c index 7039b14e1f73..6bfc7eaebfda 100644 --- a/security/integrity/ima/ima_fs.c +++ b/security/integrity/ima/ima_fs.c @@ -85,8 +85,8 @@ static void *ima_measurements_next(struct seq_file *m, void *v, loff_t *pos) * against concurrent list-extension */ rcu_read_lock(); - qe = list_entry(rcu_dereference(qe->later.next), - struct ima_queue_entry, later); + qe = list_entry_rcu(qe->later.next, + struct ima_queue_entry, later); rcu_read_unlock(); (*pos)++; diff --git a/security/smack/smackfs.c b/security/smack/smackfs.c index 8d3c2a051c7b..f83a80980726 100644 --- a/security/smack/smackfs.c +++ b/security/smack/smackfs.c @@ -735,8 +735,8 @@ static void smk_netlbladdr_insert(struct smk_netlbladdr *new) return; } - m = list_entry(rcu_dereference(smk_netlbladdr_list.next), - struct smk_netlbladdr, list); + m = list_entry_rcu(smk_netlbladdr_list.next, + struct smk_netlbladdr, list); /* the comparison '>' is a bit hacky, but works */ if (new->smk_mask.s_addr > m->smk_mask.s_addr) { @@ -749,8 +749,8 @@ static void smk_netlbladdr_insert(struct smk_netlbladdr *new) list_add_rcu(&new->list, &m->list); return; } - m_next = list_entry(rcu_dereference(m->list.next), - struct smk_netlbladdr, list); + m_next = list_entry_rcu(m->list.next, + struct smk_netlbladdr, list); if (new->smk_mask.s_addr > m_next->smk_mask.s_addr) { list_add_rcu(&new->list, &m->list); return; |