diff options
347 files changed, 14739 insertions, 5970 deletions
diff --git a/Documentation/feature-removal-schedule.txt b/Documentation/feature-removal-schedule.txt index 63df2262d41a..fb8258ebc577 100644 --- a/Documentation/feature-removal-schedule.txt +++ b/Documentation/feature-removal-schedule.txt @@ -205,20 +205,6 @@ Who: Len Brown <len.brown@intel.com> --------------------------- -What: Compaq touchscreen device emulation -When: Oct 2007 -Files: drivers/input/tsdev.c -Why: The code says it was obsolete when it was written in 2001. - tslib is a userspace library which does anything tsdev can do and - much more besides in userspace where this code belongs. There is no - longer any need for tsdev and applications should have converted to - use tslib by now. - The name "tsdev" is also extremely confusing and lots of people have - it loaded when they don't need/use it. -Who: Richard Purdie <rpurdie@rpsys.net> - ---------------------------- - What: i2c-ixp2000, i2c-ixp4xx and scx200_i2c drivers When: September 2007 Why: Obsolete. The new i2c-gpio driver replaces all hardware-specific diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index c323778270ff..085e4a095eaa 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt @@ -1083,6 +1083,13 @@ and is between 256 and 4096 characters. It is defined in the file [NFS] set the maximum lifetime for idmapper cache entries. + nfs.enable_ino64= + [NFS] enable 64-bit inode numbers. + If zero, the NFS client will fake up a 32-bit inode + number for the readdir() and stat() syscalls instead + of returning the full 64-bit number. + The default is to return 64-bit inode numbers. + nmi_watchdog= [KNL,BUGS=X86-32] Debugging features for SMP kernels no387 [BUGS=X86-32] Tells the kernel to use the 387 maths @@ -1883,9 +1890,6 @@ and is between 256 and 4096 characters. It is defined in the file Format: <io>,<irq>,<dma>,<dma2>,<sb_io>,<sb_irq>,<sb_dma>,<mpu_io>,<mpu_irq> - tsdev.xres= [TS] Horizontal screen resolution. - tsdev.yres= [TS] Vertical screen resolution. - turbografx.map[2|3]= [HW,JOY] TurboGraFX parallel port interface Format: diff --git a/Documentation/networking/bonding.txt b/Documentation/networking/bonding.txt index 1da566630831..11340625e363 100644 --- a/Documentation/networking/bonding.txt +++ b/Documentation/networking/bonding.txt @@ -281,6 +281,39 @@ downdelay will be rounded down to the nearest multiple. The default value is 0. +fail_over_mac + + Specifies whether active-backup mode should set all slaves to + the same MAC address (the traditional behavior), or, when + enabled, change the bond's MAC address when changing the + active interface (i.e., fail over the MAC address itself). + + Fail over MAC is useful for devices that cannot ever alter + their MAC address, or for devices that refuse incoming + broadcasts with their own source MAC (which interferes with + the ARP monitor). + + The down side of fail over MAC is that every device on the + network must be updated via gratuitous ARP, vs. just updating + a switch or set of switches (which often takes place for any + traffic, not just ARP traffic, if the switch snoops incoming + traffic to update its tables) for the traditional method. If + the gratuitous ARP is lost, communication may be disrupted. + + When fail over MAC is used in conjuction with the mii monitor, + devices which assert link up prior to being able to actually + transmit and receive are particularly susecptible to loss of + the gratuitous ARP, and an appropriate updelay setting may be + required. + + A value of 0 disables fail over MAC, and is the default. A + value of 1 enables fail over MAC. This option is enabled + automatically if the first slave added cannot change its MAC + address. This option may be modified via sysfs only when no + slaves are present in the bond. + + This option was added in bonding version 3.2.0. + lacp_rate Option specifying the rate in which we'll ask our link partner diff --git a/Documentation/networking/proc_net_tcp.txt b/Documentation/networking/proc_net_tcp.txt index 5e21f7cb6383..4a79209e77a7 100644 --- a/Documentation/networking/proc_net_tcp.txt +++ b/Documentation/networking/proc_net_tcp.txt @@ -1,8 +1,9 @@ This document describes the interfaces /proc/net/tcp and /proc/net/tcp6. +Note that these interfaces are deprecated in favor of tcp_diag. These /proc interfaces provide information about currently active TCP -connections, and are implemented by tcp_get_info() in net/ipv4/tcp_ipv4.c and -tcp6_get_info() in net/ipv6/tcp_ipv6.c, respectively. +connections, and are implemented by tcp4_seq_show() in net/ipv4/tcp_ipv4.c +and tcp6_seq_show() in net/ipv6/tcp_ipv6.c, respectively. It will first list all listening TCP sockets, and next list all established TCP connections. A typical entry of /proc/net/tcp would look like this (split diff --git a/MAINTAINERS b/MAINTAINERS index 12cee3da2625..c7355e7f09ff 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -2404,6 +2404,15 @@ M: khali@linux-fr.org L: lm-sensors@lm-sensors.org S: Maintained +LOCKDEP AND LOCKSTAT +P: Peter Zijlstra +M: peterz@infradead.org +P: Ingo Molnar +M: mingo@redhat.com +L: linux-kernel@vger.kernel.org +T: git://git.kernel.org/pub/scm/linux/kernel/git/peterz/linux-2.6-lockdep.git +S: Maintained + LOGICAL DISK MANAGER SUPPORT (LDM, Windows 2000/XP/Vista Dynamic Disks) P: Richard Russon (FlatCap) M: ldm@flatcap.org diff --git a/arch/blackfin/mach-bf548/boards/ezkit.c b/arch/blackfin/mach-bf548/boards/ezkit.c index 2c47db494f7d..046e6d84bbfc 100644 --- a/arch/blackfin/mach-bf548/boards/ezkit.c +++ b/arch/blackfin/mach-bf548/boards/ezkit.c @@ -88,7 +88,7 @@ static struct platform_device bf54x_lq043_device = { #endif #if defined(CONFIG_KEYBOARD_BFIN) || defined(CONFIG_KEYBOARD_BFIN_MODULE) -static int bf548_keymap[] = { +static const unsigned int bf548_keymap[] = { KEYVAL(0, 0, KEY_ENTER), KEYVAL(0, 1, KEY_HELP), KEYVAL(0, 2, KEY_0), @@ -110,8 +110,8 @@ static int bf548_keymap[] = { static struct bfin_kpad_platform_data bf54x_kpad_data = { .rows = 4, .cols = 4, - .keymap = bf548_keymap, - .keymapsize = ARRAY_SIZE(bf548_keymap), + .keymap = bf548_keymap, + .keymapsize = ARRAY_SIZE(bf548_keymap), .repeat = 0, .debounce_time = 5000, /* ns (5ms) */ .coldrive_time = 1000, /* ns (1ms) */ diff --git a/arch/m68k/atari/atakeyb.c b/arch/m68k/atari/atakeyb.c index fbbccb5e7511..880add120eb3 100644 --- a/arch/m68k/atari/atakeyb.c +++ b/arch/m68k/atari/atakeyb.c @@ -1,6 +1,4 @@ /* - * linux/arch/m68k/atari/atakeyb.c - * * Atari Keyboard driver for 680x0 Linux * * This file is subject to the terms and conditions of the GNU General Public diff --git a/arch/mips/au1000/common/prom.c b/arch/mips/au1000/common/prom.c index a8637cdb5b4b..90d70695aa60 100644 --- a/arch/mips/au1000/common/prom.c +++ b/arch/mips/au1000/common/prom.c @@ -33,7 +33,6 @@ * with this program; if not, write to the Free Software Foundation, Inc., * 675 Mass Ave, Cambridge, MA 02139, USA. */ - #include <linux/module.h> #include <linux/kernel.h> #include <linux/init.h> @@ -41,18 +40,16 @@ #include <asm/bootinfo.h> -/* #define DEBUG_CMDLINE */ - -extern int prom_argc; -extern char **prom_argv, **prom_envp; - +int prom_argc; +char **prom_argv; +char **prom_envp; char * __init_or_module prom_getcmdline(void) { return &(arcs_cmdline[0]); } -void prom_init_cmdline(void) +void prom_init_cmdline(void) { char *cp; int actr; @@ -61,7 +58,7 @@ void prom_init_cmdline(void) cp = &(arcs_cmdline[0]); while(actr < prom_argc) { - strcpy(cp, prom_argv[actr]); + strcpy(cp, prom_argv[actr]); cp += strlen(prom_argv[actr]); *cp++ = ' '; actr++; @@ -70,10 +67,8 @@ void prom_init_cmdline(void) --cp; if (prom_argc > 1) *cp = '\0'; - } - char *prom_getenv(char *envname) { /* @@ -95,21 +90,23 @@ char *prom_getenv(char *envname) } env++; } + return NULL; } -inline unsigned char str2hexnum(unsigned char c) +static inline unsigned char str2hexnum(unsigned char c) { - if(c >= '0' && c <= '9') + if (c >= '0' && c <= '9') return c - '0'; - if(c >= 'a' && c <= 'f') + if (c >= 'a' && c <= 'f') return c - 'a' + 10; - if(c >= 'A' && c <= 'F') + if (c >= 'A' && c <= 'F') return c - 'A' + 10; + return 0; /* foo */ } -inline void str2eaddr(unsigned char *ea, unsigned char *str) +static inline void str2eaddr(unsigned char *ea, unsigned char *str) { int i; @@ -124,35 +121,29 @@ inline void str2eaddr(unsigned char *ea, unsigned char *str) } } -int get_ethernet_addr(char *ethernet_addr) +int prom_get_ethernet_addr(char *ethernet_addr) { - char *ethaddr_str; + char *ethaddr_str; + char *argptr; - ethaddr_str = prom_getenv("ethaddr"); + /* Check the environment variables first */ + ethaddr_str = prom_getenv("ethaddr"); if (!ethaddr_str) { - printk("ethaddr not set in boot prom\n"); - return -1; - } - str2eaddr(ethernet_addr, ethaddr_str); - -#if 0 - { - int i; + /* Check command line */ + argptr = prom_getcmdline(); + ethaddr_str = strstr(argptr, "ethaddr="); + if (!ethaddr_str) + return -1; - printk("get_ethernet_addr: "); - for (i=0; i<5; i++) - printk("%02x:", (unsigned char)*(ethernet_addr+i)); - printk("%02x\n", *(ethernet_addr+i)); + ethaddr_str += strlen("ethaddr="); } -#endif + + str2eaddr(ethernet_addr, ethaddr_str); return 0; } +EXPORT_SYMBOL(prom_get_ethernet_addr); void __init prom_free_prom_memory(void) { } - -EXPORT_SYMBOL(prom_getcmdline); -EXPORT_SYMBOL(get_ethernet_addr); -EXPORT_SYMBOL(str2eaddr); diff --git a/arch/mips/au1000/common/setup.c b/arch/mips/au1000/common/setup.c index b212c0726125..a90d425d4651 100644 --- a/arch/mips/au1000/common/setup.c +++ b/arch/mips/au1000/common/setup.c @@ -40,10 +40,11 @@ #include <asm/mipsregs.h> #include <asm/reboot.h> #include <asm/pgtable.h> -#include <asm/mach-au1x00/au1000.h> #include <asm/time.h> -extern char * prom_getcmdline(void); +#include <au1000.h> +#include <prom.h> + extern void __init board_setup(void); extern void au1000_restart(char *); extern void au1000_halt(void); diff --git a/arch/mips/au1000/db1x00/init.c b/arch/mips/au1000/db1x00/init.c index 4d7bcfc8cf73..43298fd9459c 100644 --- a/arch/mips/au1000/db1x00/init.c +++ b/arch/mips/au1000/db1x00/init.c @@ -31,15 +31,13 @@ #include <linux/mm.h> #include <linux/sched.h> #include <linux/bootmem.h> -#include <asm/addrspace.h> -#include <asm/bootinfo.h> #include <linux/string.h> #include <linux/kernel.h> -int prom_argc; -char **prom_argv, **prom_envp; -extern void __init prom_init_cmdline(void); -extern char *prom_getenv(char *envname); +#include <asm/addrspace.h> +#include <asm/bootinfo.h> + +#include <prom.h> const char *get_system_type(void) { diff --git a/arch/mips/au1000/mtx-1/init.c b/arch/mips/au1000/mtx-1/init.c index 2aa7b2ed6a8c..cdeae3212a2d 100644 --- a/arch/mips/au1000/mtx-1/init.c +++ b/arch/mips/au1000/mtx-1/init.c @@ -34,13 +34,11 @@ #include <linux/init.h> #include <linux/mm.h> #include <linux/bootmem.h> + #include <asm/addrspace.h> #include <asm/bootinfo.h> -int prom_argc; -char **prom_argv, **prom_envp; -extern void __init prom_init_cmdline(void); -extern char *prom_getenv(char *envname); +#include <prom.h> const char *get_system_type(void) { diff --git a/arch/mips/au1000/pb1000/init.c b/arch/mips/au1000/pb1000/init.c index 4535f7208e18..ddccaf6997d0 100644 --- a/arch/mips/au1000/pb1000/init.c +++ b/arch/mips/au1000/pb1000/init.c @@ -30,15 +30,13 @@ #include <linux/mm.h> #include <linux/sched.h> #include <linux/bootmem.h> -#include <asm/addrspace.h> -#include <asm/bootinfo.h> #include <linux/string.h> #include <linux/kernel.h> -int prom_argc; -char **prom_argv, **prom_envp; -extern void __init prom_init_cmdline(void); -extern char *prom_getenv(char *envname); +#include <asm/addrspace.h> +#include <asm/bootinfo.h> + +#include <prom.h> const char *get_system_type(void) { diff --git a/arch/mips/au1000/pb1100/init.c b/arch/mips/au1000/pb1100/init.c index 7ba6852de7cd..c93fd39b4aba 100644 --- a/arch/mips/au1000/pb1100/init.c +++ b/arch/mips/au1000/pb1100/init.c @@ -31,15 +31,13 @@ #include <linux/mm.h> #include <linux/sched.h> #include <linux/bootmem.h> -#include <asm/addrspace.h> -#include <asm/bootinfo.h> #include <linux/string.h> #include <linux/kernel.h> -int prom_argc; -char **prom_argv, **prom_envp; -extern void __init prom_init_cmdline(void); -extern char *prom_getenv(char *envname); +#include <asm/addrspace.h> +#include <asm/bootinfo.h> + +#include <prom.h> const char *get_system_type(void) { diff --git a/arch/mips/au1000/pb1200/board_setup.c b/arch/mips/au1000/pb1200/board_setup.c index 2122515f79d7..5dbc9868f598 100644 --- a/arch/mips/au1000/pb1200/board_setup.c +++ b/arch/mips/au1000/pb1200/board_setup.c @@ -41,8 +41,10 @@ #include <asm/mipsregs.h> #include <asm/reboot.h> #include <asm/pgtable.h> -#include <asm/mach-au1x00/au1000.h> -#include <asm/mach-au1x00/au1xxx_dbdma.h> + +#include <au1000.h> +#include <au1xxx_dbdma.h> +#include <prom.h> #ifdef CONFIG_MIPS_PB1200 #include <asm/mach-pb1x00/pb1200.h> diff --git a/arch/mips/au1000/pb1200/init.c b/arch/mips/au1000/pb1200/init.c index 5a70029d5388..c251570749ee 100644 --- a/arch/mips/au1000/pb1200/init.c +++ b/arch/mips/au1000/pb1200/init.c @@ -31,15 +31,13 @@ #include <linux/mm.h> #include <linux/sched.h> #include <linux/bootmem.h> -#include <asm/addrspace.h> -#include <asm/bootinfo.h> #include <linux/string.h> #include <linux/kernel.h> -int prom_argc; -char **prom_argv, **prom_envp; -extern void __init prom_init_cmdline(void); -extern char *prom_getenv(char *envname); +#include <asm/addrspace.h> +#include <asm/bootinfo.h> + +#include <prom.h> const char *get_system_type(void) { diff --git a/arch/mips/au1000/pb1500/init.c b/arch/mips/au1000/pb1500/init.c index e58a9d6c5021..507d4b204161 100644 --- a/arch/mips/au1000/pb1500/init.c +++ b/arch/mips/au1000/pb1500/init.c @@ -31,15 +31,13 @@ #include <linux/mm.h> #include <linux/sched.h> #include <linux/bootmem.h> -#include <asm/addrspace.h> -#include <asm/bootinfo.h> #include <linux/string.h> #include <linux/kernel.h> -int prom_argc; -char **prom_argv, **prom_envp; -extern void __init prom_init_cmdline(void); -extern char *prom_getenv(char *envname); +#include <asm/addrspace.h> +#include <asm/bootinfo.h> + +#include <prom.h> const char *get_system_type(void) { diff --git a/arch/mips/au1000/pb1550/init.c b/arch/mips/au1000/pb1550/init.c index fad53bf5aad1..b03eee601e36 100644 --- a/arch/mips/au1000/pb1550/init.c +++ b/arch/mips/au1000/pb1550/init.c @@ -31,15 +31,13 @@ #include <linux/mm.h> #include <linux/sched.h> #include <linux/bootmem.h> -#include <asm/addrspace.h> -#include <asm/bootinfo.h> #include <linux/string.h> #include <linux/kernel.h> -int prom_argc; -char **prom_argv, **prom_envp; -extern void __init prom_init_cmdline(void); -extern char *prom_getenv(char *envname); +#include <asm/addrspace.h> +#include <asm/bootinfo.h> + +#include <prom.h> const char *get_system_type(void) { diff --git a/arch/mips/au1000/xxs1500/init.c b/arch/mips/au1000/xxs1500/init.c index 9f839c36f69e..6532939f377a 100644 --- a/arch/mips/au1000/xxs1500/init.c +++ b/arch/mips/au1000/xxs1500/init.c @@ -30,15 +30,13 @@ #include <linux/mm.h> #include <linux/sched.h> #include <linux/bootmem.h> -#include <asm/addrspace.h> -#include <asm/bootinfo.h> #include <linux/string.h> #include <linux/kernel.h> -int prom_argc; -char **prom_argv, **prom_envp; -extern void __init prom_init_cmdline(void); -extern char *prom_getenv(char *envname); +#include <asm/addrspace.h> +#include <asm/bootinfo.h> + +#include <prom.h> const char *get_system_type(void) { diff --git a/arch/powerpc/platforms/cell/axon_msi.c b/arch/powerpc/platforms/cell/axon_msi.c index 1245b2f517bb..095988f13bf4 100644 --- a/arch/powerpc/platforms/cell/axon_msi.c +++ b/arch/powerpc/platforms/cell/axon_msi.c @@ -77,12 +77,7 @@ static void msic_dcr_write(struct axon_msic *msic, unsigned int dcr_n, u32 val) { pr_debug("axon_msi: dcr_write(0x%x, 0x%x)\n", val, dcr_n); - dcr_write(msic->dcr_host, msic->dcr_host.base + dcr_n, val); -} - -static u32 msic_dcr_read(struct axon_msic *msic, unsigned int dcr_n) -{ - return dcr_read(msic->dcr_host, msic->dcr_host.base + dcr_n); + dcr_write(msic->dcr_host, dcr_n, val); } static void axon_msi_cascade(unsigned int irq, struct irq_desc *desc) @@ -91,7 +86,7 @@ static void axon_msi_cascade(unsigned int irq, struct irq_desc *desc) u32 write_offset, msi; int idx; - write_offset = msic_dcr_read(msic, MSIC_WRITE_OFFSET_REG); + write_offset = dcr_read(msic->dcr_host, MSIC_WRITE_OFFSET_REG); pr_debug("axon_msi: original write_offset 0x%x\n", write_offset); /* write_offset doesn't wrap properly, so we have to mask it */ @@ -306,7 +301,7 @@ static int axon_msi_notify_reboot(struct notifier_block *nb, list_for_each_entry(msic, &axon_msic_list, list) { pr_debug("axon_msi: disabling %s\n", msic->irq_host->of_node->full_name); - tmp = msic_dcr_read(msic, MSIC_CTRL_REG); + tmp = dcr_read(msic->dcr_host, MSIC_CTRL_REG); tmp &= ~MSIC_CTRL_ENABLE & ~MSIC_CTRL_IRQ_ENABLE; msic_dcr_write(msic, MSIC_CTRL_REG, tmp); } diff --git a/arch/powerpc/sysdev/dcr.c b/arch/powerpc/sysdev/dcr.c index ab11c0b29024..427027c7ea0f 100644 --- a/arch/powerpc/sysdev/dcr.c +++ b/arch/powerpc/sysdev/dcr.c @@ -126,13 +126,13 @@ dcr_host_t dcr_map(struct device_node *dev, unsigned int dcr_n, } EXPORT_SYMBOL_GPL(dcr_map); -void dcr_unmap(dcr_host_t host, unsigned int dcr_n, unsigned int dcr_c) +void dcr_unmap(dcr_host_t host, unsigned int dcr_c) { dcr_host_t h = host; if (h.token == NULL) return; - h.token += dcr_n * h.stride; + h.token += host.base * h.stride; iounmap(h.token); h.token = NULL; } diff --git a/arch/powerpc/sysdev/mpic.c b/arch/powerpc/sysdev/mpic.c index 893e65439e85..e47938899a92 100644 --- a/arch/powerpc/sysdev/mpic.c +++ b/arch/powerpc/sysdev/mpic.c @@ -156,7 +156,7 @@ static inline u32 _mpic_read(enum mpic_reg_type type, switch(type) { #ifdef CONFIG_PPC_DCR case mpic_access_dcr: - return dcr_read(rb->dhost, rb->dhost.base + reg); + return dcr_read(rb->dhost, reg); #endif case mpic_access_mmio_be: return in_be32(rb->base + (reg >> 2)); @@ -173,7 +173,7 @@ static inline void _mpic_write(enum mpic_reg_type type, switch(type) { #ifdef CONFIG_PPC_DCR case mpic_access_dcr: - return dcr_write(rb->dhost, rb->dhost.base + reg, value); + return dcr_write(rb->dhost, reg, value); #endif case mpic_access_mmio_be: return out_be32(rb->base + (reg >> 2), value); diff --git a/arch/s390/kernel/entry.S b/arch/s390/kernel/entry.S index f3bceb165321..139ca153d5cc 100644 --- a/arch/s390/kernel/entry.S +++ b/arch/s390/kernel/entry.S @@ -68,9 +68,15 @@ STACK_SIZE = 1 << STACK_SHIFT l %r1,BASED(.Ltrace_irq_off) basr %r14,%r1 .endm + + .macro LOCKDEP_SYS_EXIT + l %r1,BASED(.Llockdep_sys_exit) + basr %r14,%r1 + .endm #else #define TRACE_IRQS_ON #define TRACE_IRQS_OFF +#define LOCKDEP_SYS_EXIT #endif /* @@ -260,6 +266,7 @@ sysc_return: bno BASED(sysc_leave) tm __TI_flags+3(%r9),_TIF_WORK_SVC bnz BASED(sysc_work) # there is work to do (signals etc.) + LOCKDEP_SYS_EXIT sysc_leave: RESTORE_ALL __LC_RETURN_PSW,1 @@ -283,6 +290,7 @@ sysc_work: bo BASED(sysc_restart) tm __TI_flags+3(%r9),_TIF_SINGLE_STEP bo BASED(sysc_singlestep) + LOCKDEP_SYS_EXIT b BASED(sysc_leave) # @@ -572,6 +580,7 @@ io_return: #endif tm __TI_flags+3(%r9),_TIF_WORK_INT bnz BASED(io_work) # there is work to do (signals etc.) + LOCKDEP_SYS_EXIT io_leave: RESTORE_ALL __LC_RETURN_PSW,0 io_done: @@ -618,6 +627,7 @@ io_work_loop: bo BASED(io_reschedule) tm __TI_flags+3(%r9),(_TIF_SIGPENDING | _TIF_RESTORE_SIGMASK) bnz BASED(io_sigpending) + LOCKDEP_SYS_EXIT b BASED(io_leave) # @@ -1040,6 +1050,8 @@ cleanup_io_leave_insn: .Ltrace_irq_on: .long trace_hardirqs_on .Ltrace_irq_off: .long trace_hardirqs_off +.Llockdep_sys_exit: + .long lockdep_sys_exit #endif .Lcritical_start: .long __critical_start + 0x80000000 diff --git a/arch/s390/kernel/entry64.S b/arch/s390/kernel/entry64.S index 9c0d5cc8269d..05e26d1fdf40 100644 --- a/arch/s390/kernel/entry64.S +++ b/arch/s390/kernel/entry64.S @@ -66,9 +66,14 @@ _TIF_WORK_INT = (_TIF_SIGPENDING | _TIF_RESTORE_SIGMASK | _TIF_NEED_RESCHED | \ .macro TRACE_IRQS_OFF brasl %r14,trace_hardirqs_off .endm + + .macro LOCKDEP_SYS_EXIT + brasl %r14,lockdep_sys_exit + .endm #else #define TRACE_IRQS_ON #define TRACE_IRQS_OFF +#define LOCKDEP_SYS_EXIT #endif .macro STORE_TIMER lc_offset @@ -255,6 +260,7 @@ sysc_return: jno sysc_leave tm __TI_flags+7(%r9),_TIF_WORK_SVC jnz sysc_work # there is work to do (signals etc.) + LOCKDEP_SYS_EXIT sysc_leave: RESTORE_ALL __LC_RETURN_PSW,1 @@ -278,6 +284,7 @@ sysc_work: jo sysc_restart tm __TI_flags+7(%r9),_TIF_SINGLE_STEP jo sysc_singlestep + LOCKDEP_SYS_EXIT j sysc_leave # @@ -558,6 +565,7 @@ io_return: #endif tm __TI_flags+7(%r9),_TIF_WORK_INT jnz io_work # there is work to do (signals etc.) + LOCKDEP_SYS_EXIT io_leave: RESTORE_ALL __LC_RETURN_PSW,0 io_done: @@ -605,6 +613,7 @@ io_work_loop: jo io_reschedule tm __TI_flags+7(%r9),(_TIF_SIGPENDING | _TIF_RESTORE_SIGMASK) jnz io_sigpending + LOCKDEP_SYS_EXIT j io_leave # diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S index 290b7bc82da3..8099fea0a72f 100644 --- a/arch/x86/kernel/entry_32.S +++ b/arch/x86/kernel/entry_32.S @@ -251,6 +251,7 @@ check_userspace: jb resume_kernel # not returning to v8086 or userspace ENTRY(resume_userspace) + LOCKDEP_SYS_EXIT DISABLE_INTERRUPTS(CLBR_ANY) # make sure we don't miss an interrupt # setting need_resched or sigpending # between sampling and the iret @@ -338,6 +339,7 @@ sysenter_past_esp: jae syscall_badsys call *sys_call_table(,%eax,4) movl %eax,PT_EAX(%esp) + LOCKDEP_SYS_EXIT DISABLE_INTERRUPTS(CLBR_ANY) TRACE_IRQS_OFF movl TI_flags(%ebp), %ecx @@ -377,6 +379,7 @@ syscall_call: call *sys_call_table(,%eax,4) movl %eax,PT_EAX(%esp) # store the return value syscall_exit: + LOCKDEP_SYS_EXIT DISABLE_INTERRUPTS(CLBR_ANY) # make sure we don't miss an interrupt # setting need_resched or sigpending # between sampling and the iret @@ -467,6 +470,7 @@ work_pending: jz work_notifysig work_resched: call schedule + LOCKDEP_SYS_EXIT DISABLE_INTERRUPTS(CLBR_ANY) # make sure we don't miss an interrupt # setting need_resched or sigpending # between sampling and the iret diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S index 1d232e5f5658..f1cacd4897f7 100644 --- a/arch/x86/kernel/entry_64.S +++ b/arch/x86/kernel/entry_64.S @@ -244,6 +244,7 @@ ret_from_sys_call: movl $_TIF_ALLWORK_MASK,%edi /* edi: flagmask */ sysret_check: + LOCKDEP_SYS_EXIT GET_THREAD_INFO(%rcx) cli TRACE_IRQS_OFF @@ -333,6 +334,7 @@ int_ret_from_sys_call: movl $_TIF_ALLWORK_MASK,%edi /* edi: mask to check */ int_with_check: + LOCKDEP_SYS_EXIT_IRQ GET_THREAD_INFO(%rcx) movl threadinfo_flags(%rcx),%edx andl %edi,%edx @@ -544,11 +546,13 @@ exit_intr: retint_with_reschedule: movl $_TIF_WORK_MASK,%edi retint_check: + LOCKDEP_SYS_EXIT_IRQ movl threadinfo_flags(%rcx),%edx andl %edi,%edx CFI_REMEMBER_STATE jnz retint_careful -retint_swapgs: + +retint_swapgs: /* return to user-space */ /* * The iretq could re-enable interrupts: */ @@ -557,7 +561,7 @@ retint_swapgs: swapgs jmp restore_args -retint_restore_args: +retint_restore_args: /* return to kernel space */ cli /* * The iretq could re-enable interrupts: @@ -866,26 +870,21 @@ error_sti: movq ORIG_RAX(%rsp),%rsi /* get error code */ movq $-1,ORIG_RAX(%rsp) call *%rax - /* ebx: no swapgs flag (1: don't need swapgs, 0: need it) */ -error_exit: - movl %ebx,%eax + /* ebx: no swapgs flag (1: don't need swapgs, 0: need it) */ +error_exit: + movl %ebx,%eax RESTORE_REST cli TRACE_IRQS_OFF GET_THREAD_INFO(%rcx) testl %eax,%eax jne retint_kernel + LOCKDEP_SYS_EXIT_IRQ movl threadinfo_flags(%rcx),%edx movl $_TIF_WORK_MASK,%edi andl %edi,%edx jnz retint_careful - /* - * The iret might restore flags: - */ - TRACE_IRQS_IRETQ - swapgs - RESTORE_ARGS 0,8,0 - jmp iret_label + jmp retint_swapgs CFI_ENDPROC error_kernelspace: diff --git a/arch/x86/kernel/kprobes_32.c b/arch/x86/kernel/kprobes_32.c index c2d03e96ae9f..e7d0d3c2ef64 100644 --- a/arch/x86/kernel/kprobes_32.c +++ b/arch/x86/kernel/kprobes_32.c @@ -557,6 +557,12 @@ static int __kprobes post_kprobe_handler(struct pt_regs *regs) resume_execution(cur, regs, kcb); regs->eflags |= kcb->kprobe_saved_eflags; +#ifdef CONFIG_TRACE_IRQFLAGS_SUPPORT + if (raw_irqs_disabled_flags(regs->eflags)) + trace_hardirqs_off(); + else + trace_hardirqs_on(); +#endif /*Restore back the original saved kprobes variables and continue. */ if (kcb->kprobe_status == KPROBE_REENTER) { @@ -694,6 +700,7 @@ int __kprobes setjmp_pre_handler(struct kprobe *p, struct pt_regs *regs) memcpy(kcb->jprobes_stack, (kprobe_opcode_t *)addr, MIN_STACK_SIZE(addr)); regs->eflags &= ~IF_MASK; + trace_hardirqs_off(); regs->eip = (unsigned long)(jp->entry); return 1; } diff --git a/arch/x86/kernel/kprobes_64.c b/arch/x86/kernel/kprobes_64.c index 1df17a0ec0c9..62e28e52d784 100644 --- a/arch/x86/kernel/kprobes_64.c +++ b/arch/x86/kernel/kprobes_64.c @@ -544,6 +544,12 @@ int __kprobes post_kprobe_handler(struct pt_regs *regs) resume_execution(cur, regs, kcb); regs->eflags |= kcb->kprobe_saved_rflags; +#ifdef CONFIG_TRACE_IRQFLAGS_SUPPORT + if (raw_irqs_disabled_flags(regs->eflags)) + trace_hardirqs_off(); + else + trace_hardirqs_on(); +#endif /* Restore the original saved kprobes variables and continue. */ if (kcb->kprobe_status == KPROBE_REENTER) { @@ -684,6 +690,7 @@ int __kprobes setjmp_pre_handler(struct kprobe *p, struct pt_regs *regs) memcpy(kcb->jprobes_stack, (kprobe_opcode_t *)addr, MIN_STACK_SIZE(addr)); regs->eflags &= ~IF_MASK; + trace_hardirqs_off(); regs->rip = (unsigned long)(jp->entry); return 1; } diff --git a/arch/x86/lib/thunk_64.S b/arch/x86/lib/thunk_64.S index 55e586d352d3..6ea73f3de567 100644 --- a/arch/x86/lib/thunk_64.S +++ b/arch/x86/lib/thunk_64.S @@ -50,6 +50,10 @@ thunk trace_hardirqs_on_thunk,trace_hardirqs_on thunk trace_hardirqs_off_thunk,trace_hardirqs_off #endif + +#ifdef CONFIG_DEBUG_LOCK_ALLOC + thunk lockdep_sys_exit_thunk,lockdep_sys_exit +#endif /* SAVE_ARGS below is used only for the .cfi directives it contains. */ CFI_STARTPROC diff --git a/drivers/ata/Kconfig b/drivers/ata/Kconfig index 4672066167e3..33f5eb038773 100644 --- a/drivers/ata/Kconfig +++ b/drivers/ata/Kconfig @@ -272,6 +272,15 @@ config PATA_CS5535 If unsure, say N. +config PATA_CS5536 + tristate "CS5536 PATA support (Experimental)" + depends on PCI && X86 && !X86_64 && EXPERIMENTAL + help + This option enables support for the AMD CS5536 + companion chip used with the Geode LX processor family. + + If unsure, say N. + config PATA_CYPRESS tristate "Cypress CY82C693 PATA support (Very Experimental)" depends on PCI && EXPERIMENTAL diff --git a/drivers/ata/Makefile b/drivers/ata/Makefile index 2a63645003eb..6bdc307649e6 100644 --- a/drivers/ata/Makefile +++ b/drivers/ata/Makefile @@ -28,6 +28,7 @@ obj-$(CONFIG_PATA_CMD64X) += pata_cmd64x.o obj-$(CONFIG_PATA_CS5520) += pata_cs5520.o obj-$(CONFIG_PATA_CS5530) += pata_cs5530.o obj-$(CONFIG_PATA_CS5535) += pata_cs5535.o +obj-$(CONFIG_PATA_CS5536) += pata_cs5536.o obj-$(CONFIG_PATA_CYPRESS) += pata_cypress.o obj-$(CONFIG_PATA_EFAR) += pata_efar.o obj-$(CONFIG_PATA_HPT366) += pata_hpt366.o diff --git a/drivers/ata/ata_piix.c b/drivers/ata/ata_piix.c index 9ce4aa9c2f25..3c6f43e381f4 100644 --- a/drivers/ata/ata_piix.c +++ b/drivers/ata/ata_piix.c @@ -130,6 +130,7 @@ enum { ich8_sata_ahci = 9, piix_pata_mwdma = 10, /* PIIX3 MWDMA only */ tolapai_sata_ahci = 11, + ich9_2port_sata = 12, /* constants for mapping table */ P0 = 0, /* port 0 */ @@ -238,19 +239,19 @@ static const struct pci_device_id piix_pci_tbl[] = { /* SATA Controller 1 IDE (ICH8) */ { 0x8086, 0x2820, PCI_ANY_ID, PCI_ANY_ID, 0, 0, ich8_sata_ahci }, /* SATA Controller 2 IDE (ICH8) */ - { 0x8086, 0x2825, PCI_ANY_ID, PCI_ANY_ID, 0, 0, ich8_sata_ahci }, + { 0x8086, 0x2825, PCI_ANY_ID, PCI_ANY_ID, 0, 0, ich9_2port_sata }, /* Mobile SATA Controller IDE (ICH8M) */ { 0x8086, 0x2828, PCI_ANY_ID, PCI_ANY_ID, 0, 0, ich8_sata_ahci }, /* SATA Controller IDE (ICH9) */ { 0x8086, 0x2920, PCI_ANY_ID, PCI_ANY_ID, 0, 0, ich8_sata_ahci }, /* SATA Controller IDE (ICH9) */ - { 0x8086, 0x2921, PCI_ANY_ID, PCI_ANY_ID, 0, 0, ich8_sata_ahci }, + { 0x8086, 0x2921, PCI_ANY_ID, PCI_ANY_ID, 0, 0, ich9_2port_sata }, /* SATA Controller IDE (ICH9) */ - { 0x8086, 0x2926, PCI_ANY_ID, PCI_ANY_ID, 0, 0, ich8_sata_ahci }, + { 0x8086, 0x2926, PCI_ANY_ID, PCI_ANY_ID, 0, 0, ich9_2port_sata }, /* SATA Controller IDE (ICH9M) */ - { 0x8086, 0x2928, PCI_ANY_ID, PCI_ANY_ID, 0, 0, ich8_sata_ahci }, + { 0x8086, 0x2928, PCI_ANY_ID, PCI_ANY_ID, 0, 0, ich9_2port_sata }, /* SATA Controller IDE (ICH9M) */ - { 0x8086, 0x292d, PCI_ANY_ID, PCI_ANY_ID, 0, 0, ich8_sata_ahci }, + { 0x8086, 0x292d, PCI_ANY_ID, PCI_ANY_ID, 0, 0, ich9_2port_sata }, /* SATA Controller IDE (ICH9M) */ { 0x8086, 0x292e, PCI_ANY_ID, PCI_ANY_ID, 0, 0, ich8_sata_ahci }, /* SATA Controller IDE (Tolapai) */ @@ -448,6 +449,18 @@ static const struct piix_map_db tolapai_map_db = { }, }; +static const struct piix_map_db ich9_2port_map_db = { + .mask = 0x3, + .port_enable = 0x3, + .map = { + /* PM PS SM SS MAP */ + { P0, NA, P1, NA }, /* 00b */ + { RV, RV, RV, RV }, /* 01b */ + { RV, RV, RV, RV }, /* 10b */ + { RV, RV, RV, RV }, + }, +}; + static const struct piix_map_db *piix_map_db_table[] = { [ich5_sata] = &ich5_map_db, [ich6_sata] = &ich6_map_db, @@ -455,6 +468,7 @@ static const struct piix_map_db *piix_map_db_table[] = { [ich6m_sata_ahci] = &ich6m_map_db, [ich8_sata_ahci] = &ich8_map_db, [tolapai_sata_ahci] = &tolapai_map_db, + [ich9_2port_sata] = &ich9_2port_map_db, }; static struct ata_port_info piix_port_info[] = { @@ -570,6 +584,17 @@ static struct ata_port_info piix_port_info[] = { .udma_mask = ATA_UDMA6, .port_ops = &piix_sata_ops, }, + + [ich9_2port_sata] = + { + .sht = &piix_sht, + .flags = PIIX_SATA_FLAGS | PIIX_FLAG_SCR | + PIIX_FLAG_AHCI, + .pio_mask = 0x1f, /* pio0-4 */ + .mwdma_mask = 0x07, /* mwdma0-2 */ + .udma_mask = ATA_UDMA6, + .port_ops = &piix_sata_ops, + }, }; static struct pci_bits piix_enable_bits[] = { diff --git a/drivers/ata/libata-core.c b/drivers/ata/libata-core.c index b05384a8c326..68699b3e7998 100644 --- a/drivers/ata/libata-core.c +++ b/drivers/ata/libata-core.c @@ -3984,6 +3984,7 @@ static const struct ata_blacklist_entry ata_device_blacklist [] = { { "ST9120822AS", "3.CLF", ATA_HORKAGE_NONCQ, }, { "ST9160821AS", "3.CLF", ATA_HORKAGE_NONCQ, }, { "ST9160821AS", "3.ALD", ATA_HORKAGE_NONCQ, }, + { "ST9160821AS", "3.CCD", ATA_HORKAGE_NONCQ, }, { "ST3160812AS", "3.ADJ", ATA_HORKAGE_NONCQ, }, { "ST980813AS", "3.ADB", ATA_HORKAGE_NONCQ, }, { "SAMSUNG HD401LJ", "ZZ100-15", ATA_HORKAGE_NONCQ, }, @@ -4013,8 +4014,14 @@ int strn_pattern_cmp(const char *patt, const char *name, int wildchar) p = strchr(patt, wildchar); if (p && ((*(p + 1)) == 0)) len = p - patt; - else + else { len = strlen(name); + if (!len) { + if (!*patt) + return 0; + return -1; + } + } return strncmp(patt, name, len); } diff --git a/drivers/ata/libata-scsi.c b/drivers/ata/libata-scsi.c index ea53e6a570b4..d63c81ed084f 100644 --- a/drivers/ata/libata-scsi.c +++ b/drivers/ata/libata-scsi.c @@ -1363,6 +1363,7 @@ nothing_to_do: static void ata_scsi_qc_complete(struct ata_queued_cmd *qc) { struct ata_port *ap = qc->ap; + struct ata_eh_info *ehi = &qc->dev->link->eh_info; struct scsi_cmnd *cmd = qc->scsicmd; u8 *cdb = cmd->cmnd; int need_sense = (qc->err_mask != 0); @@ -1376,14 +1377,14 @@ static void ata_scsi_qc_complete(struct ata_queued_cmd *qc) case ATA_CMD_SET_FEATURES: if ((qc->tf.feature == SETFEATURES_WC_ON) || (qc->tf.feature == SETFEATURES_WC_OFF)) { - ap->link.eh_info.action |= ATA_EH_REVALIDATE; + ehi->action |= ATA_EH_REVALIDATE; ata_port_schedule_eh(ap); } break; case ATA_CMD_INIT_DEV_PARAMS: /* CHS translation changed */ case ATA_CMD_SET_MULTI: /* multi_count changed */ - ap->link.eh_info.action |= ATA_EH_REVALIDATE; + ehi->action |= ATA_EH_REVALIDATE; ata_port_schedule_eh(ap); break; } diff --git a/drivers/ata/pata_cs5536.c b/drivers/ata/pata_cs5536.c new file mode 100644 index 000000000000..53070f6b1fc4 --- /dev/null +++ b/drivers/ata/pata_cs5536.c @@ -0,0 +1,344 @@ +/* + * pata_cs5536.c - CS5536 PATA for new ATA layer + * (C) 2007 Martin K. Petersen <mkp@mkp.net> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + * Documentation: + * Available from AMD web site. + * + * The IDE timing registers for the CS5536 live in the Geode Machine + * Specific Register file and not PCI config space. Most BIOSes + * virtualize the PCI registers so the chip looks like a standard IDE + * controller. Unfortunately not all implementations get this right. + * In particular some have problems with unaligned accesses to the + * virtualized PCI registers. This driver always does full dword + * writes to work around the issue. Also, in case of a bad BIOS this + * driver can be loaded with the "msr=1" parameter which forces using + * the Machine Specific Registers to configure the device. + */ + +#include <linux/kernel.h> +#include <linux/module.h> +#include <linux/pci.h> +#include <linux/init.h> +#include <linux/blkdev.h> +#include <linux/delay.h> +#include <linux/libata.h> +#include <scsi/scsi_host.h> +#include <asm/msr.h> + +#define DRV_NAME "pata_cs5536" +#define DRV_VERSION "0.0.5" + +enum { + CFG = 0, + DTC = 1, + CAST = 2, + ETC = 3, + + MSR_IDE_BASE = 0x51300000, + MSR_IDE_CFG = (MSR_IDE_BASE + 0x10), + MSR_IDE_DTC = (MSR_IDE_BASE + 0x12), + MSR_IDE_CAST = (MSR_IDE_BASE + 0x13), + MSR_IDE_ETC = (MSR_IDE_BASE + 0x14), + + PCI_IDE_CFG = 0x40, + PCI_IDE_DTC = 0x48, + PCI_IDE_CAST = 0x4c, + PCI_IDE_ETC = 0x50, + + IDE_CFG_CHANEN = 0x2, + IDE_CFG_CABLE = 0x10000, + + IDE_D0_SHIFT = 24, + IDE_D1_SHIFT = 16, + IDE_DRV_MASK = 0xff, + + IDE_CAST_D0_SHIFT = 6, + IDE_CAST_D1_SHIFT = 4, + IDE_CAST_DRV_MASK = 0x3, + IDE_CAST_CMD_MASK = 0xff, + IDE_CAST_CMD_SHIFT = 24, + + IDE_ETC_NODMA = 0x03, +}; + +static int use_msr; + +static const u32 msr_reg[4] = { + MSR_IDE_CFG, MSR_IDE_DTC, MSR_IDE_CAST, MSR_IDE_ETC, +}; + +static const u8 pci_reg[4] = { + PCI_IDE_CFG, PCI_IDE_DTC, PCI_IDE_CAST, PCI_IDE_ETC, +}; + +static inline int cs5536_read(struct pci_dev *pdev, int reg, int *val) +{ + if (unlikely(use_msr)) { + u32 dummy; + + rdmsr(msr_reg[reg], *val, dummy); + return 0; + } + + return pci_read_config_dword(pdev, pci_reg[reg], val); +} + +static inline int cs5536_write(struct pci_dev *pdev, int reg, int val) +{ + if (unlikely(use_msr)) { + wrmsr(msr_reg[reg], val, 0); + return 0; + } + + return pci_write_config_dword(pdev, pci_reg[reg], val); +} + +/** + * cs5536_cable_detect - detect cable type + * @ap: Port to detect on + * @deadline: deadline jiffies for the operation + * + * Perform cable detection for ATA66 capable cable. Return a libata + * cable type. + */ + +static int cs5536_cable_detect(struct ata_port *ap) +{ + struct pci_dev *pdev = to_pci_dev(ap->host->dev); + u32 cfg; + + cs5536_read(pdev, CFG, &cfg); + + if (cfg & (IDE_CFG_CABLE << ap->port_no)) + return ATA_CBL_PATA80; + else + return ATA_CBL_PATA40; +} + +/** + * cs5536_set_piomode - PIO setup + * @ap: ATA interface + * @adev: device on the interface + */ + +static void cs5536_set_piomode(struct ata_port *ap, struct ata_device *adev) +{ + static const u8 drv_timings[5] = { + 0x98, 0x55, 0x32, 0x21, 0x20, + }; + + static const u8 addr_timings[5] = { + 0x2, 0x1, 0x0, 0x0, 0x0, + }; + + static const u8 cmd_timings[5] = { + 0x99, 0x92, 0x90, 0x22, 0x20, + }; + + struct pci_dev *pdev = to_pci_dev(ap->host->dev); + struct ata_device *pair = ata_dev_pair(adev); + int mode = adev->pio_mode - XFER_PIO_0; + int cmdmode = mode; + int dshift = ap->port_no ? IDE_D1_SHIFT : IDE_D0_SHIFT; + int cshift = ap->port_no ? IDE_CAST_D1_SHIFT : IDE_CAST_D0_SHIFT; + u32 dtc, cast, etc; + + if (pair) + cmdmode = min(mode, pair->pio_mode - XFER_PIO_0); + + cs5536_read(pdev, DTC, &dtc); + cs5536_read(pdev, CAST, &cast); + cs5536_read(pdev, ETC, &etc); + + dtc &= ~(IDE_DRV_MASK << dshift); + dtc |= drv_timings[mode] << dshift; + + cast &= ~(IDE_CAST_DRV_MASK << cshift); + cast |= addr_timings[mode] << cshift; + + cast &= ~(IDE_CAST_CMD_MASK << IDE_CAST_CMD_SHIFT); + cast |= cmd_timings[cmdmode] << IDE_CAST_CMD_SHIFT; + + etc &= ~(IDE_DRV_MASK << dshift); + etc |= IDE_ETC_NODMA << dshift; + + cs5536_write(pdev, DTC, dtc); + cs5536_write(pdev, CAST, cast); + cs5536_write(pdev, ETC, etc); +} + +/** + * cs5536_set_dmamode - DMA timing setup + * @ap: ATA interface + * @adev: Device being configured + * + */ + +static void cs5536_set_dmamode(struct ata_port *ap, struct ata_device *adev) +{ + static const u8 udma_timings[6] = { + 0xc2, 0xc1, 0xc0, 0xc4, 0xc5, 0xc6, + }; + + static const u8 mwdma_timings[3] = { + 0x67, 0x21, 0x20, + }; + + struct pci_dev *pdev = to_pci_dev(ap->host->dev); + u32 dtc, etc; + int mode = adev->dma_mode; + int dshift = ap->port_no ? IDE_D1_SHIFT : IDE_D0_SHIFT; + + if (mode >= XFER_UDMA_0) { + cs5536_read(pdev, ETC, &etc); + + etc &= ~(IDE_DRV_MASK << dshift); + etc |= udma_timings[mode - XFER_UDMA_0] << dshift; + + cs5536_write(pdev, ETC, etc); + } else { /* MWDMA */ + cs5536_read(pdev, DTC, &dtc); + + dtc &= ~(IDE_DRV_MASK << dshift); + dtc |= mwdma_timings[mode] << dshift; + + cs5536_write(pdev, DTC, dtc); + } +} + +static struct scsi_host_template cs5536_sht = { + .module = THIS_MODULE, + .name = DRV_NAME, + .ioctl = ata_scsi_ioctl, + .queuecommand = ata_scsi_queuecmd, + .can_queue = ATA_DEF_QUEUE, + .this_id = ATA_SHT_THIS_ID, + .sg_tablesize = LIBATA_MAX_PRD, + .cmd_per_lun = ATA_SHT_CMD_PER_LUN, + .emulated = ATA_SHT_EMULATED, + .use_clustering = ATA_SHT_USE_CLUSTERING, + .proc_name = DRV_NAME, + .dma_boundary = ATA_DMA_BOUNDARY, + .slave_configure = ata_scsi_slave_config, + .slave_destroy = ata_scsi_slave_destroy, + .bios_param = ata_std_bios_param, +}; + +static struct ata_port_operations cs5536_port_ops = { + .set_piomode = cs5536_set_piomode, + .set_dmamode = cs5536_set_dmamode, + .mode_filter = ata_pci_default_filter, + + .tf_load = ata_tf_load, + .tf_read = ata_tf_read, + .check_status = ata_check_status, + .exec_command = ata_exec_command, + .dev_select = ata_std_dev_select, + + .freeze = ata_bmdma_freeze, + .thaw = ata_bmdma_thaw, + .error_handler = ata_bmdma_error_handler, + .post_internal_cmd = ata_bmdma_post_internal_cmd, + .cable_detect = cs5536_cable_detect, + + .bmdma_setup = ata_bmdma_setup, + .bmdma_start = ata_bmdma_start, + .bmdma_stop = ata_bmdma_stop, + .bmdma_status = ata_bmdma_status, + + .qc_prep = ata_qc_prep, + .qc_issue = ata_qc_issue_prot, + + .data_xfer = ata_data_xfer, + + .irq_handler = ata_interrupt, + .irq_clear = ata_bmdma_irq_clear, + .irq_on = ata_irq_on, + + .port_start = ata_port_start, +}; + +/** + * cs5536_init_one + * @dev: PCI device + * @id: Entry in match table + * + */ + +static int cs5536_init_one(struct pci_dev *dev, const struct pci_device_id *id) +{ + static const struct ata_port_info info = { + .sht = &cs5536_sht, + .flags = ATA_FLAG_SLAVE_POSS, + .pio_mask = 0x1f, + .mwdma_mask = 0x07, + .udma_mask = ATA_UDMA5, + .port_ops = &cs5536_port_ops, + }; + + const struct ata_port_info *ppi[] = { &info, &ata_dummy_port_info }; + u32 cfg; + + if (use_msr) + printk(KERN_ERR DRV_NAME ": Using MSR regs instead of PCI\n"); + + cs5536_read(dev, CFG, &cfg); + + if ((cfg & IDE_CFG_CHANEN) == 0) { + printk(KERN_ERR DRV_NAME ": disabled by BIOS\n"); + return -ENODEV; + } + + return ata_pci_init_one(dev, ppi); +} + +static const struct pci_device_id cs5536[] = { + { PCI_VDEVICE(AMD, PCI_DEVICE_ID_AMD_CS5536_IDE), }, + { }, +}; + +static struct pci_driver cs5536_pci_driver = { + .name = DRV_NAME, + .id_table = cs5536, + .probe = cs5536_init_one, + .remove = ata_pci_remove_one, +#ifdef CONFIG_PM + .suspend = ata_pci_device_suspend, + .resume = ata_pci_device_resume, +#endif +}; + +static int __init cs5536_init(void) +{ + return pci_register_driver(&cs5536_pci_driver); +} + +static void __exit cs5536_exit(void) +{ + pci_unregister_driver(&cs5536_pci_driver); +} + +MODULE_AUTHOR("Martin K. Petersen"); +MODULE_DESCRIPTION("low-level driver for the CS5536 IDE controller"); +MODULE_LICENSE("GPL"); +MODULE_DEVICE_TABLE(pci, cs5536); +MODULE_VERSION(DRV_VERSION); +module_param_named(msr, use_msr, int, 0644); +MODULE_PARM_DESC(msr, "Force using MSR to configure IDE function (Default: 0)"); + +module_init(cs5536_init); +module_exit(cs5536_exit); diff --git a/drivers/ata/pata_pcmcia.c b/drivers/ata/pata_pcmcia.c index 782ff4ada9d1..5db2013230b3 100644 --- a/drivers/ata/pata_pcmcia.c +++ b/drivers/ata/pata_pcmcia.c @@ -353,6 +353,7 @@ static void pcmcia_remove_one(struct pcmcia_device *pdev) static struct pcmcia_device_id pcmcia_devices[] = { PCMCIA_DEVICE_FUNC_ID(4), + PCMCIA_DEVICE_MANF_CARD(0x0000, 0x0000), /* Corsair */ PCMCIA_DEVICE_MANF_CARD(0x0007, 0x0000), /* Hitachi */ PCMCIA_DEVICE_MANF_CARD(0x000a, 0x0000), /* I-O Data CFA */ PCMCIA_DEVICE_MANF_CARD(0x001c, 0x0001), /* Mitsubishi CFA */ @@ -378,6 +379,7 @@ static struct pcmcia_device_id pcmcia_devices[] = { PCMCIA_DEVICE_PROD_ID12("EXP ", "CD-ROM", 0x0a5c52fd, 0x66536591), PCMCIA_DEVICE_PROD_ID12("EXP ", "PnPIDE", 0x0a5c52fd, 0x0c694728), PCMCIA_DEVICE_PROD_ID12("FREECOM", "PCCARD-IDE", 0x5714cbf7, 0x48e0ab8e), + PCMCIA_DEVICE_PROD_ID12("Hyperstone", "Model1", 0x3d5b9ef5, 0xca6ab420), PCMCIA_DEVICE_PROD_ID12("HITACHI", "FLASH", 0xf4f43949, 0x9eb86aae), PCMCIA_DEVICE_PROD_ID12("HITACHI", "microdrive", 0xf4f43949, 0xa6d76178), PCMCIA_DEVICE_PROD_ID12("IBM", "microdrive", 0xb569a6e5, 0xa6d76178), diff --git a/drivers/ata/pata_sil680.c b/drivers/ata/pata_sil680.c index 2eb75cd74a96..4dc2e73298fd 100644 --- a/drivers/ata/pata_sil680.c +++ b/drivers/ata/pata_sil680.c @@ -279,7 +279,7 @@ static struct ata_port_operations sil680_port_ops = { * Returns the final clock settings. */ -static u8 sil680_init_chip(struct pci_dev *pdev) +static u8 sil680_init_chip(struct pci_dev *pdev, int *try_mmio) { u32 class_rev = 0; u8 tmpbyte = 0; @@ -297,6 +297,8 @@ static u8 sil680_init_chip(struct pci_dev *pdev) dev_dbg(&pdev->dev, "sil680: BA5_EN = %d clock = %02X\n", tmpbyte & 1, tmpbyte & 0x30); + *try_mmio = (tmpbyte & 1) || pci_resource_start(pdev, 5); + switch(tmpbyte & 0x30) { case 0x00: /* 133 clock attempt to force it on */ @@ -361,25 +363,76 @@ static int __devinit sil680_init_one(struct pci_dev *pdev, }; const struct ata_port_info *ppi[] = { &info, NULL }; static int printed_version; + struct ata_host *host; + void __iomem *mmio_base; + int rc, try_mmio; if (!printed_version++) dev_printk(KERN_DEBUG, &pdev->dev, "version " DRV_VERSION "\n"); - switch(sil680_init_chip(pdev)) - { + switch (sil680_init_chip(pdev, &try_mmio)) { case 0: ppi[0] = &info_slow; break; case 0x30: return -ENODEV; } + + if (!try_mmio) + goto use_ioports; + + /* Try to acquire MMIO resources and fallback to PIO if + * that fails + */ + rc = pcim_enable_device(pdev); + if (rc) + return rc; + rc = pcim_iomap_regions(pdev, 1 << SIL680_MMIO_BAR, DRV_NAME); + if (rc) + goto use_ioports; + + /* Allocate host and set it up */ + host = ata_host_alloc_pinfo(&pdev->dev, ppi, 2); + if (!host) + return -ENOMEM; + host->iomap = pcim_iomap_table(pdev); + + /* Setup DMA masks */ + rc = pci_set_dma_mask(pdev, ATA_DMA_MASK); + if (rc) + return rc; + rc = pci_set_consistent_dma_mask(pdev, ATA_DMA_MASK); + if (rc) + return rc; + pci_set_master(pdev); + + /* Get MMIO base and initialize port addresses */ + mmio_base = host->iomap[SIL680_MMIO_BAR]; + host->ports[0]->ioaddr.bmdma_addr = mmio_base + 0x00; + host->ports[0]->ioaddr.cmd_addr = mmio_base + 0x80; + host->ports[0]->ioaddr.ctl_addr = mmio_base + 0x8a; + host->ports[0]->ioaddr.altstatus_addr = mmio_base + 0x8a; + ata_std_ports(&host->ports[0]->ioaddr); + host->ports[1]->ioaddr.bmdma_addr = mmio_base + 0x08; + host->ports[1]->ioaddr.cmd_addr = mmio_base + 0xc0; + host->ports[1]->ioaddr.ctl_addr = mmio_base + 0xca; + host->ports[1]->ioaddr.altstatus_addr = mmio_base + 0xca; + ata_std_ports(&host->ports[1]->ioaddr); + + /* Register & activate */ + return ata_host_activate(host, pdev->irq, ata_interrupt, IRQF_SHARED, + &sil680_sht); + +use_ioports: return ata_pci_init_one(pdev, ppi); } #ifdef CONFIG_PM static int sil680_reinit_one(struct pci_dev *pdev) { - sil680_init_chip(pdev); + int try_mmio; + + sil680_init_chip(pdev, &try_mmio); return ata_pci_device_resume(pdev); } #endif diff --git a/drivers/ata/sata_nv.c b/drivers/ata/sata_nv.c index 40557fe2ffdf..240a8920d0bd 100644 --- a/drivers/ata/sata_nv.c +++ b/drivers/ata/sata_nv.c @@ -169,6 +169,35 @@ enum { NV_ADMA_PORT_REGISTER_MODE = (1 << 0), NV_ADMA_ATAPI_SETUP_COMPLETE = (1 << 1), + /* MCP55 reg offset */ + NV_CTL_MCP55 = 0x400, + NV_INT_STATUS_MCP55 = 0x440, + NV_INT_ENABLE_MCP55 = 0x444, + NV_NCQ_REG_MCP55 = 0x448, + + /* MCP55 */ + NV_INT_ALL_MCP55 = 0xffff, + NV_INT_PORT_SHIFT_MCP55 = 16, /* each port occupies 16 bits */ + NV_INT_MASK_MCP55 = NV_INT_ALL_MCP55 & 0xfffd, + + /* SWNCQ ENABLE BITS*/ + NV_CTL_PRI_SWNCQ = 0x02, + NV_CTL_SEC_SWNCQ = 0x04, + + /* SW NCQ status bits*/ + NV_SWNCQ_IRQ_DEV = (1 << 0), + NV_SWNCQ_IRQ_PM = (1 << 1), + NV_SWNCQ_IRQ_ADDED = (1 << 2), + NV_SWNCQ_IRQ_REMOVED = (1 << 3), + + NV_SWNCQ_IRQ_BACKOUT = (1 << 4), + NV_SWNCQ_IRQ_SDBFIS = (1 << 5), + NV_SWNCQ_IRQ_DHREGFIS = (1 << 6), + NV_SWNCQ_IRQ_DMASETUP = (1 << 7), + + NV_SWNCQ_IRQ_HOTPLUG = NV_SWNCQ_IRQ_ADDED | + NV_SWNCQ_IRQ_REMOVED, + }; /* ADMA Physical Region Descriptor - one SG segment */ @@ -226,6 +255,42 @@ struct nv_host_priv { unsigned long type; }; +struct defer_queue { + u32 defer_bits; + unsigned int head; + unsigned int tail; + unsigned int tag[ATA_MAX_QUEUE]; +}; + +enum ncq_saw_flag_list { + ncq_saw_d2h = (1U << 0), + ncq_saw_dmas = (1U << 1), + ncq_saw_sdb = (1U << 2), + ncq_saw_backout = (1U << 3), +}; + +struct nv_swncq_port_priv { + struct ata_prd *prd; /* our SG list */ + dma_addr_t prd_dma; /* and its DMA mapping */ + void __iomem *sactive_block; + void __iomem *irq_block; + void __iomem *tag_block; + u32 qc_active; + + unsigned int last_issue_tag; + + /* fifo circular queue to store deferral command */ + struct defer_queue defer_queue; + + /* for NCQ interrupt analysis */ + u32 dhfis_bits; + u32 dmafis_bits; + u32 sdbfis_bits; + + unsigned int ncq_flags; +}; + + #define NV_ADMA_CHECK_INTR(GCTL, PORT) ((GCTL) & ( 1 << (19 + (12 * (PORT))))) static int nv_init_one (struct pci_dev *pdev, const struct pci_device_id *ent); @@ -263,13 +328,29 @@ static void nv_adma_host_stop(struct ata_host *host); static void nv_adma_post_internal_cmd(struct ata_queued_cmd *qc); static void nv_adma_tf_read(struct ata_port *ap, struct ata_taskfile *tf); +static void nv_mcp55_thaw(struct ata_port *ap); +static void nv_mcp55_freeze(struct ata_port *ap); +static void nv_swncq_error_handler(struct ata_port *ap); +static int nv_swncq_slave_config(struct scsi_device *sdev); +static int nv_swncq_port_start(struct ata_port *ap); +static void nv_swncq_qc_prep(struct ata_queued_cmd *qc); +static void nv_swncq_fill_sg(struct ata_queued_cmd *qc); +static unsigned int nv_swncq_qc_issue(struct ata_queued_cmd *qc); +static void nv_swncq_irq_clear(struct ata_port *ap, u16 fis); +static irqreturn_t nv_swncq_interrupt(int irq, void *dev_instance); +#ifdef CONFIG_PM +static int nv_swncq_port_suspend(struct ata_port *ap, pm_message_t mesg); +static int nv_swncq_port_resume(struct ata_port *ap); +#endif + enum nv_host_type { GENERIC, NFORCE2, NFORCE3 = NFORCE2, /* NF2 == NF3 as far as sata_nv is concerned */ CK804, - ADMA + ADMA, + SWNCQ, }; static const struct pci_device_id nv_pci_tbl[] = { @@ -280,13 +361,13 @@ static const struct pci_device_id nv_pci_tbl[] = { { PCI_VDEVICE(NVIDIA, PCI_DEVICE_ID_NVIDIA_NFORCE_CK804_SATA2), CK804 }, { PCI_VDEVICE(NVIDIA, PCI_DEVICE_ID_NVIDIA_NFORCE_MCP04_SATA), CK804 }, { PCI_VDEVICE(NVIDIA, PCI_DEVICE_ID_NVIDIA_NFORCE_MCP04_SATA2), CK804 }, - { PCI_VDEVICE(NVIDIA, PCI_DEVICE_ID_NVIDIA_NFORCE_MCP51_SATA), GENERIC }, - { PCI_VDEVICE(NVIDIA, PCI_DEVICE_ID_NVIDIA_NFORCE_MCP51_SATA2), GENERIC }, - { PCI_VDEVICE(NVIDIA, PCI_DEVICE_ID_NVIDIA_NFORCE_MCP55_SATA), GENERIC }, - { PCI_VDEVICE(NVIDIA, PCI_DEVICE_ID_NVIDIA_NFORCE_MCP55_SATA2), GENERIC }, - { PCI_VDEVICE(NVIDIA, PCI_DEVICE_ID_NVIDIA_NFORCE_MCP61_SATA), GENERIC }, - { PCI_VDEVICE(NVIDIA, PCI_DEVICE_ID_NVIDIA_NFORCE_MCP61_SATA2), GENERIC }, - { PCI_VDEVICE(NVIDIA, PCI_DEVICE_ID_NVIDIA_NFORCE_MCP61_SATA3), GENERIC }, + { PCI_VDEVICE(NVIDIA, PCI_DEVICE_ID_NVIDIA_NFORCE_MCP51_SATA), SWNCQ }, + { PCI_VDEVICE(NVIDIA, PCI_DEVICE_ID_NVIDIA_NFORCE_MCP51_SATA2), SWNCQ }, + { PCI_VDEVICE(NVIDIA, PCI_DEVICE_ID_NVIDIA_NFORCE_MCP55_SATA), SWNCQ }, + { PCI_VDEVICE(NVIDIA, PCI_DEVICE_ID_NVIDIA_NFORCE_MCP55_SATA2), SWNCQ }, + { PCI_VDEVICE(NVIDIA, PCI_DEVICE_ID_NVIDIA_NFORCE_MCP61_SATA), SWNCQ }, + { PCI_VDEVICE(NVIDIA, PCI_DEVICE_ID_NVIDIA_NFORCE_MCP61_SATA2), SWNCQ }, + { PCI_VDEVICE(NVIDIA, PCI_DEVICE_ID_NVIDIA_NFORCE_MCP61_SATA3), SWNCQ }, { } /* terminate list */ }; @@ -339,6 +420,25 @@ static struct scsi_host_template nv_adma_sht = { .bios_param = ata_std_bios_param, }; +static struct scsi_host_template nv_swncq_sht = { + .module = THIS_MODULE, + .name = DRV_NAME, + .ioctl = ata_scsi_ioctl, + .queuecommand = ata_scsi_queuecmd, + .change_queue_depth = ata_scsi_change_queue_depth, + .can_queue = ATA_MAX_QUEUE, + .this_id = ATA_SHT_THIS_ID, + .sg_tablesize = LIBATA_MAX_PRD, + .cmd_per_lun = ATA_SHT_CMD_PER_LUN, + .emulated = ATA_SHT_EMULATED, + .use_clustering = ATA_SHT_USE_CLUSTERING, + .proc_name = DRV_NAME, + .dma_boundary = ATA_DMA_BOUNDARY, + .slave_configure = nv_swncq_slave_config, + .slave_destroy = ata_scsi_slave_destroy, + .bios_param = ata_std_bios_param, +}; + static const struct ata_port_operations nv_generic_ops = { .tf_load = ata_tf_load, .tf_read = ata_tf_read, @@ -444,6 +544,35 @@ static const struct ata_port_operations nv_adma_ops = { .host_stop = nv_adma_host_stop, }; +static const struct ata_port_operations nv_swncq_ops = { + .tf_load = ata_tf_load, + .tf_read = ata_tf_read, + .exec_command = ata_exec_command, + .check_status = ata_check_status, + .dev_select = ata_std_dev_select, + .bmdma_setup = ata_bmdma_setup, + .bmdma_start = ata_bmdma_start, + .bmdma_stop = ata_bmdma_stop, + .bmdma_status = ata_bmdma_status, + .qc_defer = ata_std_qc_defer, + .qc_prep = nv_swncq_qc_prep, + .qc_issue = nv_swncq_qc_issue, + .freeze = nv_mcp55_freeze, + .thaw = nv_mcp55_thaw, + .error_handler = nv_swncq_error_handler, + .post_internal_cmd = ata_bmdma_post_internal_cmd, + .data_xfer = ata_data_xfer, + .irq_clear = ata_bmdma_irq_clear, + .irq_on = ata_irq_on, + .scr_read = nv_scr_read, + .scr_write = nv_scr_write, +#ifdef CONFIG_PM + .port_suspend = nv_swncq_port_suspend, + .port_resume = nv_swncq_port_resume, +#endif + .port_start = nv_swncq_port_start, +}; + static const struct ata_port_info nv_port_info[] = { /* generic */ { @@ -490,6 +619,18 @@ static const struct ata_port_info nv_port_info[] = { .port_ops = &nv_adma_ops, .irq_handler = nv_adma_interrupt, }, + /* SWNCQ */ + { + .sht = &nv_swncq_sht, + .flags = ATA_FLAG_SATA | ATA_FLAG_NO_LEGACY | + ATA_FLAG_NCQ, + .link_flags = ATA_LFLAG_HRST_TO_RESUME, + .pio_mask = NV_PIO_MASK, + .mwdma_mask = NV_MWDMA_MASK, + .udma_mask = NV_UDMA_MASK, + .port_ops = &nv_swncq_ops, + .irq_handler = nv_swncq_interrupt, + }, }; MODULE_AUTHOR("NVIDIA"); @@ -499,6 +640,7 @@ MODULE_DEVICE_TABLE(pci, nv_pci_tbl); MODULE_VERSION(DRV_VERSION); static int adma_enabled = 1; +static int swncq_enabled; static void nv_adma_register_mode(struct ata_port *ap) { @@ -1452,6 +1594,34 @@ static void nv_ck804_thaw(struct ata_port *ap) writeb(mask, mmio_base + NV_INT_ENABLE_CK804); } +static void nv_mcp55_freeze(struct ata_port *ap) +{ + void __iomem *mmio_base = ap->host->iomap[NV_MMIO_BAR]; + int shift = ap->port_no * NV_INT_PORT_SHIFT_MCP55; + u32 mask; + + writel(NV_INT_ALL_MCP55 << shift, mmio_base + NV_INT_STATUS_MCP55); + + mask = readl(mmio_base + NV_INT_ENABLE_MCP55); + mask &= ~(NV_INT_ALL_MCP55 << shift); + writel(mask, mmio_base + NV_INT_ENABLE_MCP55); + ata_bmdma_freeze(ap); +} + +static void nv_mcp55_thaw(struct ata_port *ap) +{ + void __iomem *mmio_base = ap->host->iomap[NV_MMIO_BAR]; + int shift = ap->port_no * NV_INT_PORT_SHIFT_MCP55; + u32 mask; + + writel(NV_INT_ALL_MCP55 << shift, mmio_base + NV_INT_STATUS_MCP55); + + mask = readl(mmio_base + NV_INT_ENABLE_MCP55); + mask |= (NV_INT_MASK_MCP55 << shift); + writel(mask, mmio_base + NV_INT_ENABLE_MCP55); + ata_bmdma_thaw(ap); +} + static int nv_hardreset(struct ata_link *link, unsigned int *class, unsigned long deadline) { @@ -1525,6 +1695,663 @@ static void nv_adma_error_handler(struct ata_port *ap) nv_hardreset, ata_std_postreset); } +static void nv_swncq_qc_to_dq(struct ata_port *ap, struct ata_queued_cmd *qc) +{ + struct nv_swncq_port_priv *pp = ap->private_data; + struct defer_queue *dq = &pp->defer_queue; + + /* queue is full */ + WARN_ON(dq->tail - dq->head == ATA_MAX_QUEUE); + dq->defer_bits |= (1 << qc->tag); + dq->tag[dq->tail++ & (ATA_MAX_QUEUE - 1)] = qc->tag; +} + +static struct ata_queued_cmd *nv_swncq_qc_from_dq(struct ata_port *ap) +{ + struct nv_swncq_port_priv *pp = ap->private_data; + struct defer_queue *dq = &pp->defer_queue; + unsigned int tag; + + if (dq->head == dq->tail) /* null queue */ + return NULL; + + tag = dq->tag[dq->head & (ATA_MAX_QUEUE - 1)]; + dq->tag[dq->head++ & (ATA_MAX_QUEUE - 1)] = ATA_TAG_POISON; + WARN_ON(!(dq->defer_bits & (1 << tag))); + dq->defer_bits &= ~(1 << tag); + + return ata_qc_from_tag(ap, tag); +} + +static void nv_swncq_fis_reinit(struct ata_port *ap) +{ + struct nv_swncq_port_priv *pp = ap->private_data; + + pp->dhfis_bits = 0; + pp->dmafis_bits = 0; + pp->sdbfis_bits = 0; + pp->ncq_flags = 0; +} + +static void nv_swncq_pp_reinit(struct ata_port *ap) +{ + struct nv_swncq_port_priv *pp = ap->private_data; + struct defer_queue *dq = &pp->defer_queue; + + dq->head = 0; + dq->tail = 0; + dq->defer_bits = 0; + pp->qc_active = 0; + pp->last_issue_tag = ATA_TAG_POISON; + nv_swncq_fis_reinit(ap); +} + +static void nv_swncq_irq_clear(struct ata_port *ap, u16 fis) +{ + struct nv_swncq_port_priv *pp = ap->private_data; + + writew(fis, pp->irq_block); +} + +static void __ata_bmdma_stop(struct ata_port *ap) +{ + struct ata_queued_cmd qc; + + qc.ap = ap; + ata_bmdma_stop(&qc); +} + +static void nv_swncq_ncq_stop(struct ata_port *ap) +{ + struct nv_swncq_port_priv *pp = ap->private_data; + unsigned int i; + u32 sactive; + u32 done_mask; + + ata_port_printk(ap, KERN_ERR, + "EH in SWNCQ mode,QC:qc_active 0x%X sactive 0x%X\n", + ap->qc_active, ap->link.sactive); + ata_port_printk(ap, KERN_ERR, + "SWNCQ:qc_active 0x%X defer_bits 0x%X last_issue_tag 0x%x\n " + "dhfis 0x%X dmafis 0x%X sdbfis 0x%X\n", + pp->qc_active, pp->defer_queue.defer_bits, pp->last_issue_tag, + pp->dhfis_bits, pp->dmafis_bits, pp->sdbfis_bits); + + ata_port_printk(ap, KERN_ERR, "ATA_REG 0x%X ERR_REG 0x%X\n", + ap->ops->check_status(ap), + ioread8(ap->ioaddr.error_addr)); + + sactive = readl(pp->sactive_block); + done_mask = pp->qc_active ^ sactive; + + ata_port_printk(ap, KERN_ERR, "tag : dhfis dmafis sdbfis sacitve\n"); + for (i = 0; i < ATA_MAX_QUEUE; i++) { + u8 err = 0; + if (pp->qc_active & (1 << i)) + err = 0; + else if (done_mask & (1 << i)) + err = 1; + else + continue; + + ata_port_printk(ap, KERN_ERR, + "tag 0x%x: %01x %01x %01x %01x %s\n", i, + (pp->dhfis_bits >> i) & 0x1, + (pp->dmafis_bits >> i) & 0x1, + (pp->sdbfis_bits >> i) & 0x1, + (sactive >> i) & 0x1, + (err ? "error! tag doesn't exit" : " ")); + } + + nv_swncq_pp_reinit(ap); + ap->ops->irq_clear(ap); + __ata_bmdma_stop(ap); + nv_swncq_irq_clear(ap, 0xffff); +} + +static void nv_swncq_error_handler(struct ata_port *ap) +{ + struct ata_eh_context *ehc = &ap->link.eh_context; + + if (ap->link.sactive) { + nv_swncq_ncq_stop(ap); + ehc->i.action |= ATA_EH_HARDRESET; + } + + ata_bmdma_drive_eh(ap, ata_std_prereset, ata_std_softreset, + nv_hardreset, ata_std_postreset); +} + +#ifdef CONFIG_PM +static int nv_swncq_port_suspend(struct ata_port *ap, pm_message_t mesg) +{ + void __iomem *mmio = ap->host->iomap[NV_MMIO_BAR]; + u32 tmp; + + /* clear irq */ + writel(~0, mmio + NV_INT_STATUS_MCP55); + + /* disable irq */ + writel(0, mmio + NV_INT_ENABLE_MCP55); + + /* disable swncq */ + tmp = readl(mmio + NV_CTL_MCP55); + tmp &= ~(NV_CTL_PRI_SWNCQ | NV_CTL_SEC_SWNCQ); + writel(tmp, mmio + NV_CTL_MCP55); + + return 0; +} + +static int nv_swncq_port_resume(struct ata_port *ap) +{ + void __iomem *mmio = ap->host->iomap[NV_MMIO_BAR]; + u32 tmp; + + /* clear irq */ + writel(~0, mmio + NV_INT_STATUS_MCP55); + + /* enable irq */ + writel(0x00fd00fd, mmio + NV_INT_ENABLE_MCP55); + + /* enable swncq */ + tmp = readl(mmio + NV_CTL_MCP55); + writel(tmp | NV_CTL_PRI_SWNCQ | NV_CTL_SEC_SWNCQ, mmio + NV_CTL_MCP55); + + return 0; +} +#endif + +static void nv_swncq_host_init(struct ata_host *host) +{ + u32 tmp; + void __iomem *mmio = host->iomap[NV_MMIO_BAR]; + struct pci_dev *pdev = to_pci_dev(host->dev); + u8 regval; + + /* disable ECO 398 */ + pci_read_config_byte(pdev, 0x7f, ®val); + regval &= ~(1 << 7); + pci_write_config_byte(pdev, 0x7f, regval); + + /* enable swncq */ + tmp = readl(mmio + NV_CTL_MCP55); + VPRINTK("HOST_CTL:0x%X\n", tmp); + writel(tmp | NV_CTL_PRI_SWNCQ | NV_CTL_SEC_SWNCQ, mmio + NV_CTL_MCP55); + + /* enable irq intr */ + tmp = readl(mmio + NV_INT_ENABLE_MCP55); + VPRINTK("HOST_ENABLE:0x%X\n", tmp); + writel(tmp | 0x00fd00fd, mmio + NV_INT_ENABLE_MCP55); + + /* clear port irq */ + writel(~0x0, mmio + NV_INT_STATUS_MCP55); +} + +static int nv_swncq_slave_config(struct scsi_device *sdev) +{ + struct ata_port *ap = ata_shost_to_port(sdev->host); + struct pci_dev *pdev = to_pci_dev(ap->host->dev); + struct ata_device *dev; + int rc; + u8 rev; + u8 check_maxtor = 0; + unsigned char model_num[ATA_ID_PROD_LEN + 1]; + + rc = ata_scsi_slave_config(sdev); + if (sdev->id >= ATA_MAX_DEVICES || sdev->channel || sdev->lun) + /* Not a proper libata device, ignore */ + return rc; + + dev = &ap->link.device[sdev->id]; + if (!(ap->flags & ATA_FLAG_NCQ) || dev->class == ATA_DEV_ATAPI) + return rc; + + /* if MCP51 and Maxtor, then disable ncq */ + if (pdev->device == PCI_DEVICE_ID_NVIDIA_NFORCE_MCP51_SATA || + pdev->device == PCI_DEVICE_ID_NVIDIA_NFORCE_MCP51_SATA2) + check_maxtor = 1; + + /* if MCP55 and rev <= a2 and Maxtor, then disable ncq */ + if (pdev->device == PCI_DEVICE_ID_NVIDIA_NFORCE_MCP55_SATA || + pdev->device == PCI_DEVICE_ID_NVIDIA_NFORCE_MCP55_SATA2) { + pci_read_config_byte(pdev, 0x8, &rev); + if (rev <= 0xa2) + check_maxtor = 1; + } + + if (!check_maxtor) + return rc; + + ata_id_c_string(dev->id, model_num, ATA_ID_PROD, sizeof(model_num)); + + if (strncmp(model_num, "Maxtor", 6) == 0) { + ata_scsi_change_queue_depth(sdev, 1); + ata_dev_printk(dev, KERN_NOTICE, + "Disabling SWNCQ mode (depth %x)\n", sdev->queue_depth); + } + + return rc; +} + +static int nv_swncq_port_start(struct ata_port *ap) +{ + struct device *dev = ap->host->dev; + void __iomem *mmio = ap->host->iomap[NV_MMIO_BAR]; + struct nv_swncq_port_priv *pp; + int rc; + + rc = ata_port_start(ap); + if (rc) + return rc; + + pp = devm_kzalloc(dev, sizeof(*pp), GFP_KERNEL); + if (!pp) + return -ENOMEM; + + pp->prd = dmam_alloc_coherent(dev, ATA_PRD_TBL_SZ * ATA_MAX_QUEUE, + &pp->prd_dma, GFP_KERNEL); + if (!pp->prd) + return -ENOMEM; + memset(pp->prd, 0, ATA_PRD_TBL_SZ * ATA_MAX_QUEUE); + + ap->private_data = pp; + pp->sactive_block = ap->ioaddr.scr_addr + 4 * SCR_ACTIVE; + pp->irq_block = mmio + NV_INT_STATUS_MCP55 + ap->port_no * 2; + pp->tag_block = mmio + NV_NCQ_REG_MCP55 + ap->port_no * 2; + + return 0; +} + +static void nv_swncq_qc_prep(struct ata_queued_cmd *qc) +{ + if (qc->tf.protocol != ATA_PROT_NCQ) { + ata_qc_prep(qc); + return; + } + + if (!(qc->flags & ATA_QCFLAG_DMAMAP)) + return; + + nv_swncq_fill_sg(qc); +} + +static void nv_swncq_fill_sg(struct ata_queued_cmd *qc) +{ + struct ata_port *ap = qc->ap; + struct scatterlist *sg; + unsigned int idx; + struct nv_swncq_port_priv *pp = ap->private_data; + struct ata_prd *prd; + + WARN_ON(qc->__sg == NULL); + WARN_ON(qc->n_elem == 0 && qc->pad_len == 0); + + prd = pp->prd + ATA_MAX_PRD * qc->tag; + + idx = 0; + ata_for_each_sg(sg, qc) { + u32 addr, offset; + u32 sg_len, len; + + addr = (u32)sg_dma_address(sg); + sg_len = sg_dma_len(sg); + + while (sg_len) { + offset = addr & 0xffff; + len = sg_len; + if ((offset + sg_len) > 0x10000) + len = 0x10000 - offset; + + prd[idx].addr = cpu_to_le32(addr); + prd[idx].flags_len = cpu_to_le32(len & 0xffff); + + idx++; + sg_len -= len; + addr += len; + } + } + + if (idx) + prd[idx - 1].flags_len |= cpu_to_le32(ATA_PRD_EOT); +} + +static unsigned int nv_swncq_issue_atacmd(struct ata_port *ap, + struct ata_queued_cmd *qc) +{ + struct nv_swncq_port_priv *pp = ap->private_data; + + if (qc == NULL) + return 0; + + DPRINTK("Enter\n"); + + writel((1 << qc->tag), pp->sactive_block); + pp->last_issue_tag = qc->tag; + pp->dhfis_bits &= ~(1 << qc->tag); + pp->dmafis_bits &= ~(1 << qc->tag); + pp->qc_active |= (0x1 << qc->tag); + + ap->ops->tf_load(ap, &qc->tf); /* load tf registers */ + ap->ops->exec_command(ap, &qc->tf); + + DPRINTK("Issued tag %u\n", qc->tag); + + return 0; +} + +static unsigned int nv_swncq_qc_issue(struct ata_queued_cmd *qc) +{ + struct ata_port *ap = qc->ap; + struct nv_swncq_port_priv *pp = ap->private_data; + + if (qc->tf.protocol != ATA_PROT_NCQ) + return ata_qc_issue_prot(qc); + + DPRINTK("Enter\n"); + + if (!pp->qc_active) + nv_swncq_issue_atacmd(ap, qc); + else + nv_swncq_qc_to_dq(ap, qc); /* add qc to defer queue */ + + return 0; +} + +static void nv_swncq_hotplug(struct ata_port *ap, u32 fis) +{ + u32 serror; + struct ata_eh_info *ehi = &ap->link.eh_info; + + ata_ehi_clear_desc(ehi); + + /* AHCI needs SError cleared; otherwise, it might lock up */ + sata_scr_read(&ap->link, SCR_ERROR, &serror); + sata_scr_write(&ap->link, SCR_ERROR, serror); + + /* analyze @irq_stat */ + if (fis & NV_SWNCQ_IRQ_ADDED) + ata_ehi_push_desc(ehi, "hot plug"); + else if (fis & NV_SWNCQ_IRQ_REMOVED) + ata_ehi_push_desc(ehi, "hot unplug"); + + ata_ehi_hotplugged(ehi); + + /* okay, let's hand over to EH */ + ehi->serror |= serror; + + ata_port_freeze(ap); +} + +static int nv_swncq_sdbfis(struct ata_port *ap) +{ + struct ata_queued_cmd *qc; + struct nv_swncq_port_priv *pp = ap->private_data; + struct ata_eh_info *ehi = &ap->link.eh_info; + u32 sactive; + int nr_done = 0; + u32 done_mask; + int i; + u8 host_stat; + u8 lack_dhfis = 0; + + host_stat = ap->ops->bmdma_status(ap); + if (unlikely(host_stat & ATA_DMA_ERR)) { + /* error when transfering data to/from memory */ + ata_ehi_clear_desc(ehi); + ata_ehi_push_desc(ehi, "BMDMA stat 0x%x", host_stat); + ehi->err_mask |= AC_ERR_HOST_BUS; + ehi->action |= ATA_EH_SOFTRESET; + return -EINVAL; + } + + ap->ops->irq_clear(ap); + __ata_bmdma_stop(ap); + + sactive = readl(pp->sactive_block); + done_mask = pp->qc_active ^ sactive; + + if (unlikely(done_mask & sactive)) { + ata_ehi_clear_desc(ehi); + ata_ehi_push_desc(ehi, "illegal SWNCQ:qc_active transition" + "(%08x->%08x)", pp->qc_active, sactive); + ehi->err_mask |= AC_ERR_HSM; + ehi->action |= ATA_EH_HARDRESET; + return -EINVAL; + } + for (i = 0; i < ATA_MAX_QUEUE; i++) { + if (!(done_mask & (1 << i))) + continue; + + qc = ata_qc_from_tag(ap, i); + if (qc) { + ata_qc_complete(qc); + pp->qc_active &= ~(1 << i); + pp->dhfis_bits &= ~(1 << i); + pp->dmafis_bits &= ~(1 << i); + pp->sdbfis_bits |= (1 << i); + nr_done++; + } + } + + if (!ap->qc_active) { + DPRINTK("over\n"); + nv_swncq_pp_reinit(ap); + return nr_done; + } + + if (pp->qc_active & pp->dhfis_bits) + return nr_done; + + if ((pp->ncq_flags & ncq_saw_backout) || + (pp->qc_active ^ pp->dhfis_bits)) + /* if the controller cann't get a device to host register FIS, + * The driver needs to reissue the new command. + */ + lack_dhfis = 1; + + DPRINTK("id 0x%x QC: qc_active 0x%x," + "SWNCQ:qc_active 0x%X defer_bits %X " + "dhfis 0x%X dmafis 0x%X last_issue_tag %x\n", + ap->print_id, ap->qc_active, pp->qc_active, + pp->defer_queue.defer_bits, pp->dhfis_bits, + pp->dmafis_bits, pp->last_issue_tag); + + nv_swncq_fis_reinit(ap); + + if (lack_dhfis) { + qc = ata_qc_from_tag(ap, pp->last_issue_tag); + nv_swncq_issue_atacmd(ap, qc); + return nr_done; + } + + if (pp->defer_queue.defer_bits) { + /* send deferral queue command */ + qc = nv_swncq_qc_from_dq(ap); + WARN_ON(qc == NULL); + nv_swncq_issue_atacmd(ap, qc); + } + + return nr_done; +} + +static inline u32 nv_swncq_tag(struct ata_port *ap) +{ + struct nv_swncq_port_priv *pp = ap->private_data; + u32 tag; + + tag = readb(pp->tag_block) >> 2; + return (tag & 0x1f); +} + +static int nv_swncq_dmafis(struct ata_port *ap) +{ + struct ata_queued_cmd *qc; + unsigned int rw; + u8 dmactl; + u32 tag; + struct nv_swncq_port_priv *pp = ap->private_data; + + __ata_bmdma_stop(ap); + tag = nv_swncq_tag(ap); + + DPRINTK("dma setup tag 0x%x\n", tag); + qc = ata_qc_from_tag(ap, tag); + + if (unlikely(!qc)) + return 0; + + rw = qc->tf.flags & ATA_TFLAG_WRITE; + + /* load PRD table addr. */ + iowrite32(pp->prd_dma + ATA_PRD_TBL_SZ * qc->tag, + ap->ioaddr.bmdma_addr + ATA_DMA_TABLE_OFS); + + /* specify data direction, triple-check start bit is clear */ + dmactl = ioread8(ap->ioaddr.bmdma_addr + ATA_DMA_CMD); + dmactl &= ~ATA_DMA_WR; + if (!rw) + dmactl |= ATA_DMA_WR; + + iowrite8(dmactl | ATA_DMA_START, ap->ioaddr.bmdma_addr + ATA_DMA_CMD); + + return 1; +} + +static void nv_swncq_host_interrupt(struct ata_port *ap, u16 fis) +{ + struct nv_swncq_port_priv *pp = ap->private_data; + struct ata_queued_cmd *qc; + struct ata_eh_info *ehi = &ap->link.eh_info; + u32 serror; + u8 ata_stat; + int rc = 0; + + ata_stat = ap->ops->check_status(ap); + nv_swncq_irq_clear(ap, fis); + if (!fis) + return; + + if (ap->pflags & ATA_PFLAG_FROZEN) + return; + + if (fis & NV_SWNCQ_IRQ_HOTPLUG) { + nv_swncq_hotplug(ap, fis); + return; + } + + if (!pp->qc_active) + return; + + if (ap->ops->scr_read(ap, SCR_ERROR, &serror)) + return; + ap->ops->scr_write(ap, SCR_ERROR, serror); + + if (ata_stat & ATA_ERR) { + ata_ehi_clear_desc(ehi); + ata_ehi_push_desc(ehi, "Ata error. fis:0x%X", fis); + ehi->err_mask |= AC_ERR_DEV; + ehi->serror |= serror; + ehi->action |= ATA_EH_SOFTRESET; + ata_port_freeze(ap); + return; + } + + if (fis & NV_SWNCQ_IRQ_BACKOUT) { + /* If the IRQ is backout, driver must issue + * the new command again some time later. + */ + pp->ncq_flags |= ncq_saw_backout; + } + + if (fis & NV_SWNCQ_IRQ_SDBFIS) { + pp->ncq_flags |= ncq_saw_sdb; + DPRINTK("id 0x%x SWNCQ: qc_active 0x%X " + "dhfis 0x%X dmafis 0x%X sactive 0x%X\n", + ap->print_id, pp->qc_active, pp->dhfis_bits, + pp->dmafis_bits, readl(pp->sactive_block)); + rc = nv_swncq_sdbfis(ap); + if (rc < 0) + goto irq_error; + } + + if (fis & NV_SWNCQ_IRQ_DHREGFIS) { + /* The interrupt indicates the new command + * was transmitted correctly to the drive. + */ + pp->dhfis_bits |= (0x1 << pp->last_issue_tag); + pp->ncq_flags |= ncq_saw_d2h; + if (pp->ncq_flags & (ncq_saw_sdb | ncq_saw_backout)) { + ata_ehi_push_desc(ehi, "illegal fis transaction"); + ehi->err_mask |= AC_ERR_HSM; + ehi->action |= ATA_EH_HARDRESET; + goto irq_error; + } + + if (!(fis & NV_SWNCQ_IRQ_DMASETUP) && + !(pp->ncq_flags & ncq_saw_dmas)) { + ata_stat = ap->ops->check_status(ap); + if (ata_stat & ATA_BUSY) + goto irq_exit; + + if (pp->defer_queue.defer_bits) { + DPRINTK("send next command\n"); + qc = nv_swncq_qc_from_dq(ap); + nv_swncq_issue_atacmd(ap, qc); + } + } + } + + if (fis & NV_SWNCQ_IRQ_DMASETUP) { + /* program the dma controller with appropriate PRD buffers + * and start the DMA transfer for requested command. + */ + pp->dmafis_bits |= (0x1 << nv_swncq_tag(ap)); + pp->ncq_flags |= ncq_saw_dmas; + rc = nv_swncq_dmafis(ap); + } + +irq_exit: + return; +irq_error: + ata_ehi_push_desc(ehi, "fis:0x%x", fis); + ata_port_freeze(ap); + return; +} + +static irqreturn_t nv_swncq_interrupt(int irq, void *dev_instance) +{ + struct ata_host *host = dev_instance; + unsigned int i; + unsigned int handled = 0; + unsigned long flags; + u32 irq_stat; + + spin_lock_irqsave(&host->lock, flags); + + irq_stat = readl(host->iomap[NV_MMIO_BAR] + NV_INT_STATUS_MCP55); + + for (i = 0; i < host->n_ports; i++) { + struct ata_port *ap = host->ports[i]; + + if (ap && !(ap->flags & ATA_FLAG_DISABLED)) { + if (ap->link.sactive) { + nv_swncq_host_interrupt(ap, (u16)irq_stat); + handled = 1; + } else { + if (irq_stat) /* reserve Hotplug */ + nv_swncq_irq_clear(ap, 0xfff0); + + handled += nv_host_intr(ap, (u8)irq_stat); + } + } + irq_stat >>= NV_INT_PORT_SHIFT_MCP55; + } + + spin_unlock_irqrestore(&host->lock, flags); + + return IRQ_RETVAL(handled); +} + static int nv_init_one (struct pci_dev *pdev, const struct pci_device_id *ent) { static int printed_version = 0; @@ -1551,7 +2378,7 @@ static int nv_init_one (struct pci_dev *pdev, const struct pci_device_id *ent) return rc; /* determine type and allocate host */ - if (type >= CK804 && adma_enabled) { + if (type == CK804 && adma_enabled) { dev_printk(KERN_NOTICE, &pdev->dev, "Using ADMA mode\n"); type = ADMA; } @@ -1597,6 +2424,9 @@ static int nv_init_one (struct pci_dev *pdev, const struct pci_device_id *ent) rc = nv_adma_host_init(host); if (rc) return rc; + } else if (type == SWNCQ && swncq_enabled) { + dev_printk(KERN_NOTICE, &pdev->dev, "Using SWNCQ mode\n"); + nv_swncq_host_init(host); } pci_set_master(pdev); @@ -1696,3 +2526,6 @@ module_init(nv_init); module_exit(nv_exit); module_param_named(adma, adma_enabled, bool, 0444); MODULE_PARM_DESC(adma, "Enable use of ADMA (Default: true)"); +module_param_named(swncq, swncq_enabled, bool, 0444); +MODULE_PARM_DESC(swncq, "Enable use of SWNCQ (Default: false)"); + diff --git a/drivers/char/ec3104_keyb.c b/drivers/char/ec3104_keyb.c deleted file mode 100644 index 020011495d91..000000000000 --- a/drivers/char/ec3104_keyb.c +++ /dev/null @@ -1,457 +0,0 @@ -/* - * linux/drivers/char/ec3104_keyb.c - * - * Copyright (C) 2000 Philipp Rumpf <prumpf@tux.org> - * - * based on linux/drivers/char/pc_keyb.c, which had the following comments: - * - * Separation of the PC low-level part by Geert Uytterhoeven, May 1997 - * See keyboard.c for the whole history. - * - * Major cleanup by Martin Mares, May 1997 - * - * Combined the keyboard and PS/2 mouse handling into one file, - * because they share the same hardware. - * Johan Myreen <jem@iki.fi> 1998-10-08. - * - * Code fixes to handle mouse ACKs properly. - * C. Scott Ananian <cananian@alumni.princeton.edu> 1999-01-29. - */ -/* EC3104 note: - * This code was written without any documentation about the EC3104 chip. While - * I hope I got most of the basic functionality right, the register names I use - * are most likely completely different from those in the chip documentation. - * - * If you have any further information about the EC3104, please tell me - * (prumpf@tux.org). - */ - - -#include <linux/spinlock.h> -#include <linux/sched.h> -#include <linux/interrupt.h> -#include <linux/tty.h> -#include <linux/mm.h> -#include <linux/signal.h> -#include <linux/init.h> -#include <linux/kbd_ll.h> -#include <linux/delay.h> -#include <linux/random.h> -#include <linux/poll.h> -#include <linux/miscdevice.h> -#include <linux/slab.h> -#include <linux/kbd_kern.h> -#include <linux/bitops.h> - -#include <asm/keyboard.h> -#include <asm/uaccess.h> -#include <asm/irq.h> -#include <asm/system.h> -#include <asm/ec3104.h> - -#include <asm/io.h> - -/* Some configuration switches are present in the include file... */ - -#include <linux/pc_keyb.h> - -#define MSR_CTS 0x10 -#define MCR_RTS 0x02 -#define LSR_DR 0x01 -#define LSR_BOTH_EMPTY 0x60 - -static struct e5_struct { - u8 packet[8]; - int pos; - int length; - - u8 cached_mcr; - u8 last_msr; -} ec3104_keyb; - -/* Simple translation table for the SysRq keys */ - - -#ifdef CONFIG_MAGIC_SYSRQ -unsigned char ec3104_kbd_sysrq_xlate[128] = - "\000\0331234567890-=\177\t" /* 0x00 - 0x0f */ - "qwertyuiop[]\r\000as" /* 0x10 - 0x1f */ - "dfghjkl;'`\000\\zxcv" /* 0x20 - 0x2f */ - "bnm,./\000*\000 \000\201\202\203\204\205" /* 0x30 - 0x3f */ - "\206\207\210\211\212\000\000789-456+1" /* 0x40 - 0x4f */ - "230\177\000\000\213\214\000\000\000\000\000\000\000\000\000\000" /* 0x50 - 0x5f */ - "\r\000/"; /* 0x60 - 0x6f */ -#endif - -static void kbd_write_command_w(int data); -static void kbd_write_output_w(int data); -#ifdef CONFIG_PSMOUSE -static void aux_write_ack(int val); -static void __aux_write_ack(int val); -#endif - -static DEFINE_SPINLOCK(kbd_controller_lock); -static unsigned char handle_kbd_event(void); - -/* used only by send_data - set by keyboard_interrupt */ -static volatile unsigned char reply_expected; -static volatile unsigned char acknowledge; -static volatile unsigned char resend; - - -int ec3104_kbd_setkeycode(unsigned int scancode, unsigned int keycode) -{ - return 0; -} - -int ec3104_kbd_getkeycode(unsigned int scancode) -{ - return 0; -} - - -/* yes, it probably would be faster to use an array. I don't care. */ - -static inline unsigned char ec3104_scan2key(unsigned char scancode) -{ - switch (scancode) { - case 1: /* '`' */ - return 41; - - case 2 ... 27: - return scancode; - - case 28: /* '\\' */ - return 43; - - case 29 ... 39: - return scancode + 1; - - case 40: /* '\r' */ - return 28; - - case 41 ... 50: - return scancode + 3; - - case 51: /* ' ' */ - return 57; - - case 52: /* escape */ - return 1; - - case 54: /* insert/delete (labelled delete) */ - /* this should arguably be 110, but I'd like to have ctrl-alt-del - * working with a standard keymap */ - return 111; - - case 55: /* left */ - return 105; - case 56: /* home */ - return 102; - case 57: /* end */ - return 107; - case 58: /* up */ - return 103; - case 59: /* down */ - return 108; - case 60: /* pgup */ - return 104; - case 61: /* pgdown */ - return 109; - case 62: /* right */ - return 106; - - case 79 ... 88: /* f1 - f10 */ - return scancode - 20; - - case 89 ... 90: /* f11 - f12 */ - return scancode - 2; - - case 91: /* left shift */ - return 42; - - case 92: /* right shift */ - return 54; - - case 93: /* left alt */ - return 56; - case 94: /* right alt */ - return 100; - case 95: /* left ctrl */ - return 29; - case 96: /* right ctrl */ - return 97; - - case 97: /* caps lock */ - return 58; - case 102: /* left windows */ - return 125; - case 103: /* right windows */ - return 126; - - case 106: /* Fn */ - /* this is wrong. */ - return 84; - - default: - return 0; - } -} - -int ec3104_kbd_translate(unsigned char scancode, unsigned char *keycode, - char raw_mode) -{ - scancode &= 0x7f; - - *keycode = ec3104_scan2key(scancode); - - return 1; -} - -char ec3104_kbd_unexpected_up(unsigned char keycode) -{ - return 0200; -} - -static inline void handle_keyboard_event(unsigned char scancode) -{ -#ifdef CONFIG_VT - handle_scancode(scancode, !(scancode & 0x80)); -#endif - tasklet_schedule(&keyboard_tasklet); -} - -void ec3104_kbd_leds(unsigned char leds) -{ -} - -static u8 e5_checksum(u8 *packet, int count) -{ - int i; - u8 sum = 0; - - for (i=0; i<count; i++) - sum ^= packet[i]; - - if (sum & 0x80) - sum ^= 0xc0; - - return sum; -} - -static void e5_wait_for_cts(struct e5_struct *k) -{ - u8 msr; - - do { - msr = ctrl_inb(EC3104_SER4_MSR); - } while (!(msr & MSR_CTS)); -} - - -static void e5_send_byte(u8 byte, struct e5_struct *k) -{ - u8 status; - - do { - status = ctrl_inb(EC3104_SER4_LSR); - } while ((status & LSR_BOTH_EMPTY) != LSR_BOTH_EMPTY); - - printk("<%02x>", byte); - - ctrl_outb(byte, EC3104_SER4_DATA); - - do { - status = ctrl_inb(EC3104_SER4_LSR); - } while ((status & LSR_BOTH_EMPTY) != LSR_BOTH_EMPTY); - -} - -static int e5_send_packet(u8 *packet, int count, struct e5_struct *k) -{ - int i; - - disable_irq(EC3104_IRQ_SER4); - - if (k->cached_mcr & MCR_RTS) { - printk("e5_send_packet: too slow\n"); - enable_irq(EC3104_IRQ_SER4); - return -EAGAIN; - } - - k->cached_mcr |= MCR_RTS; - ctrl_outb(k->cached_mcr, EC3104_SER4_MCR); - - e5_wait_for_cts(k); - - printk("p: "); - - for(i=0; i<count; i++) - e5_send_byte(packet[i], k); - - e5_send_byte(e5_checksum(packet, count), k); - - printk("\n"); - - udelay(1500); - - k->cached_mcr &= ~MCR_RTS; - ctrl_outb(k->cached_mcr, EC3104_SER4_MCR); - - set_current_state(TASK_UNINTERRUPTIBLE); - - - - enable_irq(EC3104_IRQ_SER4); - - - - return 0; -} - -/* - * E5 packets we know about: - * E5->host 0x80 0x05 <checksum> - resend packet - * host->E5 0x83 0x43 <contrast> - set LCD contrast - * host->E5 0x85 0x41 0x02 <brightness> 0x02 - set LCD backlight - * E5->host 0x87 <ps2 packet> 0x00 <checksum> - external PS2 - * E5->host 0x88 <scancode> <checksum> - key press - */ - -static void e5_receive(struct e5_struct *k) -{ - k->packet[k->pos++] = ctrl_inb(EC3104_SER4_DATA); - - if (k->pos == 1) { - switch(k->packet[0]) { - case 0x80: - k->length = 3; - break; - - case 0x87: /* PS2 ext */ - k->length = 6; - break; - - case 0x88: /* keyboard */ - k->length = 3; - break; - - default: - k->length = 1; - printk(KERN_WARNING "unknown E5 packet %02x\n", - k->packet[0]); - } - } - - if (k->pos == k->length) { - int i; - - if (e5_checksum(k->packet, k->length) != 0) - printk(KERN_WARNING "E5: wrong checksum\n"); - -#if 0 - printk("E5 packet ["); - for(i=0; i<k->length; i++) { - printk("%02x ", k->packet[i]); - } - - printk("(%02x)]\n", e5_checksum(k->packet, k->length-1)); -#endif - - switch(k->packet[0]) { - case 0x80: - case 0x88: - handle_keyboard_event(k->packet[1]); - break; - } - - k->pos = k->length = 0; - } -} - -static void ec3104_keyb_interrupt(int irq, void *data) -{ - struct e5_struct *k = &ec3104_keyb; - u8 msr, lsr; - - msr = ctrl_inb(EC3104_SER4_MSR); - - if ((msr & MSR_CTS) && !(k->last_msr & MSR_CTS)) { - if (k->cached_mcr & MCR_RTS) - printk("confused: RTS already high\n"); - /* CTS went high. Send RTS. */ - k->cached_mcr |= MCR_RTS; - - ctrl_outb(k->cached_mcr, EC3104_SER4_MCR); - } else if ((!(msr & MSR_CTS)) && (k->last_msr & MSR_CTS)) { - /* CTS went low. */ - if (!(k->cached_mcr & MCR_RTS)) - printk("confused: RTS already low\n"); - - k->cached_mcr &= ~MCR_RTS; - - ctrl_outb(k->cached_mcr, EC3104_SER4_MCR); - } - - k->last_msr = msr; - - lsr = ctrl_inb(EC3104_SER4_LSR); - - if (lsr & LSR_DR) - e5_receive(k); -} - -static void ec3104_keyb_clear_state(void) -{ - struct e5_struct *k = &ec3104_keyb; - u8 msr, lsr; - - /* we want CTS to be low */ - k->last_msr = 0; - - for (;;) { - msleep(100); - - msr = ctrl_inb(EC3104_SER4_MSR); - - lsr = ctrl_inb(EC3104_SER4_LSR); - - if (lsr & LSR_DR) { - e5_receive(k); - continue; - } - - if ((msr & MSR_CTS) && !(k->last_msr & MSR_CTS)) { - if (k->cached_mcr & MCR_RTS) - printk("confused: RTS already high\n"); - /* CTS went high. Send RTS. */ - k->cached_mcr |= MCR_RTS; - - ctrl_outb(k->cached_mcr, EC3104_SER4_MCR); - } else if ((!(msr & MSR_CTS)) && (k->last_msr & MSR_CTS)) { - /* CTS went low. */ - if (!(k->cached_mcr & MCR_RTS)) - printk("confused: RTS already low\n"); - - k->cached_mcr &= ~MCR_RTS; - - ctrl_outb(k->cached_mcr, EC3104_SER4_MCR); - } else - break; - - k->last_msr = msr; - - continue; - } -} - -void __init ec3104_kbd_init_hw(void) -{ - ec3104_keyb.last_msr = ctrl_inb(EC3104_SER4_MSR); - ec3104_keyb.cached_mcr = ctrl_inb(EC3104_SER4_MCR); - - ec3104_keyb_clear_state(); - - /* Ok, finally allocate the IRQ, and off we go.. */ - request_irq(EC3104_IRQ_SER4, ec3104_keyb_interrupt, 0, "keyboard", NULL); -} diff --git a/drivers/hwmon/Kconfig b/drivers/hwmon/Kconfig index e47f88170806..700a1657554f 100644 --- a/drivers/hwmon/Kconfig +++ b/drivers/hwmon/Kconfig @@ -158,6 +158,7 @@ config SENSORS_K8TEMP config SENSORS_AMS tristate "Apple Motion Sensor driver" depends on PPC_PMAC && !PPC64 && INPUT && ((ADB_PMU && I2C = y) || (ADB_PMU && !I2C) || I2C) && EXPERIMENTAL + select INPUT_POLLDEV help Support for the motion sensor included in PowerBooks. Includes implementations for PMU and I2C. @@ -701,6 +702,7 @@ config SENSORS_W83627EHF config SENSORS_HDAPS tristate "IBM Hard Drive Active Protection System (hdaps)" depends on INPUT && X86 + select INPUT_POLLDEV default n help This driver provides support for the IBM Hard Drive Active Protection @@ -722,6 +724,7 @@ config SENSORS_APPLESMC depends on INPUT && X86 select NEW_LEDS select LEDS_CLASS + select INPUT_POLLDEV default n help This driver provides support for the Apple System Management diff --git a/drivers/hwmon/ams/ams-input.c b/drivers/hwmon/ams/ams-input.c index ca7095d96ad0..7b81e0c2c2d9 100644 --- a/drivers/hwmon/ams/ams-input.c +++ b/drivers/hwmon/ams/ams-input.c @@ -27,47 +27,32 @@ static unsigned int invert; module_param(invert, bool, 0644); MODULE_PARM_DESC(invert, "Invert input data on X and Y axis"); -static int ams_input_kthread(void *data) +static void ams_idev_poll(struct input_polled_dev *dev) { + struct input_dev *idev = dev->input; s8 x, y, z; - while (!kthread_should_stop()) { - mutex_lock(&ams_info.lock); - - ams_sensors(&x, &y, &z); - - x -= ams_info.xcalib; - y -= ams_info.ycalib; - z -= ams_info.zcalib; - - input_report_abs(ams_info.idev, ABS_X, invert ? -x : x); - input_report_abs(ams_info.idev, ABS_Y, invert ? -y : y); - input_report_abs(ams_info.idev, ABS_Z, z); + mutex_lock(&ams_info.lock); - input_sync(ams_info.idev); + ams_sensors(&x, &y, &z); - mutex_unlock(&ams_info.lock); + x -= ams_info.xcalib; + y -= ams_info.ycalib; + z -= ams_info.zcalib; - msleep(25); - } + input_report_abs(idev, ABS_X, invert ? -x : x); + input_report_abs(idev, ABS_Y, invert ? -y : y); + input_report_abs(idev, ABS_Z, z); - return 0; -} + input_sync(idev); -static int ams_input_open(struct input_dev *dev) -{ - ams_info.kthread = kthread_run(ams_input_kthread, NULL, "kams"); - return IS_ERR(ams_info.kthread) ? PTR_ERR(ams_info.kthread) : 0; -} - -static void ams_input_close(struct input_dev *dev) -{ - kthread_stop(ams_info.kthread); + mutex_unlock(&ams_info.lock); } /* Call with ams_info.lock held! */ static void ams_input_enable(void) { + struct input_dev *input; s8 x, y, z; if (ams_info.idev) @@ -78,27 +63,29 @@ static void ams_input_enable(void) ams_info.ycalib = y; ams_info.zcalib = z; - ams_info.idev = input_allocate_device(); + ams_info.idev = input_allocate_polled_device(); if (!ams_info.idev) return; - ams_info.idev->name = "Apple Motion Sensor"; - ams_info.idev->id.bustype = ams_info.bustype; - ams_info.idev->id.vendor = 0; - ams_info.idev->open = ams_input_open; - ams_info.idev->close = ams_input_close; - ams_info.idev->dev.parent = &ams_info.of_dev->dev; + ams_info.idev->poll = ams_idev_poll; + ams_info.idev->poll_interval = 25; + + input = ams_info.idev->input; + input->name = "Apple Motion Sensor"; + input->id.bustype = ams_info.bustype; + input->id.vendor = 0; + input->dev.parent = &ams_info.of_dev->dev; - input_set_abs_params(ams_info.idev, ABS_X, -50, 50, 3, 0); - input_set_abs_params(ams_info.idev, ABS_Y, -50, 50, 3, 0); - input_set_abs_params(ams_info.idev, ABS_Z, -50, 50, 3, 0); + input_set_abs_params(input, ABS_X, -50, 50, 3, 0); + input_set_abs_params(input, ABS_Y, -50, 50, 3, 0); + input_set_abs_params(input, ABS_Z, -50, 50, 3, 0); - set_bit(EV_ABS, ams_info.idev->evbit); - set_bit(EV_KEY, ams_info.idev->evbit); - set_bit(BTN_TOUCH, ams_info.idev->keybit); + set_bit(EV_ABS, input->evbit); + set_bit(EV_KEY, input->evbit); + set_bit(BTN_TOUCH, input->keybit); - if (input_register_device(ams_info.idev)) { - input_free_device(ams_info.idev); + if (input_register_polled_device(ams_info.idev)) { + input_free_polled_device(ams_info.idev); ams_info.idev = NULL; return; } @@ -108,7 +95,8 @@ static void ams_input_enable(void) static void ams_input_disable(void) { if (ams_info.idev) { - input_unregister_device(ams_info.idev); + input_unregister_polled_device(ams_info.idev); + input_free_polled_device(ams_info.idev); ams_info.idev = NULL; } } diff --git a/drivers/hwmon/ams/ams.h b/drivers/hwmon/ams/ams.h index 240730e6bcde..a6221e5dd984 100644 --- a/drivers/hwmon/ams/ams.h +++ b/drivers/hwmon/ams/ams.h @@ -1,5 +1,5 @@ #include <linux/i2c.h> -#include <linux/input.h> +#include <linux/input-polldev.h> #include <linux/kthread.h> #include <linux/mutex.h> #include <linux/spinlock.h> @@ -52,8 +52,7 @@ struct ams { #endif /* Joystick emulation */ - struct task_struct *kthread; - struct input_dev *idev; + struct input_polled_dev *idev; __u16 bustype; /* calibrated null values */ diff --git a/drivers/hwmon/applesmc.c b/drivers/hwmon/applesmc.c index f37fd7ebf65a..4879125b4cdc 100644 --- a/drivers/hwmon/applesmc.c +++ b/drivers/hwmon/applesmc.c @@ -28,7 +28,7 @@ #include <linux/delay.h> #include <linux/platform_device.h> -#include <linux/input.h> +#include <linux/input-polldev.h> #include <linux/kernel.h> #include <linux/module.h> #include <linux/timer.h> @@ -59,9 +59,9 @@ #define LIGHT_SENSOR_LEFT_KEY "ALV0" /* r-o {alv (6 bytes) */ #define LIGHT_SENSOR_RIGHT_KEY "ALV1" /* r-o {alv (6 bytes) */ -#define BACKLIGHT_KEY "LKSB" /* w-o {lkb (2 bytes) */ +#define BACKLIGHT_KEY "LKSB" /* w-o {lkb (2 bytes) */ -#define CLAMSHELL_KEY "MSLD" /* r-o ui8 (unused) */ +#define CLAMSHELL_KEY "MSLD" /* r-o ui8 (unused) */ #define MOTION_SENSOR_X_KEY "MO_X" /* r-o sp78 (2 bytes) */ #define MOTION_SENSOR_Y_KEY "MO_Y" /* r-o sp78 (2 bytes) */ @@ -103,7 +103,7 @@ static const char* fan_speed_keys[] = { #define INIT_TIMEOUT_MSECS 5000 /* wait up to 5s for device init ... */ #define INIT_WAIT_MSECS 50 /* ... in 50ms increments */ -#define APPLESMC_POLL_PERIOD (HZ/20) /* poll for input every 1/20s */ +#define APPLESMC_POLL_INTERVAL 50 /* msecs */ #define APPLESMC_INPUT_FUZZ 4 /* input event threshold */ #define APPLESMC_INPUT_FLAT 4 @@ -125,9 +125,8 @@ static const int debug; static struct platform_device *pdev; static s16 rest_x; static s16 rest_y; -static struct timer_list applesmc_timer; -static struct input_dev *applesmc_idev; static struct device *hwmon_dev; +static struct input_polled_dev *applesmc_idev; /* Indicates whether this computer has an accelerometer. */ static unsigned int applesmc_accelerometer; @@ -138,7 +137,7 @@ static unsigned int applesmc_light; /* Indicates which temperature sensors set to use. */ static unsigned int applesmc_temperature_set; -static struct mutex applesmc_lock; +static DEFINE_MUTEX(applesmc_lock); /* * Last index written to key_at_index sysfs file, and value to use for all other @@ -455,27 +454,12 @@ static void applesmc_calibrate(void) rest_x = -rest_x; } -static int applesmc_idev_open(struct input_dev *dev) -{ - add_timer(&applesmc_timer); - - return 0; -} - -static void applesmc_idev_close(struct input_dev *dev) -{ - del_timer_sync(&applesmc_timer); -} - -static void applesmc_idev_poll(unsigned long unused) +static void applesmc_idev_poll(struct input_polled_dev *dev) { + struct input_dev *idev = dev->input; s16 x, y; - /* Cannot sleep. Try nonblockingly. If we fail, try again later. */ - if (!mutex_trylock(&applesmc_lock)) { - mod_timer(&applesmc_timer, jiffies + APPLESMC_POLL_PERIOD); - return; - } + mutex_lock(&applesmc_lock); if (applesmc_read_motion_sensor(SENSOR_X, &x)) goto out; @@ -483,13 +467,11 @@ static void applesmc_idev_poll(unsigned long unused) goto out; x = -x; - input_report_abs(applesmc_idev, ABS_X, x - rest_x); - input_report_abs(applesmc_idev, ABS_Y, y - rest_y); - input_sync(applesmc_idev); + input_report_abs(idev, ABS_X, x - rest_x); + input_report_abs(idev, ABS_Y, y - rest_y); + input_sync(idev); out: - mod_timer(&applesmc_timer, jiffies + APPLESMC_POLL_PERIOD); - mutex_unlock(&applesmc_lock); } @@ -821,8 +803,7 @@ static ssize_t applesmc_key_at_index_read_show(struct device *dev, if (!ret) { return info[0]; - } - else { + } else { return ret; } } @@ -1093,6 +1074,7 @@ static int applesmc_dmi_match(const struct dmi_system_id *id) /* Create accelerometer ressources */ static int applesmc_create_accelerometer(void) { + struct input_dev *idev; int ret; ret = sysfs_create_group(&pdev->dev.kobj, @@ -1100,40 +1082,37 @@ static int applesmc_create_accelerometer(void) if (ret) goto out; - applesmc_idev = input_allocate_device(); + applesmc_idev = input_allocate_polled_device(); if (!applesmc_idev) { ret = -ENOMEM; goto out_sysfs; } + applesmc_idev->poll = applesmc_idev_poll; + applesmc_idev->poll_interval = APPLESMC_POLL_INTERVAL; + /* initial calibrate for the input device */ applesmc_calibrate(); - /* initialize the input class */ - applesmc_idev->name = "applesmc"; - applesmc_idev->id.bustype = BUS_HOST; - applesmc_idev->dev.parent = &pdev->dev; - applesmc_idev->evbit[0] = BIT(EV_ABS); - applesmc_idev->open = applesmc_idev_open; - applesmc_idev->close = applesmc_idev_close; - input_set_abs_params(applesmc_idev, ABS_X, + /* initialize the input device */ + idev = applesmc_idev->input; + idev->name = "applesmc"; + idev->id.bustype = BUS_HOST; + idev->dev.parent = &pdev->dev; + idev->evbit[0] = BIT(EV_ABS); + input_set_abs_params(idev, ABS_X, -256, 256, APPLESMC_INPUT_FUZZ, APPLESMC_INPUT_FLAT); - input_set_abs_params(applesmc_idev, ABS_Y, + input_set_abs_params(idev, ABS_Y, -256, 256, APPLESMC_INPUT_FUZZ, APPLESMC_INPUT_FLAT); - ret = input_register_device(applesmc_idev); + ret = input_register_polled_device(applesmc_idev); if (ret) goto out_idev; - /* start up our timer for the input device */ - init_timer(&applesmc_timer); - applesmc_timer.function = applesmc_idev_poll; - applesmc_timer.expires = jiffies + APPLESMC_POLL_PERIOD; - return 0; out_idev: - input_free_device(applesmc_idev); + input_free_polled_device(applesmc_idev); out_sysfs: sysfs_remove_group(&pdev->dev.kobj, &accelerometer_attributes_group); @@ -1146,8 +1125,8 @@ out: /* Release all ressources used by the accelerometer */ static void applesmc_release_accelerometer(void) { - del_timer_sync(&applesmc_timer); - input_unregister_device(applesmc_idev); + input_unregister_polled_device(applesmc_idev); + input_free_polled_device(applesmc_idev); sysfs_remove_group(&pdev->dev.kobj, &accelerometer_attributes_group); } @@ -1184,8 +1163,6 @@ static int __init applesmc_init(void) int count; int i; - mutex_init(&applesmc_lock); - if (!dmi_check_system(applesmc_whitelist)) { printk(KERN_WARNING "applesmc: supported laptop not found!\n"); ret = -ENODEV; diff --git a/drivers/hwmon/hdaps.c b/drivers/hwmon/hdaps.c index a7c6d407572b..8a7ae03aeee4 100644 --- a/drivers/hwmon/hdaps.c +++ b/drivers/hwmon/hdaps.c @@ -28,7 +28,7 @@ #include <linux/delay.h> #include <linux/platform_device.h> -#include <linux/input.h> +#include <linux/input-polldev.h> #include <linux/kernel.h> #include <linux/mutex.h> #include <linux/module.h> @@ -61,13 +61,12 @@ #define INIT_TIMEOUT_MSECS 4000 /* wait up to 4s for device init ... */ #define INIT_WAIT_MSECS 200 /* ... in 200ms increments */ -#define HDAPS_POLL_PERIOD (HZ/20) /* poll for input every 1/20s */ +#define HDAPS_POLL_INTERVAL 50 /* poll for input every 1/20s (50 ms)*/ #define HDAPS_INPUT_FUZZ 4 /* input event threshold */ #define HDAPS_INPUT_FLAT 4 -static struct timer_list hdaps_timer; static struct platform_device *pdev; -static struct input_dev *hdaps_idev; +static struct input_polled_dev *hdaps_idev; static unsigned int hdaps_invert; static u8 km_activity; static int rest_x; @@ -323,24 +322,19 @@ static void hdaps_calibrate(void) __hdaps_read_pair(HDAPS_PORT_XPOS, HDAPS_PORT_YPOS, &rest_x, &rest_y); } -static void hdaps_mousedev_poll(unsigned long unused) +static void hdaps_mousedev_poll(struct input_polled_dev *dev) { + struct input_dev *input_dev = dev->input; int x, y; - /* Cannot sleep. Try nonblockingly. If we fail, try again later. */ - if (mutex_trylock(&hdaps_mtx)) { - mod_timer(&hdaps_timer,jiffies + HDAPS_POLL_PERIOD); - return; - } + mutex_lock(&hdaps_mtx); if (__hdaps_read_pair(HDAPS_PORT_XPOS, HDAPS_PORT_YPOS, &x, &y)) goto out; - input_report_abs(hdaps_idev, ABS_X, x - rest_x); - input_report_abs(hdaps_idev, ABS_Y, y - rest_y); - input_sync(hdaps_idev); - - mod_timer(&hdaps_timer, jiffies + HDAPS_POLL_PERIOD); + input_report_abs(input_dev, ABS_X, x - rest_x); + input_report_abs(input_dev, ABS_Y, y - rest_y); + input_sync(input_dev); out: mutex_unlock(&hdaps_mtx); @@ -536,6 +530,7 @@ static struct dmi_system_id __initdata hdaps_whitelist[] = { static int __init hdaps_init(void) { + struct input_dev *idev; int ret; if (!dmi_check_system(hdaps_whitelist)) { @@ -563,39 +558,37 @@ static int __init hdaps_init(void) if (ret) goto out_device; - hdaps_idev = input_allocate_device(); + hdaps_idev = input_allocate_polled_device(); if (!hdaps_idev) { ret = -ENOMEM; goto out_group; } + hdaps_idev->poll = hdaps_mousedev_poll; + hdaps_idev->poll_interval = HDAPS_POLL_INTERVAL; + /* initial calibrate for the input device */ hdaps_calibrate(); /* initialize the input class */ - hdaps_idev->name = "hdaps"; - hdaps_idev->dev.parent = &pdev->dev; - hdaps_idev->evbit[0] = BIT(EV_ABS); - input_set_abs_params(hdaps_idev, ABS_X, + idev = hdaps_idev->input; + idev->name = "hdaps"; + idev->dev.parent = &pdev->dev; + idev->evbit[0] = BIT(EV_ABS); + input_set_abs_params(idev, ABS_X, -256, 256, HDAPS_INPUT_FUZZ, HDAPS_INPUT_FLAT); - input_set_abs_params(hdaps_idev, ABS_Y, + input_set_abs_params(idev, ABS_Y, -256, 256, HDAPS_INPUT_FUZZ, HDAPS_INPUT_FLAT); - ret = input_register_device(hdaps_idev); + ret = input_register_polled_device(hdaps_idev); if (ret) goto out_idev; - /* start up our timer for the input device */ - init_timer(&hdaps_timer); - hdaps_timer.function = hdaps_mousedev_poll; - hdaps_timer.expires = jiffies + HDAPS_POLL_PERIOD; - add_timer(&hdaps_timer); - printk(KERN_INFO "hdaps: driver successfully loaded.\n"); return 0; out_idev: - input_free_device(hdaps_idev); + input_free_polled_device(hdaps_idev); out_group: sysfs_remove_group(&pdev->dev.kobj, &hdaps_attribute_group); out_device: @@ -611,8 +604,8 @@ out: static void __exit hdaps_exit(void) { - del_timer_sync(&hdaps_timer); - input_unregister_device(hdaps_idev); + input_unregister_polled_device(hdaps_idev); + input_free_polled_device(hdaps_idev); sysfs_remove_group(&pdev->dev.kobj, &hdaps_attribute_group); platform_device_unregister(pdev); platform_driver_unregister(&hdaps_driver); diff --git a/drivers/infiniband/ulp/ipoib/ipoib.h b/drivers/infiniband/ulp/ipoib/ipoib.h index 6545fa798b12..1b3327ad6bc4 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib.h +++ b/drivers/infiniband/ulp/ipoib/ipoib.h @@ -349,6 +349,7 @@ struct ipoib_neigh { struct sk_buff_head queue; struct neighbour *neighbour; + struct net_device *dev; struct list_head list; }; @@ -365,7 +366,8 @@ static inline struct ipoib_neigh **to_ipoib_neigh(struct neighbour *neigh) INFINIBAND_ALEN, sizeof(void *)); } -struct ipoib_neigh *ipoib_neigh_alloc(struct neighbour *neigh); +struct ipoib_neigh *ipoib_neigh_alloc(struct neighbour *neigh, + struct net_device *dev); void ipoib_neigh_free(struct net_device *dev, struct ipoib_neigh *neigh); extern struct workqueue_struct *ipoib_workqueue; diff --git a/drivers/infiniband/ulp/ipoib/ipoib_main.c b/drivers/infiniband/ulp/ipoib/ipoib_main.c index e072f3c32ce6..362610d870e4 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_main.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c @@ -517,7 +517,7 @@ static void neigh_add_path(struct sk_buff *skb, struct net_device *dev) struct ipoib_path *path; struct ipoib_neigh *neigh; - neigh = ipoib_neigh_alloc(skb->dst->neighbour); + neigh = ipoib_neigh_alloc(skb->dst->neighbour, skb->dev); if (!neigh) { ++dev->stats.tx_dropped; dev_kfree_skb_any(skb); @@ -692,9 +692,10 @@ static int ipoib_start_xmit(struct sk_buff *skb, struct net_device *dev) goto out; } } else if (neigh->ah) { - if (unlikely(memcmp(&neigh->dgid.raw, + if (unlikely((memcmp(&neigh->dgid.raw, skb->dst->neighbour->ha + 4, - sizeof(union ib_gid)))) { + sizeof(union ib_gid))) || + (neigh->dev != dev))) { spin_lock(&priv->lock); /* * It's safe to call ipoib_put_ah() inside @@ -817,6 +818,13 @@ static void ipoib_neigh_cleanup(struct neighbour *n) unsigned long flags; struct ipoib_ah *ah = NULL; + neigh = *to_ipoib_neigh(n); + if (neigh) { + priv = netdev_priv(neigh->dev); + ipoib_dbg(priv, "neigh_destructor for bonding device: %s\n", + n->dev->name); + } else + return; ipoib_dbg(priv, "neigh_cleanup for %06x " IPOIB_GID_FMT "\n", IPOIB_QPN(n->ha), @@ -824,13 +832,10 @@ static void ipoib_neigh_cleanup(struct neighbour *n) spin_lock_irqsave(&priv->lock, flags); - neigh = *to_ipoib_neigh(n); - if (neigh) { - if (neigh->ah) - ah = neigh->ah; - list_del(&neigh->list); - ipoib_neigh_free(n->dev, neigh); - } + if (neigh->ah) + ah = neigh->ah; + list_del(&neigh->list); + ipoib_neigh_free(n->dev, neigh); spin_unlock_irqrestore(&priv->lock, flags); @@ -838,7 +843,8 @@ static void ipoib_neigh_cleanup(struct neighbour *n) ipoib_put_ah(ah); } -struct ipoib_neigh *ipoib_neigh_alloc(struct neighbour *neighbour) +struct ipoib_neigh *ipoib_neigh_alloc(struct neighbour *neighbour, + struct net_device *dev) { struct ipoib_neigh *neigh; @@ -847,6 +853,7 @@ struct ipoib_neigh *ipoib_neigh_alloc(struct neighbour *neighbour) return NULL; neigh->neighbour = neighbour; + neigh->dev = dev; *to_ipoib_neigh(neighbour) = neigh; skb_queue_head_init(&neigh->queue); ipoib_cm_set(neigh, NULL); diff --git a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c index 827820ec66d1..9bcfc7ad6aa6 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c @@ -705,7 +705,8 @@ out: if (skb->dst && skb->dst->neighbour && !*to_ipoib_neigh(skb->dst->neighbour)) { - struct ipoib_neigh *neigh = ipoib_neigh_alloc(skb->dst->neighbour); + struct ipoib_neigh *neigh = ipoib_neigh_alloc(skb->dst->neighbour, + skb->dev); if (neigh) { kref_get(&mcast->ah->ref); diff --git a/drivers/input/Kconfig b/drivers/input/Kconfig index 2d87357e2b2b..63512d906f02 100644 --- a/drivers/input/Kconfig +++ b/drivers/input/Kconfig @@ -114,28 +114,6 @@ config INPUT_JOYDEV To compile this driver as a module, choose M here: the module will be called joydev. -config INPUT_TSDEV - tristate "Touchscreen interface" - ---help--- - Say Y here if you have an application that only can understand the - Compaq touchscreen protocol for absolute pointer data. This is - useful namely for embedded configurations. - - If unsure, say N. - - To compile this driver as a module, choose M here: the - module will be called tsdev. - -config INPUT_TSDEV_SCREEN_X - int "Horizontal screen resolution" - depends on INPUT_TSDEV - default "240" - -config INPUT_TSDEV_SCREEN_Y - int "Vertical screen resolution" - depends on INPUT_TSDEV - default "320" - config INPUT_EVDEV tristate "Event interface" help diff --git a/drivers/input/Makefile b/drivers/input/Makefile index 15eb752697b3..99af903bd3ce 100644 --- a/drivers/input/Makefile +++ b/drivers/input/Makefile @@ -13,7 +13,6 @@ obj-$(CONFIG_INPUT_POLLDEV) += input-polldev.o obj-$(CONFIG_INPUT_MOUSEDEV) += mousedev.o obj-$(CONFIG_INPUT_JOYDEV) += joydev.o obj-$(CONFIG_INPUT_EVDEV) += evdev.o -obj-$(CONFIG_INPUT_TSDEV) += tsdev.o obj-$(CONFIG_INPUT_EVBUG) += evbug.o obj-$(CONFIG_INPUT_KEYBOARD) += keyboard/ diff --git a/drivers/input/evdev.c b/drivers/input/evdev.c index f1c3d6cebd58..1d62c8b88e12 100644 --- a/drivers/input/evdev.c +++ b/drivers/input/evdev.c @@ -30,6 +30,8 @@ struct evdev { wait_queue_head_t wait; struct evdev_client *grab; struct list_head client_list; + spinlock_t client_lock; /* protects client_list */ + struct mutex mutex; struct device dev; }; @@ -37,39 +39,54 @@ struct evdev_client { struct input_event buffer[EVDEV_BUFFER_SIZE]; int head; int tail; + spinlock_t buffer_lock; /* protects access to buffer, head and tail */ struct fasync_struct *fasync; struct evdev *evdev; struct list_head node; }; static struct evdev *evdev_table[EVDEV_MINORS]; +static DEFINE_MUTEX(evdev_table_mutex); -static void evdev_event(struct input_handle *handle, unsigned int type, unsigned int code, int value) +static void evdev_pass_event(struct evdev_client *client, + struct input_event *event) +{ + /* + * Interrupts are disabled, just acquire the lock + */ + spin_lock(&client->buffer_lock); + client->buffer[client->head++] = *event; + client->head &= EVDEV_BUFFER_SIZE - 1; + spin_unlock(&client->buffer_lock); + + kill_fasync(&client->fasync, SIGIO, POLL_IN); +} + +/* + * Pass incoming event to all connected clients. + */ +static void evdev_event(struct input_handle *handle, + unsigned int type, unsigned int code, int value) { struct evdev *evdev = handle->private; struct evdev_client *client; + struct input_event event; - if (evdev->grab) { - client = evdev->grab; + do_gettimeofday(&event.time); + event.type = type; + event.code = code; + event.value = value; - do_gettimeofday(&client->buffer[client->head].time); - client->buffer[client->head].type = type; - client->buffer[client->head].code = code; - client->buffer[client->head].value = value; - client->head = (client->head + 1) & (EVDEV_BUFFER_SIZE - 1); + rcu_read_lock(); - kill_fasync(&client->fasync, SIGIO, POLL_IN); - } else - list_for_each_entry(client, &evdev->client_list, node) { + client = rcu_dereference(evdev->grab); + if (client) + evdev_pass_event(client, &event); + else + list_for_each_entry_rcu(client, &evdev->client_list, node) + evdev_pass_event(client, &event); - do_gettimeofday(&client->buffer[client->head].time); - client->buffer[client->head].type = type; - client->buffer[client->head].code = code; - client->buffer[client->head].value = value; - client->head = (client->head + 1) & (EVDEV_BUFFER_SIZE - 1); - - kill_fasync(&client->fasync, SIGIO, POLL_IN); - } + rcu_read_unlock(); wake_up_interruptible(&evdev->wait); } @@ -88,38 +105,140 @@ static int evdev_flush(struct file *file, fl_owner_t id) { struct evdev_client *client = file->private_data; struct evdev *evdev = client->evdev; + int retval; + + retval = mutex_lock_interruptible(&evdev->mutex); + if (retval) + return retval; if (!evdev->exist) - return -ENODEV; + retval = -ENODEV; + else + retval = input_flush_device(&evdev->handle, file); - return input_flush_device(&evdev->handle, file); + mutex_unlock(&evdev->mutex); + return retval; } static void evdev_free(struct device *dev) { struct evdev *evdev = container_of(dev, struct evdev, dev); - evdev_table[evdev->minor] = NULL; kfree(evdev); } +/* + * Grabs an event device (along with underlying input device). + * This function is called with evdev->mutex taken. + */ +static int evdev_grab(struct evdev *evdev, struct evdev_client *client) +{ + int error; + + if (evdev->grab) + return -EBUSY; + + error = input_grab_device(&evdev->handle); + if (error) + return error; + + rcu_assign_pointer(evdev->grab, client); + synchronize_rcu(); + + return 0; +} + +static int evdev_ungrab(struct evdev *evdev, struct evdev_client *client) +{ + if (evdev->grab != client) + return -EINVAL; + + rcu_assign_pointer(evdev->grab, NULL); + synchronize_rcu(); + input_release_device(&evdev->handle); + + return 0; +} + +static void evdev_attach_client(struct evdev *evdev, + struct evdev_client *client) +{ + spin_lock(&evdev->client_lock); + list_add_tail_rcu(&client->node, &evdev->client_list); + spin_unlock(&evdev->client_lock); + synchronize_rcu(); +} + +static void evdev_detach_client(struct evdev *evdev, + struct evdev_client *client) +{ + spin_lock(&evdev->client_lock); + list_del_rcu(&client->node); + spin_unlock(&evdev->client_lock); + synchronize_rcu(); +} + +static int evdev_open_device(struct evdev *evdev) +{ + int retval; + + retval = mutex_lock_interruptible(&evdev->mutex); + if (retval) + return retval; + + if (!evdev->exist) + retval = -ENODEV; + else if (!evdev->open++) { + retval = input_open_device(&evdev->handle); + if (retval) + evdev->open--; + } + + mutex_unlock(&evdev->mutex); + return retval; +} + +static void evdev_close_device(struct evdev *evdev) +{ + mutex_lock(&evdev->mutex); + + if (evdev->exist && !--evdev->open) + input_close_device(&evdev->handle); + + mutex_unlock(&evdev->mutex); +} + +/* + * Wake up users waiting for IO so they can disconnect from + * dead device. + */ +static void evdev_hangup(struct evdev *evdev) +{ + struct evdev_client *client; + + spin_lock(&evdev->client_lock); + list_for_each_entry(client, &evdev->client_list, node) + kill_fasync(&client->fasync, SIGIO, POLL_HUP); + spin_unlock(&evdev->client_lock); + + wake_up_interruptible(&evdev->wait); +} + static int evdev_release(struct inode *inode, struct file *file) { struct evdev_client *client = file->private_data; struct evdev *evdev = client->evdev; - if (evdev->grab == client) { - input_release_device(&evdev->handle); - evdev->grab = NULL; - } + mutex_lock(&evdev->mutex); + if (evdev->grab == client) + evdev_ungrab(evdev, client); + mutex_unlock(&evdev->mutex); evdev_fasync(-1, file, 0); - list_del(&client->node); + evdev_detach_client(evdev, client); kfree(client); - if (!--evdev->open && evdev->exist) - input_close_device(&evdev->handle); - + evdev_close_device(evdev); put_device(&evdev->dev); return 0; @@ -127,41 +246,44 @@ static int evdev_release(struct inode *inode, struct file *file) static int evdev_open(struct inode *inode, struct file *file) { - struct evdev_client *client; struct evdev *evdev; + struct evdev_client *client; int i = iminor(inode) - EVDEV_MINOR_BASE; int error; if (i >= EVDEV_MINORS) return -ENODEV; + error = mutex_lock_interruptible(&evdev_table_mutex); + if (error) + return error; evdev = evdev_table[i]; + if (evdev) + get_device(&evdev->dev); + mutex_unlock(&evdev_table_mutex); - if (!evdev || !evdev->exist) + if (!evdev) return -ENODEV; - get_device(&evdev->dev); - client = kzalloc(sizeof(struct evdev_client), GFP_KERNEL); if (!client) { error = -ENOMEM; goto err_put_evdev; } + spin_lock_init(&client->buffer_lock); client->evdev = evdev; - list_add_tail(&client->node, &evdev->client_list); + evdev_attach_client(evdev, client); - if (!evdev->open++ && evdev->exist) { - error = input_open_device(&evdev->handle); - if (error) - goto err_free_client; - } + error = evdev_open_device(evdev); + if (error) + goto err_free_client; file->private_data = client; return 0; err_free_client: - list_del(&client->node); + evdev_detach_client(evdev, client); kfree(client); err_put_evdev: put_device(&evdev->dev); @@ -197,12 +319,14 @@ static inline size_t evdev_event_size(void) sizeof(struct input_event_compat) : sizeof(struct input_event); } -static int evdev_event_from_user(const char __user *buffer, struct input_event *event) +static int evdev_event_from_user(const char __user *buffer, + struct input_event *event) { if (COMPAT_TEST) { struct input_event_compat compat_event; - if (copy_from_user(&compat_event, buffer, sizeof(struct input_event_compat))) + if (copy_from_user(&compat_event, buffer, + sizeof(struct input_event_compat))) return -EFAULT; event->time.tv_sec = compat_event.time.tv_sec; @@ -219,7 +343,8 @@ static int evdev_event_from_user(const char __user *buffer, struct input_event * return 0; } -static int evdev_event_to_user(char __user *buffer, const struct input_event *event) +static int evdev_event_to_user(char __user *buffer, + const struct input_event *event) { if (COMPAT_TEST) { struct input_event_compat compat_event; @@ -230,7 +355,8 @@ static int evdev_event_to_user(char __user *buffer, const struct input_event *ev compat_event.code = event->code; compat_event.value = event->value; - if (copy_to_user(buffer, &compat_event, sizeof(struct input_event_compat))) + if (copy_to_user(buffer, &compat_event, + sizeof(struct input_event_compat))) return -EFAULT; } else { @@ -248,7 +374,8 @@ static inline size_t evdev_event_size(void) return sizeof(struct input_event); } -static int evdev_event_from_user(const char __user *buffer, struct input_event *event) +static int evdev_event_from_user(const char __user *buffer, + struct input_event *event) { if (copy_from_user(event, buffer, sizeof(struct input_event))) return -EFAULT; @@ -256,7 +383,8 @@ static int evdev_event_from_user(const char __user *buffer, struct input_event * return 0; } -static int evdev_event_to_user(char __user *buffer, const struct input_event *event) +static int evdev_event_to_user(char __user *buffer, + const struct input_event *event) { if (copy_to_user(buffer, event, sizeof(struct input_event))) return -EFAULT; @@ -266,37 +394,71 @@ static int evdev_event_to_user(char __user *buffer, const struct input_event *ev #endif /* CONFIG_COMPAT */ -static ssize_t evdev_write(struct file *file, const char __user *buffer, size_t count, loff_t *ppos) +static ssize_t evdev_write(struct file *file, const char __user *buffer, + size_t count, loff_t *ppos) { struct evdev_client *client = file->private_data; struct evdev *evdev = client->evdev; struct input_event event; - int retval = 0; + int retval; - if (!evdev->exist) - return -ENODEV; + retval = mutex_lock_interruptible(&evdev->mutex); + if (retval) + return retval; + + if (!evdev->exist) { + retval = -ENODEV; + goto out; + } while (retval < count) { - if (evdev_event_from_user(buffer + retval, &event)) - return -EFAULT; - input_inject_event(&evdev->handle, event.type, event.code, event.value); + if (evdev_event_from_user(buffer + retval, &event)) { + retval = -EFAULT; + goto out; + } + + input_inject_event(&evdev->handle, + event.type, event.code, event.value); retval += evdev_event_size(); } + out: + mutex_unlock(&evdev->mutex); return retval; } -static ssize_t evdev_read(struct file *file, char __user *buffer, size_t count, loff_t *ppos) +static int evdev_fetch_next_event(struct evdev_client *client, + struct input_event *event) +{ + int have_event; + + spin_lock_irq(&client->buffer_lock); + + have_event = client->head != client->tail; + if (have_event) { + *event = client->buffer[client->tail++]; + client->tail &= EVDEV_BUFFER_SIZE - 1; + } + + spin_unlock_irq(&client->buffer_lock); + + return have_event; +} + +static ssize_t evdev_read(struct file *file, char __user *buffer, + size_t count, loff_t *ppos) { struct evdev_client *client = file->private_data; struct evdev *evdev = client->evdev; + struct input_event event; int retval; if (count < evdev_event_size()) return -EINVAL; - if (client->head == client->tail && evdev->exist && (file->f_flags & O_NONBLOCK)) + if (client->head == client->tail && evdev->exist && + (file->f_flags & O_NONBLOCK)) return -EAGAIN; retval = wait_event_interruptible(evdev->wait, @@ -307,14 +469,12 @@ static ssize_t evdev_read(struct file *file, char __user *buffer, size_t count, if (!evdev->exist) return -ENODEV; - while (client->head != client->tail && retval + evdev_event_size() <= count) { - - struct input_event *event = (struct input_event *) client->buffer + client->tail; + while (retval + evdev_event_size() <= count && + evdev_fetch_next_event(client, &event)) { - if (evdev_event_to_user(buffer + retval, event)) + if (evdev_event_to_user(buffer + retval, &event)) return -EFAULT; - client->tail = (client->tail + 1) & (EVDEV_BUFFER_SIZE - 1); retval += evdev_event_size(); } @@ -409,8 +569,8 @@ static int str_to_user(const char *str, unsigned int maxlen, void __user *p) return copy_to_user(p, str, len) ? -EFAULT : len; } -static long evdev_ioctl_handler(struct file *file, unsigned int cmd, - void __user *p, int compat_mode) +static long evdev_do_ioctl(struct file *file, unsigned int cmd, + void __user *p, int compat_mode) { struct evdev_client *client = file->private_data; struct evdev *evdev = client->evdev; @@ -421,215 +581,289 @@ static long evdev_ioctl_handler(struct file *file, unsigned int cmd, int i, t, u, v; int error; - if (!evdev->exist) - return -ENODEV; - switch (cmd) { - case EVIOCGVERSION: - return put_user(EV_VERSION, ip); + case EVIOCGVERSION: + return put_user(EV_VERSION, ip); - case EVIOCGID: - if (copy_to_user(p, &dev->id, sizeof(struct input_id))) - return -EFAULT; - return 0; + case EVIOCGID: + if (copy_to_user(p, &dev->id, sizeof(struct input_id))) + return -EFAULT; + return 0; - case EVIOCGREP: - if (!test_bit(EV_REP, dev->evbit)) - return -ENOSYS; - if (put_user(dev->rep[REP_DELAY], ip)) - return -EFAULT; - if (put_user(dev->rep[REP_PERIOD], ip + 1)) - return -EFAULT; - return 0; + case EVIOCGREP: + if (!test_bit(EV_REP, dev->evbit)) + return -ENOSYS; + if (put_user(dev->rep[REP_DELAY], ip)) + return -EFAULT; + if (put_user(dev->rep[REP_PERIOD], ip + 1)) + return -EFAULT; + return 0; - case EVIOCSREP: - if (!test_bit(EV_REP, dev->evbit)) - return -ENOSYS; - if (get_user(u, ip)) - return -EFAULT; - if (get_user(v, ip + 1)) - return -EFAULT; + case EVIOCSREP: + if (!test_bit(EV_REP, dev->evbit)) + return -ENOSYS; + if (get_user(u, ip)) + return -EFAULT; + if (get_user(v, ip + 1)) + return -EFAULT; - input_inject_event(&evdev->handle, EV_REP, REP_DELAY, u); - input_inject_event(&evdev->handle, EV_REP, REP_PERIOD, v); + input_inject_event(&evdev->handle, EV_REP, REP_DELAY, u); + input_inject_event(&evdev->handle, EV_REP, REP_PERIOD, v); - return 0; + return 0; - case EVIOCGKEYCODE: - if (get_user(t, ip)) - return -EFAULT; + case EVIOCGKEYCODE: + if (get_user(t, ip)) + return -EFAULT; - error = dev->getkeycode(dev, t, &v); - if (error) - return error; + error = dev->getkeycode(dev, t, &v); + if (error) + return error; - if (put_user(v, ip + 1)) - return -EFAULT; + if (put_user(v, ip + 1)) + return -EFAULT; - return 0; + return 0; - case EVIOCSKEYCODE: - if (get_user(t, ip) || get_user(v, ip + 1)) - return -EFAULT; + case EVIOCSKEYCODE: + if (get_user(t, ip) || get_user(v, ip + 1)) + return -EFAULT; - return dev->setkeycode(dev, t, v); + return dev->setkeycode(dev, t, v); - case EVIOCSFF: - if (copy_from_user(&effect, p, sizeof(effect))) - return -EFAULT; + case EVIOCSFF: + if (copy_from_user(&effect, p, sizeof(effect))) + return -EFAULT; - error = input_ff_upload(dev, &effect, file); + error = input_ff_upload(dev, &effect, file); - if (put_user(effect.id, &(((struct ff_effect __user *)p)->id))) - return -EFAULT; + if (put_user(effect.id, &(((struct ff_effect __user *)p)->id))) + return -EFAULT; - return error; + return error; - case EVIOCRMFF: - return input_ff_erase(dev, (int)(unsigned long) p, file); + case EVIOCRMFF: + return input_ff_erase(dev, (int)(unsigned long) p, file); - case EVIOCGEFFECTS: - i = test_bit(EV_FF, dev->evbit) ? dev->ff->max_effects : 0; - if (put_user(i, ip)) - return -EFAULT; - return 0; - - case EVIOCGRAB: - if (p) { - if (evdev->grab) - return -EBUSY; - if (input_grab_device(&evdev->handle)) - return -EBUSY; - evdev->grab = client; - return 0; - } else { - if (evdev->grab != client) - return -EINVAL; - input_release_device(&evdev->handle); - evdev->grab = NULL; - return 0; - } + case EVIOCGEFFECTS: + i = test_bit(EV_FF, dev->evbit) ? + dev->ff->max_effects : 0; + if (put_user(i, ip)) + return -EFAULT; + return 0; + + case EVIOCGRAB: + if (p) + return evdev_grab(evdev, client); + else + return evdev_ungrab(evdev, client); - default: + default: - if (_IOC_TYPE(cmd) != 'E') - return -EINVAL; + if (_IOC_TYPE(cmd) != 'E') + return -EINVAL; - if (_IOC_DIR(cmd) == _IOC_READ) { + if (_IOC_DIR(cmd) == _IOC_READ) { - if ((_IOC_NR(cmd) & ~EV_MAX) == _IOC_NR(EVIOCGBIT(0,0))) { + if ((_IOC_NR(cmd) & ~EV_MAX) == _IOC_NR(EVIOCGBIT(0, 0))) { - unsigned long *bits; - int len; + unsigned long *bits; + int len; - switch (_IOC_NR(cmd) & EV_MAX) { - case 0: bits = dev->evbit; len = EV_MAX; break; - case EV_KEY: bits = dev->keybit; len = KEY_MAX; break; - case EV_REL: bits = dev->relbit; len = REL_MAX; break; - case EV_ABS: bits = dev->absbit; len = ABS_MAX; break; - case EV_MSC: bits = dev->mscbit; len = MSC_MAX; break; - case EV_LED: bits = dev->ledbit; len = LED_MAX; break; - case EV_SND: bits = dev->sndbit; len = SND_MAX; break; - case EV_FF: bits = dev->ffbit; len = FF_MAX; break; - case EV_SW: bits = dev->swbit; len = SW_MAX; break; - default: return -EINVAL; - } - return bits_to_user(bits, len, _IOC_SIZE(cmd), p, compat_mode); - } + switch (_IOC_NR(cmd) & EV_MAX) { - if (_IOC_NR(cmd) == _IOC_NR(EVIOCGKEY(0))) - return bits_to_user(dev->key, KEY_MAX, _IOC_SIZE(cmd), - p, compat_mode); + case 0: bits = dev->evbit; len = EV_MAX; break; + case EV_KEY: bits = dev->keybit; len = KEY_MAX; break; + case EV_REL: bits = dev->relbit; len = REL_MAX; break; + case EV_ABS: bits = dev->absbit; len = ABS_MAX; break; + case EV_MSC: bits = dev->mscbit; len = MSC_MAX; break; + case EV_LED: bits = dev->ledbit; len = LED_MAX; break; + case EV_SND: bits = dev->sndbit; len = SND_MAX; break; + case EV_FF: bits = dev->ffbit; len = FF_MAX; break; + case EV_SW: bits = dev->swbit; len = SW_MAX; break; + default: return -EINVAL; + } + return bits_to_user(bits, len, _IOC_SIZE(cmd), p, compat_mode); + } - if (_IOC_NR(cmd) == _IOC_NR(EVIOCGLED(0))) - return bits_to_user(dev->led, LED_MAX, _IOC_SIZE(cmd), - p, compat_mode); + if (_IOC_NR(cmd) == _IOC_NR(EVIOCGKEY(0))) + return bits_to_user(dev->key, KEY_MAX, _IOC_SIZE(cmd), + p, compat_mode); - if (_IOC_NR(cmd) == _IOC_NR(EVIOCGSND(0))) - return bits_to_user(dev->snd, SND_MAX, _IOC_SIZE(cmd), - p, compat_mode); + if (_IOC_NR(cmd) == _IOC_NR(EVIOCGLED(0))) + return bits_to_user(dev->led, LED_MAX, _IOC_SIZE(cmd), + p, compat_mode); - if (_IOC_NR(cmd) == _IOC_NR(EVIOCGSW(0))) - return bits_to_user(dev->sw, SW_MAX, _IOC_SIZE(cmd), - p, compat_mode); + if (_IOC_NR(cmd) == _IOC_NR(EVIOCGSND(0))) + return bits_to_user(dev->snd, SND_MAX, _IOC_SIZE(cmd), + p, compat_mode); - if (_IOC_NR(cmd) == _IOC_NR(EVIOCGNAME(0))) - return str_to_user(dev->name, _IOC_SIZE(cmd), p); + if (_IOC_NR(cmd) == _IOC_NR(EVIOCGSW(0))) + return bits_to_user(dev->sw, SW_MAX, _IOC_SIZE(cmd), + p, compat_mode); - if (_IOC_NR(cmd) == _IOC_NR(EVIOCGPHYS(0))) - return str_to_user(dev->phys, _IOC_SIZE(cmd), p); + if (_IOC_NR(cmd) == _IOC_NR(EVIOCGNAME(0))) + return str_to_user(dev->name, _IOC_SIZE(cmd), p); - if (_IOC_NR(cmd) == _IOC_NR(EVIOCGUNIQ(0))) - return str_to_user(dev->uniq, _IOC_SIZE(cmd), p); + if (_IOC_NR(cmd) == _IOC_NR(EVIOCGPHYS(0))) + return str_to_user(dev->phys, _IOC_SIZE(cmd), p); - if ((_IOC_NR(cmd) & ~ABS_MAX) == _IOC_NR(EVIOCGABS(0))) { + if (_IOC_NR(cmd) == _IOC_NR(EVIOCGUNIQ(0))) + return str_to_user(dev->uniq, _IOC_SIZE(cmd), p); - t = _IOC_NR(cmd) & ABS_MAX; + if ((_IOC_NR(cmd) & ~ABS_MAX) == _IOC_NR(EVIOCGABS(0))) { - abs.value = dev->abs[t]; - abs.minimum = dev->absmin[t]; - abs.maximum = dev->absmax[t]; - abs.fuzz = dev->absfuzz[t]; - abs.flat = dev->absflat[t]; + t = _IOC_NR(cmd) & ABS_MAX; - if (copy_to_user(p, &abs, sizeof(struct input_absinfo))) - return -EFAULT; + abs.value = dev->abs[t]; + abs.minimum = dev->absmin[t]; + abs.maximum = dev->absmax[t]; + abs.fuzz = dev->absfuzz[t]; + abs.flat = dev->absflat[t]; - return 0; - } + if (copy_to_user(p, &abs, sizeof(struct input_absinfo))) + return -EFAULT; + return 0; } - if (_IOC_DIR(cmd) == _IOC_WRITE) { + } + + if (_IOC_DIR(cmd) == _IOC_WRITE) { - if ((_IOC_NR(cmd) & ~ABS_MAX) == _IOC_NR(EVIOCSABS(0))) { + if ((_IOC_NR(cmd) & ~ABS_MAX) == _IOC_NR(EVIOCSABS(0))) { - t = _IOC_NR(cmd) & ABS_MAX; + t = _IOC_NR(cmd) & ABS_MAX; - if (copy_from_user(&abs, p, sizeof(struct input_absinfo))) - return -EFAULT; + if (copy_from_user(&abs, p, + sizeof(struct input_absinfo))) + return -EFAULT; - dev->abs[t] = abs.value; - dev->absmin[t] = abs.minimum; - dev->absmax[t] = abs.maximum; - dev->absfuzz[t] = abs.fuzz; - dev->absflat[t] = abs.flat; + /* + * Take event lock to ensure that we are not + * changing device parameters in the middle + * of event. + */ + spin_lock_irq(&dev->event_lock); - return 0; - } + dev->abs[t] = abs.value; + dev->absmin[t] = abs.minimum; + dev->absmax[t] = abs.maximum; + dev->absfuzz[t] = abs.fuzz; + dev->absflat[t] = abs.flat; + + spin_unlock_irq(&dev->event_lock); + + return 0; } + } } return -EINVAL; } +static long evdev_ioctl_handler(struct file *file, unsigned int cmd, + void __user *p, int compat_mode) +{ + struct evdev_client *client = file->private_data; + struct evdev *evdev = client->evdev; + int retval; + + retval = mutex_lock_interruptible(&evdev->mutex); + if (retval) + return retval; + + if (!evdev->exist) { + retval = -ENODEV; + goto out; + } + + retval = evdev_do_ioctl(file, cmd, p, compat_mode); + + out: + mutex_unlock(&evdev->mutex); + return retval; +} + static long evdev_ioctl(struct file *file, unsigned int cmd, unsigned long arg) { return evdev_ioctl_handler(file, cmd, (void __user *)arg, 0); } #ifdef CONFIG_COMPAT -static long evdev_ioctl_compat(struct file *file, unsigned int cmd, unsigned long arg) +static long evdev_ioctl_compat(struct file *file, + unsigned int cmd, unsigned long arg) { return evdev_ioctl_handler(file, cmd, compat_ptr(arg), 1); } #endif static const struct file_operations evdev_fops = { - .owner = THIS_MODULE, - .read = evdev_read, - .write = evdev_write, - .poll = evdev_poll, - .open = evdev_open, - .release = evdev_release, - .unlocked_ioctl = evdev_ioctl, + .owner = THIS_MODULE, + .read = evdev_read, + .write = evdev_write, + .poll = evdev_poll, + .open = evdev_open, + .release = evdev_release, + .unlocked_ioctl = evdev_ioctl, #ifdef CONFIG_COMPAT - .compat_ioctl = evdev_ioctl_compat, + .compat_ioctl = evdev_ioctl_compat, #endif - .fasync = evdev_fasync, - .flush = evdev_flush + .fasync = evdev_fasync, + .flush = evdev_flush }; +static int evdev_install_chrdev(struct evdev *evdev) +{ + /* + * No need to do any locking here as calls to connect and + * disconnect are serialized by the input core + */ + evdev_table[evdev->minor] = evdev; + return 0; +} + +static void evdev_remove_chrdev(struct evdev *evdev) +{ + /* + * Lock evdev table to prevent race with evdev_open() + */ + mutex_lock(&evdev_table_mutex); + evdev_table[evdev->minor] = NULL; + mutex_unlock(&evdev_table_mutex); +} + +/* + * Mark device non-existent. This disables writes, ioctls and + * prevents new users from opening the device. Already posted + * blocking reads will stay, however new ones will fail. + */ +static void evdev_mark_dead(struct evdev *evdev) +{ + mutex_lock(&evdev->mutex); + evdev->exist = 0; + mutex_unlock(&evdev->mutex); +} + +static void evdev_cleanup(struct evdev *evdev) +{ + struct input_handle *handle = &evdev->handle; + + evdev_mark_dead(evdev); + evdev_hangup(evdev); + evdev_remove_chrdev(evdev); + + /* evdev is marked dead so no one else accesses evdev->open */ + if (evdev->open) { + input_flush_device(handle, NULL); + input_close_device(handle); + } +} + +/* + * Create new evdev device. Note that input core serializes calls + * to connect and disconnect so we don't need to lock evdev_table here. + */ static int evdev_connect(struct input_handler *handler, struct input_dev *dev, const struct input_device_id *id) { @@ -637,7 +871,10 @@ static int evdev_connect(struct input_handler *handler, struct input_dev *dev, int minor; int error; - for (minor = 0; minor < EVDEV_MINORS && evdev_table[minor]; minor++); + for (minor = 0; minor < EVDEV_MINORS; minor++) + if (!evdev_table[minor]) + break; + if (minor == EVDEV_MINORS) { printk(KERN_ERR "evdev: no more free evdev devices\n"); return -ENFILE; @@ -648,38 +885,44 @@ static int evdev_connect(struct input_handler *handler, struct input_dev *dev, return -ENOMEM; INIT_LIST_HEAD(&evdev->client_list); + spin_lock_init(&evdev->client_lock); + mutex_init(&evdev->mutex); init_waitqueue_head(&evdev->wait); + snprintf(evdev->name, sizeof(evdev->name), "event%d", minor); evdev->exist = 1; evdev->minor = minor; + evdev->handle.dev = dev; evdev->handle.name = evdev->name; evdev->handle.handler = handler; evdev->handle.private = evdev; - snprintf(evdev->name, sizeof(evdev->name), "event%d", minor); - snprintf(evdev->dev.bus_id, sizeof(evdev->dev.bus_id), - "event%d", minor); + strlcpy(evdev->dev.bus_id, evdev->name, sizeof(evdev->dev.bus_id)); + evdev->dev.devt = MKDEV(INPUT_MAJOR, EVDEV_MINOR_BASE + minor); evdev->dev.class = &input_class; evdev->dev.parent = &dev->dev; - evdev->dev.devt = MKDEV(INPUT_MAJOR, EVDEV_MINOR_BASE + minor); evdev->dev.release = evdev_free; device_initialize(&evdev->dev); - evdev_table[minor] = evdev; - - error = device_add(&evdev->dev); + error = input_register_handle(&evdev->handle); if (error) goto err_free_evdev; - error = input_register_handle(&evdev->handle); + error = evdev_install_chrdev(evdev); + if (error) + goto err_unregister_handle; + + error = device_add(&evdev->dev); if (error) - goto err_delete_evdev; + goto err_cleanup_evdev; return 0; - err_delete_evdev: - device_del(&evdev->dev); + err_cleanup_evdev: + evdev_cleanup(evdev); + err_unregister_handle: + input_unregister_handle(&evdev->handle); err_free_evdev: put_device(&evdev->dev); return error; @@ -688,21 +931,10 @@ static int evdev_connect(struct input_handler *handler, struct input_dev *dev, static void evdev_disconnect(struct input_handle *handle) { struct evdev *evdev = handle->private; - struct evdev_client *client; - input_unregister_handle(handle); device_del(&evdev->dev); - - evdev->exist = 0; - - if (evdev->open) { - input_flush_device(handle, NULL); - input_close_device(handle); - list_for_each_entry(client, &evdev->client_list, node) - kill_fasync(&client->fasync, SIGIO, POLL_HUP); - wake_up_interruptible(&evdev->wait); - } - + evdev_cleanup(evdev); + input_unregister_handle(handle); put_device(&evdev->dev); } @@ -714,13 +946,13 @@ static const struct input_device_id evdev_ids[] = { MODULE_DEVICE_TABLE(input, evdev_ids); static struct input_handler evdev_handler = { - .event = evdev_event, - .connect = evdev_connect, - .disconnect = evdev_disconnect, - .fops = &evdev_fops, - .minor = EVDEV_MINOR_BASE, - .name = "evdev", - .id_table = evdev_ids, + .event = evdev_event, + .connect = evdev_connect, + .disconnect = evdev_disconnect, + .fops = &evdev_fops, + .minor = EVDEV_MINOR_BASE, + .name = "evdev", + .id_table = evdev_ids, }; static int __init evdev_init(void) diff --git a/drivers/input/input-polldev.c b/drivers/input/input-polldev.c index b773d4c756a6..92b359894e81 100644 --- a/drivers/input/input-polldev.c +++ b/drivers/input/input-polldev.c @@ -70,6 +70,7 @@ static int input_open_polled_device(struct input_dev *input) { struct input_polled_dev *dev = input->private; int error; + unsigned long ticks; error = input_polldev_start_workqueue(); if (error) @@ -78,8 +79,10 @@ static int input_open_polled_device(struct input_dev *input) if (dev->flush) dev->flush(dev); - queue_delayed_work(polldev_wq, &dev->work, - msecs_to_jiffies(dev->poll_interval)); + ticks = msecs_to_jiffies(dev->poll_interval); + if (ticks >= HZ) + ticks = round_jiffies(ticks); + queue_delayed_work(polldev_wq, &dev->work, ticks); return 0; } diff --git a/drivers/input/input.c b/drivers/input/input.c index 5dc361c954e2..2f2b020cd629 100644 --- a/drivers/input/input.c +++ b/drivers/input/input.c @@ -17,10 +17,10 @@ #include <linux/major.h> #include <linux/proc_fs.h> #include <linux/seq_file.h> -#include <linux/interrupt.h> #include <linux/poll.h> #include <linux/device.h> #include <linux/mutex.h> +#include <linux/rcupdate.h> MODULE_AUTHOR("Vojtech Pavlik <vojtech@suse.cz>"); MODULE_DESCRIPTION("Input core"); @@ -31,167 +31,245 @@ MODULE_LICENSE("GPL"); static LIST_HEAD(input_dev_list); static LIST_HEAD(input_handler_list); +/* + * input_mutex protects access to both input_dev_list and input_handler_list. + * This also causes input_[un]register_device and input_[un]register_handler + * be mutually exclusive which simplifies locking in drivers implementing + * input handlers. + */ +static DEFINE_MUTEX(input_mutex); + static struct input_handler *input_table[8]; -/** - * input_event() - report new input event - * @dev: device that generated the event - * @type: type of the event - * @code: event code - * @value: value of the event - * - * This function should be used by drivers implementing various input devices - * See also input_inject_event() - */ -void input_event(struct input_dev *dev, unsigned int type, unsigned int code, int value) +static inline int is_event_supported(unsigned int code, + unsigned long *bm, unsigned int max) { - struct input_handle *handle; + return code <= max && test_bit(code, bm); +} - if (type > EV_MAX || !test_bit(type, dev->evbit)) - return; +static int input_defuzz_abs_event(int value, int old_val, int fuzz) +{ + if (fuzz) { + if (value > old_val - fuzz / 2 && value < old_val + fuzz / 2) + return old_val; - add_input_randomness(type, code, value); + if (value > old_val - fuzz && value < old_val + fuzz) + return (old_val * 3 + value) / 4; - switch (type) { - - case EV_SYN: - switch (code) { - case SYN_CONFIG: - if (dev->event) - dev->event(dev, type, code, value); - break; - - case SYN_REPORT: - if (dev->sync) - return; - dev->sync = 1; - break; - } - break; + if (value > old_val - fuzz * 2 && value < old_val + fuzz * 2) + return (old_val + value) / 2; + } - case EV_KEY: + return value; +} - if (code > KEY_MAX || !test_bit(code, dev->keybit) || !!test_bit(code, dev->key) == value) - return; +/* + * Pass event through all open handles. This function is called with + * dev->event_lock held and interrupts disabled. + */ +static void input_pass_event(struct input_dev *dev, + unsigned int type, unsigned int code, int value) +{ + struct input_handle *handle; - if (value == 2) - break; + rcu_read_lock(); - change_bit(code, dev->key); + handle = rcu_dereference(dev->grab); + if (handle) + handle->handler->event(handle, type, code, value); + else + list_for_each_entry_rcu(handle, &dev->h_list, d_node) + if (handle->open) + handle->handler->event(handle, + type, code, value); + rcu_read_unlock(); +} - if (test_bit(EV_REP, dev->evbit) && dev->rep[REP_PERIOD] && dev->rep[REP_DELAY] && dev->timer.data && value) { - dev->repeat_key = code; - mod_timer(&dev->timer, jiffies + msecs_to_jiffies(dev->rep[REP_DELAY])); - } +/* + * Generate software autorepeat event. Note that we take + * dev->event_lock here to avoid racing with input_event + * which may cause keys get "stuck". + */ +static void input_repeat_key(unsigned long data) +{ + struct input_dev *dev = (void *) data; + unsigned long flags; - break; + spin_lock_irqsave(&dev->event_lock, flags); - case EV_SW: + if (test_bit(dev->repeat_key, dev->key) && + is_event_supported(dev->repeat_key, dev->keybit, KEY_MAX)) { - if (code > SW_MAX || !test_bit(code, dev->swbit) || !!test_bit(code, dev->sw) == value) - return; + input_pass_event(dev, EV_KEY, dev->repeat_key, 2); - change_bit(code, dev->sw); + if (dev->sync) { + /* + * Only send SYN_REPORT if we are not in a middle + * of driver parsing a new hardware packet. + * Otherwise assume that the driver will send + * SYN_REPORT once it's done. + */ + input_pass_event(dev, EV_SYN, SYN_REPORT, 1); + } - break; + if (dev->rep[REP_PERIOD]) + mod_timer(&dev->timer, jiffies + + msecs_to_jiffies(dev->rep[REP_PERIOD])); + } - case EV_ABS: + spin_unlock_irqrestore(&dev->event_lock, flags); +} - if (code > ABS_MAX || !test_bit(code, dev->absbit)) - return; +static void input_start_autorepeat(struct input_dev *dev, int code) +{ + if (test_bit(EV_REP, dev->evbit) && + dev->rep[REP_PERIOD] && dev->rep[REP_DELAY] && + dev->timer.data) { + dev->repeat_key = code; + mod_timer(&dev->timer, + jiffies + msecs_to_jiffies(dev->rep[REP_DELAY])); + } +} - if (dev->absfuzz[code]) { - if ((value > dev->abs[code] - (dev->absfuzz[code] >> 1)) && - (value < dev->abs[code] + (dev->absfuzz[code] >> 1))) - return; +#define INPUT_IGNORE_EVENT 0 +#define INPUT_PASS_TO_HANDLERS 1 +#define INPUT_PASS_TO_DEVICE 2 +#define INPUT_PASS_TO_ALL (INPUT_PASS_TO_HANDLERS | INPUT_PASS_TO_DEVICE) - if ((value > dev->abs[code] - dev->absfuzz[code]) && - (value < dev->abs[code] + dev->absfuzz[code])) - value = (dev->abs[code] * 3 + value) >> 2; +static void input_handle_event(struct input_dev *dev, + unsigned int type, unsigned int code, int value) +{ + int disposition = INPUT_IGNORE_EVENT; - if ((value > dev->abs[code] - (dev->absfuzz[code] << 1)) && - (value < dev->abs[code] + (dev->absfuzz[code] << 1))) - value = (dev->abs[code] + value) >> 1; - } + switch (type) { - if (dev->abs[code] == value) - return; + case EV_SYN: + switch (code) { + case SYN_CONFIG: + disposition = INPUT_PASS_TO_ALL; + break; - dev->abs[code] = value; + case SYN_REPORT: + if (!dev->sync) { + dev->sync = 1; + disposition = INPUT_PASS_TO_HANDLERS; + } break; + } + break; - case EV_REL: + case EV_KEY: + if (is_event_supported(code, dev->keybit, KEY_MAX) && + !!test_bit(code, dev->key) != value) { - if (code > REL_MAX || !test_bit(code, dev->relbit) || (value == 0)) - return; + if (value != 2) { + __change_bit(code, dev->key); + if (value) + input_start_autorepeat(dev, code); + } - break; + disposition = INPUT_PASS_TO_HANDLERS; + } + break; - case EV_MSC: + case EV_SW: + if (is_event_supported(code, dev->swbit, SW_MAX) && + !!test_bit(code, dev->sw) != value) { - if (code > MSC_MAX || !test_bit(code, dev->mscbit)) - return; + __change_bit(code, dev->sw); + disposition = INPUT_PASS_TO_HANDLERS; + } + break; - if (dev->event) - dev->event(dev, type, code, value); + case EV_ABS: + if (is_event_supported(code, dev->absbit, ABS_MAX)) { - break; + value = input_defuzz_abs_event(value, + dev->abs[code], dev->absfuzz[code]); + + if (dev->abs[code] != value) { + dev->abs[code] = value; + disposition = INPUT_PASS_TO_HANDLERS; + } + } + break; - case EV_LED: + case EV_REL: + if (is_event_supported(code, dev->relbit, REL_MAX) && value) + disposition = INPUT_PASS_TO_HANDLERS; - if (code > LED_MAX || !test_bit(code, dev->ledbit) || !!test_bit(code, dev->led) == value) - return; + break; - change_bit(code, dev->led); + case EV_MSC: + if (is_event_supported(code, dev->mscbit, MSC_MAX)) + disposition = INPUT_PASS_TO_ALL; - if (dev->event) - dev->event(dev, type, code, value); + break; - break; + case EV_LED: + if (is_event_supported(code, dev->ledbit, LED_MAX) && + !!test_bit(code, dev->led) != value) { - case EV_SND: + __change_bit(code, dev->led); + disposition = INPUT_PASS_TO_ALL; + } + break; - if (code > SND_MAX || !test_bit(code, dev->sndbit)) - return; + case EV_SND: + if (is_event_supported(code, dev->sndbit, SND_MAX)) { if (!!test_bit(code, dev->snd) != !!value) - change_bit(code, dev->snd); + __change_bit(code, dev->snd); + disposition = INPUT_PASS_TO_ALL; + } + break; - if (dev->event) - dev->event(dev, type, code, value); + case EV_REP: + if (code <= REP_MAX && value >= 0 && dev->rep[code] != value) { + dev->rep[code] = value; + disposition = INPUT_PASS_TO_ALL; + } + break; - break; + case EV_FF: + if (value >= 0) + disposition = INPUT_PASS_TO_ALL; + break; + } - case EV_REP: + if (type != EV_SYN) + dev->sync = 0; - if (code > REP_MAX || value < 0 || dev->rep[code] == value) - return; + if ((disposition & INPUT_PASS_TO_DEVICE) && dev->event) + dev->event(dev, type, code, value); - dev->rep[code] = value; - if (dev->event) - dev->event(dev, type, code, value); + if (disposition & INPUT_PASS_TO_HANDLERS) + input_pass_event(dev, type, code, value); +} - break; +/** + * input_event() - report new input event + * @dev: device that generated the event + * @type: type of the event + * @code: event code + * @value: value of the event + * + * This function should be used by drivers implementing various input + * devices. See also input_inject_event(). + */ - case EV_FF: +void input_event(struct input_dev *dev, + unsigned int type, unsigned int code, int value) +{ + unsigned long flags; - if (value < 0) - return; + if (is_event_supported(type, dev->evbit, EV_MAX)) { - if (dev->event) - dev->event(dev, type, code, value); - break; + spin_lock_irqsave(&dev->event_lock, flags); + add_input_randomness(type, code, value); + input_handle_event(dev, type, code, value); + spin_unlock_irqrestore(&dev->event_lock, flags); } - - if (type != EV_SYN) - dev->sync = 0; - - if (dev->grab) - dev->grab->handler->event(dev->grab, type, code, value); - else - list_for_each_entry(handle, &dev->h_list, d_node) - if (handle->open) - handle->handler->event(handle, type, code, value); } EXPORT_SYMBOL(input_event); @@ -202,102 +280,228 @@ EXPORT_SYMBOL(input_event); * @code: event code * @value: value of the event * - * Similar to input_event() but will ignore event if device is "grabbed" and handle - * injecting event is not the one that owns the device. + * Similar to input_event() but will ignore event if device is + * "grabbed" and handle injecting event is not the one that owns + * the device. */ -void input_inject_event(struct input_handle *handle, unsigned int type, unsigned int code, int value) -{ - if (!handle->dev->grab || handle->dev->grab == handle) - input_event(handle->dev, type, code, value); -} -EXPORT_SYMBOL(input_inject_event); - -static void input_repeat_key(unsigned long data) +void input_inject_event(struct input_handle *handle, + unsigned int type, unsigned int code, int value) { - struct input_dev *dev = (void *) data; + struct input_dev *dev = handle->dev; + struct input_handle *grab; + unsigned long flags; - if (!test_bit(dev->repeat_key, dev->key)) - return; + if (is_event_supported(type, dev->evbit, EV_MAX)) { + spin_lock_irqsave(&dev->event_lock, flags); - input_event(dev, EV_KEY, dev->repeat_key, 2); - input_sync(dev); + rcu_read_lock(); + grab = rcu_dereference(dev->grab); + if (!grab || grab == handle) + input_handle_event(dev, type, code, value); + rcu_read_unlock(); - if (dev->rep[REP_PERIOD]) - mod_timer(&dev->timer, jiffies + msecs_to_jiffies(dev->rep[REP_PERIOD])); + spin_unlock_irqrestore(&dev->event_lock, flags); + } } +EXPORT_SYMBOL(input_inject_event); +/** + * input_grab_device - grabs device for exclusive use + * @handle: input handle that wants to own the device + * + * When a device is grabbed by an input handle all events generated by + * the device are delivered only to this handle. Also events injected + * by other input handles are ignored while device is grabbed. + */ int input_grab_device(struct input_handle *handle) { - if (handle->dev->grab) - return -EBUSY; + struct input_dev *dev = handle->dev; + int retval; - handle->dev->grab = handle; - return 0; + retval = mutex_lock_interruptible(&dev->mutex); + if (retval) + return retval; + + if (dev->grab) { + retval = -EBUSY; + goto out; + } + + rcu_assign_pointer(dev->grab, handle); + synchronize_rcu(); + + out: + mutex_unlock(&dev->mutex); + return retval; } EXPORT_SYMBOL(input_grab_device); -void input_release_device(struct input_handle *handle) +static void __input_release_device(struct input_handle *handle) { struct input_dev *dev = handle->dev; if (dev->grab == handle) { - dev->grab = NULL; + rcu_assign_pointer(dev->grab, NULL); + /* Make sure input_pass_event() notices that grab is gone */ + synchronize_rcu(); list_for_each_entry(handle, &dev->h_list, d_node) - if (handle->handler->start) + if (handle->open && handle->handler->start) handle->handler->start(handle); } } + +/** + * input_release_device - release previously grabbed device + * @handle: input handle that owns the device + * + * Releases previously grabbed device so that other input handles can + * start receiving input events. Upon release all handlers attached + * to the device have their start() method called so they have a change + * to synchronize device state with the rest of the system. + */ +void input_release_device(struct input_handle *handle) +{ + struct input_dev *dev = handle->dev; + + mutex_lock(&dev->mutex); + __input_release_device(handle); + mutex_unlock(&dev->mutex); +} EXPORT_SYMBOL(input_release_device); +/** + * input_open_device - open input device + * @handle: handle through which device is being accessed + * + * This function should be called by input handlers when they + * want to start receive events from given input device. + */ int input_open_device(struct input_handle *handle) { struct input_dev *dev = handle->dev; - int err; + int retval; - err = mutex_lock_interruptible(&dev->mutex); - if (err) - return err; + retval = mutex_lock_interruptible(&dev->mutex); + if (retval) + return retval; + + if (dev->going_away) { + retval = -ENODEV; + goto out; + } handle->open++; if (!dev->users++ && dev->open) - err = dev->open(dev); - - if (err) - handle->open--; + retval = dev->open(dev); + + if (retval) { + dev->users--; + if (!--handle->open) { + /* + * Make sure we are not delivering any more events + * through this handle + */ + synchronize_rcu(); + } + } + out: mutex_unlock(&dev->mutex); - - return err; + return retval; } EXPORT_SYMBOL(input_open_device); -int input_flush_device(struct input_handle* handle, struct file* file) +int input_flush_device(struct input_handle *handle, struct file *file) { - if (handle->dev->flush) - return handle->dev->flush(handle->dev, file); + struct input_dev *dev = handle->dev; + int retval; - return 0; + retval = mutex_lock_interruptible(&dev->mutex); + if (retval) + return retval; + + if (dev->flush) + retval = dev->flush(dev, file); + + mutex_unlock(&dev->mutex); + return retval; } EXPORT_SYMBOL(input_flush_device); +/** + * input_close_device - close input device + * @handle: handle through which device is being accessed + * + * This function should be called by input handlers when they + * want to stop receive events from given input device. + */ void input_close_device(struct input_handle *handle) { struct input_dev *dev = handle->dev; - input_release_device(handle); - mutex_lock(&dev->mutex); + __input_release_device(handle); + if (!--dev->users && dev->close) dev->close(dev); - handle->open--; + + if (!--handle->open) { + /* + * synchronize_rcu() makes sure that input_pass_event() + * completed and that no more input events are delivered + * through this handle + */ + synchronize_rcu(); + } mutex_unlock(&dev->mutex); } EXPORT_SYMBOL(input_close_device); +/* + * Prepare device for unregistering + */ +static void input_disconnect_device(struct input_dev *dev) +{ + struct input_handle *handle; + int code; + + /* + * Mark device as going away. Note that we take dev->mutex here + * not to protect access to dev->going_away but rather to ensure + * that there are no threads in the middle of input_open_device() + */ + mutex_lock(&dev->mutex); + dev->going_away = 1; + mutex_unlock(&dev->mutex); + + spin_lock_irq(&dev->event_lock); + + /* + * Simulate keyup events for all pressed keys so that handlers + * are not left with "stuck" keys. The driver may continue + * generate events even after we done here but they will not + * reach any handlers. + */ + if (is_event_supported(EV_KEY, dev->evbit, EV_MAX)) { + for (code = 0; code <= KEY_MAX; code++) { + if (is_event_supported(code, dev->keybit, KEY_MAX) && + test_bit(code, dev->key)) { + input_pass_event(dev, EV_KEY, code, 0); + } + } + input_pass_event(dev, EV_SYN, SYN_REPORT, 1); + } + + list_for_each_entry(handle, &dev->h_list, d_node) + handle->open = 0; + + spin_unlock_irq(&dev->event_lock); +} + static int input_fetch_keycode(struct input_dev *dev, int scancode) { switch (dev->keycodesize) { @@ -473,7 +677,8 @@ static unsigned int input_proc_devices_poll(struct file *file, poll_table *wait) static void *input_devices_seq_start(struct seq_file *seq, loff_t *pos) { - /* acquire lock here ... Yes, we do need locking, I knowi, I know... */ + if (mutex_lock_interruptible(&input_mutex)) + return NULL; return seq_list_start(&input_dev_list, *pos); } @@ -485,7 +690,7 @@ static void *input_devices_seq_next(struct seq_file *seq, void *v, loff_t *pos) static void input_devices_seq_stop(struct seq_file *seq, void *v) { - /* release lock here */ + mutex_unlock(&input_mutex); } static void input_seq_print_bitmap(struct seq_file *seq, const char *name, @@ -569,7 +774,9 @@ static const struct file_operations input_devices_fileops = { static void *input_handlers_seq_start(struct seq_file *seq, loff_t *pos) { - /* acquire lock here ... Yes, we do need locking, I knowi, I know... */ + if (mutex_lock_interruptible(&input_mutex)) + return NULL; + seq->private = (void *)(unsigned long)*pos; return seq_list_start(&input_handler_list, *pos); } @@ -582,7 +789,7 @@ static void *input_handlers_seq_next(struct seq_file *seq, void *v, loff_t *pos) static void input_handlers_seq_stop(struct seq_file *seq, void *v) { - /* release lock here */ + mutex_unlock(&input_mutex); } static int input_handlers_seq_show(struct seq_file *seq, void *v) @@ -983,6 +1190,7 @@ struct input_dev *input_allocate_device(void) dev->dev.class = &input_class; device_initialize(&dev->dev); mutex_init(&dev->mutex); + spin_lock_init(&dev->event_lock); INIT_LIST_HEAD(&dev->h_list); INIT_LIST_HEAD(&dev->node); @@ -1000,7 +1208,7 @@ EXPORT_SYMBOL(input_allocate_device); * This function should only be used if input_register_device() * was not called yet or if it failed. Once device was registered * use input_unregister_device() and memory will be freed once last - * refrence to the device is dropped. + * reference to the device is dropped. * * Device should be allocated by input_allocate_device(). * @@ -1070,6 +1278,18 @@ void input_set_capability(struct input_dev *dev, unsigned int type, unsigned int } EXPORT_SYMBOL(input_set_capability); +/** + * input_register_device - register device with input core + * @dev: device to be registered + * + * This function registers device with input core. The device must be + * allocated with input_allocate_device() and all it's capabilities + * set up before registering. + * If function fails the device must be freed with input_free_device(). + * Once device has been successfully registered it can be unregistered + * with input_unregister_device(); input_free_device() should not be + * called in this case. + */ int input_register_device(struct input_dev *dev) { static atomic_t input_no = ATOMIC_INIT(0); @@ -1077,7 +1297,7 @@ int input_register_device(struct input_dev *dev) const char *path; int error; - set_bit(EV_SYN, dev->evbit); + __set_bit(EV_SYN, dev->evbit); /* * If delay and period are pre-set by the driver, then autorepeating @@ -1098,8 +1318,6 @@ int input_register_device(struct input_dev *dev) if (!dev->setkeycode) dev->setkeycode = input_default_setkeycode; - list_add_tail(&dev->node, &input_dev_list); - snprintf(dev->dev.bus_id, sizeof(dev->dev.bus_id), "input%ld", (unsigned long) atomic_inc_return(&input_no) - 1); @@ -1115,49 +1333,79 @@ int input_register_device(struct input_dev *dev) dev->name ? dev->name : "Unspecified device", path ? path : "N/A"); kfree(path); + error = mutex_lock_interruptible(&input_mutex); + if (error) { + device_del(&dev->dev); + return error; + } + + list_add_tail(&dev->node, &input_dev_list); + list_for_each_entry(handler, &input_handler_list, node) input_attach_handler(dev, handler); input_wakeup_procfs_readers(); + mutex_unlock(&input_mutex); + return 0; } EXPORT_SYMBOL(input_register_device); +/** + * input_unregister_device - unregister previously registered device + * @dev: device to be unregistered + * + * This function unregisters an input device. Once device is unregistered + * the caller should not try to access it as it may get freed at any moment. + */ void input_unregister_device(struct input_dev *dev) { struct input_handle *handle, *next; - int code; - for (code = 0; code <= KEY_MAX; code++) - if (test_bit(code, dev->key)) - input_report_key(dev, code, 0); - input_sync(dev); + input_disconnect_device(dev); - del_timer_sync(&dev->timer); + mutex_lock(&input_mutex); list_for_each_entry_safe(handle, next, &dev->h_list, d_node) handle->handler->disconnect(handle); WARN_ON(!list_empty(&dev->h_list)); + del_timer_sync(&dev->timer); list_del_init(&dev->node); - device_unregister(&dev->dev); - input_wakeup_procfs_readers(); + + mutex_unlock(&input_mutex); + + device_unregister(&dev->dev); } EXPORT_SYMBOL(input_unregister_device); +/** + * input_register_handler - register a new input handler + * @handler: handler to be registered + * + * This function registers a new input handler (interface) for input + * devices in the system and attaches it to all input devices that + * are compatible with the handler. + */ int input_register_handler(struct input_handler *handler) { struct input_dev *dev; + int retval; + + retval = mutex_lock_interruptible(&input_mutex); + if (retval) + return retval; INIT_LIST_HEAD(&handler->h_list); if (handler->fops != NULL) { - if (input_table[handler->minor >> 5]) - return -EBUSY; - + if (input_table[handler->minor >> 5]) { + retval = -EBUSY; + goto out; + } input_table[handler->minor >> 5] = handler; } @@ -1167,14 +1415,26 @@ int input_register_handler(struct input_handler *handler) input_attach_handler(dev, handler); input_wakeup_procfs_readers(); - return 0; + + out: + mutex_unlock(&input_mutex); + return retval; } EXPORT_SYMBOL(input_register_handler); +/** + * input_unregister_handler - unregisters an input handler + * @handler: handler to be unregistered + * + * This function disconnects a handler from its input devices and + * removes it from lists of known handlers. + */ void input_unregister_handler(struct input_handler *handler) { struct input_handle *handle, *next; + mutex_lock(&input_mutex); + list_for_each_entry_safe(handle, next, &handler->h_list, h_node) handler->disconnect(handle); WARN_ON(!list_empty(&handler->h_list)); @@ -1185,14 +1445,45 @@ void input_unregister_handler(struct input_handler *handler) input_table[handler->minor >> 5] = NULL; input_wakeup_procfs_readers(); + + mutex_unlock(&input_mutex); } EXPORT_SYMBOL(input_unregister_handler); +/** + * input_register_handle - register a new input handle + * @handle: handle to register + * + * This function puts a new input handle onto device's + * and handler's lists so that events can flow through + * it once it is opened using input_open_device(). + * + * This function is supposed to be called from handler's + * connect() method. + */ int input_register_handle(struct input_handle *handle) { struct input_handler *handler = handle->handler; + struct input_dev *dev = handle->dev; + int error; + + /* + * We take dev->mutex here to prevent race with + * input_release_device(). + */ + error = mutex_lock_interruptible(&dev->mutex); + if (error) + return error; + list_add_tail_rcu(&handle->d_node, &dev->h_list); + mutex_unlock(&dev->mutex); + synchronize_rcu(); - list_add_tail(&handle->d_node, &handle->dev->h_list); + /* + * Since we are supposed to be called from ->connect() + * which is mutually exclusive with ->disconnect() + * we can't be racing with input_unregister_handle() + * and so separate lock is not needed here. + */ list_add_tail(&handle->h_node, &handler->h_list); if (handler->start) @@ -1202,10 +1493,29 @@ int input_register_handle(struct input_handle *handle) } EXPORT_SYMBOL(input_register_handle); +/** + * input_unregister_handle - unregister an input handle + * @handle: handle to unregister + * + * This function removes input handle from device's + * and handler's lists. + * + * This function is supposed to be called from handler's + * disconnect() method. + */ void input_unregister_handle(struct input_handle *handle) { + struct input_dev *dev = handle->dev; + list_del_init(&handle->h_node); - list_del_init(&handle->d_node); + + /* + * Take dev->mutex to prevent race with input_release_device(). + */ + mutex_lock(&dev->mutex); + list_del_rcu(&handle->d_node); + mutex_unlock(&dev->mutex); + synchronize_rcu(); } EXPORT_SYMBOL(input_unregister_handle); diff --git a/drivers/input/joydev.c b/drivers/input/joydev.c index a9a0180bfd46..2b201f9aa024 100644 --- a/drivers/input/joydev.c +++ b/drivers/input/joydev.c @@ -43,6 +43,8 @@ struct joydev { struct input_handle handle; wait_queue_head_t wait; struct list_head client_list; + spinlock_t client_lock; /* protects client_list */ + struct mutex mutex; struct device dev; struct js_corr corr[ABS_MAX + 1]; @@ -61,31 +63,61 @@ struct joydev_client { int head; int tail; int startup; + spinlock_t buffer_lock; /* protects access to buffer, head and tail */ struct fasync_struct *fasync; struct joydev *joydev; struct list_head node; }; static struct joydev *joydev_table[JOYDEV_MINORS]; +static DEFINE_MUTEX(joydev_table_mutex); static int joydev_correct(int value, struct js_corr *corr) { switch (corr->type) { - case JS_CORR_NONE: - break; - case JS_CORR_BROKEN: - value = value > corr->coef[0] ? (value < corr->coef[1] ? 0 : - ((corr->coef[3] * (value - corr->coef[1])) >> 14)) : - ((corr->coef[2] * (value - corr->coef[0])) >> 14); - break; - default: - return 0; + + case JS_CORR_NONE: + break; + + case JS_CORR_BROKEN: + value = value > corr->coef[0] ? (value < corr->coef[1] ? 0 : + ((corr->coef[3] * (value - corr->coef[1])) >> 14)) : + ((corr->coef[2] * (value - corr->coef[0])) >> 14); + break; + + default: + return 0; } return value < -32767 ? -32767 : (value > 32767 ? 32767 : value); } -static void joydev_event(struct input_handle *handle, unsigned int type, unsigned int code, int value) +static void joydev_pass_event(struct joydev_client *client, + struct js_event *event) +{ + struct joydev *joydev = client->joydev; + + /* + * IRQs already disabled, just acquire the lock + */ + spin_lock(&client->buffer_lock); + + client->buffer[client->head] = *event; + + if (client->startup == joydev->nabs + joydev->nkey) { + client->head++; + client->head &= JOYDEV_BUFFER_SIZE - 1; + if (client->tail == client->head) + client->startup = 0; + } + + spin_unlock(&client->buffer_lock); + + kill_fasync(&client->fasync, SIGIO, POLL_IN); +} + +static void joydev_event(struct input_handle *handle, + unsigned int type, unsigned int code, int value) { struct joydev *joydev = handle->private; struct joydev_client *client; @@ -93,39 +125,34 @@ static void joydev_event(struct input_handle *handle, unsigned int type, unsigne switch (type) { - case EV_KEY: - if (code < BTN_MISC || value == 2) - return; - event.type = JS_EVENT_BUTTON; - event.number = joydev->keymap[code - BTN_MISC]; - event.value = value; - break; - - case EV_ABS: - event.type = JS_EVENT_AXIS; - event.number = joydev->absmap[code]; - event.value = joydev_correct(value, joydev->corr + event.number); - if (event.value == joydev->abs[event.number]) - return; - joydev->abs[event.number] = event.value; - break; + case EV_KEY: + if (code < BTN_MISC || value == 2) + return; + event.type = JS_EVENT_BUTTON; + event.number = joydev->keymap[code - BTN_MISC]; + event.value = value; + break; - default: + case EV_ABS: + event.type = JS_EVENT_AXIS; + event.number = joydev->absmap[code]; + event.value = joydev_correct(value, + &joydev->corr[event.number]); + if (event.value == joydev->abs[event.number]) return; + joydev->abs[event.number] = event.value; + break; + + default: + return; } event.time = jiffies_to_msecs(jiffies); - list_for_each_entry(client, &joydev->client_list, node) { - - memcpy(client->buffer + client->head, &event, sizeof(struct js_event)); - - if (client->startup == joydev->nabs + joydev->nkey) - if (client->tail == (client->head = (client->head + 1) & (JOYDEV_BUFFER_SIZE - 1))) - client->startup = 0; - - kill_fasync(&client->fasync, SIGIO, POLL_IN); - } + rcu_read_lock(); + list_for_each_entry_rcu(client, &joydev->client_list, node) + joydev_pass_event(client, &event); + rcu_read_unlock(); wake_up_interruptible(&joydev->wait); } @@ -144,23 +171,83 @@ static void joydev_free(struct device *dev) { struct joydev *joydev = container_of(dev, struct joydev, dev); - joydev_table[joydev->minor] = NULL; kfree(joydev); } +static void joydev_attach_client(struct joydev *joydev, + struct joydev_client *client) +{ + spin_lock(&joydev->client_lock); + list_add_tail_rcu(&client->node, &joydev->client_list); + spin_unlock(&joydev->client_lock); + synchronize_rcu(); +} + +static void joydev_detach_client(struct joydev *joydev, + struct joydev_client *client) +{ + spin_lock(&joydev->client_lock); + list_del_rcu(&client->node); + spin_unlock(&joydev->client_lock); + synchronize_rcu(); +} + +static int joydev_open_device(struct joydev *joydev) +{ + int retval; + + retval = mutex_lock_interruptible(&joydev->mutex); + if (retval) + return retval; + + if (!joydev->exist) + retval = -ENODEV; + else if (!joydev->open++) { + retval = input_open_device(&joydev->handle); + if (retval) + joydev->open--; + } + + mutex_unlock(&joydev->mutex); + return retval; +} + +static void joydev_close_device(struct joydev *joydev) +{ + mutex_lock(&joydev->mutex); + + if (joydev->exist && !--joydev->open) + input_close_device(&joydev->handle); + + mutex_unlock(&joydev->mutex); +} + +/* + * Wake up users waiting for IO so they can disconnect from + * dead device. + */ +static void joydev_hangup(struct joydev *joydev) +{ + struct joydev_client *client; + + spin_lock(&joydev->client_lock); + list_for_each_entry(client, &joydev->client_list, node) + kill_fasync(&client->fasync, SIGIO, POLL_HUP); + spin_unlock(&joydev->client_lock); + + wake_up_interruptible(&joydev->wait); +} + static int joydev_release(struct inode *inode, struct file *file) { struct joydev_client *client = file->private_data; struct joydev *joydev = client->joydev; joydev_fasync(-1, file, 0); - - list_del(&client->node); + joydev_detach_client(joydev, client); kfree(client); - if (!--joydev->open && joydev->exist) - input_close_device(&joydev->handle); - + joydev_close_device(joydev); put_device(&joydev->dev); return 0; @@ -176,11 +263,16 @@ static int joydev_open(struct inode *inode, struct file *file) if (i >= JOYDEV_MINORS) return -ENODEV; + error = mutex_lock_interruptible(&joydev_table_mutex); + if (error) + return error; joydev = joydev_table[i]; - if (!joydev || !joydev->exist) - return -ENODEV; + if (joydev) + get_device(&joydev->dev); + mutex_unlock(&joydev_table_mutex); - get_device(&joydev->dev); + if (!joydev) + return -ENODEV; client = kzalloc(sizeof(struct joydev_client), GFP_KERNEL); if (!client) { @@ -188,37 +280,129 @@ static int joydev_open(struct inode *inode, struct file *file) goto err_put_joydev; } + spin_lock_init(&client->buffer_lock); client->joydev = joydev; - list_add_tail(&client->node, &joydev->client_list); + joydev_attach_client(joydev, client); - if (!joydev->open++ && joydev->exist) { - error = input_open_device(&joydev->handle); - if (error) - goto err_free_client; - } + error = joydev_open_device(joydev); + if (error) + goto err_free_client; file->private_data = client; return 0; err_free_client: - list_del(&client->node); + joydev_detach_client(joydev, client); kfree(client); err_put_joydev: put_device(&joydev->dev); return error; } -static ssize_t joydev_write(struct file *file, const char __user *buffer, size_t count, loff_t *ppos) +static int joydev_generate_startup_event(struct joydev_client *client, + struct input_dev *input, + struct js_event *event) { - return -EINVAL; + struct joydev *joydev = client->joydev; + int have_event; + + spin_lock_irq(&client->buffer_lock); + + have_event = client->startup < joydev->nabs + joydev->nkey; + + if (have_event) { + + event->time = jiffies_to_msecs(jiffies); + if (client->startup < joydev->nkey) { + event->type = JS_EVENT_BUTTON | JS_EVENT_INIT; + event->number = client->startup; + event->value = !!test_bit(joydev->keypam[event->number], + input->key); + } else { + event->type = JS_EVENT_AXIS | JS_EVENT_INIT; + event->number = client->startup - joydev->nkey; + event->value = joydev->abs[event->number]; + } + client->startup++; + } + + spin_unlock_irq(&client->buffer_lock); + + return have_event; +} + +static int joydev_fetch_next_event(struct joydev_client *client, + struct js_event *event) +{ + int have_event; + + spin_lock_irq(&client->buffer_lock); + + have_event = client->head != client->tail; + if (have_event) { + *event = client->buffer[client->tail++]; + client->tail &= JOYDEV_BUFFER_SIZE - 1; + } + + spin_unlock_irq(&client->buffer_lock); + + return have_event; +} + +/* + * Old joystick interface + */ +static ssize_t joydev_0x_read(struct joydev_client *client, + struct input_dev *input, + char __user *buf) +{ + struct joydev *joydev = client->joydev; + struct JS_DATA_TYPE data; + int i; + + spin_lock_irq(&input->event_lock); + + /* + * Get device state + */ + for (data.buttons = i = 0; i < 32 && i < joydev->nkey; i++) + data.buttons |= + test_bit(joydev->keypam[i], input->key) ? (1 << i) : 0; + data.x = (joydev->abs[0] / 256 + 128) >> joydev->glue.JS_CORR.x; + data.y = (joydev->abs[1] / 256 + 128) >> joydev->glue.JS_CORR.y; + + /* + * Reset reader's event queue + */ + spin_lock(&client->buffer_lock); + client->startup = 0; + client->tail = client->head; + spin_unlock(&client->buffer_lock); + + spin_unlock_irq(&input->event_lock); + + if (copy_to_user(buf, &data, sizeof(struct JS_DATA_TYPE))) + return -EFAULT; + + return sizeof(struct JS_DATA_TYPE); +} + +static inline int joydev_data_pending(struct joydev_client *client) +{ + struct joydev *joydev = client->joydev; + + return client->startup < joydev->nabs + joydev->nkey || + client->head != client->tail; } -static ssize_t joydev_read(struct file *file, char __user *buf, size_t count, loff_t *ppos) +static ssize_t joydev_read(struct file *file, char __user *buf, + size_t count, loff_t *ppos) { struct joydev_client *client = file->private_data; struct joydev *joydev = client->joydev; struct input_dev *input = joydev->handle.dev; - int retval = 0; + struct js_event event; + int retval; if (!joydev->exist) return -ENODEV; @@ -226,68 +410,35 @@ static ssize_t joydev_read(struct file *file, char __user *buf, size_t count, lo if (count < sizeof(struct js_event)) return -EINVAL; - if (count == sizeof(struct JS_DATA_TYPE)) { - - struct JS_DATA_TYPE data; - int i; - - for (data.buttons = i = 0; i < 32 && i < joydev->nkey; i++) - data.buttons |= test_bit(joydev->keypam[i], input->key) ? (1 << i) : 0; - data.x = (joydev->abs[0] / 256 + 128) >> joydev->glue.JS_CORR.x; - data.y = (joydev->abs[1] / 256 + 128) >> joydev->glue.JS_CORR.y; - - if (copy_to_user(buf, &data, sizeof(struct JS_DATA_TYPE))) - return -EFAULT; - - client->startup = 0; - client->tail = client->head; + if (count == sizeof(struct JS_DATA_TYPE)) + return joydev_0x_read(client, input, buf); - return sizeof(struct JS_DATA_TYPE); - } - - if (client->startup == joydev->nabs + joydev->nkey && - client->head == client->tail && (file->f_flags & O_NONBLOCK)) + if (!joydev_data_pending(client) && (file->f_flags & O_NONBLOCK)) return -EAGAIN; retval = wait_event_interruptible(joydev->wait, - !joydev->exist || - client->startup < joydev->nabs + joydev->nkey || - client->head != client->tail); + !joydev->exist || joydev_data_pending(client)); if (retval) return retval; if (!joydev->exist) return -ENODEV; - while (client->startup < joydev->nabs + joydev->nkey && retval + sizeof(struct js_event) <= count) { - - struct js_event event; - - event.time = jiffies_to_msecs(jiffies); - - if (client->startup < joydev->nkey) { - event.type = JS_EVENT_BUTTON | JS_EVENT_INIT; - event.number = client->startup; - event.value = !!test_bit(joydev->keypam[event.number], input->key); - } else { - event.type = JS_EVENT_AXIS | JS_EVENT_INIT; - event.number = client->startup - joydev->nkey; - event.value = joydev->abs[event.number]; - } + while (retval + sizeof(struct js_event) <= count && + joydev_generate_startup_event(client, input, &event)) { if (copy_to_user(buf + retval, &event, sizeof(struct js_event))) return -EFAULT; - client->startup++; retval += sizeof(struct js_event); } - while (client->head != client->tail && retval + sizeof(struct js_event) <= count) { + while (retval + sizeof(struct js_event) <= count && + joydev_fetch_next_event(client, &event)) { - if (copy_to_user(buf + retval, client->buffer + client->tail, sizeof(struct js_event))) + if (copy_to_user(buf + retval, &event, sizeof(struct js_event))) return -EFAULT; - client->tail = (client->tail + 1) & (JOYDEV_BUFFER_SIZE - 1); retval += sizeof(struct js_event); } @@ -301,126 +452,144 @@ static unsigned int joydev_poll(struct file *file, poll_table *wait) struct joydev *joydev = client->joydev; poll_wait(file, &joydev->wait, wait); - return ((client->head != client->tail || client->startup < joydev->nabs + joydev->nkey) ? - (POLLIN | POLLRDNORM) : 0) | (joydev->exist ? 0 : (POLLHUP | POLLERR)); + return (joydev_data_pending(client) ? (POLLIN | POLLRDNORM) : 0) | + (joydev->exist ? 0 : (POLLHUP | POLLERR)); } -static int joydev_ioctl_common(struct joydev *joydev, unsigned int cmd, void __user *argp) +static int joydev_ioctl_common(struct joydev *joydev, + unsigned int cmd, void __user *argp) { struct input_dev *dev = joydev->handle.dev; int i, j; switch (cmd) { - case JS_SET_CAL: - return copy_from_user(&joydev->glue.JS_CORR, argp, + case JS_SET_CAL: + return copy_from_user(&joydev->glue.JS_CORR, argp, sizeof(joydev->glue.JS_CORR)) ? -EFAULT : 0; - case JS_GET_CAL: - return copy_to_user(argp, &joydev->glue.JS_CORR, + case JS_GET_CAL: + return copy_to_user(argp, &joydev->glue.JS_CORR, sizeof(joydev->glue.JS_CORR)) ? -EFAULT : 0; - case JS_SET_TIMEOUT: - return get_user(joydev->glue.JS_TIMEOUT, (s32 __user *) argp); + case JS_SET_TIMEOUT: + return get_user(joydev->glue.JS_TIMEOUT, (s32 __user *) argp); - case JS_GET_TIMEOUT: - return put_user(joydev->glue.JS_TIMEOUT, (s32 __user *) argp); + case JS_GET_TIMEOUT: + return put_user(joydev->glue.JS_TIMEOUT, (s32 __user *) argp); - case JSIOCGVERSION: - return put_user(JS_VERSION, (__u32 __user *) argp); + case JSIOCGVERSION: + return put_user(JS_VERSION, (__u32 __user *) argp); - case JSIOCGAXES: - return put_user(joydev->nabs, (__u8 __user *) argp); + case JSIOCGAXES: + return put_user(joydev->nabs, (__u8 __user *) argp); - case JSIOCGBUTTONS: - return put_user(joydev->nkey, (__u8 __user *) argp); + case JSIOCGBUTTONS: + return put_user(joydev->nkey, (__u8 __user *) argp); - case JSIOCSCORR: - if (copy_from_user(joydev->corr, argp, - sizeof(joydev->corr[0]) * joydev->nabs)) - return -EFAULT; - for (i = 0; i < joydev->nabs; i++) { - j = joydev->abspam[i]; - joydev->abs[i] = joydev_correct(dev->abs[j], joydev->corr + i); - } - return 0; + case JSIOCSCORR: + if (copy_from_user(joydev->corr, argp, + sizeof(joydev->corr[0]) * joydev->nabs)) + return -EFAULT; - case JSIOCGCORR: - return copy_to_user(argp, joydev->corr, - sizeof(joydev->corr[0]) * joydev->nabs) ? -EFAULT : 0; + for (i = 0; i < joydev->nabs; i++) { + j = joydev->abspam[i]; + joydev->abs[i] = joydev_correct(dev->abs[j], + &joydev->corr[i]); + } + return 0; - case JSIOCSAXMAP: - if (copy_from_user(joydev->abspam, argp, sizeof(__u8) * (ABS_MAX + 1))) - return -EFAULT; - for (i = 0; i < joydev->nabs; i++) { - if (joydev->abspam[i] > ABS_MAX) - return -EINVAL; - joydev->absmap[joydev->abspam[i]] = i; - } - return 0; - - case JSIOCGAXMAP: - return copy_to_user(argp, joydev->abspam, - sizeof(__u8) * (ABS_MAX + 1)) ? -EFAULT : 0; - - case JSIOCSBTNMAP: - if (copy_from_user(joydev->keypam, argp, sizeof(__u16) * (KEY_MAX - BTN_MISC + 1))) + case JSIOCGCORR: + return copy_to_user(argp, joydev->corr, + sizeof(joydev->corr[0]) * joydev->nabs) ? -EFAULT : 0; + + case JSIOCSAXMAP: + if (copy_from_user(joydev->abspam, argp, + sizeof(__u8) * (ABS_MAX + 1))) + return -EFAULT; + + for (i = 0; i < joydev->nabs; i++) { + if (joydev->abspam[i] > ABS_MAX) + return -EINVAL; + joydev->absmap[joydev->abspam[i]] = i; + } + return 0; + + case JSIOCGAXMAP: + return copy_to_user(argp, joydev->abspam, + sizeof(__u8) * (ABS_MAX + 1)) ? -EFAULT : 0; + + case JSIOCSBTNMAP: + if (copy_from_user(joydev->keypam, argp, + sizeof(__u16) * (KEY_MAX - BTN_MISC + 1))) + return -EFAULT; + + for (i = 0; i < joydev->nkey; i++) { + if (joydev->keypam[i] > KEY_MAX || + joydev->keypam[i] < BTN_MISC) + return -EINVAL; + joydev->keymap[joydev->keypam[i] - BTN_MISC] = i; + } + + return 0; + + case JSIOCGBTNMAP: + return copy_to_user(argp, joydev->keypam, + sizeof(__u16) * (KEY_MAX - BTN_MISC + 1)) ? -EFAULT : 0; + + default: + if ((cmd & ~IOCSIZE_MASK) == JSIOCGNAME(0)) { + int len; + if (!dev->name) + return 0; + len = strlen(dev->name) + 1; + if (len > _IOC_SIZE(cmd)) + len = _IOC_SIZE(cmd); + if (copy_to_user(argp, dev->name, len)) return -EFAULT; - for (i = 0; i < joydev->nkey; i++) { - if (joydev->keypam[i] > KEY_MAX || joydev->keypam[i] < BTN_MISC) - return -EINVAL; - joydev->keymap[joydev->keypam[i] - BTN_MISC] = i; - } - return 0; - - case JSIOCGBTNMAP: - return copy_to_user(argp, joydev->keypam, - sizeof(__u16) * (KEY_MAX - BTN_MISC + 1)) ? -EFAULT : 0; - - default: - if ((cmd & ~(_IOC_SIZEMASK << _IOC_SIZESHIFT)) == JSIOCGNAME(0)) { - int len; - if (!dev->name) - return 0; - len = strlen(dev->name) + 1; - if (len > _IOC_SIZE(cmd)) - len = _IOC_SIZE(cmd); - if (copy_to_user(argp, dev->name, len)) - return -EFAULT; - return len; - } + return len; + } } return -EINVAL; } #ifdef CONFIG_COMPAT -static long joydev_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg) +static long joydev_compat_ioctl(struct file *file, + unsigned int cmd, unsigned long arg) { struct joydev_client *client = file->private_data; struct joydev *joydev = client->joydev; void __user *argp = (void __user *)arg; s32 tmp32; struct JS_DATA_SAVE_TYPE_32 ds32; - int err; + int retval; - if (!joydev->exist) - return -ENODEV; + retval = mutex_lock_interruptible(&joydev->mutex); + if (retval) + return retval; + + if (!joydev->exist) { + retval = -ENODEV; + goto out; + } + + switch (cmd) { - switch(cmd) { case JS_SET_TIMELIMIT: - err = get_user(tmp32, (s32 __user *) arg); - if (err == 0) + retval = get_user(tmp32, (s32 __user *) arg); + if (retval == 0) joydev->glue.JS_TIMELIMIT = tmp32; break; + case JS_GET_TIMELIMIT: tmp32 = joydev->glue.JS_TIMELIMIT; - err = put_user(tmp32, (s32 __user *) arg); + retval = put_user(tmp32, (s32 __user *) arg); break; case JS_SET_ALL: - err = copy_from_user(&ds32, argp, - sizeof(ds32)) ? -EFAULT : 0; - if (err == 0) { + retval = copy_from_user(&ds32, argp, + sizeof(ds32)) ? -EFAULT : 0; + if (retval == 0) { joydev->glue.JS_TIMEOUT = ds32.JS_TIMEOUT; joydev->glue.BUSY = ds32.BUSY; joydev->glue.JS_EXPIRETIME = ds32.JS_EXPIRETIME; @@ -438,55 +607,119 @@ static long joydev_compat_ioctl(struct file *file, unsigned int cmd, unsigned lo ds32.JS_SAVE = joydev->glue.JS_SAVE; ds32.JS_CORR = joydev->glue.JS_CORR; - err = copy_to_user(argp, &ds32, sizeof(ds32)) ? -EFAULT : 0; + retval = copy_to_user(argp, &ds32, sizeof(ds32)) ? -EFAULT : 0; break; default: - err = joydev_ioctl_common(joydev, cmd, argp); + retval = joydev_ioctl_common(joydev, cmd, argp); + break; } - return err; + + out: + mutex_unlock(&joydev->mutex); + return retval; } #endif /* CONFIG_COMPAT */ -static int joydev_ioctl(struct inode *inode, struct file *file, unsigned int cmd, unsigned long arg) +static long joydev_ioctl(struct file *file, + unsigned int cmd, unsigned long arg) { struct joydev_client *client = file->private_data; struct joydev *joydev = client->joydev; void __user *argp = (void __user *)arg; + int retval; - if (!joydev->exist) - return -ENODEV; + retval = mutex_lock_interruptible(&joydev->mutex); + if (retval) + return retval; + + if (!joydev->exist) { + retval = -ENODEV; + goto out; + } + + switch (cmd) { + + case JS_SET_TIMELIMIT: + retval = get_user(joydev->glue.JS_TIMELIMIT, + (long __user *) arg); + break; + + case JS_GET_TIMELIMIT: + retval = put_user(joydev->glue.JS_TIMELIMIT, + (long __user *) arg); + break; + + case JS_SET_ALL: + retval = copy_from_user(&joydev->glue, argp, + sizeof(joydev->glue)) ? -EFAULT: 0; + break; + + case JS_GET_ALL: + retval = copy_to_user(argp, &joydev->glue, + sizeof(joydev->glue)) ? -EFAULT : 0; + break; - switch(cmd) { - case JS_SET_TIMELIMIT: - return get_user(joydev->glue.JS_TIMELIMIT, (long __user *) arg); - case JS_GET_TIMELIMIT: - return put_user(joydev->glue.JS_TIMELIMIT, (long __user *) arg); - case JS_SET_ALL: - return copy_from_user(&joydev->glue, argp, - sizeof(joydev->glue)) ? -EFAULT : 0; - case JS_GET_ALL: - return copy_to_user(argp, &joydev->glue, - sizeof(joydev->glue)) ? -EFAULT : 0; - default: - return joydev_ioctl_common(joydev, cmd, argp); + default: + retval = joydev_ioctl_common(joydev, cmd, argp); + break; } + out: + mutex_unlock(&joydev->mutex); + return retval; } static const struct file_operations joydev_fops = { - .owner = THIS_MODULE, - .read = joydev_read, - .write = joydev_write, - .poll = joydev_poll, - .open = joydev_open, - .release = joydev_release, - .ioctl = joydev_ioctl, + .owner = THIS_MODULE, + .read = joydev_read, + .poll = joydev_poll, + .open = joydev_open, + .release = joydev_release, + .unlocked_ioctl = joydev_ioctl, #ifdef CONFIG_COMPAT - .compat_ioctl = joydev_compat_ioctl, + .compat_ioctl = joydev_compat_ioctl, #endif - .fasync = joydev_fasync, + .fasync = joydev_fasync, }; +static int joydev_install_chrdev(struct joydev *joydev) +{ + joydev_table[joydev->minor] = joydev; + return 0; +} + +static void joydev_remove_chrdev(struct joydev *joydev) +{ + mutex_lock(&joydev_table_mutex); + joydev_table[joydev->minor] = NULL; + mutex_unlock(&joydev_table_mutex); +} + +/* + * Mark device non-existant. This disables writes, ioctls and + * prevents new users from opening the device. Already posted + * blocking reads will stay, however new ones will fail. + */ +static void joydev_mark_dead(struct joydev *joydev) +{ + mutex_lock(&joydev->mutex); + joydev->exist = 0; + mutex_unlock(&joydev->mutex); +} + +static void joydev_cleanup(struct joydev *joydev) +{ + struct input_handle *handle = &joydev->handle; + + joydev_mark_dead(joydev); + joydev_hangup(joydev); + joydev_remove_chrdev(joydev); + + /* joydev is marked dead so noone else accesses joydev->open */ + if (joydev->open) + input_close_device(handle); +} + static int joydev_connect(struct input_handler *handler, struct input_dev *dev, const struct input_device_id *id) { @@ -494,7 +727,10 @@ static int joydev_connect(struct input_handler *handler, struct input_dev *dev, int i, j, t, minor; int error; - for (minor = 0; minor < JOYDEV_MINORS && joydev_table[minor]; minor++); + for (minor = 0; minor < JOYDEV_MINORS; minor++) + if (!joydev_table[minor]) + break; + if (minor == JOYDEV_MINORS) { printk(KERN_ERR "joydev: no more free joydev devices\n"); return -ENFILE; @@ -505,15 +741,19 @@ static int joydev_connect(struct input_handler *handler, struct input_dev *dev, return -ENOMEM; INIT_LIST_HEAD(&joydev->client_list); + spin_lock_init(&joydev->client_lock); + mutex_init(&joydev->mutex); init_waitqueue_head(&joydev->wait); + snprintf(joydev->name, sizeof(joydev->name), "js%d", minor); + joydev->exist = 1; joydev->minor = minor; + joydev->exist = 1; joydev->handle.dev = dev; joydev->handle.name = joydev->name; joydev->handle.handler = handler; joydev->handle.private = joydev; - snprintf(joydev->name, sizeof(joydev->name), "js%d", minor); for (i = 0; i < ABS_MAX + 1; i++) if (test_bit(i, dev->absbit)) { @@ -545,67 +785,65 @@ static int joydev_connect(struct input_handler *handler, struct input_dev *dev, } joydev->corr[i].type = JS_CORR_BROKEN; joydev->corr[i].prec = dev->absfuzz[j]; - joydev->corr[i].coef[0] = (dev->absmax[j] + dev->absmin[j]) / 2 - dev->absflat[j]; - joydev->corr[i].coef[1] = (dev->absmax[j] + dev->absmin[j]) / 2 + dev->absflat[j]; - if (!(t = ((dev->absmax[j] - dev->absmin[j]) / 2 - 2 * dev->absflat[j]))) - continue; - joydev->corr[i].coef[2] = (1 << 29) / t; - joydev->corr[i].coef[3] = (1 << 29) / t; - - joydev->abs[i] = joydev_correct(dev->abs[j], joydev->corr + i); + joydev->corr[i].coef[0] = + (dev->absmax[j] + dev->absmin[j]) / 2 - dev->absflat[j]; + joydev->corr[i].coef[1] = + (dev->absmax[j] + dev->absmin[j]) / 2 + dev->absflat[j]; + + t = (dev->absmax[j] - dev->absmin[j]) / 2 - 2 * dev->absflat[j]; + if (t) { + joydev->corr[i].coef[2] = (1 << 29) / t; + joydev->corr[i].coef[3] = (1 << 29) / t; + + joydev->abs[i] = joydev_correct(dev->abs[j], + joydev->corr + i); + } } - snprintf(joydev->dev.bus_id, sizeof(joydev->dev.bus_id), - "js%d", minor); + strlcpy(joydev->dev.bus_id, joydev->name, sizeof(joydev->dev.bus_id)); + joydev->dev.devt = MKDEV(INPUT_MAJOR, JOYDEV_MINOR_BASE + minor); joydev->dev.class = &input_class; joydev->dev.parent = &dev->dev; - joydev->dev.devt = MKDEV(INPUT_MAJOR, JOYDEV_MINOR_BASE + minor); joydev->dev.release = joydev_free; device_initialize(&joydev->dev); - joydev_table[minor] = joydev; - - error = device_add(&joydev->dev); + error = input_register_handle(&joydev->handle); if (error) goto err_free_joydev; - error = input_register_handle(&joydev->handle); + error = joydev_install_chrdev(joydev); if (error) - goto err_delete_joydev; + goto err_unregister_handle; + + error = device_add(&joydev->dev); + if (error) + goto err_cleanup_joydev; return 0; - err_delete_joydev: - device_del(&joydev->dev); + err_cleanup_joydev: + joydev_cleanup(joydev); + err_unregister_handle: + input_unregister_handle(&joydev->handle); err_free_joydev: put_device(&joydev->dev); return error; } - static void joydev_disconnect(struct input_handle *handle) { struct joydev *joydev = handle->private; - struct joydev_client *client; - input_unregister_handle(handle); device_del(&joydev->dev); - - joydev->exist = 0; - - if (joydev->open) { - input_close_device(handle); - list_for_each_entry(client, &joydev->client_list, node) - kill_fasync(&client->fasync, SIGIO, POLL_HUP); - wake_up_interruptible(&joydev->wait); - } - + joydev_cleanup(joydev); + input_unregister_handle(handle); put_device(&joydev->dev); } static const struct input_device_id joydev_blacklist[] = { { - .flags = INPUT_DEVICE_ID_MATCH_EVBIT | INPUT_DEVICE_ID_MATCH_KEYBIT, + .flags = INPUT_DEVICE_ID_MATCH_EVBIT | + INPUT_DEVICE_ID_MATCH_KEYBIT, .evbit = { BIT(EV_KEY) }, .keybit = { [LONG(BTN_TOUCH)] = BIT(BTN_TOUCH) }, }, /* Avoid itouchpads, touchscreens and tablets */ @@ -614,17 +852,20 @@ static const struct input_device_id joydev_blacklist[] = { static const struct input_device_id joydev_ids[] = { { - .flags = INPUT_DEVICE_ID_MATCH_EVBIT | INPUT_DEVICE_ID_MATCH_ABSBIT, + .flags = INPUT_DEVICE_ID_MATCH_EVBIT | + INPUT_DEVICE_ID_MATCH_ABSBIT, .evbit = { BIT(EV_ABS) }, .absbit = { BIT(ABS_X) }, }, { - .flags = INPUT_DEVICE_ID_MATCH_EVBIT | INPUT_DEVICE_ID_MATCH_ABSBIT, + .flags = INPUT_DEVICE_ID_MATCH_EVBIT | + INPUT_DEVICE_ID_MATCH_ABSBIT, .evbit = { BIT(EV_ABS) }, .absbit = { BIT(ABS_WHEEL) }, }, { - .flags = INPUT_DEVICE_ID_MATCH_EVBIT | INPUT_DEVICE_ID_MATCH_ABSBIT, + .flags = INPUT_DEVICE_ID_MATCH_EVBIT | + INPUT_DEVICE_ID_MATCH_ABSBIT, .evbit = { BIT(EV_ABS) }, .absbit = { BIT(ABS_THROTTLE) }, }, @@ -634,14 +875,14 @@ static const struct input_device_id joydev_ids[] = { MODULE_DEVICE_TABLE(input, joydev_ids); static struct input_handler joydev_handler = { - .event = joydev_event, - .connect = joydev_connect, - .disconnect = joydev_disconnect, - .fops = &joydev_fops, - .minor = JOYDEV_MINOR_BASE, - .name = "joydev", - .id_table = joydev_ids, - .blacklist = joydev_blacklist, + .event = joydev_event, + .connect = joydev_connect, + .disconnect = joydev_disconnect, + .fops = &joydev_fops, + .minor = JOYDEV_MINOR_BASE, + .name = "joydev", + .id_table = joydev_ids, + .blacklist = joydev_blacklist, }; static int __init joydev_init(void) diff --git a/drivers/input/joystick/xpad.c b/drivers/input/joystick/xpad.c index 28080395899c..623629a69b03 100644 --- a/drivers/input/joystick/xpad.c +++ b/drivers/input/joystick/xpad.c @@ -223,12 +223,16 @@ static void xpad_process_packet(struct usb_xpad *xpad, u16 cmd, unsigned char *d struct input_dev *dev = xpad->dev; /* left stick */ - input_report_abs(dev, ABS_X, (__s16) (((__s16)data[13] << 8) | data[12])); - input_report_abs(dev, ABS_Y, (__s16) (((__s16)data[15] << 8) | data[14])); + input_report_abs(dev, ABS_X, + (__s16) le16_to_cpup((__le16 *)(data + 12))); + input_report_abs(dev, ABS_Y, + (__s16) le16_to_cpup((__le16 *)(data + 14))); /* right stick */ - input_report_abs(dev, ABS_RX, (__s16) (((__s16)data[17] << 8) | data[16])); - input_report_abs(dev, ABS_RY, (__s16) (((__s16)data[19] << 8) | data[18])); + input_report_abs(dev, ABS_RX, + (__s16) le16_to_cpup((__le16 *)(data + 16))); + input_report_abs(dev, ABS_RY, + (__s16) le16_to_cpup((__le16 *)(data + 18))); /* triggers left/right */ input_report_abs(dev, ABS_Z, data[10]); @@ -236,8 +240,10 @@ static void xpad_process_packet(struct usb_xpad *xpad, u16 cmd, unsigned char *d /* digital pad */ if (xpad->dpad_mapping == MAP_DPAD_TO_AXES) { - input_report_abs(dev, ABS_HAT0X, !!(data[2] & 0x08) - !!(data[2] & 0x04)); - input_report_abs(dev, ABS_HAT0Y, !!(data[2] & 0x02) - !!(data[2] & 0x01)); + input_report_abs(dev, ABS_HAT0X, + !!(data[2] & 0x08) - !!(data[2] & 0x04)); + input_report_abs(dev, ABS_HAT0Y, + !!(data[2] & 0x02) - !!(data[2] & 0x01)); } else /* xpad->dpad_mapping == MAP_DPAD_TO_BUTTONS */ { input_report_key(dev, BTN_LEFT, data[2] & 0x04); input_report_key(dev, BTN_RIGHT, data[2] & 0x08); @@ -274,14 +280,17 @@ static void xpad_process_packet(struct usb_xpad *xpad, u16 cmd, unsigned char *d * http://www.free60.org/wiki/Gamepad */ -static void xpad360_process_packet(struct usb_xpad *xpad, u16 cmd, unsigned char *data) +static void xpad360_process_packet(struct usb_xpad *xpad, + u16 cmd, unsigned char *data) { struct input_dev *dev = xpad->dev; /* digital pad */ if (xpad->dpad_mapping == MAP_DPAD_TO_AXES) { - input_report_abs(dev, ABS_HAT0X, !!(data[2] & 0x08) - !!(data[2] & 0x04)); - input_report_abs(dev, ABS_HAT0Y, !!(data[2] & 0x02) - !!(data[2] & 0x01)); + input_report_abs(dev, ABS_HAT0X, + !!(data[2] & 0x08) - !!(data[2] & 0x04)); + input_report_abs(dev, ABS_HAT0Y, + !!(data[2] & 0x02) - !!(data[2] & 0x01)); } else if (xpad->dpad_mapping == MAP_DPAD_TO_BUTTONS) { /* dpad as buttons (right, left, down, up) */ input_report_key(dev, BTN_LEFT, data[2] & 0x04); @@ -308,12 +317,16 @@ static void xpad360_process_packet(struct usb_xpad *xpad, u16 cmd, unsigned char input_report_key(dev, BTN_MODE, data[3] & 0x04); /* left stick */ - input_report_abs(dev, ABS_X, (__s16) (((__s16)data[7] << 8) | (__s16)data[6])); - input_report_abs(dev, ABS_Y, (__s16) (((__s16)data[9] << 8) | (__s16)data[8])); + input_report_abs(dev, ABS_X, + (__s16) le16_to_cpup((__le16 *)(data + 6))); + input_report_abs(dev, ABS_Y, + (__s16) le16_to_cpup((__le16 *)(data + 8))); /* right stick */ - input_report_abs(dev, ABS_RX, (__s16) (((__s16)data[11] << 8) | (__s16)data[10])); - input_report_abs(dev, ABS_RY, (__s16) (((__s16)data[13] << 8) | (__s16)data[12])); + input_report_abs(dev, ABS_RX, + (__s16) le16_to_cpup((__le16 *)(data + 10))); + input_report_abs(dev, ABS_RY, + (__s16) le16_to_cpup((__le16 *)(data + 12))); /* triggers left/right */ input_report_abs(dev, ABS_Z, data[4]); @@ -335,10 +348,12 @@ static void xpad_irq_in(struct urb *urb) case -ENOENT: case -ESHUTDOWN: /* this urb is terminated, clean up */ - dbg("%s - urb shutting down with status: %d", __FUNCTION__, urb->status); + dbg("%s - urb shutting down with status: %d", + __FUNCTION__, urb->status); return; default: - dbg("%s - nonzero urb status received: %d", __FUNCTION__, urb->status); + dbg("%s - nonzero urb status received: %d", + __FUNCTION__, urb->status); goto exit; } @@ -367,10 +382,12 @@ static void xpad_irq_out(struct urb *urb) case -ENOENT: case -ESHUTDOWN: /* this urb is terminated, clean up */ - dbg("%s - urb shutting down with status: %d", __FUNCTION__, urb->status); + dbg("%s - urb shutting down with status: %d", + __FUNCTION__, urb->status); return; default: - dbg("%s - nonzero urb status received: %d", __FUNCTION__, urb->status); + dbg("%s - nonzero urb status received: %d", + __FUNCTION__, urb->status); goto exit; } @@ -378,7 +395,7 @@ exit: retval = usb_submit_urb(urb, GFP_ATOMIC); if (retval) err("%s - usb_submit_urb failed with result %d", - __FUNCTION__, retval); + __FUNCTION__, retval); } static int xpad_init_output(struct usb_interface *intf, struct usb_xpad *xpad) @@ -595,7 +612,7 @@ static void xpad_set_up_abs(struct input_dev *input_dev, signed short abs) static int xpad_probe(struct usb_interface *intf, const struct usb_device_id *id) { - struct usb_device *udev = interface_to_usbdev (intf); + struct usb_device *udev = interface_to_usbdev(intf); struct usb_xpad *xpad; struct input_dev *input_dev; struct usb_endpoint_descriptor *ep_irq_in; diff --git a/drivers/input/keyboard/Kconfig b/drivers/input/keyboard/Kconfig index c97d5eb0075d..2316a018fae6 100644 --- a/drivers/input/keyboard/Kconfig +++ b/drivers/input/keyboard/Kconfig @@ -208,6 +208,27 @@ config KEYBOARD_HIL This driver implements support for HIL-keyboards attached to your machine, so normally you should say Y here. +config KEYBOARD_HP6XX + tristate "HP Jornada 6XX Keyboard support" + depends on SH_HP6XX + select INPUT_POLLDEV + help + This adds support for the onboard keyboard found on + HP Jornada 620/660/680/690. + + To compile this driver as a module, choose M here: the + module will be called jornada680_kbd. + +config KEYBOARD_HP7XX + tristate "HP Jornada 7XX Keyboard Driver" + depends on SA1100_JORNADA720_SSP && SA1100_SSP + help + Say Y here to add support for the HP Jornada 7xx (710/720/728) + onboard keyboard. + + To compile this driver as a module, choose M here: the + module will be called jornada720_kbd. + config KEYBOARD_OMAP tristate "TI OMAP keypad support" depends on (ARCH_OMAP1 || ARCH_OMAP2) @@ -253,4 +274,23 @@ config KEYBOARD_GPIO To compile this driver as a module, choose M here: the module will be called gpio-keys. +config KEYBOARD_MAPLE + tristate "Maple bus keyboard" + depends on SH_DREAMCAST && MAPLE + help + Say Y here if you have a Dreamcast console running Linux and have + a keyboard attached to its Maple bus. + + To compile this driver as a module, choose M here: the + module will be called maple_keyb. + +config KEYBOARD_BFIN + tristate "Blackfin BF54x keypad support" + depends on BF54x + help + Say Y here if you want to use the BF54x keypad. + + To compile this driver as a module, choose M here: the + module will be called bf54x-keys. + endif diff --git a/drivers/input/keyboard/Makefile b/drivers/input/keyboard/Makefile index 28d211b87b14..e97455fdcc83 100644 --- a/drivers/input/keyboard/Makefile +++ b/drivers/input/keyboard/Makefile @@ -21,4 +21,7 @@ obj-$(CONFIG_KEYBOARD_OMAP) += omap-keypad.o obj-$(CONFIG_KEYBOARD_PXA27x) += pxa27x_keyboard.o obj-$(CONFIG_KEYBOARD_AAED2000) += aaed2000_kbd.o obj-$(CONFIG_KEYBOARD_GPIO) += gpio_keys.o - +obj-$(CONFIG_KEYBOARD_HP6XX) += jornada680_kbd.o +obj-$(CONFIG_KEYBOARD_HP7XX) += jornada720_kbd.o +obj-$(CONFIG_KEYBOARD_MAPLE) += maple_keyb.o +obj-$(CONFIG_KEYBOARD_BFIN) += bf54x-keys.o diff --git a/drivers/input/keyboard/atakbd.c b/drivers/input/keyboard/atakbd.c index f948d3a14a93..a1800151b6ce 100644 --- a/drivers/input/keyboard/atakbd.c +++ b/drivers/input/keyboard/atakbd.c @@ -217,7 +217,7 @@ static void atakbd_interrupt(unsigned char scancode, char down) static int __init atakbd_init(void) { - int i; + int i, error; if (!MACH_IS_ATARI || !ATARIHW_PRESENT(ST_MFP)) return -EIO; @@ -247,9 +247,10 @@ static int __init atakbd_init(void) } /* error check */ - if (input_register_device(atakbd_dev)) { + error = input_register_device(atakbd_dev); + if (error) { input_free_device(atakbd_dev); - return -ENOMEM; + return error; } atari_input_keyboard_interrupt_hook = atakbd_interrupt; diff --git a/drivers/input/keyboard/bf54x-keys.c b/drivers/input/keyboard/bf54x-keys.c new file mode 100644 index 000000000000..a67b29b089ef --- /dev/null +++ b/drivers/input/keyboard/bf54x-keys.c @@ -0,0 +1,382 @@ +/* + * File: drivers/input/keyboard/bf54x-keys.c + * Based on: + * Author: Michael Hennerich <hennerich@blackfin.uclinux.org> + * + * Created: + * Description: keypad driver for Analog Devices Blackfin BF54x Processors + * + * + * Modified: + * Copyright 2007 Analog Devices Inc. + * + * Bugs: Enter bugs at http://blackfin.uclinux.org/ + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, see the file COPYING, or write + * to the Free Software Foundation, Inc., + * 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include <linux/module.h> +#include <linux/version.h> + +#include <linux/init.h> +#include <linux/fs.h> +#include <linux/interrupt.h> +#include <linux/irq.h> +#include <linux/sched.h> +#include <linux/pm.h> +#include <linux/sysctl.h> +#include <linux/proc_fs.h> +#include <linux/delay.h> +#include <linux/platform_device.h> +#include <linux/input.h> +#include <linux/irq.h> + +#include <asm/portmux.h> +#include <asm/mach/bf54x_keys.h> + +#define DRV_NAME "bf54x-keys" +#define TIME_SCALE 100 /* 100 ns */ +#define MAX_MULT (0xFF * TIME_SCALE) +#define MAX_RC 8 /* Max Row/Col */ + +static const u16 per_rows[] = { + P_KEY_ROW7, + P_KEY_ROW6, + P_KEY_ROW5, + P_KEY_ROW4, + P_KEY_ROW3, + P_KEY_ROW2, + P_KEY_ROW1, + P_KEY_ROW0, + 0 +}; + +static const u16 per_cols[] = { + P_KEY_COL7, + P_KEY_COL6, + P_KEY_COL5, + P_KEY_COL4, + P_KEY_COL3, + P_KEY_COL2, + P_KEY_COL1, + P_KEY_COL0, + 0 +}; + +struct bf54x_kpad { + struct input_dev *input; + int irq; + unsigned short lastkey; + unsigned short *keycode; + struct timer_list timer; + unsigned int keyup_test_jiffies; +}; + +static inline int bfin_kpad_find_key(struct bf54x_kpad *bf54x_kpad, + struct input_dev *input, u16 keyident) +{ + u16 i; + + for (i = 0; i < input->keycodemax; i++) + if (bf54x_kpad->keycode[i + input->keycodemax] == keyident) + return bf54x_kpad->keycode[i]; + return -1; +} + +static inline void bfin_keycodecpy(unsigned short *keycode, + const unsigned int *pdata_kc, + unsigned short keymapsize) +{ + unsigned int i; + + for (i = 0; i < keymapsize; i++) { + keycode[i] = pdata_kc[i] & 0xffff; + keycode[i + keymapsize] = pdata_kc[i] >> 16; + } +} + +static inline u16 bfin_kpad_get_prescale(u32 timescale) +{ + u32 sclk = get_sclk(); + + return ((((sclk / 1000) * timescale) / 1024) - 1); +} + +static inline u16 bfin_kpad_get_keypressed(struct bf54x_kpad *bf54x_kpad) +{ + return (bfin_read_KPAD_STAT() & KPAD_PRESSED); +} + +static inline void bfin_kpad_clear_irq(void) +{ + bfin_write_KPAD_STAT(0xFFFF); + bfin_write_KPAD_ROWCOL(0xFFFF); +} + +static void bfin_kpad_timer(unsigned long data) +{ + struct platform_device *pdev = (struct platform_device *) data; + struct bf54x_kpad *bf54x_kpad = platform_get_drvdata(pdev); + + if (bfin_kpad_get_keypressed(bf54x_kpad)) { + /* Try again later */ + mod_timer(&bf54x_kpad->timer, + jiffies + bf54x_kpad->keyup_test_jiffies); + return; + } + + input_report_key(bf54x_kpad->input, bf54x_kpad->lastkey, 0); + input_sync(bf54x_kpad->input); + + /* Clear IRQ Status */ + + bfin_kpad_clear_irq(); + enable_irq(bf54x_kpad->irq); +} + +static irqreturn_t bfin_kpad_isr(int irq, void *dev_id) +{ + struct platform_device *pdev = dev_id; + struct bf54x_kpad *bf54x_kpad = platform_get_drvdata(pdev); + struct input_dev *input = bf54x_kpad->input; + int key; + u16 rowcol = bfin_read_KPAD_ROWCOL(); + + key = bfin_kpad_find_key(bf54x_kpad, input, rowcol); + + input_report_key(input, key, 1); + input_sync(input); + + if (bfin_kpad_get_keypressed(bf54x_kpad)) { + disable_irq(bf54x_kpad->irq); + bf54x_kpad->lastkey = key; + mod_timer(&bf54x_kpad->timer, + jiffies + bf54x_kpad->keyup_test_jiffies); + } else { + input_report_key(input, key, 0); + input_sync(input); + + bfin_kpad_clear_irq(); + } + + return IRQ_HANDLED; +} + +static int __devinit bfin_kpad_probe(struct platform_device *pdev) +{ + struct bf54x_kpad *bf54x_kpad; + struct bfin_kpad_platform_data *pdata = pdev->dev.platform_data; + struct input_dev *input; + int i, error; + + if (!pdata->rows || !pdata->cols || !pdata->keymap) { + printk(KERN_ERR DRV_NAME + ": No rows, cols or keymap from pdata\n"); + return -EINVAL; + } + + if (!pdata->keymapsize || + pdata->keymapsize > (pdata->rows * pdata->cols)) { + printk(KERN_ERR DRV_NAME ": Invalid keymapsize\n"); + return -EINVAL; + } + + bf54x_kpad = kzalloc(sizeof(struct bf54x_kpad), GFP_KERNEL); + if (!bf54x_kpad) + return -ENOMEM; + + platform_set_drvdata(pdev, bf54x_kpad); + + /* Allocate memory for keymap followed by private LUT */ + bf54x_kpad->keycode = kmalloc(pdata->keymapsize * + sizeof(unsigned short) * 2, GFP_KERNEL); + if (!bf54x_kpad->keycode) { + error = -ENOMEM; + goto out; + } + + if (!pdata->debounce_time || !pdata->debounce_time > MAX_MULT || + !pdata->coldrive_time || !pdata->coldrive_time > MAX_MULT) { + printk(KERN_ERR DRV_NAME + ": Invalid Debounce/Columdrive Time from pdata\n"); + bfin_write_KPAD_MSEL(0xFF0); /* Default MSEL */ + } else { + bfin_write_KPAD_MSEL( + ((pdata->debounce_time / TIME_SCALE) + & DBON_SCALE) | + (((pdata->coldrive_time / TIME_SCALE) << 8) + & COLDRV_SCALE)); + + } + + if (!pdata->keyup_test_interval) + bf54x_kpad->keyup_test_jiffies = msecs_to_jiffies(50); + else + bf54x_kpad->keyup_test_jiffies = + msecs_to_jiffies(pdata->keyup_test_interval); + + if (peripheral_request_list((u16 *)&per_rows[MAX_RC - pdata->rows], + DRV_NAME)) { + printk(KERN_ERR DRV_NAME + ": Requesting Peripherals failed\n"); + error = -EFAULT; + goto out0; + } + + if (peripheral_request_list((u16 *)&per_cols[MAX_RC - pdata->cols], + DRV_NAME)) { + printk(KERN_ERR DRV_NAME + ": Requesting Peripherals failed\n"); + error = -EFAULT; + goto out1; + } + + bf54x_kpad->irq = platform_get_irq(pdev, 0); + if (bf54x_kpad->irq < 0) { + error = -ENODEV; + goto out2; + } + + error = request_irq(bf54x_kpad->irq, bfin_kpad_isr, + IRQF_SAMPLE_RANDOM, DRV_NAME, pdev); + if (error) { + printk(KERN_ERR DRV_NAME + ": unable to claim irq %d; error %d\n", + bf54x_kpad->irq, error); + error = -EBUSY; + goto out2; + } + + input = input_allocate_device(); + if (!input) { + error = -ENOMEM; + goto out3; + } + + bf54x_kpad->input = input; + + input->name = pdev->name; + input->phys = "bf54x-keys/input0"; + input->dev.parent = &pdev->dev; + + input_set_drvdata(input, bf54x_kpad); + + input->id.bustype = BUS_HOST; + input->id.vendor = 0x0001; + input->id.product = 0x0001; + input->id.version = 0x0100; + + input->keycodesize = sizeof(unsigned short); + input->keycodemax = pdata->keymapsize; + input->keycode = bf54x_kpad->keycode; + + bfin_keycodecpy(bf54x_kpad->keycode, pdata->keymap, pdata->keymapsize); + + /* setup input device */ + __set_bit(EV_KEY, input->evbit); + + if (pdata->repeat) + __set_bit(EV_REP, input->evbit); + + for (i = 0; i < input->keycodemax; i++) + __set_bit(bf54x_kpad->keycode[i] & KEY_MAX, input->keybit); + __clear_bit(KEY_RESERVED, input->keybit); + + error = input_register_device(input); + if (error) { + printk(KERN_ERR DRV_NAME + ": Unable to register input device (%d)\n", error); + goto out4; + } + + /* Init Keypad Key Up/Release test timer */ + + setup_timer(&bf54x_kpad->timer, bfin_kpad_timer, (unsigned long) pdev); + + bfin_write_KPAD_PRESCALE(bfin_kpad_get_prescale(TIME_SCALE)); + + bfin_write_KPAD_CTL((((pdata->cols - 1) << 13) & KPAD_COLEN) | + (((pdata->rows - 1) << 10) & KPAD_ROWEN) | + (2 & KPAD_IRQMODE)); + + bfin_write_KPAD_CTL(bfin_read_KPAD_CTL() | KPAD_EN); + + printk(KERN_ERR DRV_NAME + ": Blackfin BF54x Keypad registered IRQ %d\n", bf54x_kpad->irq); + + return 0; + +out4: + input_free_device(input); +out3: + free_irq(bf54x_kpad->irq, pdev); +out2: + peripheral_free_list((u16 *)&per_cols[MAX_RC - pdata->cols]); +out1: + peripheral_free_list((u16 *)&per_rows[MAX_RC - pdata->rows]); +out0: + kfree(bf54x_kpad->keycode); +out: + kfree(bf54x_kpad); + platform_set_drvdata(pdev, NULL); + + return error; +} + +static int __devexit bfin_kpad_remove(struct platform_device *pdev) +{ + struct bfin_kpad_platform_data *pdata = pdev->dev.platform_data; + struct bf54x_kpad *bf54x_kpad = platform_get_drvdata(pdev); + + del_timer_sync(&bf54x_kpad->timer); + free_irq(bf54x_kpad->irq, pdev); + + input_unregister_device(bf54x_kpad->input); + + peripheral_free_list((u16 *)&per_rows[MAX_RC - pdata->rows]); + peripheral_free_list((u16 *)&per_cols[MAX_RC - pdata->cols]); + + kfree(bf54x_kpad->keycode); + kfree(bf54x_kpad); + platform_set_drvdata(pdev, NULL); + + return 0; +} + +struct platform_driver bfin_kpad_device_driver = { + .probe = bfin_kpad_probe, + .remove = __devexit_p(bfin_kpad_remove), + .driver = { + .name = DRV_NAME, + } +}; + +static int __init bfin_kpad_init(void) +{ + return platform_driver_register(&bfin_kpad_device_driver); +} + +static void __exit bfin_kpad_exit(void) +{ + platform_driver_unregister(&bfin_kpad_device_driver); +} + +module_init(bfin_kpad_init); +module_exit(bfin_kpad_exit); + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Michael Hennerich <hennerich@blackfin.uclinux.org>"); +MODULE_DESCRIPTION("Keypad driver for BF54x Processors"); diff --git a/drivers/input/keyboard/gpio_keys.c b/drivers/input/keyboard/gpio_keys.c index f0b22b8b2769..e2a3293bc67e 100644 --- a/drivers/input/keyboard/gpio_keys.c +++ b/drivers/input/keyboard/gpio_keys.c @@ -54,6 +54,7 @@ static int __devinit gpio_keys_probe(struct platform_device *pdev) struct gpio_keys_platform_data *pdata = pdev->dev.platform_data; struct input_dev *input; int i, error; + int wakeup = 0; input = input_allocate_device(); if (!input) @@ -77,31 +78,51 @@ static int __devinit gpio_keys_probe(struct platform_device *pdev) int irq = gpio_to_irq(button->gpio); unsigned int type = button->type ?: EV_KEY; - set_irq_type(irq, IRQ_TYPE_EDGE_BOTH); - error = request_irq(irq, gpio_keys_isr, IRQF_SAMPLE_RANDOM, - button->desc ? button->desc : "gpio_keys", - pdev); + if (irq < 0) { + error = irq; + printk(KERN_ERR + "gpio-keys: " + "Unable to get irq number for GPIO %d," + "error %d\n", + button->gpio, error); + goto fail; + } + + error = request_irq(irq, gpio_keys_isr, + IRQF_SAMPLE_RANDOM | IRQF_TRIGGER_RISING | + IRQF_TRIGGER_FALLING, + button->desc ? button->desc : "gpio_keys", + pdev); if (error) { - printk(KERN_ERR "gpio-keys: unable to claim irq %d; error %d\n", + printk(KERN_ERR + "gpio-keys: Unable to claim irq %d; error %d\n", irq, error); goto fail; } + if (button->wakeup) + wakeup = 1; + input_set_capability(input, type, button->code); } error = input_register_device(input); if (error) { - printk(KERN_ERR "Unable to register gpio-keys input device\n"); + printk(KERN_ERR + "gpio-keys: Unable to register input device, " + "error: %d\n", error); goto fail; } + device_init_wakeup(&pdev->dev, wakeup); + return 0; fail: - for (i = i - 1; i >= 0; i--) + while (--i >= 0) free_irq(gpio_to_irq(pdata->buttons[i].gpio), pdev); + platform_set_drvdata(pdev, NULL); input_free_device(input); return error; @@ -113,6 +134,8 @@ static int __devexit gpio_keys_remove(struct platform_device *pdev) struct input_dev *input = platform_get_drvdata(pdev); int i; + device_init_wakeup(&pdev->dev, 0); + for (i = 0; i < pdata->nbuttons; i++) { int irq = gpio_to_irq(pdata->buttons[i].gpio); free_irq(irq, pdev); @@ -123,9 +146,53 @@ static int __devexit gpio_keys_remove(struct platform_device *pdev) return 0; } + +#ifdef CONFIG_PM +static int gpio_keys_suspend(struct platform_device *pdev, pm_message_t state) +{ + struct gpio_keys_platform_data *pdata = pdev->dev.platform_data; + int i; + + if (device_may_wakeup(&pdev->dev)) { + for (i = 0; i < pdata->nbuttons; i++) { + struct gpio_keys_button *button = &pdata->buttons[i]; + if (button->wakeup) { + int irq = gpio_to_irq(button->gpio); + enable_irq_wake(irq); + } + } + } + + return 0; +} + +static int gpio_keys_resume(struct platform_device *pdev) +{ + struct gpio_keys_platform_data *pdata = pdev->dev.platform_data; + int i; + + if (device_may_wakeup(&pdev->dev)) { + for (i = 0; i < pdata->nbuttons; i++) { + struct gpio_keys_button *button = &pdata->buttons[i]; + if (button->wakeup) { + int irq = gpio_to_irq(button->gpio); + disable_irq_wake(irq); + } + } + } + + return 0; +} +#else +#define gpio_keys_suspend NULL +#define gpio_keys_resume NULL +#endif + struct platform_driver gpio_keys_device_driver = { .probe = gpio_keys_probe, .remove = __devexit_p(gpio_keys_remove), + .suspend = gpio_keys_suspend, + .resume = gpio_keys_resume, .driver = { .name = "gpio-keys", } diff --git a/drivers/input/keyboard/jornada680_kbd.c b/drivers/input/keyboard/jornada680_kbd.c new file mode 100644 index 000000000000..bec1cf483723 --- /dev/null +++ b/drivers/input/keyboard/jornada680_kbd.c @@ -0,0 +1,277 @@ +/* + * drivers/input/keyboard/jornada680_kbd.c + * + * HP Jornada 620/660/680/690 scan keyboard platform driver + * Copyright (C) 2007 Kristoffer Ericson <Kristoffer.Ericson@gmail.com> + * + * Based on hp680_keyb.c + * Copyright (C) 2006 Paul Mundt + * Copyright (C) 2005 Andriy Skulysh + * Split from drivers/input/keyboard/hp600_keyb.c + * Copyright (C) 2000 Yaegashi Takeshi (hp6xx kbd scan routine and translation table) + * Copyright (C) 2000 Niibe Yutaka (HP620 Keyb translation table) + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include <linux/input.h> +#include <linux/kernel.h> +#include <linux/module.h> +#include <linux/init.h> +#include <linux/input-polldev.h> +#include <linux/jiffies.h> +#include <linux/platform_device.h> +#include <linux/interrupt.h> + +#include <asm/delay.h> +#include <asm/io.h> + +#define PCCR 0xa4000104 +#define PDCR 0xa4000106 +#define PECR 0xa4000108 +#define PFCR 0xa400010a +#define PCDR 0xa4000124 +#define PDDR 0xa4000126 +#define PEDR 0xa4000128 +#define PFDR 0xa400012a +#define PGDR 0xa400012c +#define PHDR 0xa400012e +#define PJDR 0xa4000130 +#define PKDR 0xa4000132 +#define PLDR 0xa4000134 + +static const unsigned short jornada_scancodes[] = { +/* PTD1 */ KEY_CAPSLOCK, KEY_MACRO, KEY_LEFTCTRL, 0, KEY_ESC, 0, 0, 0, /* 1 -> 8 */ + KEY_F1, KEY_F2, KEY_F3, KEY_F8, KEY_F7, KEY_F2, KEY_F4, KEY_F5, /* 9 -> 16 */ +/* PTD5 */ KEY_SLASH, KEY_APOSTROPHE, KEY_ENTER, 0, KEY_Z, 0, 0, 0, /* 17 -> 24 */ + KEY_X, KEY_C, KEY_V, KEY_DOT, KEY_COMMA, KEY_M, KEY_B, KEY_N, /* 25 -> 32 */ +/* PTD7 */ KEY_KP2, KEY_KP6, 0, 0, 0, 0, 0, 0, /* 33 -> 40 */ + 0, 0, 0, KEY_KP4, 0, 0, KEY_LEFTALT, KEY_HANJA, /* 41 -> 48 */ +/* PTE0 */ 0, 0, 0, 0, KEY_FINANCE, 0, 0, 0, /* 49 -> 56 */ + KEY_LEFTCTRL, 0, KEY_SPACE, KEY_KPDOT, KEY_VOLUMEUP, 249, 0, 0, /* 57 -> 64 */ +/* PTE1 */ KEY_SEMICOLON, KEY_RIGHTBRACE, KEY_BACKSLASH, 0, KEY_A, 0, 0, 0,/* 65 -> 72 */ + KEY_S, KEY_D, KEY_F, KEY_L, KEY_K, KEY_J, KEY_G, KEY_H, /* 73 -> 80 */ +/* PTE3 */ KEY_KP8, KEY_LEFTMETA, KEY_RIGHTSHIFT, 0, KEY_TAB, 0, 0,0, /* 81 -> 88 */ + 0, KEY_LEFTSHIFT, 0, 0, 0, 0, 0, 0, /* 89 -> 96 */ +/* PTE6 */ KEY_P, KEY_LEFTBRACE, KEY_BACKSPACE, 0, KEY_Q, 0, 0, 0, /* 97 -> 104 */ + KEY_W, KEY_E, KEY_R, KEY_O, KEY_I, KEY_U, KEY_T, KEY_R, /* 105 -> 112 */ +/* PTE7 */ KEY_0, KEY_MINUS, KEY_EQUAL, 0, KEY_1, 0, 0, 0, /* 113 -> 120 */ + KEY_2, KEY_3, KEY_4, KEY_9, KEY_8, KEY_7, KEY_5, KEY_6, /* 121 -> 128 */ +/* **** */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0 +}; + +#define JORNADA_SCAN_SIZE 18 + +struct jornadakbd { + struct input_polled_dev *poll_dev; + unsigned short keymap[ARRAY_SIZE(jornada_scancodes)]; + unsigned char length; + unsigned char old_scan[JORNADA_SCAN_SIZE]; + unsigned char new_scan[JORNADA_SCAN_SIZE]; +}; + +static void jornada_parse_kbd(struct jornadakbd *jornadakbd) +{ + struct input_dev *input_dev = jornadakbd->poll_dev->input; + unsigned short *keymap = jornadakbd->keymap; + unsigned int sync_me = 0; + unsigned int i, j; + + for (i = 0; i < JORNADA_SCAN_SIZE; i++) { + unsigned char new = jornadakbd->new_scan[i]; + unsigned char old = jornadakbd->old_scan[i]; + unsigned int xor = new ^ old; + + if (xor == 0) + continue; + + for (j = 0; j < 8; j++) { + unsigned int bit = 1 << j; + if (xor & bit) { + unsigned int scancode = (i << 3) + j; + input_event(input_dev, + EV_MSC, MSC_SCAN, scancode); + input_report_key(input_dev, + keymap[scancode], + !(new & bit)); + sync_me = 1; + } + } + } + + if (sync_me) + input_sync(input_dev); +} + +static void jornada_scan_keyb(unsigned char *s) +{ + int i; + unsigned short ec_static, dc_static; /* = UINT16_t */ + unsigned char matrix_switch[] = { + 0xfd, 0xff, /* PTD1 PD(1) */ + 0xdf, 0xff, /* PTD5 PD(5) */ + 0x7f, 0xff, /* PTD7 PD(7) */ + 0xff, 0xfe, /* PTE0 PE(0) */ + 0xff, 0xfd, /* PTE1 PE(1) */ + 0xff, 0xf7, /* PTE3 PE(3) */ + 0xff, 0xbf, /* PTE6 PE(6) */ + 0xff, 0x7f, /* PTE7 PE(7) */ + }, *t = matrix_switch; + /* PD(x) : + 1. 0xcc0c & (1~(1 << (2*(x)+1))))) + 2. (0xf0cf & 0xfffff) */ + /* PE(x) : + 1. 0xcc0c & 0xffff + 2. 0xf0cf & (1~(1 << (2*(x)+1))))) */ + unsigned short matrix_PDE[] = { + 0xcc04, 0xf0cf, /* PD(1) */ + 0xc40c, 0xf0cf, /* PD(5) */ + 0x4c0c, 0xf0cf, /* PD(7) */ + 0xcc0c, 0xf0cd, /* PE(0) */ + 0xcc0c, 0xf0c7, /* PE(1) */ + 0xcc0c, 0xf04f, /* PE(3) */ + 0xcc0c, 0xd0cf, /* PE(6) */ + 0xcc0c, 0x70cf, /* PE(7) */ + }, *y = matrix_PDE; + + /* Save these control reg bits */ + dc_static = (ctrl_inw(PDCR) & (~0xcc0c)); + ec_static = (ctrl_inw(PECR) & (~0xf0cf)); + + for (i = 0; i < 8; i++) { + /* disable output for all but the one we want to scan */ + ctrl_outw((dc_static | *y++), PDCR); + ctrl_outw((ec_static | *y++), PECR); + udelay(5); + + /* Get scanline row */ + ctrl_outb(*t++, PDDR); + ctrl_outb(*t++, PEDR); + udelay(50); + + /* Read data */ + *s++ = ctrl_inb(PCDR); + *s++ = ctrl_inb(PFDR); + } + /* Scan no lines */ + ctrl_outb(0xff, PDDR); + ctrl_outb(0xff, PEDR); + + /* Enable all scanlines */ + ctrl_outw((dc_static | (0x5555 & 0xcc0c)),PDCR); + ctrl_outw((ec_static | (0x5555 & 0xf0cf)),PECR); + + /* Ignore extra keys and events */ + *s++ = ctrl_inb(PGDR); + *s++ = ctrl_inb(PHDR); +} + +static void jornadakbd680_poll(struct input_polled_dev *dev) +{ + struct jornadakbd *jornadakbd = dev->private; + + jornada_scan_keyb(jornadakbd->new_scan); + jornada_parse_kbd(jornadakbd); + memcpy(jornadakbd->old_scan, jornadakbd->new_scan, JORNADA_SCAN_SIZE); +} + +static int __devinit jornada680kbd_probe(struct platform_device *pdev) +{ + struct jornadakbd *jornadakbd; + struct input_polled_dev *poll_dev; + struct input_dev *input_dev; + int i, error; + + jornadakbd = kzalloc(sizeof(struct jornadakbd), GFP_KERNEL); + if (!jornadakbd) + return -ENOMEM; + + poll_dev = input_allocate_polled_device(); + if (!poll_dev) { + error = -ENOMEM; + goto failed; + } + + platform_set_drvdata(pdev, jornadakbd); + + jornadakbd->poll_dev = poll_dev; + + memcpy(jornadakbd->keymap, jornada_scancodes, + sizeof(jornadakbd->keymap)); + + poll_dev->private = jornadakbd; + poll_dev->poll = jornadakbd680_poll; + poll_dev->poll_interval = 50; /* msec */ + + input_dev = poll_dev->input; + input_dev->evbit[0] = BIT(EV_KEY) | BIT(EV_REP); + input_dev->name = "HP Jornada 680 keyboard"; + input_dev->phys = "jornadakbd/input0"; + input_dev->keycode = jornadakbd->keymap; + input_dev->keycodesize = sizeof(unsigned short); + input_dev->keycodemax = ARRAY_SIZE(jornada_scancodes); + input_dev->dev.parent = &pdev->dev; + input_dev->id.bustype = BUS_HOST; + + for (i = 0; i < 128; i++) + if (jornadakbd->keymap[i]) + __set_bit(jornadakbd->keymap[i], input_dev->keybit); + __clear_bit(KEY_RESERVED, input_dev->keybit); + + input_set_capability(input_dev, EV_MSC, MSC_SCAN); + + error = input_register_polled_device(jornadakbd->poll_dev); + if (error) + goto failed; + + return 0; + + failed: + printk(KERN_ERR "Jornadakbd: failed to register driver, error: %d\n", + error); + platform_set_drvdata(pdev, NULL); + input_free_polled_device(poll_dev); + kfree(jornadakbd); + return error; + +} + +static int __devexit jornada680kbd_remove(struct platform_device *pdev) +{ + struct jornadakbd *jornadakbd = platform_get_drvdata(pdev); + + platform_set_drvdata(pdev, NULL); + input_unregister_polled_device(jornadakbd->poll_dev); + input_free_polled_device(jornadakbd->poll_dev); + kfree(jornadakbd); + + return 0; +} + +static struct platform_driver jornada680kbd_driver = { + .driver = { + .name = "jornada680_kbd", + }, + .probe = jornada680kbd_probe, + .remove = __devexit_p(jornada680kbd_remove), +}; + +static int __init jornada680kbd_init(void) +{ + return platform_driver_register(&jornada680kbd_driver); +} + +static void __exit jornada680kbd_exit(void) +{ + platform_driver_unregister(&jornada680kbd_driver); +} + +module_init(jornada680kbd_init); +module_exit(jornada680kbd_exit); + +MODULE_AUTHOR("Kristoffer Ericson <kristoffer.ericson@gmail.com>"); +MODULE_DESCRIPTION("HP Jornada 620/660/680/690 Keyboard Driver"); +MODULE_LICENSE("GPLv2"); diff --git a/drivers/input/keyboard/jornada720_kbd.c b/drivers/input/keyboard/jornada720_kbd.c new file mode 100644 index 000000000000..e6696b3c9416 --- /dev/null +++ b/drivers/input/keyboard/jornada720_kbd.c @@ -0,0 +1,185 @@ +/* + * drivers/input/keyboard/jornada720_kbd.c + * + * HP Jornada 720 keyboard platform driver + * + * Copyright (C) 2006/2007 Kristoffer Ericson <Kristoffer.Ericson@Gmail.com> + * + * Copyright (C) 2006 jornada 720 kbd driver by + Filip Zyzniewsk <Filip.Zyzniewski@tefnet.plX + * based on (C) 2004 jornada 720 kbd driver by + Alex Lange <chicken@handhelds.org> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + */ +#include <linux/device.h> +#include <linux/errno.h> +#include <linux/init.h> +#include <linux/interrupt.h> +#include <linux/init.h> +#include <linux/input.h> +#include <linux/kernel.h> +#include <linux/module.h> +#include <linux/platform_device.h> + +#include <asm/arch/jornada720.h> +#include <asm/hardware.h> + +MODULE_AUTHOR("Kristoffer Ericson <Kristoffer.Ericson@gmail.com>"); +MODULE_DESCRIPTION("HP Jornada 710/720/728 keyboard driver"); +MODULE_LICENSE("GPLv2"); + +static unsigned short jornada_std_keymap[128] = { /* ROW */ + 0, KEY_ESC, KEY_F1, KEY_F2, KEY_F3, KEY_F4, KEY_F5, KEY_F6, KEY_F7, /* #1 */ + KEY_F8, KEY_F9, KEY_F10, KEY_F11, KEY_VOLUMEUP, KEY_VOLUMEDOWN, KEY_MUTE, /* -> */ + 0, KEY_1, KEY_2, KEY_3, KEY_4, KEY_5, KEY_6, KEY_7, KEY_8, KEY_9, /* #2 */ + KEY_0, KEY_MINUS, KEY_EQUAL,0, 0, 0, /* -> */ + 0, KEY_Q, KEY_W, KEY_E, KEY_R, KEY_T, KEY_Y, KEY_U, KEY_I, KEY_O, /* #3 */ + KEY_P, KEY_BACKSLASH, KEY_BACKSPACE, 0, 0, 0, /* -> */ + 0, KEY_A, KEY_S, KEY_D, KEY_F, KEY_G, KEY_H, KEY_J, KEY_K, KEY_L, /* #4 */ + KEY_SEMICOLON, KEY_LEFTBRACE, KEY_RIGHTBRACE, 0, 0, 0, /* -> */ + 0, KEY_Z, KEY_X, KEY_C, KEY_V, KEY_B, KEY_N, KEY_M, KEY_COMMA, /* #5 */ + KEY_DOT, KEY_KPMINUS, KEY_APOSTROPHE, KEY_ENTER, 0, 0,0, /* -> */ + 0, KEY_TAB, 0, KEY_LEFTSHIFT, 0, KEY_APOSTROPHE, 0, 0, 0, 0, /* #6 */ + KEY_UP, 0, KEY_RIGHTSHIFT, 0, 0, 0,0, 0, 0, 0, 0, KEY_LEFTALT, KEY_GRAVE, /* -> */ + 0, 0, KEY_LEFT, KEY_DOWN, KEY_RIGHT, 0, 0, 0, 0,0, KEY_KPASTERISK, /* -> */ + KEY_LEFTCTRL, 0, KEY_SPACE, 0, 0, 0, KEY_SLASH, KEY_DELETE, 0, 0, /* -> */ + 0, 0, 0, KEY_POWER, /* -> */ +}; + +struct jornadakbd { + unsigned short keymap[ARRAY_SIZE(jornada_std_keymap)]; + struct input_dev *input; +}; + +static irqreturn_t jornada720_kbd_interrupt(int irq, void *dev_id) +{ + struct platform_device *pdev = dev_id; + struct jornadakbd *jornadakbd = platform_get_drvdata(pdev); + struct input_dev *input = jornadakbd->input; + u8 count, kbd_data, scan_code; + + /* startup ssp with spinlock */ + jornada_ssp_start(); + + if (jornada_ssp_inout(GETSCANKEYCODE) != TXDUMMY) { + printk(KERN_DEBUG + "jornada720_kbd: " + "GetKeycode command failed with ETIMEDOUT, " + "flushed bus\n"); + } else { + /* How many keycodes are waiting for us? */ + count = jornada_ssp_byte(TXDUMMY); + + /* Lets drag them out one at a time */ + while (count--) { + /* Exchange TxDummy for location (keymap[kbddata]) */ + kbd_data = jornada_ssp_byte(TXDUMMY); + scan_code = kbd_data & 0x7f; + + input_event(input, EV_MSC, MSC_SCAN, scan_code); + input_report_key(input, jornadakbd->keymap[scan_code], + !(kbd_data & 0x80)); + input_sync(input); + } + } + + /* release spinlock and turn off ssp */ + jornada_ssp_end(); + + return IRQ_HANDLED; +}; + +static int __devinit jornada720_kbd_probe(struct platform_device *pdev) +{ + struct jornadakbd *jornadakbd; + struct input_dev *input_dev; + int i, err; + + jornadakbd = kzalloc(sizeof(struct jornadakbd), GFP_KERNEL); + input_dev = input_allocate_device(); + if (!jornadakbd || !input_dev) { + err = -ENOMEM; + goto fail1; + } + + platform_set_drvdata(pdev, jornadakbd); + + memcpy(jornadakbd->keymap, jornada_std_keymap, + sizeof(jornada_std_keymap)); + jornadakbd->input = input_dev; + + input_dev->evbit[0] = BIT(EV_KEY) | BIT(EV_REP); + input_dev->name = "HP Jornada 720 keyboard"; + input_dev->phys = "jornadakbd/input0"; + input_dev->keycode = jornadakbd->keymap; + input_dev->keycodesize = sizeof(unsigned short); + input_dev->keycodemax = ARRAY_SIZE(jornada_std_keymap); + input_dev->id.bustype = BUS_HOST; + input_dev->dev.parent = &pdev->dev; + + for (i = 0; i < ARRAY_SIZE(jornadakbd->keymap); i++) + __set_bit(jornadakbd->keymap[i], input_dev->keybit); + __clear_bit(KEY_RESERVED, input_dev->keybit); + + input_set_capability(input_dev, EV_MSC, MSC_SCAN); + + err = request_irq(IRQ_GPIO0, + jornada720_kbd_interrupt, + IRQF_DISABLED | IRQF_TRIGGER_FALLING, + "jornadakbd", pdev); + if (err) { + printk(KERN_INFO "jornadakbd720_kbd: Unable to grab IRQ\n"); + goto fail1; + } + + err = input_register_device(jornadakbd->input); + if (err) + goto fail2; + + return 0; + + fail2: /* IRQ, DEVICE, MEMORY */ + free_irq(IRQ_GPIO0, pdev); + fail1: /* DEVICE, MEMORY */ + platform_set_drvdata(pdev, NULL); + input_free_device(input_dev); + kfree(jornadakbd); + return err; +}; + +static int __devexit jornada720_kbd_remove(struct platform_device *pdev) +{ + struct jornadakbd *jornadakbd = platform_get_drvdata(pdev); + + free_irq(IRQ_GPIO0, pdev); + platform_set_drvdata(pdev, NULL); + input_unregister_device(jornadakbd->input); + kfree(jornadakbd); + + return 0; +} + +static struct platform_driver jornada720_kbd_driver = { + .driver = { + .name = "jornada720_kbd", + }, + .probe = jornada720_kbd_probe, + .remove = __devexit_p(jornada720_kbd_remove), +}; + +static int __init jornada720_kbd_init(void) +{ + return platform_driver_register(&jornada720_kbd_driver); +} + +static void __exit jornada720_kbd_exit(void) +{ + platform_driver_unregister(&jornada720_kbd_driver); +} + +module_init(jornada720_kbd_init); +module_exit(jornada720_kbd_exit); diff --git a/drivers/input/keyboard/maple_keyb.c b/drivers/input/keyboard/maple_keyb.c new file mode 100644 index 000000000000..2b404284c28a --- /dev/null +++ b/drivers/input/keyboard/maple_keyb.c @@ -0,0 +1,252 @@ +/* + * SEGA Dreamcast keyboard driver + * Based on drivers/usb/usbkbd.c + * Copyright YAEGASHI Takeshi, 2001 + * Porting to 2.6 Copyright Adrian McMenamin, 2007 + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, see the file COPYING, or write + * to the Free Software Foundation, Inc., + * 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include <linux/kernel.h> +#include <linux/slab.h> +#include <linux/input.h> +#include <linux/module.h> +#include <linux/init.h> +#include <linux/timer.h> +#include <linux/maple.h> +#include <asm/mach/maple.h> + +/* Very simple mutex to ensure proper cleanup */ +static DEFINE_MUTEX(maple_keyb_mutex); + +#define NR_SCANCODES 256 + +MODULE_AUTHOR("YAEGASHI Takeshi, Adrian McMenamin"); +MODULE_DESCRIPTION("SEGA Dreamcast keyboard driver"); +MODULE_LICENSE("GPL"); + +struct dc_kbd { + struct input_dev *dev; + unsigned short keycode[NR_SCANCODES]; + unsigned char new[8]; + unsigned char old[8]; +}; + +static const unsigned short dc_kbd_keycode[NR_SCANCODES] = { + KEY_RESERVED, KEY_RESERVED, KEY_RESERVED, KEY_RESERVED, KEY_A, KEY_B, KEY_C, KEY_D, + KEY_E, KEY_F, KEY_G, KEY_H, KEY_I, KEY_J, KEY_K, KEY_L, + KEY_M, KEY_N, KEY_O, KEY_P, KEY_Q, KEY_R, KEY_S, KEY_T, + KEY_U, KEY_V, KEY_W, KEY_X, KEY_Y, KEY_Z, KEY_1, KEY_2, + KEY_3, KEY_4, KEY_5, KEY_6, KEY_7, KEY_8, KEY_9, KEY_0, + KEY_ENTER, KEY_ESC, KEY_BACKSPACE, KEY_TAB, KEY_SPACE, KEY_MINUS, KEY_EQUAL, KEY_LEFTBRACE, + KEY_RIGHTBRACE, KEY_BACKSLASH, KEY_BACKSLASH, KEY_SEMICOLON, KEY_APOSTROPHE, KEY_GRAVE, KEY_COMMA, + KEY_DOT, KEY_SLASH, KEY_CAPSLOCK, KEY_F1, KEY_F2, KEY_F3, KEY_F4, KEY_F5, KEY_F6, + KEY_F7, KEY_F8, KEY_F9, KEY_F10, KEY_F11, KEY_F12, KEY_SYSRQ, + KEY_SCROLLLOCK, KEY_PAUSE, KEY_INSERT, KEY_HOME, KEY_PAGEUP, KEY_DELETE, + KEY_END, KEY_PAGEDOWN, KEY_RIGHT, KEY_LEFT, KEY_DOWN, KEY_UP, + KEY_NUMLOCK, KEY_KPSLASH, KEY_KPASTERISK, KEY_KPMINUS, KEY_KPPLUS, KEY_KPENTER, KEY_KP1, KEY_KP2, + KEY_KP3, KEY_KP4, KEY_KP5, KEY_KP6, KEY_KP7, KEY_KP8, KEY_KP9, KEY_KP0, KEY_KPDOT, + KEY_102ND, KEY_COMPOSE, KEY_POWER, KEY_KPEQUAL, KEY_F13, KEY_F14, KEY_F15, + KEY_F16, KEY_F17, KEY_F18, KEY_F19, KEY_F20, + KEY_F21, KEY_F22, KEY_F23, KEY_F24, KEY_OPEN, KEY_HELP, KEY_PROPS, KEY_FRONT, + KEY_STOP, KEY_AGAIN, KEY_UNDO, KEY_CUT, KEY_COPY, KEY_PASTE, KEY_FIND, KEY_MUTE, + KEY_VOLUMEUP, KEY_VOLUMEDOWN, KEY_RESERVED, KEY_RESERVED, KEY_RESERVED, KEY_KPCOMMA, KEY_RESERVED, KEY_RO, KEY_KATAKANAHIRAGANA , KEY_YEN, + KEY_HENKAN, KEY_MUHENKAN, KEY_KPJPCOMMA, KEY_RESERVED, KEY_RESERVED, KEY_RESERVED, + KEY_HANGEUL, KEY_HANJA, KEY_KATAKANA, KEY_HIRAGANA, KEY_ZENKAKUHANKAKU, KEY_RESERVED, KEY_RESERVED, KEY_RESERVED, + KEY_RESERVED, KEY_RESERVED, KEY_RESERVED, KEY_RESERVED, KEY_RESERVED, KEY_RESERVED, KEY_RESERVED, KEY_RESERVED, + KEY_RESERVED, KEY_RESERVED, KEY_RESERVED, KEY_RESERVED, KEY_RESERVED, KEY_RESERVED, KEY_RESERVED, KEY_RESERVED, + KEY_RESERVED, KEY_RESERVED, KEY_RESERVED, KEY_RESERVED, KEY_RESERVED, KEY_RESERVED, KEY_RESERVED, KEY_RESERVED, + KEY_RESERVED, KEY_RESERVED, KEY_RESERVED, KEY_RESERVED, KEY_RESERVED, KEY_RESERVED, KEY_RESERVED, KEY_RESERVED, + KEY_RESERVED, KEY_RESERVED, KEY_RESERVED, KEY_RESERVED, KEY_RESERVED, KEY_RESERVED, KEY_RESERVED, KEY_RESERVED, + KEY_RESERVED, KEY_RESERVED, KEY_RESERVED, KEY_RESERVED, KEY_RESERVED, KEY_RESERVED, KEY_RESERVED, KEY_RESERVED, + KEY_RESERVED, KEY_RESERVED, KEY_RESERVED, KEY_RESERVED, KEY_RESERVED, KEY_RESERVED, KEY_RESERVED, KEY_RESERVED, + KEY_RESERVED, KEY_RESERVED, KEY_RESERVED, KEY_RESERVED, KEY_RESERVED, KEY_RESERVED, KEY_RESERVED, KEY_RESERVED, + KEY_RESERVED, KEY_RESERVED, KEY_RESERVED, KEY_RESERVED, KEY_RESERVED, KEY_RESERVED, KEY_RESERVED, KEY_RESERVED, + KEY_LEFTCTRL, KEY_LEFTSHIFT, KEY_LEFTALT, KEY_LEFTMETA, KEY_RIGHTCTRL, KEY_RIGHTSHIFT, KEY_RIGHTALT, KEY_RIGHTMETA, + KEY_PLAYPAUSE, KEY_STOPCD, KEY_PREVIOUSSONG, KEY_NEXTSONG, KEY_EJECTCD, KEY_VOLUMEUP, KEY_VOLUMEDOWN, KEY_MUTE, + KEY_WWW, KEY_BACK, KEY_FORWARD, KEY_STOP, KEY_FIND, KEY_SCROLLUP, KEY_SCROLLDOWN, KEY_EDIT, KEY_SLEEP, + KEY_SCREENLOCK, KEY_REFRESH, KEY_CALC, KEY_RESERVED, KEY_RESERVED, KEY_RESERVED, KEY_RESERVED +}; + +static void dc_scan_kbd(struct dc_kbd *kbd) +{ + struct input_dev *dev = kbd->dev; + void *ptr; + int code, keycode; + int i; + + for (i = 0; i < 8; i++) { + code = i + 224; + keycode = kbd->keycode[code]; + input_event(dev, EV_MSC, MSC_SCAN, code); + input_report_key(dev, keycode, (kbd->new[0] >> i) & 1); + } + + for (i = 2; i < 8; i++) { + ptr = memchr(kbd->new + 2, kbd->old[i], 6); + code = kbd->old[i]; + if (code > 3 && ptr == NULL) { + keycode = kbd->keycode[code]; + if (keycode) { + input_event(dev, EV_MSC, MSC_SCAN, code); + input_report_key(dev, keycode, 0); + } else + printk(KERN_DEBUG "maple_keyb: " + "Unknown key (scancode %#x) released.", + code); + } + ptr = memchr(kbd->old + 2, kbd->new[i], 6); + code = kbd->new[i]; + if (code > 3 && ptr) { + keycode = kbd->keycode[code]; + if (keycode) { + input_event(dev, EV_MSC, MSC_SCAN, code); + input_report_key(dev, keycode, 1); + } else + printk(KERN_DEBUG "maple_keyb: " + "Unknown key (scancode %#x) pressed.", + code); + } + } + input_sync(dev); + memcpy(kbd->old, kbd->new, 8); +} + +static void dc_kbd_callback(struct mapleq *mq) +{ + struct maple_device *mapledev = mq->dev; + struct dc_kbd *kbd = mapledev->private_data; + unsigned long *buf = mq->recvbuf; + + /* + * We should always be getting the lock because the only + * time it may be locked if driver is in cleanup phase. + */ + if (likely(mutex_trylock(&maple_keyb_mutex))) { + + if (buf[1] == mapledev->function) { + memcpy(kbd->new, buf + 2, 8); + dc_scan_kbd(kbd); + } + + mutex_unlock(&maple_keyb_mutex); + } +} + +static int dc_kbd_connect(struct maple_device *mdev) +{ + int i, error; + struct dc_kbd *kbd; + struct input_dev *dev; + + if (!(mdev->function & MAPLE_FUNC_KEYBOARD)) + return -EINVAL; + + kbd = kzalloc(sizeof(struct dc_kbd), GFP_KERNEL); + dev = input_allocate_device(); + if (!kbd || !dev) { + error = -ENOMEM; + goto fail; + } + + mdev->private_data = kbd; + + kbd->dev = dev; + memcpy(kbd->keycode, dc_kbd_keycode, sizeof(kbd->keycode)); + + dev->name = mdev->product_name; + dev->evbit[0] = BIT(EV_KEY) | BIT(EV_REP); + dev->keycode = kbd->keycode; + dev->keycodesize = sizeof (unsigned short); + dev->keycodemax = ARRAY_SIZE(kbd->keycode); + dev->id.bustype = BUS_HOST; + dev->dev.parent = &mdev->dev; + + for (i = 0; i < NR_SCANCODES; i++) + __set_bit(dc_kbd_keycode[i], dev->keybit); + __clear_bit(KEY_RESERVED, dev->keybit); + + input_set_capability(dev, EV_MSC, MSC_SCAN); + input_set_drvdata(dev, kbd); + + error = input_register_device(dev); + if (error) + goto fail; + + /* Maple polling is locked to VBLANK - which may be just 50/s */ + maple_getcond_callback(mdev, dc_kbd_callback, HZ/50, MAPLE_FUNC_KEYBOARD); + return 0; + + fail: + input_free_device(dev); + kfree(kbd); + mdev->private_data = NULL; + return error; +} + +static void dc_kbd_disconnect(struct maple_device *mdev) +{ + struct dc_kbd *kbd; + + mutex_lock(&maple_keyb_mutex); + + kbd = mdev->private_data; + mdev->private_data = NULL; + input_unregister_device(kbd->dev); + kfree(kbd); + + mutex_unlock(&maple_keyb_mutex); +} + +/* allow the keyboard to be used */ +static int probe_maple_kbd(struct device *dev) +{ + struct maple_device *mdev = to_maple_dev(dev); + struct maple_driver *mdrv = to_maple_driver(dev->driver); + int error; + + error = dc_kbd_connect(mdev); + if (error) + return error; + + mdev->driver = mdrv; + mdev->registered = 1; + + return 0; +} + +static struct maple_driver dc_kbd_driver = { + .function = MAPLE_FUNC_KEYBOARD, + .connect = dc_kbd_connect, + .disconnect = dc_kbd_disconnect, + .drv = { + .name = "Dreamcast_keyboard", + .probe = probe_maple_kbd, + }, +}; + +static int __init dc_kbd_init(void) +{ + return maple_driver_register(&dc_kbd_driver.drv); +} + +static void __exit dc_kbd_exit(void) +{ + driver_unregister(&dc_kbd_driver.drv); +} + +module_init(dc_kbd_init); +module_exit(dc_kbd_exit); diff --git a/drivers/input/keyboard/omap-keypad.c b/drivers/input/keyboard/omap-keypad.c index 3a228634f101..76f1969552c5 100644 --- a/drivers/input/keyboard/omap-keypad.c +++ b/drivers/input/keyboard/omap-keypad.c @@ -233,7 +233,7 @@ static void omap_kp_tasklet(unsigned long data) omap_writew(0, OMAP_MPUIO_BASE + OMAP_MPUIO_KBD_MASKIT); kp_cur_group = -1; } - } + } } static ssize_t omap_kp_enable_show(struct device *dev, @@ -318,7 +318,7 @@ static int __init omap_kp_probe(struct platform_device *pdev) keymap = pdata->keymap; if (pdata->rep) - set_bit(EV_REP, input_dev->evbit); + __set_bit(EV_REP, input_dev->evbit); if (pdata->delay) omap_kp->delay = pdata->delay; @@ -365,9 +365,9 @@ static int __init omap_kp_probe(struct platform_device *pdev) goto err2; /* setup input device */ - set_bit(EV_KEY, input_dev->evbit); + __set_bit(EV_KEY, input_dev->evbit); for (i = 0; keymap[i] != 0; i++) - set_bit(keymap[i] & KEY_MAX, input_dev->keybit); + __set_bit(keymap[i] & KEY_MAX, input_dev->keybit); input_dev->name = "omap-keypad"; input_dev->phys = "omap-keypad/input0"; input_dev->dev.parent = &pdev->dev; @@ -377,10 +377,6 @@ static int __init omap_kp_probe(struct platform_device *pdev) input_dev->id.product = 0x0001; input_dev->id.version = 0x0100; - input_dev->keycode = keymap; - input_dev->keycodesize = sizeof(unsigned int); - input_dev->keycodemax = pdata->keymapsize; - ret = input_register_device(omap_kp->input); if (ret < 0) { printk(KERN_ERR "Unable to register omap-keypad input device\n"); @@ -403,15 +399,15 @@ static int __init omap_kp_probe(struct platform_device *pdev) } else { for (irq_idx = 0; irq_idx < omap_kp->rows; irq_idx++) { if (request_irq(OMAP_GPIO_IRQ(row_gpios[irq_idx]), - omap_kp_interrupt, + omap_kp_interrupt, IRQF_TRIGGER_FALLING, - "omap-keypad", omap_kp) < 0) + "omap-keypad", omap_kp) < 0) goto err5; } } return 0; err5: - for (i = irq_idx-1; i >=0; i--) + for (i = irq_idx - 1; i >=0; i--) free_irq(row_gpios[i], 0); err4: input_unregister_device(omap_kp->input); @@ -440,9 +436,9 @@ static int omap_kp_remove(struct platform_device *pdev) if (cpu_is_omap24xx()) { int i; for (i = 0; i < omap_kp->cols; i++) - omap_free_gpio(col_gpios[i]); + omap_free_gpio(col_gpios[i]); for (i = 0; i < omap_kp->rows; i++) { - omap_free_gpio(row_gpios[i]); + omap_free_gpio(row_gpios[i]); free_irq(OMAP_GPIO_IRQ(row_gpios[i]), 0); } } else { diff --git a/drivers/input/mouse/alps.c b/drivers/input/mouse/alps.c index 2c5f11a4f6b4..64d70a9b714c 100644 --- a/drivers/input/mouse/alps.c +++ b/drivers/input/mouse/alps.c @@ -48,11 +48,13 @@ static const struct alps_model_info alps_model_data[] = { { { 0x63, 0x02, 0x50 }, 0xef, 0xef, ALPS_FW_BK_1 }, /* NEC Versa L320 */ { { 0x63, 0x02, 0x64 }, 0xf8, 0xf8, 0 }, { { 0x63, 0x03, 0xc8 }, 0xf8, 0xf8, ALPS_PASS }, /* Dell Latitude D800 */ + { { 0x73, 0x00, 0x0a }, 0xf8, 0xf8, ALPS_DUALPOINT }, /* ThinkPad R61 8918-5QG */ { { 0x73, 0x02, 0x0a }, 0xf8, 0xf8, 0 }, { { 0x73, 0x02, 0x14 }, 0xf8, 0xf8, ALPS_FW_BK_2 }, /* Ahtec Laptop */ { { 0x20, 0x02, 0x0e }, 0xf8, 0xf8, ALPS_PASS | ALPS_DUALPOINT }, /* XXX */ { { 0x22, 0x02, 0x0a }, 0xf8, 0xf8, ALPS_PASS | ALPS_DUALPOINT }, { { 0x22, 0x02, 0x14 }, 0xff, 0xff, ALPS_PASS | ALPS_DUALPOINT }, /* Dell Latitude D600 */ + { { 0x73, 0x02, 0x50 }, 0xcf, 0xff, ALPS_FW_BK_1 } /* Dell Vostro 1400 */ }; /* diff --git a/drivers/input/mouse/appletouch.c b/drivers/input/mouse/appletouch.c index a1804bfdbb8c..0117817bf538 100644 --- a/drivers/input/mouse/appletouch.c +++ b/drivers/input/mouse/appletouch.c @@ -502,18 +502,23 @@ static void atp_complete(struct urb* urb) /* reset the accumulator on release */ memset(dev->xy_acc, 0, sizeof(dev->xy_acc)); + } + + /* Geyser 3 will continue to send packets continually after + the first touch unless reinitialised. Do so if it's been + idle for a while in order to avoid waking the kernel up + several hundred times a second */ - /* Geyser 3 will continue to send packets continually after - the first touch unless reinitialised. Do so if it's been - idle for a while in order to avoid waking the kernel up - several hundred times a second */ - if (!key && atp_is_geyser_3(dev)) { + if (atp_is_geyser_3(dev)) { + if (!x && !y && !key) { dev->idlecount++; if (dev->idlecount == 10) { dev->valid = 0; schedule_work(&dev->work); } } + else + dev->idlecount = 0; } input_report_key(dev->input, BTN_LEFT, key); diff --git a/drivers/input/mouse/lifebook.c b/drivers/input/mouse/lifebook.c index 608674d0be8b..d7de4c53b3d8 100644 --- a/drivers/input/mouse/lifebook.c +++ b/drivers/input/mouse/lifebook.c @@ -97,6 +97,14 @@ static const struct dmi_system_id lifebook_dmi_table[] = { .callback = lifebook_set_6byte_proto, }, { + .ident = "CF-72", + .matches = { + DMI_MATCH(DMI_PRODUCT_NAME, "CF-72"), + }, + .callback = lifebook_set_serio_phys, + .driver_data = "isa0060/serio3", + }, + { .ident = "Lifebook B142", .matches = { DMI_MATCH(DMI_PRODUCT_NAME, "LifeBook B142"), @@ -282,7 +290,7 @@ static int lifebook_create_relative_device(struct psmouse *psmouse) int lifebook_init(struct psmouse *psmouse) { struct input_dev *dev1 = psmouse->dev; - int max_coord = lifebook_use_6byte_proto ? 1024 : 4096; + int max_coord = lifebook_use_6byte_proto ? 4096 : 1024; if (lifebook_absolute_mode(psmouse)) return -1; diff --git a/drivers/input/mouse/psmouse-base.c b/drivers/input/mouse/psmouse-base.c index b9f0fb2530e2..073525756532 100644 --- a/drivers/input/mouse/psmouse-base.c +++ b/drivers/input/mouse/psmouse-base.c @@ -648,9 +648,10 @@ static int psmouse_extensions(struct psmouse *psmouse, /* * Reset to defaults in case the device got confused by extended - * protocol probes. Note that we do full reset becuase some mice - * put themselves to sleep when see PSMOUSE_RESET_DIS. + * protocol probes. Note that we follow up with full reset because + * some mice put themselves to sleep when they see PSMOUSE_RESET_DIS. */ + ps2_command(&psmouse->ps2dev, NULL, PSMOUSE_CMD_RESET_DIS); psmouse_reset(psmouse); if (max_proto >= PSMOUSE_IMEX && im_explorer_detect(psmouse, set_properties) == 0) diff --git a/drivers/input/mousedev.c b/drivers/input/mousedev.c index 9173916b8be5..79146d6ed2ab 100644 --- a/drivers/input/mousedev.c +++ b/drivers/input/mousedev.c @@ -61,9 +61,11 @@ struct mousedev { int open; int minor; char name[16]; + struct input_handle handle; wait_queue_head_t wait; struct list_head client_list; - struct input_handle handle; + spinlock_t client_lock; /* protects client_list */ + struct mutex mutex; struct device dev; struct list_head mixdev_node; @@ -113,108 +115,137 @@ static unsigned char mousedev_imex_seq[] = { 0xf3, 200, 0xf3, 200, 0xf3, 80 }; static struct input_handler mousedev_handler; static struct mousedev *mousedev_table[MOUSEDEV_MINORS]; +static DEFINE_MUTEX(mousedev_table_mutex); static struct mousedev *mousedev_mix; static LIST_HEAD(mousedev_mix_list); +static void mixdev_open_devices(void); +static void mixdev_close_devices(void); + #define fx(i) (mousedev->old_x[(mousedev->pkt_count - (i)) & 03]) #define fy(i) (mousedev->old_y[(mousedev->pkt_count - (i)) & 03]) -static void mousedev_touchpad_event(struct input_dev *dev, struct mousedev *mousedev, unsigned int code, int value) +static void mousedev_touchpad_event(struct input_dev *dev, + struct mousedev *mousedev, + unsigned int code, int value) { int size, tmp; enum { FRACTION_DENOM = 128 }; switch (code) { - case ABS_X: - fx(0) = value; - if (mousedev->touch && mousedev->pkt_count >= 2) { - size = dev->absmax[ABS_X] - dev->absmin[ABS_X]; - if (size == 0) - size = 256 * 2; - tmp = ((value - fx(2)) * (256 * FRACTION_DENOM)) / size; - tmp += mousedev->frac_dx; - mousedev->packet.dx = tmp / FRACTION_DENOM; - mousedev->frac_dx = tmp - mousedev->packet.dx * FRACTION_DENOM; - } - break; - case ABS_Y: - fy(0) = value; - if (mousedev->touch && mousedev->pkt_count >= 2) { - /* use X size to keep the same scale */ - size = dev->absmax[ABS_X] - dev->absmin[ABS_X]; - if (size == 0) - size = 256 * 2; - tmp = -((value - fy(2)) * (256 * FRACTION_DENOM)) / size; - tmp += mousedev->frac_dy; - mousedev->packet.dy = tmp / FRACTION_DENOM; - mousedev->frac_dy = tmp - mousedev->packet.dy * FRACTION_DENOM; - } - break; + case ABS_X: + fx(0) = value; + if (mousedev->touch && mousedev->pkt_count >= 2) { + size = dev->absmax[ABS_X] - dev->absmin[ABS_X]; + if (size == 0) + size = 256 * 2; + tmp = ((value - fx(2)) * 256 * FRACTION_DENOM) / size; + tmp += mousedev->frac_dx; + mousedev->packet.dx = tmp / FRACTION_DENOM; + mousedev->frac_dx = + tmp - mousedev->packet.dx * FRACTION_DENOM; + } + break; + + case ABS_Y: + fy(0) = value; + if (mousedev->touch && mousedev->pkt_count >= 2) { + /* use X size to keep the same scale */ + size = dev->absmax[ABS_X] - dev->absmin[ABS_X]; + if (size == 0) + size = 256 * 2; + tmp = -((value - fy(2)) * 256 * FRACTION_DENOM) / size; + tmp += mousedev->frac_dy; + mousedev->packet.dy = tmp / FRACTION_DENOM; + mousedev->frac_dy = tmp - + mousedev->packet.dy * FRACTION_DENOM; + } + break; } } -static void mousedev_abs_event(struct input_dev *dev, struct mousedev *mousedev, unsigned int code, int value) +static void mousedev_abs_event(struct input_dev *dev, struct mousedev *mousedev, + unsigned int code, int value) { int size; switch (code) { - case ABS_X: - size = dev->absmax[ABS_X] - dev->absmin[ABS_X]; - if (size == 0) - size = xres ? : 1; - if (value > dev->absmax[ABS_X]) - value = dev->absmax[ABS_X]; - if (value < dev->absmin[ABS_X]) - value = dev->absmin[ABS_X]; - mousedev->packet.x = ((value - dev->absmin[ABS_X]) * xres) / size; - mousedev->packet.abs_event = 1; - break; - case ABS_Y: - size = dev->absmax[ABS_Y] - dev->absmin[ABS_Y]; - if (size == 0) - size = yres ? : 1; - if (value > dev->absmax[ABS_Y]) - value = dev->absmax[ABS_Y]; - if (value < dev->absmin[ABS_Y]) - value = dev->absmin[ABS_Y]; - mousedev->packet.y = yres - ((value - dev->absmin[ABS_Y]) * yres) / size; - mousedev->packet.abs_event = 1; - break; + case ABS_X: + size = dev->absmax[ABS_X] - dev->absmin[ABS_X]; + if (size == 0) + size = xres ? : 1; + if (value > dev->absmax[ABS_X]) + value = dev->absmax[ABS_X]; + if (value < dev->absmin[ABS_X]) + value = dev->absmin[ABS_X]; + mousedev->packet.x = + ((value - dev->absmin[ABS_X]) * xres) / size; + mousedev->packet.abs_event = 1; + break; + + case ABS_Y: + size = dev->absmax[ABS_Y] - dev->absmin[ABS_Y]; + if (size == 0) + size = yres ? : 1; + if (value > dev->absmax[ABS_Y]) + value = dev->absmax[ABS_Y]; + if (value < dev->absmin[ABS_Y]) + value = dev->absmin[ABS_Y]; + mousedev->packet.y = yres - + ((value - dev->absmin[ABS_Y]) * yres) / size; + mousedev->packet.abs_event = 1; + break; } } -static void mousedev_rel_event(struct mousedev *mousedev, unsigned int code, int value) +static void mousedev_rel_event(struct mousedev *mousedev, + unsigned int code, int value) { switch (code) { - case REL_X: mousedev->packet.dx += value; break; - case REL_Y: mousedev->packet.dy -= value; break; - case REL_WHEEL: mousedev->packet.dz -= value; break; + case REL_X: + mousedev->packet.dx += value; + break; + + case REL_Y: + mousedev->packet.dy -= value; + break; + + case REL_WHEEL: + mousedev->packet.dz -= value; + break; } } -static void mousedev_key_event(struct mousedev *mousedev, unsigned int code, int value) +static void mousedev_key_event(struct mousedev *mousedev, + unsigned int code, int value) { int index; switch (code) { - case BTN_TOUCH: - case BTN_0: - case BTN_LEFT: index = 0; break; - case BTN_STYLUS: - case BTN_1: - case BTN_RIGHT: index = 1; break; - case BTN_2: - case BTN_FORWARD: - case BTN_STYLUS2: - case BTN_MIDDLE: index = 2; break; - case BTN_3: - case BTN_BACK: - case BTN_SIDE: index = 3; break; - case BTN_4: - case BTN_EXTRA: index = 4; break; - default: return; + + case BTN_TOUCH: + case BTN_0: + case BTN_LEFT: index = 0; break; + + case BTN_STYLUS: + case BTN_1: + case BTN_RIGHT: index = 1; break; + + case BTN_2: + case BTN_FORWARD: + case BTN_STYLUS2: + case BTN_MIDDLE: index = 2; break; + + case BTN_3: + case BTN_BACK: + case BTN_SIDE: index = 3; break; + + case BTN_4: + case BTN_EXTRA: index = 4; break; + + default: return; } if (value) { @@ -226,19 +257,23 @@ static void mousedev_key_event(struct mousedev *mousedev, unsigned int code, int } } -static void mousedev_notify_readers(struct mousedev *mousedev, struct mousedev_hw_data *packet) +static void mousedev_notify_readers(struct mousedev *mousedev, + struct mousedev_hw_data *packet) { struct mousedev_client *client; struct mousedev_motion *p; - unsigned long flags; + unsigned int new_head; int wake_readers = 0; - list_for_each_entry(client, &mousedev->client_list, node) { - spin_lock_irqsave(&client->packet_lock, flags); + rcu_read_lock(); + list_for_each_entry_rcu(client, &mousedev->client_list, node) { + + /* Just acquire the lock, interrupts already disabled */ + spin_lock(&client->packet_lock); p = &client->packets[client->head]; if (client->ready && p->buttons != mousedev->packet.buttons) { - unsigned int new_head = (client->head + 1) % PACKET_QUEUE_LEN; + new_head = (client->head + 1) % PACKET_QUEUE_LEN; if (new_head != client->tail) { p = &client->packets[client->head = new_head]; memset(p, 0, sizeof(struct mousedev_motion)); @@ -253,25 +288,29 @@ static void mousedev_notify_readers(struct mousedev *mousedev, struct mousedev_h } client->pos_x += packet->dx; - client->pos_x = client->pos_x < 0 ? 0 : (client->pos_x >= xres ? xres : client->pos_x); + client->pos_x = client->pos_x < 0 ? + 0 : (client->pos_x >= xres ? xres : client->pos_x); client->pos_y += packet->dy; - client->pos_y = client->pos_y < 0 ? 0 : (client->pos_y >= yres ? yres : client->pos_y); + client->pos_y = client->pos_y < 0 ? + 0 : (client->pos_y >= yres ? yres : client->pos_y); p->dx += packet->dx; p->dy += packet->dy; p->dz += packet->dz; p->buttons = mousedev->packet.buttons; - if (p->dx || p->dy || p->dz || p->buttons != client->last_buttons) + if (p->dx || p->dy || p->dz || + p->buttons != client->last_buttons) client->ready = 1; - spin_unlock_irqrestore(&client->packet_lock, flags); + spin_unlock(&client->packet_lock); if (client->ready) { kill_fasync(&client->fasync, SIGIO, POLL_IN); wake_readers = 1; } } + rcu_read_unlock(); if (wake_readers) wake_up_interruptible(&mousedev->wait); @@ -281,7 +320,8 @@ static void mousedev_touchpad_touch(struct mousedev *mousedev, int value) { if (!value) { if (mousedev->touch && - time_before(jiffies, mousedev->touch + msecs_to_jiffies(tap_time))) { + time_before(jiffies, + mousedev->touch + msecs_to_jiffies(tap_time))) { /* * Toggle left button to emulate tap. * We rely on the fact that mousedev_mix always has 0 @@ -290,7 +330,8 @@ static void mousedev_touchpad_touch(struct mousedev *mousedev, int value) set_bit(0, &mousedev->packet.buttons); set_bit(0, &mousedev_mix->packet.buttons); mousedev_notify_readers(mousedev, &mousedev_mix->packet); - mousedev_notify_readers(mousedev_mix, &mousedev_mix->packet); + mousedev_notify_readers(mousedev_mix, + &mousedev_mix->packet); clear_bit(0, &mousedev->packet.buttons); clear_bit(0, &mousedev_mix->packet.buttons); } @@ -302,54 +343,61 @@ static void mousedev_touchpad_touch(struct mousedev *mousedev, int value) mousedev->touch = jiffies; } -static void mousedev_event(struct input_handle *handle, unsigned int type, unsigned int code, int value) +static void mousedev_event(struct input_handle *handle, + unsigned int type, unsigned int code, int value) { struct mousedev *mousedev = handle->private; switch (type) { - case EV_ABS: - /* Ignore joysticks */ - if (test_bit(BTN_TRIGGER, handle->dev->keybit)) - return; - if (test_bit(BTN_TOOL_FINGER, handle->dev->keybit)) - mousedev_touchpad_event(handle->dev, mousedev, code, value); - else - mousedev_abs_event(handle->dev, mousedev, code, value); + case EV_ABS: + /* Ignore joysticks */ + if (test_bit(BTN_TRIGGER, handle->dev->keybit)) + return; - break; + if (test_bit(BTN_TOOL_FINGER, handle->dev->keybit)) + mousedev_touchpad_event(handle->dev, + mousedev, code, value); + else + mousedev_abs_event(handle->dev, mousedev, code, value); - case EV_REL: - mousedev_rel_event(mousedev, code, value); - break; + break; - case EV_KEY: - if (value != 2) { - if (code == BTN_TOUCH && test_bit(BTN_TOOL_FINGER, handle->dev->keybit)) - mousedev_touchpad_touch(mousedev, value); - else - mousedev_key_event(mousedev, code, value); - } - break; + case EV_REL: + mousedev_rel_event(mousedev, code, value); + break; - case EV_SYN: - if (code == SYN_REPORT) { - if (mousedev->touch) { - mousedev->pkt_count++; - /* Input system eats duplicate events, but we need all of them - * to do correct averaging so apply present one forward - */ - fx(0) = fx(1); - fy(0) = fy(1); - } - - mousedev_notify_readers(mousedev, &mousedev->packet); - mousedev_notify_readers(mousedev_mix, &mousedev->packet); - - mousedev->packet.dx = mousedev->packet.dy = mousedev->packet.dz = 0; - mousedev->packet.abs_event = 0; + case EV_KEY: + if (value != 2) { + if (code == BTN_TOUCH && + test_bit(BTN_TOOL_FINGER, handle->dev->keybit)) + mousedev_touchpad_touch(mousedev, value); + else + mousedev_key_event(mousedev, code, value); + } + break; + + case EV_SYN: + if (code == SYN_REPORT) { + if (mousedev->touch) { + mousedev->pkt_count++; + /* + * Input system eats duplicate events, + * but we need all of them to do correct + * averaging so apply present one forward + */ + fx(0) = fx(1); + fy(0) = fy(1); } - break; + + mousedev_notify_readers(mousedev, &mousedev->packet); + mousedev_notify_readers(mousedev_mix, &mousedev->packet); + + mousedev->packet.dx = mousedev->packet.dy = + mousedev->packet.dz = 0; + mousedev->packet.abs_event = 0; + } + break; } } @@ -367,41 +415,48 @@ static void mousedev_free(struct device *dev) { struct mousedev *mousedev = container_of(dev, struct mousedev, dev); - mousedev_table[mousedev->minor] = NULL; kfree(mousedev); } -static int mixdev_add_device(struct mousedev *mousedev) +static int mousedev_open_device(struct mousedev *mousedev) { - int error; + int retval; - if (mousedev_mix->open) { - error = input_open_device(&mousedev->handle); - if (error) - return error; + retval = mutex_lock_interruptible(&mousedev->mutex); + if (retval) + return retval; - mousedev->open++; - mousedev->mixdev_open = 1; + if (mousedev->minor == MOUSEDEV_MIX) + mixdev_open_devices(); + else if (!mousedev->exist) + retval = -ENODEV; + else if (!mousedev->open++) { + retval = input_open_device(&mousedev->handle); + if (retval) + mousedev->open--; } - get_device(&mousedev->dev); - list_add_tail(&mousedev->mixdev_node, &mousedev_mix_list); - - return 0; + mutex_unlock(&mousedev->mutex); + return retval; } -static void mixdev_remove_device(struct mousedev *mousedev) +static void mousedev_close_device(struct mousedev *mousedev) { - if (mousedev->mixdev_open) { - mousedev->mixdev_open = 0; - if (!--mousedev->open && mousedev->exist) - input_close_device(&mousedev->handle); - } + mutex_lock(&mousedev->mutex); - list_del_init(&mousedev->mixdev_node); - put_device(&mousedev->dev); + if (mousedev->minor == MOUSEDEV_MIX) + mixdev_close_devices(); + else if (mousedev->exist && !--mousedev->open) + input_close_device(&mousedev->handle); + + mutex_unlock(&mousedev->mutex); } +/* + * Open all available devices so they can all be multiplexed in one. + * stream. Note that this function is called with mousedev_mix->mutex + * held. + */ static void mixdev_open_devices(void) { struct mousedev *mousedev; @@ -411,16 +466,19 @@ static void mixdev_open_devices(void) list_for_each_entry(mousedev, &mousedev_mix_list, mixdev_node) { if (!mousedev->mixdev_open) { - if (!mousedev->open && mousedev->exist) - if (input_open_device(&mousedev->handle)) - continue; + if (mousedev_open_device(mousedev)) + continue; - mousedev->open++; mousedev->mixdev_open = 1; } } } +/* + * Close all devices that were opened as part of multiplexed + * device. Note that this function is called with mousedev_mix->mutex + * held. + */ static void mixdev_close_devices(void) { struct mousedev *mousedev; @@ -431,33 +489,45 @@ static void mixdev_close_devices(void) list_for_each_entry(mousedev, &mousedev_mix_list, mixdev_node) { if (mousedev->mixdev_open) { mousedev->mixdev_open = 0; - if (!--mousedev->open && mousedev->exist) - input_close_device(&mousedev->handle); + mousedev_close_device(mousedev); } } } + +static void mousedev_attach_client(struct mousedev *mousedev, + struct mousedev_client *client) +{ + spin_lock(&mousedev->client_lock); + list_add_tail_rcu(&client->node, &mousedev->client_list); + spin_unlock(&mousedev->client_lock); + synchronize_rcu(); +} + +static void mousedev_detach_client(struct mousedev *mousedev, + struct mousedev_client *client) +{ + spin_lock(&mousedev->client_lock); + list_del_rcu(&client->node); + spin_unlock(&mousedev->client_lock); + synchronize_rcu(); +} + static int mousedev_release(struct inode *inode, struct file *file) { struct mousedev_client *client = file->private_data; struct mousedev *mousedev = client->mousedev; mousedev_fasync(-1, file, 0); - - list_del(&client->node); + mousedev_detach_client(mousedev, client); kfree(client); - if (mousedev->minor == MOUSEDEV_MIX) - mixdev_close_devices(); - else if (!--mousedev->open && mousedev->exist) - input_close_device(&mousedev->handle); - + mousedev_close_device(mousedev); put_device(&mousedev->dev); return 0; } - static int mousedev_open(struct inode *inode, struct file *file) { struct mousedev_client *client; @@ -475,12 +545,17 @@ static int mousedev_open(struct inode *inode, struct file *file) if (i >= MOUSEDEV_MINORS) return -ENODEV; + error = mutex_lock_interruptible(&mousedev_table_mutex); + if (error) + return error; mousedev = mousedev_table[i]; + if (mousedev) + get_device(&mousedev->dev); + mutex_unlock(&mousedev_table_mutex); + if (!mousedev) return -ENODEV; - get_device(&mousedev->dev); - client = kzalloc(sizeof(struct mousedev_client), GFP_KERNEL); if (!client) { error = -ENOMEM; @@ -491,21 +566,17 @@ static int mousedev_open(struct inode *inode, struct file *file) client->pos_x = xres / 2; client->pos_y = yres / 2; client->mousedev = mousedev; - list_add_tail(&client->node, &mousedev->client_list); + mousedev_attach_client(mousedev, client); - if (mousedev->minor == MOUSEDEV_MIX) - mixdev_open_devices(); - else if (!mousedev->open++ && mousedev->exist) { - error = input_open_device(&mousedev->handle); - if (error) - goto err_free_client; - } + error = mousedev_open_device(mousedev); + if (error) + goto err_free_client; file->private_data = client; return 0; err_free_client: - list_del(&client->node); + mousedev_detach_client(mousedev, client); kfree(client); err_put_mousedev: put_device(&mousedev->dev); @@ -517,41 +588,41 @@ static inline int mousedev_limit_delta(int delta, int limit) return delta > limit ? limit : (delta < -limit ? -limit : delta); } -static void mousedev_packet(struct mousedev_client *client, signed char *ps2_data) +static void mousedev_packet(struct mousedev_client *client, + signed char *ps2_data) { - struct mousedev_motion *p; - unsigned long flags; - - spin_lock_irqsave(&client->packet_lock, flags); - p = &client->packets[client->tail]; + struct mousedev_motion *p = &client->packets[client->tail]; - ps2_data[0] = 0x08 | ((p->dx < 0) << 4) | ((p->dy < 0) << 5) | (p->buttons & 0x07); + ps2_data[0] = 0x08 | + ((p->dx < 0) << 4) | ((p->dy < 0) << 5) | (p->buttons & 0x07); ps2_data[1] = mousedev_limit_delta(p->dx, 127); ps2_data[2] = mousedev_limit_delta(p->dy, 127); p->dx -= ps2_data[1]; p->dy -= ps2_data[2]; switch (client->mode) { - case MOUSEDEV_EMUL_EXPS: - ps2_data[3] = mousedev_limit_delta(p->dz, 7); - p->dz -= ps2_data[3]; - ps2_data[3] = (ps2_data[3] & 0x0f) | ((p->buttons & 0x18) << 1); - client->bufsiz = 4; - break; - - case MOUSEDEV_EMUL_IMPS: - ps2_data[0] |= ((p->buttons & 0x10) >> 3) | ((p->buttons & 0x08) >> 1); - ps2_data[3] = mousedev_limit_delta(p->dz, 127); - p->dz -= ps2_data[3]; - client->bufsiz = 4; - break; - - case MOUSEDEV_EMUL_PS2: - default: - ps2_data[0] |= ((p->buttons & 0x10) >> 3) | ((p->buttons & 0x08) >> 1); - p->dz = 0; - client->bufsiz = 3; - break; + case MOUSEDEV_EMUL_EXPS: + ps2_data[3] = mousedev_limit_delta(p->dz, 7); + p->dz -= ps2_data[3]; + ps2_data[3] = (ps2_data[3] & 0x0f) | ((p->buttons & 0x18) << 1); + client->bufsiz = 4; + break; + + case MOUSEDEV_EMUL_IMPS: + ps2_data[0] |= + ((p->buttons & 0x10) >> 3) | ((p->buttons & 0x08) >> 1); + ps2_data[3] = mousedev_limit_delta(p->dz, 127); + p->dz -= ps2_data[3]; + client->bufsiz = 4; + break; + + case MOUSEDEV_EMUL_PS2: + default: + ps2_data[0] |= + ((p->buttons & 0x10) >> 3) | ((p->buttons & 0x08) >> 1); + p->dz = 0; + client->bufsiz = 3; + break; } if (!p->dx && !p->dy && !p->dz) { @@ -561,12 +632,56 @@ static void mousedev_packet(struct mousedev_client *client, signed char *ps2_dat } else client->tail = (client->tail + 1) % PACKET_QUEUE_LEN; } - - spin_unlock_irqrestore(&client->packet_lock, flags); } +static void mousedev_generate_response(struct mousedev_client *client, + int command) +{ + client->ps2[0] = 0xfa; /* ACK */ + + switch (command) { -static ssize_t mousedev_write(struct file *file, const char __user *buffer, size_t count, loff_t *ppos) + case 0xeb: /* Poll */ + mousedev_packet(client, &client->ps2[1]); + client->bufsiz++; /* account for leading ACK */ + break; + + case 0xf2: /* Get ID */ + switch (client->mode) { + case MOUSEDEV_EMUL_PS2: + client->ps2[1] = 0; + break; + case MOUSEDEV_EMUL_IMPS: + client->ps2[1] = 3; + break; + case MOUSEDEV_EMUL_EXPS: + client->ps2[1] = 4; + break; + } + client->bufsiz = 2; + break; + + case 0xe9: /* Get info */ + client->ps2[1] = 0x60; client->ps2[2] = 3; client->ps2[3] = 200; + client->bufsiz = 4; + break; + + case 0xff: /* Reset */ + client->impsseq = client->imexseq = 0; + client->mode = MOUSEDEV_EMUL_PS2; + client->ps2[1] = 0xaa; client->ps2[2] = 0x00; + client->bufsiz = 3; + break; + + default: + client->bufsiz = 1; + break; + } + client->buffer = client->bufsiz; +} + +static ssize_t mousedev_write(struct file *file, const char __user *buffer, + size_t count, loff_t *ppos) { struct mousedev_client *client = file->private_data; unsigned char c; @@ -577,6 +692,8 @@ static ssize_t mousedev_write(struct file *file, const char __user *buffer, size if (get_user(c, buffer + i)) return -EFAULT; + spin_lock_irq(&client->packet_lock); + if (c == mousedev_imex_seq[client->imexseq]) { if (++client->imexseq == MOUSEDEV_SEQ_LEN) { client->imexseq = 0; @@ -593,68 +710,39 @@ static ssize_t mousedev_write(struct file *file, const char __user *buffer, size } else client->impsseq = 0; - client->ps2[0] = 0xfa; - - switch (c) { - - case 0xeb: /* Poll */ - mousedev_packet(client, &client->ps2[1]); - client->bufsiz++; /* account for leading ACK */ - break; - - case 0xf2: /* Get ID */ - switch (client->mode) { - case MOUSEDEV_EMUL_PS2: client->ps2[1] = 0; break; - case MOUSEDEV_EMUL_IMPS: client->ps2[1] = 3; break; - case MOUSEDEV_EMUL_EXPS: client->ps2[1] = 4; break; - } - client->bufsiz = 2; - break; - - case 0xe9: /* Get info */ - client->ps2[1] = 0x60; client->ps2[2] = 3; client->ps2[3] = 200; - client->bufsiz = 4; - break; - - case 0xff: /* Reset */ - client->impsseq = client->imexseq = 0; - client->mode = MOUSEDEV_EMUL_PS2; - client->ps2[1] = 0xaa; client->ps2[2] = 0x00; - client->bufsiz = 3; - break; - - default: - client->bufsiz = 1; - break; - } + mousedev_generate_response(client, c); - client->buffer = client->bufsiz; + spin_unlock_irq(&client->packet_lock); } kill_fasync(&client->fasync, SIGIO, POLL_IN); - wake_up_interruptible(&client->mousedev->wait); return count; } -static ssize_t mousedev_read(struct file *file, char __user *buffer, size_t count, loff_t *ppos) +static ssize_t mousedev_read(struct file *file, char __user *buffer, + size_t count, loff_t *ppos) { struct mousedev_client *client = file->private_data; + struct mousedev *mousedev = client->mousedev; + signed char data[sizeof(client->ps2)]; int retval = 0; - if (!client->ready && !client->buffer && (file->f_flags & O_NONBLOCK)) + if (!client->ready && !client->buffer && mousedev->exist && + (file->f_flags & O_NONBLOCK)) return -EAGAIN; - retval = wait_event_interruptible(client->mousedev->wait, - !client->mousedev->exist || client->ready || client->buffer); - + retval = wait_event_interruptible(mousedev->wait, + !mousedev->exist || client->ready || client->buffer); if (retval) return retval; - if (!client->mousedev->exist) + if (!mousedev->exist) return -ENODEV; + spin_lock_irq(&client->packet_lock); + if (!client->buffer && client->ready) { mousedev_packet(client, client->ps2); client->buffer = client->bufsiz; @@ -663,9 +751,12 @@ static ssize_t mousedev_read(struct file *file, char __user *buffer, size_t coun if (count > client->buffer) count = client->buffer; + memcpy(data, client->ps2 + client->bufsiz - client->buffer, count); client->buffer -= count; - if (copy_to_user(buffer, client->ps2 + client->bufsiz - client->buffer - count, count)) + spin_unlock_irq(&client->packet_lock); + + if (copy_to_user(buffer, data, count)) return -EFAULT; return count; @@ -692,6 +783,60 @@ static const struct file_operations mousedev_fops = { .fasync = mousedev_fasync, }; +static int mousedev_install_chrdev(struct mousedev *mousedev) +{ + mousedev_table[mousedev->minor] = mousedev; + return 0; +} + +static void mousedev_remove_chrdev(struct mousedev *mousedev) +{ + mutex_lock(&mousedev_table_mutex); + mousedev_table[mousedev->minor] = NULL; + mutex_unlock(&mousedev_table_mutex); +} + +/* + * Mark device non-existent. This disables writes, ioctls and + * prevents new users from opening the device. Already posted + * blocking reads will stay, however new ones will fail. + */ +static void mousedev_mark_dead(struct mousedev *mousedev) +{ + mutex_lock(&mousedev->mutex); + mousedev->exist = 0; + mutex_unlock(&mousedev->mutex); +} + +/* + * Wake up users waiting for IO so they can disconnect from + * dead device. + */ +static void mousedev_hangup(struct mousedev *mousedev) +{ + struct mousedev_client *client; + + spin_lock(&mousedev->client_lock); + list_for_each_entry(client, &mousedev->client_list, node) + kill_fasync(&client->fasync, SIGIO, POLL_HUP); + spin_unlock(&mousedev->client_lock); + + wake_up_interruptible(&mousedev->wait); +} + +static void mousedev_cleanup(struct mousedev *mousedev) +{ + struct input_handle *handle = &mousedev->handle; + + mousedev_mark_dead(mousedev); + mousedev_hangup(mousedev); + mousedev_remove_chrdev(mousedev); + + /* mousedev is marked dead so no one else accesses mousedev->open */ + if (mousedev->open) + input_close_device(handle); +} + static struct mousedev *mousedev_create(struct input_dev *dev, struct input_handler *handler, int minor) @@ -707,6 +852,10 @@ static struct mousedev *mousedev_create(struct input_dev *dev, INIT_LIST_HEAD(&mousedev->client_list); INIT_LIST_HEAD(&mousedev->mixdev_node); + spin_lock_init(&mousedev->client_lock); + mutex_init(&mousedev->mutex); + lockdep_set_subclass(&mousedev->mutex, + minor == MOUSEDEV_MIX ? MOUSEDEV_MIX : 0); init_waitqueue_head(&mousedev->wait); if (minor == MOUSEDEV_MIX) @@ -731,14 +880,27 @@ static struct mousedev *mousedev_create(struct input_dev *dev, mousedev->dev.release = mousedev_free; device_initialize(&mousedev->dev); - mousedev_table[minor] = mousedev; + if (minor != MOUSEDEV_MIX) { + error = input_register_handle(&mousedev->handle); + if (error) + goto err_free_mousedev; + } + + error = mousedev_install_chrdev(mousedev); + if (error) + goto err_unregister_handle; error = device_add(&mousedev->dev); if (error) - goto err_free_mousedev; + goto err_cleanup_mousedev; return mousedev; + err_cleanup_mousedev: + mousedev_cleanup(mousedev); + err_unregister_handle: + if (minor != MOUSEDEV_MIX) + input_unregister_handle(&mousedev->handle); err_free_mousedev: put_device(&mousedev->dev); err_out: @@ -747,29 +909,64 @@ static struct mousedev *mousedev_create(struct input_dev *dev, static void mousedev_destroy(struct mousedev *mousedev) { - struct mousedev_client *client; - device_del(&mousedev->dev); - mousedev->exist = 0; + mousedev_cleanup(mousedev); + if (mousedev->minor != MOUSEDEV_MIX) + input_unregister_handle(&mousedev->handle); + put_device(&mousedev->dev); +} - if (mousedev->open) { - input_close_device(&mousedev->handle); - list_for_each_entry(client, &mousedev->client_list, node) - kill_fasync(&client->fasync, SIGIO, POLL_HUP); - wake_up_interruptible(&mousedev->wait); +static int mixdev_add_device(struct mousedev *mousedev) +{ + int retval; + + retval = mutex_lock_interruptible(&mousedev_mix->mutex); + if (retval) + return retval; + + if (mousedev_mix->open) { + retval = mousedev_open_device(mousedev); + if (retval) + goto out; + + mousedev->mixdev_open = 1; } + get_device(&mousedev->dev); + list_add_tail(&mousedev->mixdev_node, &mousedev_mix_list); + + out: + mutex_unlock(&mousedev_mix->mutex); + return retval; +} + +static void mixdev_remove_device(struct mousedev *mousedev) +{ + mutex_lock(&mousedev_mix->mutex); + + if (mousedev->mixdev_open) { + mousedev->mixdev_open = 0; + mousedev_close_device(mousedev); + } + + list_del_init(&mousedev->mixdev_node); + mutex_unlock(&mousedev_mix->mutex); + put_device(&mousedev->dev); } -static int mousedev_connect(struct input_handler *handler, struct input_dev *dev, +static int mousedev_connect(struct input_handler *handler, + struct input_dev *dev, const struct input_device_id *id) { struct mousedev *mousedev; int minor; int error; - for (minor = 0; minor < MOUSEDEV_MINORS && mousedev_table[minor]; minor++); + for (minor = 0; minor < MOUSEDEV_MINORS; minor++) + if (!mousedev_table[minor]) + break; + if (minor == MOUSEDEV_MINORS) { printk(KERN_ERR "mousedev: no more free mousedev devices\n"); return -ENFILE; @@ -779,21 +976,13 @@ static int mousedev_connect(struct input_handler *handler, struct input_dev *dev if (IS_ERR(mousedev)) return PTR_ERR(mousedev); - error = input_register_handle(&mousedev->handle); - if (error) - goto err_delete_mousedev; - error = mixdev_add_device(mousedev); - if (error) - goto err_unregister_handle; + if (error) { + mousedev_destroy(mousedev); + return error; + } return 0; - - err_unregister_handle: - input_unregister_handle(&mousedev->handle); - err_delete_mousedev: - device_unregister(&mousedev->dev); - return error; } static void mousedev_disconnect(struct input_handle *handle) @@ -801,33 +990,42 @@ static void mousedev_disconnect(struct input_handle *handle) struct mousedev *mousedev = handle->private; mixdev_remove_device(mousedev); - input_unregister_handle(handle); mousedev_destroy(mousedev); } static const struct input_device_id mousedev_ids[] = { { - .flags = INPUT_DEVICE_ID_MATCH_EVBIT | INPUT_DEVICE_ID_MATCH_KEYBIT | INPUT_DEVICE_ID_MATCH_RELBIT, + .flags = INPUT_DEVICE_ID_MATCH_EVBIT | + INPUT_DEVICE_ID_MATCH_KEYBIT | + INPUT_DEVICE_ID_MATCH_RELBIT, .evbit = { BIT(EV_KEY) | BIT(EV_REL) }, .keybit = { [LONG(BTN_LEFT)] = BIT(BTN_LEFT) }, .relbit = { BIT(REL_X) | BIT(REL_Y) }, - }, /* A mouse like device, at least one button, two relative axes */ + }, /* A mouse like device, at least one button, + two relative axes */ { - .flags = INPUT_DEVICE_ID_MATCH_EVBIT | INPUT_DEVICE_ID_MATCH_RELBIT, + .flags = INPUT_DEVICE_ID_MATCH_EVBIT | + INPUT_DEVICE_ID_MATCH_RELBIT, .evbit = { BIT(EV_KEY) | BIT(EV_REL) }, .relbit = { BIT(REL_WHEEL) }, }, /* A separate scrollwheel */ { - .flags = INPUT_DEVICE_ID_MATCH_EVBIT | INPUT_DEVICE_ID_MATCH_KEYBIT | INPUT_DEVICE_ID_MATCH_ABSBIT, + .flags = INPUT_DEVICE_ID_MATCH_EVBIT | + INPUT_DEVICE_ID_MATCH_KEYBIT | + INPUT_DEVICE_ID_MATCH_ABSBIT, .evbit = { BIT(EV_KEY) | BIT(EV_ABS) }, .keybit = { [LONG(BTN_TOUCH)] = BIT(BTN_TOUCH) }, .absbit = { BIT(ABS_X) | BIT(ABS_Y) }, - }, /* A tablet like device, at least touch detection, two absolute axes */ + }, /* A tablet like device, at least touch detection, + two absolute axes */ { - .flags = INPUT_DEVICE_ID_MATCH_EVBIT | INPUT_DEVICE_ID_MATCH_KEYBIT | INPUT_DEVICE_ID_MATCH_ABSBIT, + .flags = INPUT_DEVICE_ID_MATCH_EVBIT | + INPUT_DEVICE_ID_MATCH_KEYBIT | + INPUT_DEVICE_ID_MATCH_ABSBIT, .evbit = { BIT(EV_KEY) | BIT(EV_ABS) }, .keybit = { [LONG(BTN_TOOL_FINGER)] = BIT(BTN_TOOL_FINGER) }, - .absbit = { BIT(ABS_X) | BIT(ABS_Y) | BIT(ABS_PRESSURE) | BIT(ABS_TOOL_WIDTH) }, + .absbit = { BIT(ABS_X) | BIT(ABS_Y) | BIT(ABS_PRESSURE) | + BIT(ABS_TOOL_WIDTH) }, }, /* A touchpad */ { }, /* Terminating entry */ diff --git a/drivers/input/serio/i8042.c b/drivers/input/serio/i8042.c index c2eea2767e10..11dafc0ee994 100644 --- a/drivers/input/serio/i8042.c +++ b/drivers/input/serio/i8042.c @@ -385,6 +385,8 @@ static int i8042_enable_kbd_port(void) i8042_ctr |= I8042_CTR_KBDINT; if (i8042_command(&i8042_ctr, I8042_CMD_CTL_WCTR)) { + i8042_ctr &= ~I8042_CTR_KBDINT; + i8042_ctr |= I8042_CTR_KBDDIS; printk(KERN_ERR "i8042.c: Failed to enable KBD port.\n"); return -EIO; } @@ -402,6 +404,8 @@ static int i8042_enable_aux_port(void) i8042_ctr |= I8042_CTR_AUXINT; if (i8042_command(&i8042_ctr, I8042_CMD_CTL_WCTR)) { + i8042_ctr &= ~I8042_CTR_AUXINT; + i8042_ctr |= I8042_CTR_AUXDIS; printk(KERN_ERR "i8042.c: Failed to enable AUX port.\n"); return -EIO; } diff --git a/drivers/input/touchscreen/Kconfig b/drivers/input/touchscreen/Kconfig index f929fcdbae2e..e3e0baa1a158 100644 --- a/drivers/input/touchscreen/Kconfig +++ b/drivers/input/touchscreen/Kconfig @@ -126,6 +126,16 @@ config TOUCHSCREEN_HP600 To compile this driver as a module, choose M here: the module will be called hp680_ts_input. +config TOUCHSCREEN_HP7XX + tristate "HP Jornada 710/720/728 touchscreen" + depends on SA1100_JORNADA720_SSP + help + Say Y here if you have a HP Jornada 710/720/728 and want + to support the built-in touchscreen. + + To compile this driver as a module, choose M here: the + module will be called jornada720_ts. + config TOUCHSCREEN_PENMOUNT tristate "Penmount serial touchscreen" select SERIO @@ -191,6 +201,7 @@ config TOUCHSCREEN_USB_COMPOSITE - Gunze AHL61 - DMC TSC-10/25 - IRTOUCHSYSTEMS/UNITOP + - IdealTEK URTC1000 Have a look at <http://linux.chapter7.ch/touchkit/> for a usage description and the required user-space stuff. @@ -238,4 +249,14 @@ config TOUCHSCREEN_USB_IRTOUCH bool "IRTOUCHSYSTEMS/UNITOP device support" if EMBEDDED depends on TOUCHSCREEN_USB_COMPOSITE +config TOUCHSCREEN_USB_IDEALTEK + default y + bool "IdealTEK URTC1000 device support" if EMBEDDED + depends on TOUCHSCREEN_USB_COMPOSITE + +config TOUCHSCREEN_USB_GENERAL_TOUCH + default y + bool "GeneralTouch Touchscreen device support" if EMBEDDED + depends on TOUCHSCREEN_USB_COMPOSITE + endif diff --git a/drivers/input/touchscreen/Makefile b/drivers/input/touchscreen/Makefile index 5de8933c4993..35d4097df35a 100644 --- a/drivers/input/touchscreen/Makefile +++ b/drivers/input/touchscreen/Makefile @@ -13,6 +13,7 @@ obj-$(CONFIG_TOUCHSCREEN_FUJITSU) += fujitsu_ts.o obj-$(CONFIG_TOUCHSCREEN_MTOUCH) += mtouch.o obj-$(CONFIG_TOUCHSCREEN_MK712) += mk712.o obj-$(CONFIG_TOUCHSCREEN_HP600) += hp680_ts_input.o +obj-$(CONFIG_TOUCHSCREEN_HP7XX) += jornada720_ts.o obj-$(CONFIG_TOUCHSCREEN_USB_COMPOSITE) += usbtouchscreen.o obj-$(CONFIG_TOUCHSCREEN_PENMOUNT) += penmount.o obj-$(CONFIG_TOUCHSCREEN_TOUCHRIGHT) += touchright.o diff --git a/drivers/input/touchscreen/jornada720_ts.c b/drivers/input/touchscreen/jornada720_ts.c new file mode 100644 index 000000000000..42a1c9a1940e --- /dev/null +++ b/drivers/input/touchscreen/jornada720_ts.c @@ -0,0 +1,182 @@ +/* + * drivers/input/touchscreen/jornada720_ts.c + * + * Copyright (C) 2007 Kristoffer Ericson <Kristoffer.Ericson@gmail.com> + * + * Copyright (C) 2006 Filip Zyzniewski <filip.zyzniewski@tefnet.pl> + * based on HP Jornada 56x touchscreen driver by Alex Lange <chicken@handhelds.org> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * HP Jornada 710/720/729 Touchscreen Driver + */ + +#include <linux/platform_device.h> +#include <linux/init.h> +#include <linux/input.h> +#include <linux/interrupt.h> +#include <linux/module.h> + +#include <asm/hardware.h> +#include <asm/arch/jornada720.h> + +MODULE_AUTHOR("Kristoffer Ericson <kristoffer.ericson@gmail.com>"); +MODULE_DESCRIPTION("HP Jornada 710/720/728 touchscreen driver"); +MODULE_LICENSE("GPLv2"); + +struct jornada_ts { + struct input_dev *dev; + int x_data[4]; /* X sample values */ + int y_data[4]; /* Y sample values */ +}; + +static void jornada720_ts_collect_data(struct jornada_ts *jornada_ts) +{ + + /* 3 low word X samples */ + jornada_ts->x_data[0] = jornada_ssp_byte(TXDUMMY); + jornada_ts->x_data[1] = jornada_ssp_byte(TXDUMMY); + jornada_ts->x_data[2] = jornada_ssp_byte(TXDUMMY); + + /* 3 low word Y samples */ + jornada_ts->y_data[0] = jornada_ssp_byte(TXDUMMY); + jornada_ts->y_data[1] = jornada_ssp_byte(TXDUMMY); + jornada_ts->y_data[2] = jornada_ssp_byte(TXDUMMY); + + /* combined x samples bits */ + jornada_ts->x_data[3] = jornada_ssp_byte(TXDUMMY); + + /* combined y samples bits */ + jornada_ts->y_data[3] = jornada_ssp_byte(TXDUMMY); +} + +static int jornada720_ts_average(int coords[4]) +{ + int coord, high_bits = coords[3]; + + coord = coords[0] | ((high_bits & 0x03) << 8); + coord += coords[1] | ((high_bits & 0x0c) << 6); + coord += coords[2] | ((high_bits & 0x30) << 4); + + return coord / 3; +} + +static irqreturn_t jornada720_ts_interrupt(int irq, void *dev_id) +{ + struct platform_device *pdev = dev_id; + struct jornada_ts *jornada_ts = platform_get_drvdata(pdev); + struct input_dev *input = jornada_ts->dev; + int x, y; + + /* If GPIO_GPIO9 is set to high then report pen up */ + if (GPLR & GPIO_GPIO(9)) { + input_report_key(input, BTN_TOUCH, 0); + input_sync(input); + } else { + jornada_ssp_start(); + + /* proper reply to request is always TXDUMMY */ + if (jornada_ssp_inout(GETTOUCHSAMPLES) == TXDUMMY) { + jornada720_ts_collect_data(jornada_ts); + + x = jornada720_ts_average(jornada_ts->x_data); + y = jornada720_ts_average(jornada_ts->y_data); + + input_report_key(input, BTN_TOUCH, 1); + input_report_abs(input, ABS_X, x); + input_report_abs(input, ABS_Y, y); + input_sync(input); + } + + jornada_ssp_end(); + } + + return IRQ_HANDLED; +} + +static int __devinit jornada720_ts_probe(struct platform_device *pdev) +{ + struct jornada_ts *jornada_ts; + struct input_dev *input_dev; + int error; + + jornada_ts = kzalloc(sizeof(struct jornada_ts), GFP_KERNEL); + input_dev = input_allocate_device(); + + if (!jornada_ts || !input_dev) { + error = -ENOMEM; + goto fail1; + } + + platform_set_drvdata(pdev, jornada_ts); + + jornada_ts->dev = input_dev; + + input_dev->name = "HP Jornada 7xx Touchscreen"; + input_dev->phys = "jornadats/input0"; + input_dev->id.bustype = BUS_HOST; + input_dev->dev.parent = &pdev->dev; + + input_dev->evbit[0] = BIT(EV_KEY) | BIT(EV_ABS); + input_dev->keybit[LONG(BTN_TOUCH)] = BIT(BTN_TOUCH); + input_set_abs_params(input_dev, ABS_X, 270, 3900, 0, 0); + input_set_abs_params(input_dev, ABS_Y, 180, 3700, 0, 0); + + error = request_irq(IRQ_GPIO9, + jornada720_ts_interrupt, + IRQF_DISABLED | IRQF_TRIGGER_RISING, + "HP7XX Touchscreen driver", pdev); + if (error) { + printk(KERN_INFO "HP7XX TS : Unable to acquire irq!\n"); + goto fail1; + } + + error = input_register_device(jornada_ts->dev); + if (error) + goto fail2; + + return 0; + + fail2: + free_irq(IRQ_GPIO9, pdev); + fail1: + platform_set_drvdata(pdev, NULL); + input_free_device(input_dev); + kfree(jornada_ts); + return error; +} + +static int __devexit jornada720_ts_remove(struct platform_device *pdev) +{ + struct jornada_ts *jornada_ts = platform_get_drvdata(pdev); + + free_irq(IRQ_GPIO9, pdev); + platform_set_drvdata(pdev, NULL); + input_unregister_device(jornada_ts->dev); + kfree(jornada_ts); + + return 0; +} + +static struct platform_driver jornada720_ts_driver = { + .probe = jornada720_ts_probe, + .remove = __devexit_p(jornada720_ts_remove), + .driver = { + .name = "jornada_ts", + }, +}; + +static int __init jornada720_ts_init(void) +{ + return platform_driver_register(&jornada720_ts_driver); +} + +static void __exit jornada720_ts_exit(void) +{ + platform_driver_unregister(&jornada720_ts_driver); +} + +module_init(jornada720_ts_init); +module_exit(jornada720_ts_exit); diff --git a/drivers/input/touchscreen/ucb1400_ts.c b/drivers/input/touchscreen/ucb1400_ts.c index 36f944019158..86aed64ec0fb 100644 --- a/drivers/input/touchscreen/ucb1400_ts.c +++ b/drivers/input/touchscreen/ucb1400_ts.c @@ -130,8 +130,7 @@ static unsigned int ucb1400_adc_read(struct ucb1400 *ucb, u16 adc_channel) if (val & UCB_ADC_DAT_VALID) break; /* yield to other processes */ - set_current_state(TASK_INTERRUPTIBLE); - schedule_timeout(1); + schedule_timeout_uninterruptible(1); } return UCB_ADC_DAT_VALUE(val); diff --git a/drivers/input/touchscreen/usbtouchscreen.c b/drivers/input/touchscreen/usbtouchscreen.c index 741f6c6f1e50..9fb3d5c30999 100644 --- a/drivers/input/touchscreen/usbtouchscreen.c +++ b/drivers/input/touchscreen/usbtouchscreen.c @@ -10,6 +10,7 @@ * - Gunze AHL61 * - DMC TSC-10/25 * - IRTOUCHSYSTEMS/UNITOP + * - IdealTEK URTC1000 * * Copyright (C) 2004-2006 by Daniel Ritz <daniel.ritz@gmx.ch> * Copyright (C) by Todd E. Johnson (mtouchusb.c) @@ -92,7 +93,7 @@ struct usbtouch_usb { }; -#if defined(CONFIG_TOUCHSCREEN_USB_EGALAX) || defined(CONFIG_TOUCHSCREEN_USB_ETURBO) +#if defined(CONFIG_TOUCHSCREEN_USB_EGALAX) || defined(CONFIG_TOUCHSCREEN_USB_ETURBO) || defined(CONFIG_TOUCHSCREEN_USB_IDEALTEK) #define MULTI_PACKET #endif @@ -112,6 +113,8 @@ enum { DEVTYPE_GUNZE, DEVTYPE_DMC_TSC10, DEVTYPE_IRTOUCH, + DEVTYPE_IDEALTEK, + DEVTYPE_GENERAL_TOUCH, }; static struct usb_device_id usbtouch_devices[] = { @@ -157,6 +160,14 @@ static struct usb_device_id usbtouch_devices[] = { {USB_DEVICE(0x6615, 0x0001), .driver_info = DEVTYPE_IRTOUCH}, #endif +#ifdef CONFIG_TOUCHSCREEN_USB_IDEALTEK + {USB_DEVICE(0x1391, 0x1000), .driver_info = DEVTYPE_IDEALTEK}, +#endif + +#ifdef CONFIG_TOUCHSCREEN_USB_GENERAL_TOUCH + {USB_DEVICE(0x0dfc, 0x0001), .driver_info = DEVTYPE_GENERAL_TOUCH}, +#endif + {} }; @@ -396,7 +407,8 @@ static int dmc_tsc10_init(struct usbtouch_usb *usbtouch) TSC10_RATE_150, 0, buf, 2, USB_CTRL_SET_TIMEOUT); if (ret < 0) return ret; - if (buf[0] != 0x06 || buf[1] != 0x00) + if ((buf[0] != 0x06 || buf[1] != 0x00) && + (buf[0] != 0x15 || buf[1] != 0x01)) return -ENODEV; /* start sending data */ @@ -438,6 +450,57 @@ static int irtouch_read_data(struct usbtouch_usb *dev, unsigned char *pkt) /***************************************************************************** + * IdealTEK URTC1000 Part + */ +#ifdef CONFIG_TOUCHSCREEN_USB_IDEALTEK +static int idealtek_get_pkt_len(unsigned char *buf, int len) +{ + if (buf[0] & 0x80) + return 5; + if (buf[0] == 0x01) + return len; + return 0; +} + +static int idealtek_read_data(struct usbtouch_usb *dev, unsigned char *pkt) +{ + switch (pkt[0] & 0x98) { + case 0x88: + /* touch data in IdealTEK mode */ + dev->x = (pkt[1] << 5) | (pkt[2] >> 2); + dev->y = (pkt[3] << 5) | (pkt[4] >> 2); + dev->touch = (pkt[0] & 0x40) ? 1 : 0; + return 1; + + case 0x98: + /* touch data in MT emulation mode */ + dev->x = (pkt[2] << 5) | (pkt[1] >> 2); + dev->y = (pkt[4] << 5) | (pkt[3] >> 2); + dev->touch = (pkt[0] & 0x40) ? 1 : 0; + return 1; + + default: + return 0; + } +} +#endif + +/***************************************************************************** + * General Touch Part + */ +#ifdef CONFIG_TOUCHSCREEN_USB_GENERAL_TOUCH +static int general_touch_read_data(struct usbtouch_usb *dev, unsigned char *pkt) +{ + dev->x = ((pkt[2] & 0x0F) << 8) | pkt[1] ; + dev->y = ((pkt[4] & 0x0F) << 8) | pkt[3] ; + dev->press = pkt[5] & 0xff; + dev->touch = pkt[0] & 0x01; + + return 1; +} +#endif + +/***************************************************************************** * the different device descriptors */ static struct usbtouch_device_info usbtouch_dev_info[] = { @@ -537,6 +600,32 @@ static struct usbtouch_device_info usbtouch_dev_info[] = { .read_data = irtouch_read_data, }, #endif + +#ifdef CONFIG_TOUCHSCREEN_USB_IDEALTEK + [DEVTYPE_IDEALTEK] = { + .min_xc = 0x0, + .max_xc = 0x0fff, + .min_yc = 0x0, + .max_yc = 0x0fff, + .rept_size = 8, + .flags = USBTOUCH_FLG_BUFFER, + .process_pkt = usbtouch_process_multi, + .get_pkt_len = idealtek_get_pkt_len, + .read_data = idealtek_read_data, + }, +#endif + +#ifdef CONFIG_TOUCHSCREEN_USB_GENERAL_TOUCH + [DEVTYPE_GENERAL_TOUCH] = { + .min_xc = 0x0, + .max_xc = 0x0500, + .min_yc = 0x0, + .max_yc = 0x0500, + .rept_size = 7, + .read_data = general_touch_read_data, + } +#endif + }; diff --git a/drivers/input/tsdev.c b/drivers/input/tsdev.c deleted file mode 100644 index d2f882e98e5e..000000000000 --- a/drivers/input/tsdev.c +++ /dev/null @@ -1,533 +0,0 @@ -/* - * $Id: tsdev.c,v 1.15 2002/04/10 16:50:19 jsimmons Exp $ - * - * Copyright (c) 2001 "Crazy" james Simmons - * - * Compaq touchscreen protocol driver. The protocol emulated by this driver - * is obsolete; for new programs use the tslib library which can read directly - * from evdev and perform dejittering, variance filtering and calibration - - * all in user space, not at kernel level. The meaning of this driver is - * to allow usage of newer input drivers with old applications that use the - * old /dev/h3600_ts and /dev/h3600_tsraw devices. - * - * 09-Apr-2004: Andrew Zabolotny <zap@homelink.ru> - * Fixed to actually work, not just output random numbers. - * Added support for both h3600_ts and h3600_tsraw protocol - * emulation. - */ - -/* - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - * - * Should you need to contact me, the author, you can do so either by - * e-mail - mail your message to <jsimmons@infradead.org>. - */ - -#define TSDEV_MINOR_BASE 128 -#define TSDEV_MINORS 32 -/* First 16 devices are h3600_ts compatible; second 16 are h3600_tsraw */ -#define TSDEV_MINOR_MASK 15 -#define TSDEV_BUFFER_SIZE 64 - -#include <linux/slab.h> -#include <linux/poll.h> -#include <linux/module.h> -#include <linux/moduleparam.h> -#include <linux/init.h> -#include <linux/input.h> -#include <linux/major.h> -#include <linux/random.h> -#include <linux/time.h> -#include <linux/device.h> - -#ifndef CONFIG_INPUT_TSDEV_SCREEN_X -#define CONFIG_INPUT_TSDEV_SCREEN_X 240 -#endif -#ifndef CONFIG_INPUT_TSDEV_SCREEN_Y -#define CONFIG_INPUT_TSDEV_SCREEN_Y 320 -#endif - -/* This driver emulates both protocols of the old h3600_ts and h3600_tsraw - * devices. The first one must output X/Y data in 'cooked' format, e.g. - * filtered, dejittered and calibrated. Second device just outputs raw - * data received from the hardware. - * - * This driver doesn't support filtering and dejittering; it supports only - * calibration. Filtering and dejittering must be done in the low-level - * driver, if needed, because it may gain additional benefits from knowing - * the low-level details, the nature of noise and so on. - * - * The driver precomputes a calibration matrix given the initial xres and - * yres values (quite innacurate for most touchscreens) that will result - * in a more or less expected range of output values. The driver supports - * the TS_SET_CAL ioctl, which will replace the calibration matrix with a - * new one, supposedly generated from the values taken from the raw device. - */ - -MODULE_AUTHOR("James Simmons <jsimmons@transvirtual.com>"); -MODULE_DESCRIPTION("Input driver to touchscreen converter"); -MODULE_LICENSE("GPL"); - -static int xres = CONFIG_INPUT_TSDEV_SCREEN_X; -module_param(xres, uint, 0); -MODULE_PARM_DESC(xres, "Horizontal screen resolution (can be negative for X-mirror)"); - -static int yres = CONFIG_INPUT_TSDEV_SCREEN_Y; -module_param(yres, uint, 0); -MODULE_PARM_DESC(yres, "Vertical screen resolution (can be negative for Y-mirror)"); - -/* From Compaq's Touch Screen Specification version 0.2 (draft) */ -struct ts_event { - short pressure; - short x; - short y; - short millisecs; -}; - -struct ts_calibration { - int xscale; - int xtrans; - int yscale; - int ytrans; - int xyswap; -}; - -struct tsdev { - int exist; - int open; - int minor; - char name[8]; - struct input_handle handle; - wait_queue_head_t wait; - struct list_head client_list; - struct device dev; - - int x, y, pressure; - struct ts_calibration cal; -}; - -struct tsdev_client { - struct fasync_struct *fasync; - struct list_head node; - struct tsdev *tsdev; - int head, tail; - struct ts_event event[TSDEV_BUFFER_SIZE]; - int raw; -}; - -/* The following ioctl codes are defined ONLY for backward compatibility. - * Don't use tsdev for new developement; use the tslib library instead. - * Touchscreen calibration is a fully userspace task. - */ -/* Use 'f' as magic number */ -#define IOC_H3600_TS_MAGIC 'f' -#define TS_GET_CAL _IOR(IOC_H3600_TS_MAGIC, 10, struct ts_calibration) -#define TS_SET_CAL _IOW(IOC_H3600_TS_MAGIC, 11, struct ts_calibration) - -static struct tsdev *tsdev_table[TSDEV_MINORS/2]; - -static int tsdev_fasync(int fd, struct file *file, int on) -{ - struct tsdev_client *client = file->private_data; - int retval; - - retval = fasync_helper(fd, file, on, &client->fasync); - return retval < 0 ? retval : 0; -} - -static int tsdev_open(struct inode *inode, struct file *file) -{ - int i = iminor(inode) - TSDEV_MINOR_BASE; - struct tsdev_client *client; - struct tsdev *tsdev; - int error; - - printk(KERN_WARNING "tsdev (compaq touchscreen emulation) is scheduled " - "for removal.\nSee Documentation/feature-removal-schedule.txt " - "for details.\n"); - - if (i >= TSDEV_MINORS) - return -ENODEV; - - tsdev = tsdev_table[i & TSDEV_MINOR_MASK]; - if (!tsdev || !tsdev->exist) - return -ENODEV; - - get_device(&tsdev->dev); - - client = kzalloc(sizeof(struct tsdev_client), GFP_KERNEL); - if (!client) { - error = -ENOMEM; - goto err_put_tsdev; - } - - client->tsdev = tsdev; - client->raw = (i >= TSDEV_MINORS / 2) ? 1 : 0; - list_add_tail(&client->node, &tsdev->client_list); - - if (!tsdev->open++ && tsdev->exist) { - error = input_open_device(&tsdev->handle); - if (error) - goto err_free_client; - } - - file->private_data = client; - return 0; - - err_free_client: - list_del(&client->node); - kfree(client); - err_put_tsdev: - put_device(&tsdev->dev); - return error; -} - -static void tsdev_free(struct device *dev) -{ - struct tsdev *tsdev = container_of(dev, struct tsdev, dev); - - tsdev_table[tsdev->minor] = NULL; - kfree(tsdev); -} - -static int tsdev_release(struct inode *inode, struct file *file) -{ - struct tsdev_client *client = file->private_data; - struct tsdev *tsdev = client->tsdev; - - tsdev_fasync(-1, file, 0); - - list_del(&client->node); - kfree(client); - - if (!--tsdev->open && tsdev->exist) - input_close_device(&tsdev->handle); - - put_device(&tsdev->dev); - - return 0; -} - -static ssize_t tsdev_read(struct file *file, char __user *buffer, size_t count, - loff_t *ppos) -{ - struct tsdev_client *client = file->private_data; - struct tsdev *tsdev = client->tsdev; - int retval = 0; - - if (client->head == client->tail && tsdev->exist && (file->f_flags & O_NONBLOCK)) - return -EAGAIN; - - retval = wait_event_interruptible(tsdev->wait, - client->head != client->tail || !tsdev->exist); - if (retval) - return retval; - - if (!tsdev->exist) - return -ENODEV; - - while (client->head != client->tail && - retval + sizeof (struct ts_event) <= count) { - if (copy_to_user (buffer + retval, client->event + client->tail, - sizeof (struct ts_event))) - return -EFAULT; - client->tail = (client->tail + 1) & (TSDEV_BUFFER_SIZE - 1); - retval += sizeof (struct ts_event); - } - - return retval; -} - -/* No kernel lock - fine */ -static unsigned int tsdev_poll(struct file *file, poll_table *wait) -{ - struct tsdev_client *client = file->private_data; - struct tsdev *tsdev = client->tsdev; - - poll_wait(file, &tsdev->wait, wait); - return ((client->head == client->tail) ? 0 : (POLLIN | POLLRDNORM)) | - (tsdev->exist ? 0 : (POLLHUP | POLLERR)); -} - -static int tsdev_ioctl(struct inode *inode, struct file *file, - unsigned int cmd, unsigned long arg) -{ - struct tsdev_client *client = file->private_data; - struct tsdev *tsdev = client->tsdev; - int retval = 0; - - switch (cmd) { - case TS_GET_CAL: - if (copy_to_user((void __user *)arg, &tsdev->cal, - sizeof (struct ts_calibration))) - retval = -EFAULT; - break; - - case TS_SET_CAL: - if (copy_from_user(&tsdev->cal, (void __user *)arg, - sizeof (struct ts_calibration))) - retval = -EFAULT; - break; - - default: - retval = -EINVAL; - break; - } - - return retval; -} - -static const struct file_operations tsdev_fops = { - .owner = THIS_MODULE, - .open = tsdev_open, - .release = tsdev_release, - .read = tsdev_read, - .poll = tsdev_poll, - .fasync = tsdev_fasync, - .ioctl = tsdev_ioctl, -}; - -static void tsdev_event(struct input_handle *handle, unsigned int type, - unsigned int code, int value) -{ - struct tsdev *tsdev = handle->private; - struct tsdev_client *client; - struct timeval time; - - switch (type) { - case EV_ABS: - switch (code) { - case ABS_X: - tsdev->x = value; - break; - - case ABS_Y: - tsdev->y = value; - break; - - case ABS_PRESSURE: - if (value > handle->dev->absmax[ABS_PRESSURE]) - value = handle->dev->absmax[ABS_PRESSURE]; - value -= handle->dev->absmin[ABS_PRESSURE]; - if (value < 0) - value = 0; - tsdev->pressure = value; - break; - } - break; - - case EV_REL: - switch (code) { - case REL_X: - tsdev->x += value; - if (tsdev->x < 0) - tsdev->x = 0; - else if (tsdev->x > xres) - tsdev->x = xres; - break; - - case REL_Y: - tsdev->y += value; - if (tsdev->y < 0) - tsdev->y = 0; - else if (tsdev->y > yres) - tsdev->y = yres; - break; - } - break; - - case EV_KEY: - if (code == BTN_TOUCH || code == BTN_MOUSE) { - switch (value) { - case 0: - tsdev->pressure = 0; - break; - - case 1: - if (!tsdev->pressure) - tsdev->pressure = 1; - break; - } - } - break; - } - - if (type != EV_SYN || code != SYN_REPORT) - return; - - list_for_each_entry(client, &tsdev->client_list, node) { - int x, y, tmp; - - do_gettimeofday(&time); - client->event[client->head].millisecs = time.tv_usec / 1000; - client->event[client->head].pressure = tsdev->pressure; - - x = tsdev->x; - y = tsdev->y; - - /* Calibration */ - if (!client->raw) { - x = ((x * tsdev->cal.xscale) >> 8) + tsdev->cal.xtrans; - y = ((y * tsdev->cal.yscale) >> 8) + tsdev->cal.ytrans; - if (tsdev->cal.xyswap) { - tmp = x; x = y; y = tmp; - } - } - - client->event[client->head].x = x; - client->event[client->head].y = y; - client->head = (client->head + 1) & (TSDEV_BUFFER_SIZE - 1); - kill_fasync(&client->fasync, SIGIO, POLL_IN); - } - wake_up_interruptible(&tsdev->wait); -} - -static int tsdev_connect(struct input_handler *handler, struct input_dev *dev, - const struct input_device_id *id) -{ - struct tsdev *tsdev; - int minor, delta; - int error; - - for (minor = 0; minor < TSDEV_MINORS / 2 && tsdev_table[minor]; minor++); - if (minor >= TSDEV_MINORS / 2) { - printk(KERN_ERR - "tsdev: You have way too many touchscreens\n"); - return -ENFILE; - } - - tsdev = kzalloc(sizeof(struct tsdev), GFP_KERNEL); - if (!tsdev) - return -ENOMEM; - - INIT_LIST_HEAD(&tsdev->client_list); - init_waitqueue_head(&tsdev->wait); - - tsdev->exist = 1; - tsdev->minor = minor; - tsdev->handle.dev = dev; - tsdev->handle.name = tsdev->name; - tsdev->handle.handler = handler; - tsdev->handle.private = tsdev; - snprintf(tsdev->name, sizeof(tsdev->name), "ts%d", minor); - - /* Precompute the rough calibration matrix */ - delta = dev->absmax [ABS_X] - dev->absmin [ABS_X] + 1; - if (delta == 0) - delta = 1; - tsdev->cal.xscale = (xres << 8) / delta; - tsdev->cal.xtrans = - ((dev->absmin [ABS_X] * tsdev->cal.xscale) >> 8); - - delta = dev->absmax [ABS_Y] - dev->absmin [ABS_Y] + 1; - if (delta == 0) - delta = 1; - tsdev->cal.yscale = (yres << 8) / delta; - tsdev->cal.ytrans = - ((dev->absmin [ABS_Y] * tsdev->cal.yscale) >> 8); - - snprintf(tsdev->dev.bus_id, sizeof(tsdev->dev.bus_id), - "ts%d", minor); - tsdev->dev.class = &input_class; - tsdev->dev.parent = &dev->dev; - tsdev->dev.devt = MKDEV(INPUT_MAJOR, TSDEV_MINOR_BASE + minor); - tsdev->dev.release = tsdev_free; - device_initialize(&tsdev->dev); - - tsdev_table[minor] = tsdev; - - error = device_add(&tsdev->dev); - if (error) - goto err_free_tsdev; - - error = input_register_handle(&tsdev->handle); - if (error) - goto err_delete_tsdev; - - return 0; - - err_delete_tsdev: - device_del(&tsdev->dev); - err_free_tsdev: - put_device(&tsdev->dev); - return error; -} - -static void tsdev_disconnect(struct input_handle *handle) -{ - struct tsdev *tsdev = handle->private; - struct tsdev_client *client; - - input_unregister_handle(handle); - device_del(&tsdev->dev); - - tsdev->exist = 0; - - if (tsdev->open) { - input_close_device(handle); - list_for_each_entry(client, &tsdev->client_list, node) - kill_fasync(&client->fasync, SIGIO, POLL_HUP); - wake_up_interruptible(&tsdev->wait); - } - - put_device(&tsdev->dev); -} - -static const struct input_device_id tsdev_ids[] = { - { - .flags = INPUT_DEVICE_ID_MATCH_EVBIT | INPUT_DEVICE_ID_MATCH_KEYBIT | INPUT_DEVICE_ID_MATCH_RELBIT, - .evbit = { BIT(EV_KEY) | BIT(EV_REL) }, - .keybit = { [LONG(BTN_LEFT)] = BIT(BTN_LEFT) }, - .relbit = { BIT(REL_X) | BIT(REL_Y) }, - }, /* A mouse like device, at least one button, two relative axes */ - - { - .flags = INPUT_DEVICE_ID_MATCH_EVBIT | INPUT_DEVICE_ID_MATCH_KEYBIT | INPUT_DEVICE_ID_MATCH_ABSBIT, - .evbit = { BIT(EV_KEY) | BIT(EV_ABS) }, - .keybit = { [LONG(BTN_TOUCH)] = BIT(BTN_TOUCH) }, - .absbit = { BIT(ABS_X) | BIT(ABS_Y) }, - }, /* A tablet like device, at least touch detection, two absolute axes */ - - { - .flags = INPUT_DEVICE_ID_MATCH_EVBIT | INPUT_DEVICE_ID_MATCH_ABSBIT, - .evbit = { BIT(EV_ABS) }, - .absbit = { BIT(ABS_X) | BIT(ABS_Y) | BIT(ABS_PRESSURE) }, - }, /* A tablet like device with several gradations of pressure */ - - {} /* Terminating entry */ -}; - -MODULE_DEVICE_TABLE(input, tsdev_ids); - -static struct input_handler tsdev_handler = { - .event = tsdev_event, - .connect = tsdev_connect, - .disconnect = tsdev_disconnect, - .fops = &tsdev_fops, - .minor = TSDEV_MINOR_BASE, - .name = "tsdev", - .id_table = tsdev_ids, -}; - -static int __init tsdev_init(void) -{ - return input_register_handler(&tsdev_handler); -} - -static void __exit tsdev_exit(void) -{ - input_unregister_handler(&tsdev_handler); -} - -module_init(tsdev_init); -module_exit(tsdev_exit); diff --git a/drivers/isdn/i4l/isdn_net.c b/drivers/isdn/i4l/isdn_net.c index 7c9cb7e19f2e..b39d1f5b378e 100644 --- a/drivers/isdn/i4l/isdn_net.c +++ b/drivers/isdn/i4l/isdn_net.c @@ -328,7 +328,7 @@ isdn_net_autohup(void) l->cps = (l->transcount * HZ) / (jiffies - last_jiffies); l->transcount = 0; if (dev->net_verbose > 3) - printk(KERN_DEBUG "%s: %d bogocps\n", l->name, l->cps); + printk(KERN_DEBUG "%s: %d bogocps\n", p->dev->name, l->cps); if ((l->flags & ISDN_NET_CONNECTED) && (!l->dialstate)) { anymore = 1; l->huptimer++; @@ -350,12 +350,12 @@ isdn_net_autohup(void) if (l->hupflags & ISDN_CHARGEHUP) { if (l->hupflags & ISDN_WAITCHARGE) { printk(KERN_DEBUG "isdn_net: Hupflags of %s are %X\n", - l->name, l->hupflags); + p->dev->name, l->hupflags); isdn_net_hangup(p->dev); } else if (time_after(jiffies, l->chargetime + l->chargeint)) { printk(KERN_DEBUG "isdn_net: %s: chtime = %lu, chint = %d\n", - l->name, l->chargetime, l->chargeint); + p->dev->name, l->chargetime, l->chargeint); isdn_net_hangup(p->dev); } } else @@ -442,8 +442,8 @@ isdn_net_stat_callback(int idx, isdn_ctrl *c) #endif isdn_net_lp_disconnected(lp); isdn_all_eaz(lp->isdn_device, lp->isdn_channel); - printk(KERN_INFO "%s: remote hangup\n", lp->name); - printk(KERN_INFO "%s: Chargesum is %d\n", lp->name, + printk(KERN_INFO "%s: remote hangup\n", p->dev->name); + printk(KERN_INFO "%s: Chargesum is %d\n", p->dev->name, lp->charge); isdn_net_unbind_channel(lp); return 1; @@ -487,7 +487,7 @@ isdn_net_stat_callback(int idx, isdn_ctrl *c) isdn_net_add_to_bundle(nd, lp); } } - printk(KERN_INFO "isdn_net: %s connected\n", lp->name); + printk(KERN_INFO "isdn_net: %s connected\n", p->dev->name); /* If first Chargeinfo comes before B-Channel connect, * we correct the timestamp here. */ @@ -534,7 +534,7 @@ isdn_net_stat_callback(int idx, isdn_ctrl *c) lp->hupflags |= ISDN_HAVECHARGE; lp->chargetime = jiffies; printk(KERN_DEBUG "isdn_net: Got CINF chargetime of %s now %lu\n", - lp->name, lp->chargetime); + p->dev->name, lp->chargetime); return 1; } } @@ -565,7 +565,7 @@ isdn_net_dial(void) #ifdef ISDN_DEBUG_NET_DIAL if (lp->dialstate) - printk(KERN_DEBUG "%s: dialstate=%d\n", lp->name, lp->dialstate); + printk(KERN_DEBUG "%s: dialstate=%d\n", p->dev->name, lp->dialstate); #endif switch (lp->dialstate) { case 0: @@ -578,7 +578,7 @@ isdn_net_dial(void) lp->dial = lp->phone[1]; if (!lp->dial) { printk(KERN_WARNING "%s: phone number deleted?\n", - lp->name); + p->dev->name); isdn_net_hangup(p->dev); break; } @@ -632,13 +632,13 @@ isdn_net_dial(void) cmd.arg = lp->isdn_channel; if (!lp->dial) { printk(KERN_WARNING "%s: phone number deleted?\n", - lp->name); + p->dev->name); isdn_net_hangup(p->dev); break; } if (!strncmp(lp->dial->num, "LEASED", strlen("LEASED"))) { lp->dialstate = 4; - printk(KERN_INFO "%s: Open leased line ...\n", lp->name); + printk(KERN_INFO "%s: Open leased line ...\n", p->dev->name); } else { if(lp->dialtimeout > 0) if (time_after(jiffies, lp->dialstarted + lp->dialtimeout)) { @@ -688,7 +688,7 @@ isdn_net_dial(void) dev->usage[i] |= ISDN_USAGE_OUTGOING; isdn_info_update(); } - printk(KERN_INFO "%s: dialing %d %s... %s\n", lp->name, + printk(KERN_INFO "%s: dialing %d %s... %s\n", p->dev->name, lp->dialretry, cmd.parm.setup.phone, (cmd.parm.setup.si1 == 1) ? "DOV" : ""); lp->dtimer = 0; @@ -797,7 +797,7 @@ isdn_net_dial(void) */ if (lp->dtimer++ > lp->cbdelay) { - printk(KERN_INFO "%s: hangup waiting for callback ...\n", lp->name); + printk(KERN_INFO "%s: hangup waiting for callback ...\n", p->dev->name); lp->dtimer = 0; lp->dialstate = 4; cmd.driver = lp->isdn_device; @@ -810,7 +810,7 @@ isdn_net_dial(void) break; default: printk(KERN_WARNING "isdn_net: Illegal dialstate %d for device %s\n", - lp->dialstate, lp->name); + lp->dialstate, p->dev->name); } p = (isdn_net_dev *) p->next; } @@ -836,11 +836,11 @@ isdn_net_hangup(struct net_device *d) if (slp->flags & ISDN_NET_CONNECTED) { printk(KERN_INFO "isdn_net: hang up slave %s before %s\n", - slp->name, lp->name); + lp->slave->name, d->name); isdn_net_hangup(lp->slave); } } - printk(KERN_INFO "isdn_net: local hangup %s\n", lp->name); + printk(KERN_INFO "isdn_net: local hangup %s\n", d->name); #ifdef CONFIG_ISDN_PPP if (lp->p_encap == ISDN_NET_ENCAP_SYNCPPP) isdn_ppp_free(lp); @@ -858,7 +858,7 @@ isdn_net_hangup(struct net_device *d) cmd.command = ISDN_CMD_HANGUP; cmd.arg = lp->isdn_channel; isdn_command(&cmd); - printk(KERN_INFO "%s: Chargesum is %d\n", lp->name, lp->charge); + printk(KERN_INFO "%s: Chargesum is %d\n", d->name, lp->charge); isdn_all_eaz(lp->isdn_device, lp->isdn_channel); } isdn_net_unbind_channel(lp); @@ -885,7 +885,7 @@ isdn_net_log_skb(struct sk_buff * skb, isdn_net_local * lp) /* fall back to old isdn_net_log_packet method() */ char * buf = skb->data; - printk(KERN_DEBUG "isdn_net: protocol %04x is buggy, dev %s\n", skb->protocol, lp->name); + printk(KERN_DEBUG "isdn_net: protocol %04x is buggy, dev %s\n", skb->protocol, lp->netdev->dev->name); p = buf; proto = ETH_P_IP; switch (lp->p_encap) { @@ -1023,7 +1023,7 @@ void isdn_net_writebuf_skb(isdn_net_local *lp, struct sk_buff *skb) ret = isdn_writebuf_skb_stub(lp->isdn_device, lp->isdn_channel, 1, skb); if (ret != len) { /* we should never get here */ - printk(KERN_WARNING "%s: HL driver queue full\n", lp->name); + printk(KERN_WARNING "%s: HL driver queue full\n", lp->netdev->dev->name); goto error; } @@ -1461,7 +1461,7 @@ isdn_ciscohdlck_dev_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) mod_timer(&lp->cisco_timer, expires); printk(KERN_INFO "%s: Keepalive period set " "to %d seconds.\n", - lp->name, lp->cisco_keepalive_period); + dev->name, lp->cisco_keepalive_period); } break; @@ -1512,7 +1512,7 @@ isdn_net_ciscohdlck_slarp_send_keepalive(unsigned long data) lp->cisco_line_state = 0; printk (KERN_WARNING "UPDOWN: Line protocol on Interface %s," - " changed state to down\n", lp->name); + " changed state to down\n", lp->netdev->dev->name); /* should stop routing higher-level data accross */ } else if ((!lp->cisco_line_state) && (myseq_diff >= 0) && (myseq_diff <= 2)) { @@ -1520,14 +1520,14 @@ isdn_net_ciscohdlck_slarp_send_keepalive(unsigned long data) lp->cisco_line_state = 1; printk (KERN_WARNING "UPDOWN: Line protocol on Interface %s," - " changed state to up\n", lp->name); + " changed state to up\n", lp->netdev->dev->name); /* restart routing higher-level data accross */ } if (lp->cisco_debserint) printk (KERN_DEBUG "%s: HDLC " "myseq %lu, mineseen %lu%c, yourseen %lu, %s\n", - lp->name, last_cisco_myseq, lp->cisco_mineseen, + lp->netdev->dev->name, last_cisco_myseq, lp->cisco_mineseen, ((last_cisco_myseq == lp->cisco_mineseen) ? '*' : 040), lp->cisco_yourseq, ((lp->cisco_line_state) ? "line up" : "line down")); @@ -1682,7 +1682,7 @@ isdn_net_ciscohdlck_slarp_in(isdn_net_local *lp, struct sk_buff *skb) "remote ip: %d.%d.%d.%d, " "local ip: %d.%d.%d.%d " "mask: %d.%d.%d.%d\n", - lp->name, + lp->netdev->dev->name, HIPQUAD(addr), HIPQUAD(local), HIPQUAD(mask)); @@ -1690,7 +1690,7 @@ isdn_net_ciscohdlck_slarp_in(isdn_net_local *lp, struct sk_buff *skb) slarp_reply_out: printk(KERN_INFO "%s: got invalid slarp " "reply (%d.%d.%d.%d/%d.%d.%d.%d) " - "- ignored\n", lp->name, + "- ignored\n", lp->netdev->dev->name, HIPQUAD(addr), HIPQUAD(mask)); break; case CISCO_SLARP_KEEPALIVE: @@ -1701,7 +1701,8 @@ isdn_net_ciscohdlck_slarp_in(isdn_net_local *lp, struct sk_buff *skb) lp->cisco_last_slarp_in) { printk(KERN_DEBUG "%s: Keepalive period mismatch - " "is %d but should be %d.\n", - lp->name, period, lp->cisco_keepalive_period); + lp->netdev->dev->name, period, + lp->cisco_keepalive_period); } lp->cisco_last_slarp_in = jiffies; p += get_u32(p, &my_seq); @@ -1732,12 +1733,12 @@ isdn_net_ciscohdlck_receive(isdn_net_local *lp, struct sk_buff *skb) if (addr != CISCO_ADDR_UNICAST && addr != CISCO_ADDR_BROADCAST) { printk(KERN_WARNING "%s: Unknown Cisco addr 0x%02x\n", - lp->name, addr); + lp->netdev->dev->name, addr); goto out_free; } if (ctrl != CISCO_CTRL) { printk(KERN_WARNING "%s: Unknown Cisco ctrl 0x%02x\n", - lp->name, ctrl); + lp->netdev->dev->name, ctrl); goto out_free; } @@ -1748,7 +1749,8 @@ isdn_net_ciscohdlck_receive(isdn_net_local *lp, struct sk_buff *skb) case CISCO_TYPE_CDP: if (lp->cisco_debserint) printk(KERN_DEBUG "%s: Received CDP packet. use " - "\"no cdp enable\" on cisco.\n", lp->name); + "\"no cdp enable\" on cisco.\n", + lp->netdev->dev->name); goto out_free; default: /* no special cisco protocol */ @@ -1843,7 +1845,7 @@ isdn_net_receive(struct net_device *ndev, struct sk_buff *skb) }; #endif /* CONFIG_ISDN_X25 */ printk(KERN_WARNING "%s: unknown encapsulation, dropping\n", - lp->name); + lp->netdev->dev->name); kfree_skb(skb); return; } @@ -2174,7 +2176,7 @@ isdn_net_find_icall(int di, int ch, int idx, setup_parm *setup) wret = matchret; #ifdef ISDN_DEBUG_NET_ICALL printk(KERN_DEBUG "n_fi: if='%s', l.msn=%s, l.flags=%d, l.dstate=%d\n", - lp->name, lp->msn, lp->flags, lp->dialstate); + p->dev->name, lp->msn, lp->flags, lp->dialstate); #endif if ((!matchret) && /* EAZ is matching */ (((!(lp->flags & ISDN_NET_CONNECTED)) && /* but not connected */ @@ -2277,7 +2279,7 @@ isdn_net_find_icall(int di, int ch, int idx, setup_parm *setup) * */ if (ISDN_NET_DIALMODE(*lp) == ISDN_NET_DM_OFF) { printk(KERN_INFO "incoming call, interface %s `stopped' -> rejected\n", - lp->name); + p->dev->name); return 3; } /* @@ -2286,7 +2288,7 @@ isdn_net_find_icall(int di, int ch, int idx, setup_parm *setup) */ if (!isdn_net_device_started(p)) { printk(KERN_INFO "%s: incoming call, interface down -> rejected\n", - lp->name); + p->dev->name); return 3; } /* Interface is up, now see if it's a slave. If so, see if @@ -2294,8 +2296,8 @@ isdn_net_find_icall(int di, int ch, int idx, setup_parm *setup) */ if (lp->master) { isdn_net_local *mlp = (isdn_net_local *) lp->master->priv; - printk(KERN_DEBUG "ICALLslv: %s\n", lp->name); - printk(KERN_DEBUG "master=%s\n", mlp->name); + printk(KERN_DEBUG "ICALLslv: %s\n", p->dev->name); + printk(KERN_DEBUG "master=%s\n", lp->master->name); if (mlp->flags & ISDN_NET_CONNECTED) { printk(KERN_DEBUG "master online\n"); /* Master is online, find parent-slave (master if first slave) */ @@ -2322,11 +2324,11 @@ isdn_net_find_icall(int di, int ch, int idx, setup_parm *setup) * */ if (ISDN_NET_DIALMODE(*lp) == ISDN_NET_DM_OFF) { printk(KERN_INFO "incoming call for callback, interface %s `off' -> rejected\n", - lp->name); + p->dev->name); return 3; } printk(KERN_DEBUG "%s: call from %s -> %s, start callback\n", - lp->name, nr, eaz); + p->dev->name, nr, eaz); if (lp->phone[1]) { /* Grab a free ISDN-Channel */ spin_lock_irqsave(&dev->lock, flags); @@ -2340,7 +2342,8 @@ isdn_net_find_icall(int di, int ch, int idx, setup_parm *setup) lp->msn) ) < 0) { - printk(KERN_WARNING "isdn_net_find_icall: No channel for %s\n", lp->name); + printk(KERN_WARNING "isdn_net_find_icall: No channel for %s\n", + p->dev->name); spin_unlock_irqrestore(&dev->lock, flags); return 0; } @@ -2361,11 +2364,12 @@ isdn_net_find_icall(int di, int ch, int idx, setup_parm *setup) /* Initiate dialing by returning 2 or 4 */ return (lp->flags & ISDN_NET_CBHUP) ? 2 : 4; } else - printk(KERN_WARNING "isdn_net: %s: No phone number\n", lp->name); + printk(KERN_WARNING "isdn_net: %s: No phone number\n", + p->dev->name); return 0; } else { - printk(KERN_DEBUG "%s: call from %s -> %s accepted\n", lp->name, nr, - eaz); + printk(KERN_DEBUG "%s: call from %s -> %s accepted\n", + p->dev->name, nr, eaz); /* if this interface is dialing, it does it probably on a different device, so free this device */ if ((lp->dialstate == 4) || (lp->dialstate == 12)) { @@ -2424,7 +2428,7 @@ isdn_net_findif(char *name) isdn_net_dev *p = dev->netdev; while (p) { - if (!strcmp(p->local->name, name)) + if (!strcmp(p->dev->name, name)) return p; p = (isdn_net_dev *) p->next; } @@ -2453,7 +2457,8 @@ isdn_net_force_dial_lp(isdn_net_local * lp) lp->pre_device, lp->pre_channel, lp->msn)) < 0) { - printk(KERN_WARNING "isdn_net_force_dial: No channel for %s\n", lp->name); + printk(KERN_WARNING "isdn_net_force_dial: No channel for %s\n", + lp->netdev->dev->name); spin_unlock_irqrestore(&dev->lock, flags); return -EAGAIN; } @@ -2556,7 +2561,7 @@ isdn_net_new(char *name, struct net_device *master) return NULL; } if (name == NULL) - name = " "; + return NULL; if (!(netdev = kzalloc(sizeof(isdn_net_dev), GFP_KERNEL))) { printk(KERN_WARNING "isdn_net: Could not allocate net-device\n"); return NULL; @@ -2568,7 +2573,6 @@ isdn_net_new(char *name, struct net_device *master) return NULL; } netdev->local = netdev->dev->priv; - strcpy(netdev->local->name, netdev->dev->name); netdev->dev->init = isdn_net_init; if (master) { /* Device shall be a slave */ @@ -2673,7 +2677,7 @@ isdn_net_setcfg(isdn_net_ioctl_cfg * cfg) #endif if (isdn_net_device_started(p)) { printk(KERN_WARNING "%s: cannot change encap when if is up\n", - lp->name); + p->dev->name); return -EBUSY; } #ifdef CONFIG_ISDN_X25 @@ -2698,7 +2702,7 @@ isdn_net_setcfg(isdn_net_ioctl_cfg * cfg) case ISDN_NET_ENCAP_SYNCPPP: #ifndef CONFIG_ISDN_PPP printk(KERN_WARNING "%s: SyncPPP support not configured\n", - lp->name); + p->dev->name); return -EINVAL; #else p->dev->type = ARPHRD_PPP; /* change ARP type */ @@ -2709,7 +2713,7 @@ isdn_net_setcfg(isdn_net_ioctl_cfg * cfg) case ISDN_NET_ENCAP_X25IFACE: #ifndef CONFIG_ISDN_X25 printk(KERN_WARNING "%s: isdn-x25 support not configured\n", - p->local->name); + p->dev->name); return -EINVAL; #else p->dev->type = ARPHRD_X25; /* change ARP type */ @@ -2725,7 +2729,7 @@ isdn_net_setcfg(isdn_net_ioctl_cfg * cfg) break; printk(KERN_WARNING "%s: encapsulation protocol %d not supported\n", - p->local->name, cfg->p_encap); + p->dev->name, cfg->p_encap); return -EINVAL; } if (strlen(cfg->drvid)) { @@ -2902,13 +2906,18 @@ isdn_net_getcfg(isdn_net_ioctl_cfg * cfg) cfg->pppbind = lp->pppbind; cfg->dialtimeout = lp->dialtimeout >= 0 ? lp->dialtimeout / HZ : -1; cfg->dialwait = lp->dialwait / HZ; - if (lp->slave) - strcpy(cfg->slave, ((isdn_net_local *) lp->slave->priv)->name); - else + if (lp->slave) { + if (strlen(lp->slave->name) > 8) + strcpy(cfg->slave, "too-long"); + else + strcpy(cfg->slave, lp->slave->name); + } else cfg->slave[0] = '\0'; - if (lp->master) - strcpy(cfg->master, ((isdn_net_local *) lp->master->priv)->name); - else + if (lp->master) { + if (strlen(lp->master->name) > 8) + strcpy(cfg->master, "too-long"); + strcpy(cfg->master, lp->master->name); + } else cfg->master[0] = '\0'; return 0; } @@ -2978,7 +2987,8 @@ isdn_net_getpeer(isdn_net_ioctl_phone *phone, isdn_net_ioctl_phone __user *peer) isdn_net_dev *p = isdn_net_findif(phone->name); int ch, dv, idx; - if (!p) return -ENODEV; + if (!p) + return -ENODEV; /* * Theoretical race: while this executes, the remote number might * become invalid (hang up) or change (new connection), resulting @@ -2987,14 +2997,18 @@ isdn_net_getpeer(isdn_net_ioctl_phone *phone, isdn_net_ioctl_phone __user *peer) */ ch = p->local->isdn_channel; dv = p->local->isdn_device; - if(ch<0 && dv<0) return -ENOTCONN; + if(ch < 0 && dv < 0) + return -ENOTCONN; idx = isdn_dc2minor(dv, ch); - if (idx<0) return -ENODEV; + if (idx <0 ) + return -ENODEV; /* for pre-bound channels, we need this extra check */ - if ( strncmp(dev->num[idx],"???",3) == 0 ) return -ENOTCONN; - strncpy(phone->phone,dev->num[idx],ISDN_MSNLEN); - phone->outgoing=USG_OUTGOING(dev->usage[idx]); - if ( copy_to_user(peer,phone,sizeof(*peer)) ) return -EFAULT; + if (strncmp(dev->num[idx], "???", 3) == 0) + return -ENOTCONN; + strncpy(phone->phone, dev->num[idx], ISDN_MSNLEN); + phone->outgoing = USG_OUTGOING(dev->usage[idx]); + if (copy_to_user(peer, phone, sizeof(*peer))) + return -EFAULT; return 0; } /* @@ -3113,18 +3127,18 @@ isdn_net_realrm(isdn_net_dev * p, isdn_net_dev * q) dev->netdev = p->next; if (p->local->slave) { /* If this interface has a slave, remove it also */ - char *slavename = ((isdn_net_local *) (p->local->slave->priv))->name; + char *slavename = p->local->slave->name; isdn_net_dev *n = dev->netdev; q = NULL; while (n) { - if (!strcmp(n->local->name, slavename)) { + if (!strcmp(n->dev->name, slavename)) { spin_unlock_irqrestore(&dev->lock, flags); isdn_net_realrm(n, q); spin_lock_irqsave(&dev->lock, flags); break; } q = n; - n = (isdn_net_dev *) n->next; + n = (isdn_net_dev *)n->next; } } spin_unlock_irqrestore(&dev->lock, flags); @@ -3152,7 +3166,7 @@ isdn_net_rm(char *name) p = dev->netdev; q = NULL; while (p) { - if (!strcmp(p->local->name, name)) { + if (!strcmp(p->dev->name, name)) { spin_unlock_irqrestore(&dev->lock, flags); return (isdn_net_realrm(p, q)); } diff --git a/drivers/isdn/i4l/isdn_ppp.c b/drivers/isdn/i4l/isdn_ppp.c index 0e5e59f84344..9f5fe372f83d 100644 --- a/drivers/isdn/i4l/isdn_ppp.c +++ b/drivers/isdn/i4l/isdn_ppp.c @@ -190,9 +190,11 @@ isdn_ppp_bind(isdn_net_local * lp) retval = -1; goto out; } - unit = isdn_ppp_if_get_unit(lp->name); /* get unit number from interface name .. ugly! */ + /* get unit number from interface name .. ugly! */ + unit = isdn_ppp_if_get_unit(lp->netdev->dev->name); if (unit < 0) { - printk(KERN_ERR "isdn_ppp_bind: illegal interface name %s.\n", lp->name); + printk(KERN_ERR "isdn_ppp_bind: illegal interface name %s.\n", + lp->netdev->dev->name); retval = -1; goto out; } @@ -507,7 +509,8 @@ isdn_ppp_ioctl(int min, struct file *file, unsigned int cmd, unsigned long arg) case PPPIOCGIFNAME: if(!lp) return -EINVAL; - if ((r = set_arg(argp, lp->name, strlen(lp->name)))) + if ((r = set_arg(argp, lp->netdev->dev->name, + strlen(lp->netdev->dev->name)))) return r; break; case PPPIOCGMPFLAGS: /* get configuration flags */ diff --git a/drivers/macintosh/Kconfig b/drivers/macintosh/Kconfig index 56cd8998fe4b..77f50b63a970 100644 --- a/drivers/macintosh/Kconfig +++ b/drivers/macintosh/Kconfig @@ -172,6 +172,7 @@ config INPUT_ADBHID config MAC_EMUMOUSEBTN bool "Support for mouse button 2+3 emulation" + select INPUT help This provides generic support for emulating the 2nd and 3rd mouse button with keypresses. If you say Y here, the emulation is still diff --git a/drivers/macintosh/adbhid.c b/drivers/macintosh/adbhid.c index 48d17bf6c927..8cce016b3d09 100644 --- a/drivers/macintosh/adbhid.c +++ b/drivers/macintosh/adbhid.c @@ -52,6 +52,11 @@ MODULE_AUTHOR("Franz Sirl <Franz.Sirl-kernel@lauterbach.com>"); +static int restore_capslock_events; +module_param(restore_capslock_events, int, 0644); +MODULE_PARM_DESC(restore_capslock_events, + "Produce keypress events for capslock on both keyup and keydown."); + #define KEYB_KEYREG 0 /* register # for key up/down data */ #define KEYB_LEDREG 2 /* register # for leds on ADB keyboard */ #define MOUSE_DATAREG 0 /* reg# for movement/button codes from mouse */ @@ -217,6 +222,8 @@ struct adbhid { #define FLAG_FN_KEY_PRESSED 0x00000001 #define FLAG_POWER_FROM_FN 0x00000002 #define FLAG_EMU_FWDEL_DOWN 0x00000004 +#define FLAG_CAPSLOCK_TRANSLATE 0x00000008 +#define FLAG_CAPSLOCK_DOWN 0x00000010 static struct adbhid *adbhid[16]; @@ -272,19 +279,50 @@ adbhid_keyboard_input(unsigned char *data, int nb, int apoll) } static void -adbhid_input_keycode(int id, int keycode, int repeat) +adbhid_input_keycode(int id, int scancode, int repeat) { struct adbhid *ahid = adbhid[id]; - int up_flag, key; - - up_flag = (keycode & 0x80); - keycode &= 0x7f; + int keycode, up_flag; + + keycode = scancode & 0x7f; + up_flag = scancode & 0x80; + + if (restore_capslock_events) { + if (keycode == ADB_KEY_CAPSLOCK && !up_flag) { + /* Key pressed, turning on the CapsLock LED. + * The next 0xff will be interpreted as a release. */ + ahid->flags |= FLAG_CAPSLOCK_TRANSLATE + | FLAG_CAPSLOCK_DOWN; + } else if (scancode == 0xff) { + /* Scancode 0xff usually signifies that the capslock + * key was either pressed or released. */ + if (ahid->flags & FLAG_CAPSLOCK_TRANSLATE) { + keycode = ADB_KEY_CAPSLOCK; + if (ahid->flags & FLAG_CAPSLOCK_DOWN) { + /* Key released */ + up_flag = 1; + ahid->flags &= ~FLAG_CAPSLOCK_DOWN; + } else { + /* Key pressed */ + up_flag = 0; + ahid->flags &= ~FLAG_CAPSLOCK_TRANSLATE; + } + } else { + printk(KERN_INFO "Spurious caps lock event " + "(scancode 0xff)."); + } + } + } switch (keycode) { - case ADB_KEY_CAPSLOCK: /* Generate down/up events for CapsLock everytime. */ - input_report_key(ahid->input, KEY_CAPSLOCK, 1); - input_report_key(ahid->input, KEY_CAPSLOCK, 0); - input_sync(ahid->input); + case ADB_KEY_CAPSLOCK: + if (!restore_capslock_events) { + /* Generate down/up events for CapsLock everytime. */ + input_report_key(ahid->input, KEY_CAPSLOCK, 1); + input_sync(ahid->input); + input_report_key(ahid->input, KEY_CAPSLOCK, 0); + input_sync(ahid->input); + } return; #ifdef CONFIG_PPC_PMAC case ADB_KEY_POWER_OLD: /* Power key on PBook 3400 needs remapping */ @@ -296,7 +334,7 @@ adbhid_input_keycode(int id, int keycode, int repeat) keycode = ADB_KEY_POWER; } break; - case ADB_KEY_POWER: + case ADB_KEY_POWER: /* Fn + Command will produce a bogus "power" keycode */ if (ahid->flags & FLAG_FN_KEY_PRESSED) { keycode = ADB_KEY_CMD; diff --git a/drivers/misc/thinkpad_acpi.c b/drivers/misc/thinkpad_acpi.c index 216948dd71a5..81e068fa7ac5 100644 --- a/drivers/misc/thinkpad_acpi.c +++ b/drivers/misc/thinkpad_acpi.c @@ -945,15 +945,15 @@ static int __init hotkey_init(struct ibm_init_struct *iibm) KEY_UNKNOWN, /* 0x0C: FN+BACKSPACE */ KEY_UNKNOWN, /* 0x0D: FN+INSERT */ KEY_UNKNOWN, /* 0x0E: FN+DELETE */ - KEY_RESERVED, /* 0x0F: FN+HOME (brightness up) */ + KEY_BRIGHTNESSUP, /* 0x0F: FN+HOME (brightness up) */ /* Scan codes 0x10 to 0x1F: Extended ACPI HKEY hot keys */ - KEY_RESERVED, /* 0x10: FN+END (brightness down) */ + KEY_BRIGHTNESSDOWN, /* 0x10: FN+END (brightness down) */ KEY_RESERVED, /* 0x11: FN+PGUP (thinklight toggle) */ KEY_UNKNOWN, /* 0x12: FN+PGDOWN */ KEY_ZOOM, /* 0x13: FN+SPACE (zoom) */ - KEY_RESERVED, /* 0x14: VOLUME UP */ - KEY_RESERVED, /* 0x15: VOLUME DOWN */ - KEY_RESERVED, /* 0x16: MUTE */ + KEY_VOLUMEUP, /* 0x14: VOLUME UP */ + KEY_VOLUMEDOWN, /* 0x15: VOLUME DOWN */ + KEY_MUTE, /* 0x16: MUTE */ KEY_VENDOR, /* 0x17: Thinkpad/AccessIBM/Lenovo */ /* (assignments unknown, please report if found) */ KEY_UNKNOWN, KEY_UNKNOWN, KEY_UNKNOWN, KEY_UNKNOWN, @@ -974,9 +974,9 @@ static int __init hotkey_init(struct ibm_init_struct *iibm) KEY_RESERVED, /* 0x11: FN+PGUP (thinklight toggle) */ KEY_UNKNOWN, /* 0x12: FN+PGDOWN */ KEY_ZOOM, /* 0x13: FN+SPACE (zoom) */ - KEY_RESERVED, /* 0x14: VOLUME UP */ - KEY_RESERVED, /* 0x15: VOLUME DOWN */ - KEY_RESERVED, /* 0x16: MUTE */ + KEY_VOLUMEUP, /* 0x14: VOLUME UP */ + KEY_VOLUMEDOWN, /* 0x15: VOLUME DOWN */ + KEY_MUTE, /* 0x16: MUTE */ KEY_VENDOR, /* 0x17: Thinkpad/AccessIBM/Lenovo */ /* (assignments unknown, please report if found) */ KEY_UNKNOWN, KEY_UNKNOWN, KEY_UNKNOWN, KEY_UNKNOWN, diff --git a/drivers/mtd/maps/pxa2xx-flash.c b/drivers/mtd/maps/pxa2xx-flash.c index cb933ac475d5..82113295c266 100644 --- a/drivers/mtd/maps/pxa2xx-flash.c +++ b/drivers/mtd/maps/pxa2xx-flash.c @@ -14,20 +14,20 @@ #include <linux/kernel.h> #include <linux/init.h> #include <linux/platform_device.h> -#include <linux/dma-mapping.h> #include <linux/mtd/mtd.h> #include <linux/mtd/map.h> #include <linux/mtd/partitions.h> #include <asm/io.h> #include <asm/hardware.h> +#include <asm/cacheflush.h> #include <asm/mach/flash.h> static void pxa2xx_map_inval_cache(struct map_info *map, unsigned long from, ssize_t len) { - consistent_sync((char *)map->cached + from, len, DMA_FROM_DEVICE); + flush_ioremap_region(map->phys, map->cached, from, len); } struct pxa2xx_flash_info { diff --git a/drivers/net/Kconfig b/drivers/net/Kconfig index 9c635a237a9d..8f99a0626616 100644 --- a/drivers/net/Kconfig +++ b/drivers/net/Kconfig @@ -1780,6 +1780,15 @@ config SC92031 To compile this driver as a module, choose M here: the module will be called sc92031. This is recommended. +config CPMAC + tristate "TI AR7 CPMAC Ethernet support (EXPERIMENTAL)" + depends on NET_ETHERNET && EXPERIMENTAL && AR7 + select PHYLIB + select FIXED_PHY + select FIXED_MII_100_FDX + help + TI AR7 CPMAC Ethernet support + config NET_POCKET bool "Pocket and portable adapters" depends on PARPORT diff --git a/drivers/net/Makefile b/drivers/net/Makefile index d2e0f35da42e..22f78cbd126b 100644 --- a/drivers/net/Makefile +++ b/drivers/net/Makefile @@ -159,6 +159,7 @@ obj-$(CONFIG_8139CP) += 8139cp.o obj-$(CONFIG_8139TOO) += 8139too.o obj-$(CONFIG_ZNET) += znet.o obj-$(CONFIG_LAN_SAA9730) += saa9730.o +obj-$(CONFIG_CPMAC) += cpmac.o obj-$(CONFIG_DEPCA) += depca.o obj-$(CONFIG_EWRK3) += ewrk3.o obj-$(CONFIG_ATP) += atp.o diff --git a/drivers/net/au1000_eth.c b/drivers/net/au1000_eth.c index b46c5d8a77bd..185f98e3964c 100644 --- a/drivers/net/au1000_eth.c +++ b/drivers/net/au1000_eth.c @@ -54,13 +54,16 @@ #include <linux/delay.h> #include <linux/crc32.h> #include <linux/phy.h> + +#include <asm/cpu.h> #include <asm/mipsregs.h> #include <asm/irq.h> #include <asm/io.h> #include <asm/processor.h> -#include <asm/mach-au1x00/au1000.h> -#include <asm/cpu.h> +#include <au1000.h> +#include <prom.h> + #include "au1000_eth.h" #ifdef AU1000_ETH_DEBUG @@ -96,11 +99,6 @@ static void mdio_write(struct net_device *, int, int, u16); static void au1000_adjust_link(struct net_device *); static void enable_mac(struct net_device *, int); -// externs -extern int get_ethernet_addr(char *ethernet_addr); -extern void str2eaddr(unsigned char *ea, unsigned char *str); -extern char * prom_getcmdline(void); - /* * Theory of operation * @@ -619,7 +617,6 @@ static struct net_device * au1000_probe(int port_num) struct au1000_private *aup = NULL; struct net_device *dev = NULL; db_dest_t *pDB, *pDBfree; - char *pmac, *argptr; char ethaddr[6]; int irq, i, err; u32 base, macen; @@ -677,21 +674,12 @@ static struct net_device * au1000_probe(int port_num) au_macs[port_num] = aup; if (port_num == 0) { - /* Check the environment variables first */ - if (get_ethernet_addr(ethaddr) == 0) + if (prom_get_ethernet_addr(ethaddr) == 0) memcpy(au1000_mac_addr, ethaddr, sizeof(au1000_mac_addr)); else { - /* Check command line */ - argptr = prom_getcmdline(); - if ((pmac = strstr(argptr, "ethaddr=")) == NULL) - printk(KERN_INFO "%s: No MAC address found\n", - dev->name); + printk(KERN_INFO "%s: No MAC address found\n", + dev->name); /* Use the hard coded MAC addresses */ - else { - str2eaddr(ethaddr, pmac + strlen("ethaddr=")); - memcpy(au1000_mac_addr, ethaddr, - sizeof(au1000_mac_addr)); - } } setup_hw_rings(aup, MAC0_RX_DMA_ADDR, MAC0_TX_DMA_ADDR); diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c index 64bfec32e2a6..db80f243dd37 100644 --- a/drivers/net/bonding/bond_main.c +++ b/drivers/net/bonding/bond_main.c @@ -98,6 +98,7 @@ static char *xmit_hash_policy = NULL; static int arp_interval = BOND_LINK_ARP_INTERV; static char *arp_ip_target[BOND_MAX_ARP_TARGETS] = { NULL, }; static char *arp_validate = NULL; +static int fail_over_mac = 0; struct bond_params bonding_defaults; module_param(max_bonds, int, 0); @@ -131,6 +132,8 @@ module_param_array(arp_ip_target, charp, NULL, 0); MODULE_PARM_DESC(arp_ip_target, "arp targets in n.n.n.n form"); module_param(arp_validate, charp, 0); MODULE_PARM_DESC(arp_validate, "validate src/dst of ARP probes: none (default), active, backup or all"); +module_param(fail_over_mac, int, 0); +MODULE_PARM_DESC(fail_over_mac, "For active-backup, do not set all slaves to the same MAC. 0 of off (default), 1 for on."); /*----------------------------- Global variables ----------------------------*/ @@ -1096,7 +1099,21 @@ void bond_change_active_slave(struct bonding *bond, struct slave *new_active) if (new_active) { bond_set_slave_active_flags(new_active); } - bond_send_gratuitous_arp(bond); + + /* when bonding does not set the slave MAC address, the bond MAC + * address is the one of the active slave. + */ + if (new_active && bond->params.fail_over_mac) + memcpy(bond->dev->dev_addr, new_active->dev->dev_addr, + new_active->dev->addr_len); + if (bond->curr_active_slave && + test_bit(__LINK_STATE_LINKWATCH_PENDING, + &bond->curr_active_slave->dev->state)) { + dprintk("delaying gratuitous arp on %s\n", + bond->curr_active_slave->dev->name); + bond->send_grat_arp = 1; + } else + bond_send_gratuitous_arp(bond); } } @@ -1217,7 +1234,8 @@ static int bond_compute_features(struct bonding *bond) struct slave *slave; struct net_device *bond_dev = bond->dev; unsigned long features = bond_dev->features; - unsigned short max_hard_header_len = ETH_HLEN; + unsigned short max_hard_header_len = max((u16)ETH_HLEN, + bond_dev->hard_header_len); int i; features &= ~(NETIF_F_ALL_CSUM | BOND_VLAN_FEATURES); @@ -1238,6 +1256,23 @@ static int bond_compute_features(struct bonding *bond) return 0; } + +static void bond_setup_by_slave(struct net_device *bond_dev, + struct net_device *slave_dev) +{ + struct bonding *bond = bond_dev->priv; + + bond_dev->neigh_setup = slave_dev->neigh_setup; + + bond_dev->type = slave_dev->type; + bond_dev->hard_header_len = slave_dev->hard_header_len; + bond_dev->addr_len = slave_dev->addr_len; + + memcpy(bond_dev->broadcast, slave_dev->broadcast, + slave_dev->addr_len); + bond->setup_by_slave = 1; +} + /* enslave device <slave> to bond device <master> */ int bond_enslave(struct net_device *bond_dev, struct net_device *slave_dev) { @@ -1258,8 +1293,9 @@ int bond_enslave(struct net_device *bond_dev, struct net_device *slave_dev) /* bond must be initialized by bond_open() before enslaving */ if (!(bond_dev->flags & IFF_UP)) { - dprintk("Error, master_dev is not up\n"); - return -EPERM; + printk(KERN_WARNING DRV_NAME + " %s: master_dev is not up in bond_enslave\n", + bond_dev->name); } /* already enslaved */ @@ -1312,14 +1348,42 @@ int bond_enslave(struct net_device *bond_dev, struct net_device *slave_dev) goto err_undo_flags; } + /* set bonding device ether type by slave - bonding netdevices are + * created with ether_setup, so when the slave type is not ARPHRD_ETHER + * there is a need to override some of the type dependent attribs/funcs. + * + * bond ether type mutual exclusion - don't allow slaves of dissimilar + * ether type (eg ARPHRD_ETHER and ARPHRD_INFINIBAND) share the same bond + */ + if (bond->slave_cnt == 0) { + if (slave_dev->type != ARPHRD_ETHER) + bond_setup_by_slave(bond_dev, slave_dev); + } else if (bond_dev->type != slave_dev->type) { + printk(KERN_ERR DRV_NAME ": %s ether type (%d) is different " + "from other slaves (%d), can not enslave it.\n", + slave_dev->name, + slave_dev->type, bond_dev->type); + res = -EINVAL; + goto err_undo_flags; + } + if (slave_dev->set_mac_address == NULL) { - printk(KERN_ERR DRV_NAME - ": %s: Error: The slave device you specified does " - "not support setting the MAC address. " - "Your kernel likely does not support slave " - "devices.\n", bond_dev->name); - res = -EOPNOTSUPP; - goto err_undo_flags; + if (bond->slave_cnt == 0) { + printk(KERN_WARNING DRV_NAME + ": %s: Warning: The first slave device " + "specified does not support setting the MAC " + "address. Enabling the fail_over_mac option.", + bond_dev->name); + bond->params.fail_over_mac = 1; + } else if (!bond->params.fail_over_mac) { + printk(KERN_ERR DRV_NAME + ": %s: Error: The slave device specified " + "does not support setting the MAC address, " + "but fail_over_mac is not enabled.\n" + , bond_dev->name); + res = -EOPNOTSUPP; + goto err_undo_flags; + } } new_slave = kzalloc(sizeof(struct slave), GFP_KERNEL); @@ -1340,16 +1404,18 @@ int bond_enslave(struct net_device *bond_dev, struct net_device *slave_dev) */ memcpy(new_slave->perm_hwaddr, slave_dev->dev_addr, ETH_ALEN); - /* - * Set slave to master's mac address. The application already - * set the master's mac address to that of the first slave - */ - memcpy(addr.sa_data, bond_dev->dev_addr, bond_dev->addr_len); - addr.sa_family = slave_dev->type; - res = dev_set_mac_address(slave_dev, &addr); - if (res) { - dprintk("Error %d calling set_mac_address\n", res); - goto err_free; + if (!bond->params.fail_over_mac) { + /* + * Set slave to master's mac address. The application already + * set the master's mac address to that of the first slave + */ + memcpy(addr.sa_data, bond_dev->dev_addr, bond_dev->addr_len); + addr.sa_family = slave_dev->type; + res = dev_set_mac_address(slave_dev, &addr); + if (res) { + dprintk("Error %d calling set_mac_address\n", res); + goto err_free; + } } res = netdev_set_master(slave_dev, bond_dev); @@ -1574,9 +1640,11 @@ err_close: dev_close(slave_dev); err_restore_mac: - memcpy(addr.sa_data, new_slave->perm_hwaddr, ETH_ALEN); - addr.sa_family = slave_dev->type; - dev_set_mac_address(slave_dev, &addr); + if (!bond->params.fail_over_mac) { + memcpy(addr.sa_data, new_slave->perm_hwaddr, ETH_ALEN); + addr.sa_family = slave_dev->type; + dev_set_mac_address(slave_dev, &addr); + } err_free: kfree(new_slave); @@ -1749,10 +1817,12 @@ int bond_release(struct net_device *bond_dev, struct net_device *slave_dev) /* close slave before restoring its mac address */ dev_close(slave_dev); - /* restore original ("permanent") mac address */ - memcpy(addr.sa_data, slave->perm_hwaddr, ETH_ALEN); - addr.sa_family = slave_dev->type; - dev_set_mac_address(slave_dev, &addr); + if (!bond->params.fail_over_mac) { + /* restore original ("permanent") mac address */ + memcpy(addr.sa_data, slave->perm_hwaddr, ETH_ALEN); + addr.sa_family = slave_dev->type; + dev_set_mac_address(slave_dev, &addr); + } slave_dev->priv_flags &= ~(IFF_MASTER_8023AD | IFF_MASTER_ALB | IFF_SLAVE_INACTIVE | IFF_BONDING | @@ -1764,6 +1834,35 @@ int bond_release(struct net_device *bond_dev, struct net_device *slave_dev) } /* +* Destroy a bonding device. +* Must be under rtnl_lock when this function is called. +*/ +void bond_destroy(struct bonding *bond) +{ + bond_deinit(bond->dev); + bond_destroy_sysfs_entry(bond); + unregister_netdevice(bond->dev); +} + +/* +* First release a slave and than destroy the bond if no more slaves iare left. +* Must be under rtnl_lock when this function is called. +*/ +int bond_release_and_destroy(struct net_device *bond_dev, struct net_device *slave_dev) +{ + struct bonding *bond = bond_dev->priv; + int ret; + + ret = bond_release(bond_dev, slave_dev); + if ((ret == 0) && (bond->slave_cnt == 0)) { + printk(KERN_INFO DRV_NAME ": %s: destroying bond %s.\n", + bond_dev->name, bond_dev->name); + bond_destroy(bond); + } + return ret; +} + +/* * This function releases all slaves. */ static int bond_release_all(struct net_device *bond_dev) @@ -1839,10 +1938,12 @@ static int bond_release_all(struct net_device *bond_dev) /* close slave before restoring its mac address */ dev_close(slave_dev); - /* restore original ("permanent") mac address*/ - memcpy(addr.sa_data, slave->perm_hwaddr, ETH_ALEN); - addr.sa_family = slave_dev->type; - dev_set_mac_address(slave_dev, &addr); + if (!bond->params.fail_over_mac) { + /* restore original ("permanent") mac address*/ + memcpy(addr.sa_data, slave->perm_hwaddr, ETH_ALEN); + addr.sa_family = slave_dev->type; + dev_set_mac_address(slave_dev, &addr); + } slave_dev->priv_flags &= ~(IFF_MASTER_8023AD | IFF_MASTER_ALB | IFF_SLAVE_INACTIVE); @@ -2013,6 +2114,17 @@ void bond_mii_monitor(struct net_device *bond_dev) * program could monitor the link itself if needed. */ + if (bond->send_grat_arp) { + if (bond->curr_active_slave && test_bit(__LINK_STATE_LINKWATCH_PENDING, + &bond->curr_active_slave->dev->state)) + dprintk("Needs to send gratuitous arp but not yet\n"); + else { + dprintk("sending delayed gratuitous arp on on %s\n", + bond->curr_active_slave->dev->name); + bond_send_gratuitous_arp(bond); + bond->send_grat_arp = 0; + } + } read_lock(&bond->curr_slave_lock); oldcurrent = bond->curr_active_slave; read_unlock(&bond->curr_slave_lock); @@ -2414,7 +2526,7 @@ static void bond_send_gratuitous_arp(struct bonding *bond) if (bond->master_ip) { bond_arp_send(slave->dev, ARPOP_REPLY, bond->master_ip, - bond->master_ip, 0); + bond->master_ip, 0); } list_for_each_entry(vlan, &bond->vlan_list, vlan_list) { @@ -2951,9 +3063,15 @@ static void bond_info_show_master(struct seq_file *seq) curr = bond->curr_active_slave; read_unlock(&bond->curr_slave_lock); - seq_printf(seq, "Bonding Mode: %s\n", + seq_printf(seq, "Bonding Mode: %s", bond_mode_name(bond->params.mode)); + if (bond->params.mode == BOND_MODE_ACTIVEBACKUP && + bond->params.fail_over_mac) + seq_printf(seq, " (fail_over_mac)"); + + seq_printf(seq, "\n"); + if (bond->params.mode == BOND_MODE_XOR || bond->params.mode == BOND_MODE_8023AD) { seq_printf(seq, "Transmit Hash Policy: %s (%d)\n", @@ -3248,6 +3366,11 @@ static int bond_slave_netdev_event(unsigned long event, struct net_device *slave * ... Or is it this? */ break; + case NETDEV_GOING_DOWN: + dprintk("slave %s is going down\n", slave_dev->name); + if (bond->setup_by_slave) + bond_release_and_destroy(bond_dev, slave_dev); + break; case NETDEV_CHANGEMTU: /* * TODO: Should slaves be allowed to @@ -3880,6 +4003,13 @@ static int bond_set_mac_address(struct net_device *bond_dev, void *addr) dprintk("bond=%p, name=%s\n", bond, (bond_dev ? bond_dev->name : "None")); + /* + * If fail_over_mac is enabled, do nothing and return success. + * Returning an error causes ifenslave to fail. + */ + if (bond->params.fail_over_mac) + return 0; + if (!is_valid_ether_addr(sa->sa_data)) { return -EADDRNOTAVAIL; } @@ -4217,6 +4347,8 @@ static int bond_init(struct net_device *bond_dev, struct bond_params *params) bond->current_arp_slave = NULL; bond->primary_slave = NULL; bond->dev = bond_dev; + bond->send_grat_arp = 0; + bond->setup_by_slave = 0; INIT_LIST_HEAD(&bond->vlan_list); /* Initialize the device entry points */ @@ -4265,7 +4397,6 @@ static int bond_init(struct net_device *bond_dev, struct bond_params *params) #ifdef CONFIG_PROC_FS bond_create_proc_entry(bond); #endif - list_add_tail(&bond->bond_list, &bond_dev_list); return 0; @@ -4599,6 +4730,11 @@ static int bond_check_params(struct bond_params *params) primary = NULL; } + if (fail_over_mac && (bond_mode != BOND_MODE_ACTIVEBACKUP)) + printk(KERN_WARNING DRV_NAME + ": Warning: fail_over_mac only affects " + "active-backup mode.\n"); + /* fill params struct with the proper values */ params->mode = bond_mode; params->xmit_policy = xmit_hashtype; @@ -4610,6 +4746,7 @@ static int bond_check_params(struct bond_params *params) params->use_carrier = use_carrier; params->lacp_fast = lacp_fast; params->primary[0] = 0; + params->fail_over_mac = fail_over_mac; if (primary) { strncpy(params->primary, primary, IFNAMSIZ); diff --git a/drivers/net/bonding/bond_sysfs.c b/drivers/net/bonding/bond_sysfs.c index 6f49ca7e9b66..80c0c8c415ed 100644 --- a/drivers/net/bonding/bond_sysfs.c +++ b/drivers/net/bonding/bond_sysfs.c @@ -164,9 +164,7 @@ static ssize_t bonding_store_bonds(struct class *cls, const char *buffer, size_t printk(KERN_INFO DRV_NAME ": %s is being deleted...\n", bond->dev->name); - bond_deinit(bond->dev); - bond_destroy_sysfs_entry(bond); - unregister_netdevice(bond->dev); + bond_destroy(bond); rtnl_unlock(); goto out; } @@ -260,17 +258,16 @@ static ssize_t bonding_store_slaves(struct device *d, char command[IFNAMSIZ + 1] = { 0, }; char *ifname; int i, res, found, ret = count; + u32 original_mtu; struct slave *slave; struct net_device *dev = NULL; struct bonding *bond = to_bond(d); /* Quick sanity check -- is the bond interface up? */ if (!(bond->dev->flags & IFF_UP)) { - printk(KERN_ERR DRV_NAME - ": %s: Unable to update slaves because interface is down.\n", + printk(KERN_WARNING DRV_NAME + ": %s: doing slave updates when interface is down.\n", bond->dev->name); - ret = -EPERM; - goto out; } /* Note: We can't hold bond->lock here, as bond_create grabs it. */ @@ -327,6 +324,7 @@ static ssize_t bonding_store_slaves(struct device *d, } /* Set the slave's MTU to match the bond */ + original_mtu = dev->mtu; if (dev->mtu != bond->dev->mtu) { if (dev->change_mtu) { res = dev->change_mtu(dev, @@ -341,6 +339,9 @@ static ssize_t bonding_store_slaves(struct device *d, } rtnl_lock(); res = bond_enslave(bond->dev, dev); + bond_for_each_slave(bond, slave, i) + if (strnicmp(slave->dev->name, ifname, IFNAMSIZ) == 0) + slave->original_mtu = original_mtu; rtnl_unlock(); if (res) { ret = res; @@ -353,13 +354,17 @@ static ssize_t bonding_store_slaves(struct device *d, bond_for_each_slave(bond, slave, i) if (strnicmp(slave->dev->name, ifname, IFNAMSIZ) == 0) { dev = slave->dev; + original_mtu = slave->original_mtu; break; } if (dev) { printk(KERN_INFO DRV_NAME ": %s: Removing slave %s\n", bond->dev->name, dev->name); rtnl_lock(); - res = bond_release(bond->dev, dev); + if (bond->setup_by_slave) + res = bond_release_and_destroy(bond->dev, dev); + else + res = bond_release(bond->dev, dev); rtnl_unlock(); if (res) { ret = res; @@ -367,9 +372,9 @@ static ssize_t bonding_store_slaves(struct device *d, } /* set the slave MTU to the default */ if (dev->change_mtu) { - dev->change_mtu(dev, 1500); + dev->change_mtu(dev, original_mtu); } else { - dev->mtu = 1500; + dev->mtu = original_mtu; } } else { @@ -563,6 +568,54 @@ static ssize_t bonding_store_arp_validate(struct device *d, static DEVICE_ATTR(arp_validate, S_IRUGO | S_IWUSR, bonding_show_arp_validate, bonding_store_arp_validate); /* + * Show and store fail_over_mac. User only allowed to change the + * value when there are no slaves. + */ +static ssize_t bonding_show_fail_over_mac(struct device *d, struct device_attribute *attr, char *buf) +{ + struct bonding *bond = to_bond(d); + + return sprintf(buf, "%d\n", bond->params.fail_over_mac) + 1; +} + +static ssize_t bonding_store_fail_over_mac(struct device *d, struct device_attribute *attr, const char *buf, size_t count) +{ + int new_value; + int ret = count; + struct bonding *bond = to_bond(d); + + if (bond->slave_cnt != 0) { + printk(KERN_ERR DRV_NAME + ": %s: Can't alter fail_over_mac with slaves in bond.\n", + bond->dev->name); + ret = -EPERM; + goto out; + } + + if (sscanf(buf, "%d", &new_value) != 1) { + printk(KERN_ERR DRV_NAME + ": %s: no fail_over_mac value specified.\n", + bond->dev->name); + ret = -EINVAL; + goto out; + } + + if ((new_value == 0) || (new_value == 1)) { + bond->params.fail_over_mac = new_value; + printk(KERN_INFO DRV_NAME ": %s: Setting fail_over_mac to %d.\n", + bond->dev->name, new_value); + } else { + printk(KERN_INFO DRV_NAME + ": %s: Ignoring invalid fail_over_mac value %d.\n", + bond->dev->name, new_value); + } +out: + return ret; +} + +static DEVICE_ATTR(fail_over_mac, S_IRUGO | S_IWUSR, bonding_show_fail_over_mac, bonding_store_fail_over_mac); + +/* * Show and set the arp timer interval. There are two tricky bits * here. First, if ARP monitoring is activated, then we must disable * MII monitoring. Second, if the ARP timer isn't running, we must @@ -1383,6 +1436,7 @@ static DEVICE_ATTR(ad_partner_mac, S_IRUGO, bonding_show_ad_partner_mac, NULL); static struct attribute *per_bond_attrs[] = { &dev_attr_slaves.attr, &dev_attr_mode.attr, + &dev_attr_fail_over_mac.attr, &dev_attr_arp_validate.attr, &dev_attr_arp_interval.attr, &dev_attr_arp_ip_target.attr, diff --git a/drivers/net/bonding/bonding.h b/drivers/net/bonding/bonding.h index 2a6af7d23728..a8bbd563265c 100644 --- a/drivers/net/bonding/bonding.h +++ b/drivers/net/bonding/bonding.h @@ -22,8 +22,8 @@ #include "bond_3ad.h" #include "bond_alb.h" -#define DRV_VERSION "3.1.3" -#define DRV_RELDATE "June 13, 2007" +#define DRV_VERSION "3.2.0" +#define DRV_RELDATE "September 13, 2007" #define DRV_NAME "bonding" #define DRV_DESCRIPTION "Ethernet Channel Bonding Driver" @@ -128,6 +128,7 @@ struct bond_params { int arp_interval; int arp_validate; int use_carrier; + int fail_over_mac; int updelay; int downdelay; int lacp_fast; @@ -156,6 +157,7 @@ struct slave { s8 link; /* one of BOND_LINK_XXXX */ s8 state; /* one of BOND_STATE_XXXX */ u32 original_flags; + u32 original_mtu; u32 link_failure_count; u16 speed; u8 duplex; @@ -185,6 +187,8 @@ struct bonding { struct timer_list mii_timer; struct timer_list arp_timer; s8 kill_timers; + s8 send_grat_arp; + s8 setup_by_slave; struct net_device_stats stats; #ifdef CONFIG_PROC_FS struct proc_dir_entry *proc_entry; @@ -292,6 +296,8 @@ static inline void bond_unset_master_alb_flags(struct bonding *bond) struct vlan_entry *bond_next_vlan(struct bonding *bond, struct vlan_entry *curr); int bond_dev_queue_xmit(struct bonding *bond, struct sk_buff *skb, struct net_device *slave_dev); int bond_create(char *name, struct bond_params *params, struct bonding **newbond); +void bond_destroy(struct bonding *bond); +int bond_release_and_destroy(struct net_device *bond_dev, struct net_device *slave_dev); void bond_deinit(struct net_device *bond_dev); int bond_create_sysfs(void); void bond_destroy_sysfs(void); diff --git a/drivers/net/cassini.c b/drivers/net/cassini.c index 563bf5f6fa2a..7df31b5561cc 100644 --- a/drivers/net/cassini.c +++ b/drivers/net/cassini.c @@ -4443,7 +4443,7 @@ static struct { {REG_MAC_COLL_EXCESS}, {REG_MAC_COLL_LATE} }; -#define CAS_REG_LEN (sizeof(ethtool_register_table)/sizeof(int)) +#define CAS_REG_LEN ARRAY_SIZE(ethtool_register_table) #define CAS_MAX_REGS (sizeof (u32)*CAS_REG_LEN) static void cas_read_regs(struct cas *cp, u8 *ptr, int len) diff --git a/drivers/net/cpmac.c b/drivers/net/cpmac.c new file mode 100644 index 000000000000..ed53aaab4c02 --- /dev/null +++ b/drivers/net/cpmac.c @@ -0,0 +1,1174 @@ +/* + * Copyright (C) 2006, 2007 Eugene Konev + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include <linux/module.h> +#include <linux/init.h> +#include <linux/moduleparam.h> + +#include <linux/sched.h> +#include <linux/kernel.h> +#include <linux/slab.h> +#include <linux/errno.h> +#include <linux/types.h> +#include <linux/delay.h> +#include <linux/version.h> + +#include <linux/netdevice.h> +#include <linux/etherdevice.h> +#include <linux/ethtool.h> +#include <linux/skbuff.h> +#include <linux/mii.h> +#include <linux/phy.h> +#include <linux/platform_device.h> +#include <linux/dma-mapping.h> +#include <asm/gpio.h> + +MODULE_AUTHOR("Eugene Konev <ejka@imfi.kspu.ru>"); +MODULE_DESCRIPTION("TI AR7 ethernet driver (CPMAC)"); +MODULE_LICENSE("GPL"); + +static int debug_level = 8; +static int dumb_switch; + +/* Next 2 are only used in cpmac_probe, so it's pointless to change them */ +module_param(debug_level, int, 0444); +module_param(dumb_switch, int, 0444); + +MODULE_PARM_DESC(debug_level, "Number of NETIF_MSG bits to enable"); +MODULE_PARM_DESC(dumb_switch, "Assume switch is not connected to MDIO bus"); + +#define CPMAC_VERSION "0.5.0" +/* stolen from net/ieee80211.h */ +#ifndef MAC_FMT +#define MAC_FMT "%02x:%02x:%02x:%02x:%02x:%02x" +#define MAC_ARG(x) ((u8*)(x))[0], ((u8*)(x))[1], ((u8*)(x))[2], \ + ((u8*)(x))[3], ((u8*)(x))[4], ((u8*)(x))[5] +#endif +/* frame size + 802.1q tag */ +#define CPMAC_SKB_SIZE (ETH_FRAME_LEN + 4) +#define CPMAC_QUEUES 8 + +/* Ethernet registers */ +#define CPMAC_TX_CONTROL 0x0004 +#define CPMAC_TX_TEARDOWN 0x0008 +#define CPMAC_RX_CONTROL 0x0014 +#define CPMAC_RX_TEARDOWN 0x0018 +#define CPMAC_MBP 0x0100 +# define MBP_RXPASSCRC 0x40000000 +# define MBP_RXQOS 0x20000000 +# define MBP_RXNOCHAIN 0x10000000 +# define MBP_RXCMF 0x01000000 +# define MBP_RXSHORT 0x00800000 +# define MBP_RXCEF 0x00400000 +# define MBP_RXPROMISC 0x00200000 +# define MBP_PROMISCCHAN(channel) (((channel) & 0x7) << 16) +# define MBP_RXBCAST 0x00002000 +# define MBP_BCASTCHAN(channel) (((channel) & 0x7) << 8) +# define MBP_RXMCAST 0x00000020 +# define MBP_MCASTCHAN(channel) ((channel) & 0x7) +#define CPMAC_UNICAST_ENABLE 0x0104 +#define CPMAC_UNICAST_CLEAR 0x0108 +#define CPMAC_MAX_LENGTH 0x010c +#define CPMAC_BUFFER_OFFSET 0x0110 +#define CPMAC_MAC_CONTROL 0x0160 +# define MAC_TXPTYPE 0x00000200 +# define MAC_TXPACE 0x00000040 +# define MAC_MII 0x00000020 +# define MAC_TXFLOW 0x00000010 +# define MAC_RXFLOW 0x00000008 +# define MAC_MTEST 0x00000004 +# define MAC_LOOPBACK 0x00000002 +# define MAC_FDX 0x00000001 +#define CPMAC_MAC_STATUS 0x0164 +# define MAC_STATUS_QOS 0x00000004 +# define MAC_STATUS_RXFLOW 0x00000002 +# define MAC_STATUS_TXFLOW 0x00000001 +#define CPMAC_TX_INT_ENABLE 0x0178 +#define CPMAC_TX_INT_CLEAR 0x017c +#define CPMAC_MAC_INT_VECTOR 0x0180 +# define MAC_INT_STATUS 0x00080000 +# define MAC_INT_HOST 0x00040000 +# define MAC_INT_RX 0x00020000 +# define MAC_INT_TX 0x00010000 +#define CPMAC_MAC_EOI_VECTOR 0x0184 +#define CPMAC_RX_INT_ENABLE 0x0198 +#define CPMAC_RX_INT_CLEAR 0x019c +#define CPMAC_MAC_INT_ENABLE 0x01a8 +#define CPMAC_MAC_INT_CLEAR 0x01ac +#define CPMAC_MAC_ADDR_LO(channel) (0x01b0 + (channel) * 4) +#define CPMAC_MAC_ADDR_MID 0x01d0 +#define CPMAC_MAC_ADDR_HI 0x01d4 +#define CPMAC_MAC_HASH_LO 0x01d8 +#define CPMAC_MAC_HASH_HI 0x01dc +#define CPMAC_TX_PTR(channel) (0x0600 + (channel) * 4) +#define CPMAC_RX_PTR(channel) (0x0620 + (channel) * 4) +#define CPMAC_TX_ACK(channel) (0x0640 + (channel) * 4) +#define CPMAC_RX_ACK(channel) (0x0660 + (channel) * 4) +#define CPMAC_REG_END 0x0680 +/* + * Rx/Tx statistics + * TODO: use some of them to fill stats in cpmac_stats() + */ +#define CPMAC_STATS_RX_GOOD 0x0200 +#define CPMAC_STATS_RX_BCAST 0x0204 +#define CPMAC_STATS_RX_MCAST 0x0208 +#define CPMAC_STATS_RX_PAUSE 0x020c +#define CPMAC_STATS_RX_CRC 0x0210 +#define CPMAC_STATS_RX_ALIGN 0x0214 +#define CPMAC_STATS_RX_OVER 0x0218 +#define CPMAC_STATS_RX_JABBER 0x021c +#define CPMAC_STATS_RX_UNDER 0x0220 +#define CPMAC_STATS_RX_FRAG 0x0224 +#define CPMAC_STATS_RX_FILTER 0x0228 +#define CPMAC_STATS_RX_QOSFILTER 0x022c +#define CPMAC_STATS_RX_OCTETS 0x0230 + +#define CPMAC_STATS_TX_GOOD 0x0234 +#define CPMAC_STATS_TX_BCAST 0x0238 +#define CPMAC_STATS_TX_MCAST 0x023c +#define CPMAC_STATS_TX_PAUSE 0x0240 +#define CPMAC_STATS_TX_DEFER 0x0244 +#define CPMAC_STATS_TX_COLLISION 0x0248 +#define CPMAC_STATS_TX_SINGLECOLL 0x024c +#define CPMAC_STATS_TX_MULTICOLL 0x0250 +#define CPMAC_STATS_TX_EXCESSCOLL 0x0254 +#define CPMAC_STATS_TX_LATECOLL 0x0258 +#define CPMAC_STATS_TX_UNDERRUN 0x025c +#define CPMAC_STATS_TX_CARRIERSENSE 0x0260 +#define CPMAC_STATS_TX_OCTETS 0x0264 + +#define cpmac_read(base, reg) (readl((void __iomem *)(base) + (reg))) +#define cpmac_write(base, reg, val) (writel(val, (void __iomem *)(base) + \ + (reg))) + +/* MDIO bus */ +#define CPMAC_MDIO_VERSION 0x0000 +#define CPMAC_MDIO_CONTROL 0x0004 +# define MDIOC_IDLE 0x80000000 +# define MDIOC_ENABLE 0x40000000 +# define MDIOC_PREAMBLE 0x00100000 +# define MDIOC_FAULT 0x00080000 +# define MDIOC_FAULTDETECT 0x00040000 +# define MDIOC_INTTEST 0x00020000 +# define MDIOC_CLKDIV(div) ((div) & 0xff) +#define CPMAC_MDIO_ALIVE 0x0008 +#define CPMAC_MDIO_LINK 0x000c +#define CPMAC_MDIO_ACCESS(channel) (0x0080 + (channel) * 8) +# define MDIO_BUSY 0x80000000 +# define MDIO_WRITE 0x40000000 +# define MDIO_REG(reg) (((reg) & 0x1f) << 21) +# define MDIO_PHY(phy) (((phy) & 0x1f) << 16) +# define MDIO_DATA(data) ((data) & 0xffff) +#define CPMAC_MDIO_PHYSEL(channel) (0x0084 + (channel) * 8) +# define PHYSEL_LINKSEL 0x00000040 +# define PHYSEL_LINKINT 0x00000020 + +struct cpmac_desc { + u32 hw_next; + u32 hw_data; + u16 buflen; + u16 bufflags; + u16 datalen; + u16 dataflags; +#define CPMAC_SOP 0x8000 +#define CPMAC_EOP 0x4000 +#define CPMAC_OWN 0x2000 +#define CPMAC_EOQ 0x1000 + struct sk_buff *skb; + struct cpmac_desc *next; + dma_addr_t mapping; + dma_addr_t data_mapping; +}; + +struct cpmac_priv { + spinlock_t lock; + spinlock_t rx_lock; + struct cpmac_desc *rx_head; + int ring_size; + struct cpmac_desc *desc_ring; + dma_addr_t dma_ring; + void __iomem *regs; + struct mii_bus *mii_bus; + struct phy_device *phy; + char phy_name[BUS_ID_SIZE]; + int oldlink, oldspeed, oldduplex; + u32 msg_enable; + struct net_device *dev; + struct work_struct reset_work; + struct platform_device *pdev; +}; + +static irqreturn_t cpmac_irq(int, void *); +static void cpmac_hw_start(struct net_device *dev); +static void cpmac_hw_stop(struct net_device *dev); +static int cpmac_stop(struct net_device *dev); +static int cpmac_open(struct net_device *dev); + +static void cpmac_dump_regs(struct net_device *dev) +{ + int i; + struct cpmac_priv *priv = netdev_priv(dev); + for (i = 0; i < CPMAC_REG_END; i += 4) { + if (i % 16 == 0) { + if (i) + printk("\n"); + printk(KERN_DEBUG "%s: reg[%p]:", dev->name, + priv->regs + i); + } + printk(" %08x", cpmac_read(priv->regs, i)); + } + printk("\n"); +} + +static void cpmac_dump_desc(struct net_device *dev, struct cpmac_desc *desc) +{ + int i; + printk(KERN_DEBUG "%s: desc[%p]:", dev->name, desc); + for (i = 0; i < sizeof(*desc) / 4; i++) + printk(" %08x", ((u32 *)desc)[i]); + printk("\n"); +} + +static void cpmac_dump_skb(struct net_device *dev, struct sk_buff *skb) +{ + int i; + printk(KERN_DEBUG "%s: skb 0x%p, len=%d\n", dev->name, skb, skb->len); + for (i = 0; i < skb->len; i++) { + if (i % 16 == 0) { + if (i) + printk("\n"); + printk(KERN_DEBUG "%s: data[%p]:", dev->name, + skb->data + i); + } + printk(" %02x", ((u8 *)skb->data)[i]); + } + printk("\n"); +} + +static int cpmac_mdio_read(struct mii_bus *bus, int phy_id, int reg) +{ + u32 val; + + while (cpmac_read(bus->priv, CPMAC_MDIO_ACCESS(0)) & MDIO_BUSY) + cpu_relax(); + cpmac_write(bus->priv, CPMAC_MDIO_ACCESS(0), MDIO_BUSY | MDIO_REG(reg) | + MDIO_PHY(phy_id)); + while ((val = cpmac_read(bus->priv, CPMAC_MDIO_ACCESS(0))) & MDIO_BUSY) + cpu_relax(); + return MDIO_DATA(val); +} + +static int cpmac_mdio_write(struct mii_bus *bus, int phy_id, + int reg, u16 val) +{ + while (cpmac_read(bus->priv, CPMAC_MDIO_ACCESS(0)) & MDIO_BUSY) + cpu_relax(); + cpmac_write(bus->priv, CPMAC_MDIO_ACCESS(0), MDIO_BUSY | MDIO_WRITE | + MDIO_REG(reg) | MDIO_PHY(phy_id) | MDIO_DATA(val)); + return 0; +} + +static int cpmac_mdio_reset(struct mii_bus *bus) +{ + ar7_device_reset(AR7_RESET_BIT_MDIO); + cpmac_write(bus->priv, CPMAC_MDIO_CONTROL, MDIOC_ENABLE | + MDIOC_CLKDIV(ar7_cpmac_freq() / 2200000 - 1)); + return 0; +} + +static int mii_irqs[PHY_MAX_ADDR] = { PHY_POLL, }; + +static struct mii_bus cpmac_mii = { + .name = "cpmac-mii", + .read = cpmac_mdio_read, + .write = cpmac_mdio_write, + .reset = cpmac_mdio_reset, + .irq = mii_irqs, +}; + +static int cpmac_config(struct net_device *dev, struct ifmap *map) +{ + if (dev->flags & IFF_UP) + return -EBUSY; + + /* Don't allow changing the I/O address */ + if (map->base_addr != dev->base_addr) + return -EOPNOTSUPP; + + /* ignore other fields */ + return 0; +} + +static void cpmac_set_multicast_list(struct net_device *dev) +{ + struct dev_mc_list *iter; + int i; + u8 tmp; + u32 mbp, bit, hash[2] = { 0, }; + struct cpmac_priv *priv = netdev_priv(dev); + + mbp = cpmac_read(priv->regs, CPMAC_MBP); + if (dev->flags & IFF_PROMISC) { + cpmac_write(priv->regs, CPMAC_MBP, (mbp & ~MBP_PROMISCCHAN(0)) | + MBP_RXPROMISC); + } else { + cpmac_write(priv->regs, CPMAC_MBP, mbp & ~MBP_RXPROMISC); + if (dev->flags & IFF_ALLMULTI) { + /* enable all multicast mode */ + cpmac_write(priv->regs, CPMAC_MAC_HASH_LO, 0xffffffff); + cpmac_write(priv->regs, CPMAC_MAC_HASH_HI, 0xffffffff); + } else { + /* + * cpmac uses some strange mac address hashing + * (not crc32) + */ + for (i = 0, iter = dev->mc_list; i < dev->mc_count; + i++, iter = iter->next) { + bit = 0; + tmp = iter->dmi_addr[0]; + bit ^= (tmp >> 2) ^ (tmp << 4); + tmp = iter->dmi_addr[1]; + bit ^= (tmp >> 4) ^ (tmp << 2); + tmp = iter->dmi_addr[2]; + bit ^= (tmp >> 6) ^ tmp; + tmp = iter->dmi_addr[3]; + bit ^= (tmp >> 2) ^ (tmp << 4); + tmp = iter->dmi_addr[4]; + bit ^= (tmp >> 4) ^ (tmp << 2); + tmp = iter->dmi_addr[5]; + bit ^= (tmp >> 6) ^ tmp; + bit &= 0x3f; + hash[bit / 32] |= 1 << (bit % 32); + } + + cpmac_write(priv->regs, CPMAC_MAC_HASH_LO, hash[0]); + cpmac_write(priv->regs, CPMAC_MAC_HASH_HI, hash[1]); + } + } +} + +static struct sk_buff *cpmac_rx_one(struct net_device *dev, + struct cpmac_priv *priv, + struct cpmac_desc *desc) +{ + struct sk_buff *skb, *result = NULL; + + if (unlikely(netif_msg_hw(priv))) + cpmac_dump_desc(dev, desc); + cpmac_write(priv->regs, CPMAC_RX_ACK(0), (u32)desc->mapping); + if (unlikely(!desc->datalen)) { + if (netif_msg_rx_err(priv) && net_ratelimit()) + printk(KERN_WARNING "%s: rx: spurious interrupt\n", + dev->name); + return NULL; + } + + skb = netdev_alloc_skb(dev, CPMAC_SKB_SIZE); + if (likely(skb)) { + skb_reserve(skb, 2); + skb_put(desc->skb, desc->datalen); + desc->skb->protocol = eth_type_trans(desc->skb, dev); + desc->skb->ip_summed = CHECKSUM_NONE; + dev->stats.rx_packets++; + dev->stats.rx_bytes += desc->datalen; + result = desc->skb; + dma_unmap_single(&dev->dev, desc->data_mapping, CPMAC_SKB_SIZE, + DMA_FROM_DEVICE); + desc->skb = skb; + desc->data_mapping = dma_map_single(&dev->dev, skb->data, + CPMAC_SKB_SIZE, + DMA_FROM_DEVICE); + desc->hw_data = (u32)desc->data_mapping; + if (unlikely(netif_msg_pktdata(priv))) { + printk(KERN_DEBUG "%s: received packet:\n", dev->name); + cpmac_dump_skb(dev, result); + } + } else { + if (netif_msg_rx_err(priv) && net_ratelimit()) + printk(KERN_WARNING + "%s: low on skbs, dropping packet\n", dev->name); + dev->stats.rx_dropped++; + } + + desc->buflen = CPMAC_SKB_SIZE; + desc->dataflags = CPMAC_OWN; + + return result; +} + +static int cpmac_poll(struct net_device *dev, int *budget) +{ + struct sk_buff *skb; + struct cpmac_desc *desc; + int received = 0, quota = min(dev->quota, *budget); + struct cpmac_priv *priv = netdev_priv(dev); + + spin_lock(&priv->rx_lock); + if (unlikely(!priv->rx_head)) { + if (netif_msg_rx_err(priv) && net_ratelimit()) + printk(KERN_WARNING "%s: rx: polling, but no queue\n", + dev->name); + netif_rx_complete(dev); + return 0; + } + + desc = priv->rx_head; + while ((received < quota) && ((desc->dataflags & CPMAC_OWN) == 0)) { + skb = cpmac_rx_one(dev, priv, desc); + if (likely(skb)) { + netif_receive_skb(skb); + received++; + } + desc = desc->next; + } + + priv->rx_head = desc; + spin_unlock(&priv->rx_lock); + *budget -= received; + dev->quota -= received; + if (unlikely(netif_msg_rx_status(priv))) + printk(KERN_DEBUG "%s: poll processed %d packets\n", dev->name, + received); + if (desc->dataflags & CPMAC_OWN) { + netif_rx_complete(dev); + cpmac_write(priv->regs, CPMAC_RX_PTR(0), (u32)desc->mapping); + cpmac_write(priv->regs, CPMAC_RX_INT_ENABLE, 1); + return 0; + } + + return 1; +} + +static int cpmac_start_xmit(struct sk_buff *skb, struct net_device *dev) +{ + int queue, len; + struct cpmac_desc *desc; + struct cpmac_priv *priv = netdev_priv(dev); + + if (unlikely(skb_padto(skb, ETH_ZLEN))) { + if (netif_msg_tx_err(priv) && net_ratelimit()) + printk(KERN_WARNING + "%s: tx: padding failed, dropping\n", dev->name); + spin_lock(&priv->lock); + dev->stats.tx_dropped++; + spin_unlock(&priv->lock); + return -ENOMEM; + } + + len = max(skb->len, ETH_ZLEN); + queue = skb->queue_mapping; +#ifdef CONFIG_NETDEVICES_MULTIQUEUE + netif_stop_subqueue(dev, queue); +#else + netif_stop_queue(dev); +#endif + + desc = &priv->desc_ring[queue]; + if (unlikely(desc->dataflags & CPMAC_OWN)) { + if (netif_msg_tx_err(priv) && net_ratelimit()) + printk(KERN_WARNING "%s: tx dma ring full, dropping\n", + dev->name); + spin_lock(&priv->lock); + dev->stats.tx_dropped++; + spin_unlock(&priv->lock); + dev_kfree_skb_any(skb); + return -ENOMEM; + } + + spin_lock(&priv->lock); + dev->trans_start = jiffies; + spin_unlock(&priv->lock); + desc->dataflags = CPMAC_SOP | CPMAC_EOP | CPMAC_OWN; + desc->skb = skb; + desc->data_mapping = dma_map_single(&dev->dev, skb->data, len, + DMA_TO_DEVICE); + desc->hw_data = (u32)desc->data_mapping; + desc->datalen = len; + desc->buflen = len; + if (unlikely(netif_msg_tx_queued(priv))) + printk(KERN_DEBUG "%s: sending 0x%p, len=%d\n", dev->name, skb, + skb->len); + if (unlikely(netif_msg_hw(priv))) + cpmac_dump_desc(dev, desc); + if (unlikely(netif_msg_pktdata(priv))) + cpmac_dump_skb(dev, skb); + cpmac_write(priv->regs, CPMAC_TX_PTR(queue), (u32)desc->mapping); + + return 0; +} + +static void cpmac_end_xmit(struct net_device *dev, int queue) +{ + struct cpmac_desc *desc; + struct cpmac_priv *priv = netdev_priv(dev); + + desc = &priv->desc_ring[queue]; + cpmac_write(priv->regs, CPMAC_TX_ACK(queue), (u32)desc->mapping); + if (likely(desc->skb)) { + spin_lock(&priv->lock); + dev->stats.tx_packets++; + dev->stats.tx_bytes += desc->skb->len; + spin_unlock(&priv->lock); + dma_unmap_single(&dev->dev, desc->data_mapping, desc->skb->len, + DMA_TO_DEVICE); + + if (unlikely(netif_msg_tx_done(priv))) + printk(KERN_DEBUG "%s: sent 0x%p, len=%d\n", dev->name, + desc->skb, desc->skb->len); + + dev_kfree_skb_irq(desc->skb); + desc->skb = NULL; +#ifdef CONFIG_NETDEVICES_MULTIQUEUE + if (netif_subqueue_stopped(dev, queue)) + netif_wake_subqueue(dev, queue); +#else + if (netif_queue_stopped(dev)) + netif_wake_queue(dev); +#endif + } else { + if (netif_msg_tx_err(priv) && net_ratelimit()) + printk(KERN_WARNING + "%s: end_xmit: spurious interrupt\n", dev->name); +#ifdef CONFIG_NETDEVICES_MULTIQUEUE + if (netif_subqueue_stopped(dev, queue)) + netif_wake_subqueue(dev, queue); +#else + if (netif_queue_stopped(dev)) + netif_wake_queue(dev); +#endif + } +} + +static void cpmac_hw_stop(struct net_device *dev) +{ + int i; + struct cpmac_priv *priv = netdev_priv(dev); + struct plat_cpmac_data *pdata = priv->pdev->dev.platform_data; + + ar7_device_reset(pdata->reset_bit); + cpmac_write(priv->regs, CPMAC_RX_CONTROL, + cpmac_read(priv->regs, CPMAC_RX_CONTROL) & ~1); + cpmac_write(priv->regs, CPMAC_TX_CONTROL, + cpmac_read(priv->regs, CPMAC_TX_CONTROL) & ~1); + for (i = 0; i < 8; i++) { + cpmac_write(priv->regs, CPMAC_TX_PTR(i), 0); + cpmac_write(priv->regs, CPMAC_RX_PTR(i), 0); + } + cpmac_write(priv->regs, CPMAC_UNICAST_CLEAR, 0xff); + cpmac_write(priv->regs, CPMAC_RX_INT_CLEAR, 0xff); + cpmac_write(priv->regs, CPMAC_TX_INT_CLEAR, 0xff); + cpmac_write(priv->regs, CPMAC_MAC_INT_CLEAR, 0xff); + cpmac_write(priv->regs, CPMAC_MAC_CONTROL, + cpmac_read(priv->regs, CPMAC_MAC_CONTROL) & ~MAC_MII); +} + +static void cpmac_hw_start(struct net_device *dev) +{ + int i; + struct cpmac_priv *priv = netdev_priv(dev); + struct plat_cpmac_data *pdata = priv->pdev->dev.platform_data; + + ar7_device_reset(pdata->reset_bit); + for (i = 0; i < 8; i++) { + cpmac_write(priv->regs, CPMAC_TX_PTR(i), 0); + cpmac_write(priv->regs, CPMAC_RX_PTR(i), 0); + } + cpmac_write(priv->regs, CPMAC_RX_PTR(0), priv->rx_head->mapping); + + cpmac_write(priv->regs, CPMAC_MBP, MBP_RXSHORT | MBP_RXBCAST | + MBP_RXMCAST); + cpmac_write(priv->regs, CPMAC_BUFFER_OFFSET, 0); + for (i = 0; i < 8; i++) + cpmac_write(priv->regs, CPMAC_MAC_ADDR_LO(i), dev->dev_addr[5]); + cpmac_write(priv->regs, CPMAC_MAC_ADDR_MID, dev->dev_addr[4]); + cpmac_write(priv->regs, CPMAC_MAC_ADDR_HI, dev->dev_addr[0] | + (dev->dev_addr[1] << 8) | (dev->dev_addr[2] << 16) | + (dev->dev_addr[3] << 24)); + cpmac_write(priv->regs, CPMAC_MAX_LENGTH, CPMAC_SKB_SIZE); + cpmac_write(priv->regs, CPMAC_UNICAST_CLEAR, 0xff); + cpmac_write(priv->regs, CPMAC_RX_INT_CLEAR, 0xff); + cpmac_write(priv->regs, CPMAC_TX_INT_CLEAR, 0xff); + cpmac_write(priv->regs, CPMAC_MAC_INT_CLEAR, 0xff); + cpmac_write(priv->regs, CPMAC_UNICAST_ENABLE, 1); + cpmac_write(priv->regs, CPMAC_RX_INT_ENABLE, 1); + cpmac_write(priv->regs, CPMAC_TX_INT_ENABLE, 0xff); + cpmac_write(priv->regs, CPMAC_MAC_INT_ENABLE, 3); + + cpmac_write(priv->regs, CPMAC_RX_CONTROL, + cpmac_read(priv->regs, CPMAC_RX_CONTROL) | 1); + cpmac_write(priv->regs, CPMAC_TX_CONTROL, + cpmac_read(priv->regs, CPMAC_TX_CONTROL) | 1); + cpmac_write(priv->regs, CPMAC_MAC_CONTROL, + cpmac_read(priv->regs, CPMAC_MAC_CONTROL) | MAC_MII | + MAC_FDX); +} + +static void cpmac_clear_rx(struct net_device *dev) +{ + struct cpmac_priv *priv = netdev_priv(dev); + struct cpmac_desc *desc; + int i; + if (unlikely(!priv->rx_head)) + return; + desc = priv->rx_head; + for (i = 0; i < priv->ring_size; i++) { + if ((desc->dataflags & CPMAC_OWN) == 0) { + if (netif_msg_rx_err(priv) && net_ratelimit()) + printk(KERN_WARNING "%s: packet dropped\n", + dev->name); + if (unlikely(netif_msg_hw(priv))) + cpmac_dump_desc(dev, desc); + desc->dataflags = CPMAC_OWN; + dev->stats.rx_dropped++; + } + desc = desc->next; + } +} + +static void cpmac_clear_tx(struct net_device *dev) +{ + struct cpmac_priv *priv = netdev_priv(dev); + int i; + if (unlikely(!priv->desc_ring)) + return; + for (i = 0; i < CPMAC_QUEUES; i++) + if (priv->desc_ring[i].skb) { + dev_kfree_skb_any(priv->desc_ring[i].skb); + if (netif_subqueue_stopped(dev, i)) + netif_wake_subqueue(dev, i); + } +} + +static void cpmac_hw_error(struct work_struct *work) +{ + struct cpmac_priv *priv = + container_of(work, struct cpmac_priv, reset_work); + + spin_lock(&priv->rx_lock); + cpmac_clear_rx(priv->dev); + spin_unlock(&priv->rx_lock); + cpmac_clear_tx(priv->dev); + cpmac_hw_start(priv->dev); + netif_start_queue(priv->dev); +} + +static irqreturn_t cpmac_irq(int irq, void *dev_id) +{ + struct net_device *dev = dev_id; + struct cpmac_priv *priv; + int queue; + u32 status; + + if (!dev) + return IRQ_NONE; + + priv = netdev_priv(dev); + + status = cpmac_read(priv->regs, CPMAC_MAC_INT_VECTOR); + + if (unlikely(netif_msg_intr(priv))) + printk(KERN_DEBUG "%s: interrupt status: 0x%08x\n", dev->name, + status); + + if (status & MAC_INT_TX) + cpmac_end_xmit(dev, (status & 7)); + + if (status & MAC_INT_RX) { + queue = (status >> 8) & 7; + netif_rx_schedule(dev); + cpmac_write(priv->regs, CPMAC_RX_INT_CLEAR, 1 << queue); + } + + cpmac_write(priv->regs, CPMAC_MAC_EOI_VECTOR, 0); + + if (unlikely(status & (MAC_INT_HOST | MAC_INT_STATUS))) { + if (netif_msg_drv(priv) && net_ratelimit()) + printk(KERN_ERR "%s: hw error, resetting...\n", + dev->name); + netif_stop_queue(dev); + cpmac_hw_stop(dev); + schedule_work(&priv->reset_work); + if (unlikely(netif_msg_hw(priv))) + cpmac_dump_regs(dev); + } + + return IRQ_HANDLED; +} + +static void cpmac_tx_timeout(struct net_device *dev) +{ + struct cpmac_priv *priv = netdev_priv(dev); + int i; + + spin_lock(&priv->lock); + dev->stats.tx_errors++; + spin_unlock(&priv->lock); + if (netif_msg_tx_err(priv) && net_ratelimit()) + printk(KERN_WARNING "%s: transmit timeout\n", dev->name); + /* + * FIXME: waking up random queue is not the best thing to + * do... on the other hand why we got here at all? + */ +#ifdef CONFIG_NETDEVICES_MULTIQUEUE + for (i = 0; i < CPMAC_QUEUES; i++) + if (priv->desc_ring[i].skb) { + dev_kfree_skb_any(priv->desc_ring[i].skb); + netif_wake_subqueue(dev, i); + break; + } +#else + if (priv->desc_ring[0].skb) + dev_kfree_skb_any(priv->desc_ring[0].skb); + netif_wake_queue(dev); +#endif +} + +static int cpmac_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) +{ + struct cpmac_priv *priv = netdev_priv(dev); + if (!(netif_running(dev))) + return -EINVAL; + if (!priv->phy) + return -EINVAL; + if ((cmd == SIOCGMIIPHY) || (cmd == SIOCGMIIREG) || + (cmd == SIOCSMIIREG)) + return phy_mii_ioctl(priv->phy, if_mii(ifr), cmd); + + return -EOPNOTSUPP; +} + +static int cpmac_get_settings(struct net_device *dev, struct ethtool_cmd *cmd) +{ + struct cpmac_priv *priv = netdev_priv(dev); + + if (priv->phy) + return phy_ethtool_gset(priv->phy, cmd); + + return -EINVAL; +} + +static int cpmac_set_settings(struct net_device *dev, struct ethtool_cmd *cmd) +{ + struct cpmac_priv *priv = netdev_priv(dev); + + if (!capable(CAP_NET_ADMIN)) + return -EPERM; + + if (priv->phy) + return phy_ethtool_sset(priv->phy, cmd); + + return -EINVAL; +} + +static void cpmac_get_ringparam(struct net_device *dev, struct ethtool_ringparam* ring) +{ + struct cpmac_priv *priv = netdev_priv(dev); + + ring->rx_max_pending = 1024; + ring->rx_mini_max_pending = 1; + ring->rx_jumbo_max_pending = 1; + ring->tx_max_pending = 1; + + ring->rx_pending = priv->ring_size; + ring->rx_mini_pending = 1; + ring->rx_jumbo_pending = 1; + ring->tx_pending = 1; +} + +static int cpmac_set_ringparam(struct net_device *dev, struct ethtool_ringparam* ring) +{ + struct cpmac_priv *priv = netdev_priv(dev); + + if (dev->flags && IFF_UP) + return -EBUSY; + priv->ring_size = ring->rx_pending; + return 0; +} + +static void cpmac_get_drvinfo(struct net_device *dev, + struct ethtool_drvinfo *info) +{ + strcpy(info->driver, "cpmac"); + strcpy(info->version, CPMAC_VERSION); + info->fw_version[0] = '\0'; + sprintf(info->bus_info, "%s", "cpmac"); + info->regdump_len = 0; +} + +static const struct ethtool_ops cpmac_ethtool_ops = { + .get_settings = cpmac_get_settings, + .set_settings = cpmac_set_settings, + .get_drvinfo = cpmac_get_drvinfo, + .get_link = ethtool_op_get_link, + .get_ringparam = cpmac_get_ringparam, + .set_ringparam = cpmac_set_ringparam, +}; + +static void cpmac_adjust_link(struct net_device *dev) +{ + struct cpmac_priv *priv = netdev_priv(dev); + int new_state = 0; + + spin_lock(&priv->lock); + if (priv->phy->link) { + netif_start_queue(dev); + if (priv->phy->duplex != priv->oldduplex) { + new_state = 1; + priv->oldduplex = priv->phy->duplex; + } + + if (priv->phy->speed != priv->oldspeed) { + new_state = 1; + priv->oldspeed = priv->phy->speed; + } + + if (!priv->oldlink) { + new_state = 1; + priv->oldlink = 1; + netif_schedule(dev); + } + } else if (priv->oldlink) { + netif_stop_queue(dev); + new_state = 1; + priv->oldlink = 0; + priv->oldspeed = 0; + priv->oldduplex = -1; + } + + if (new_state && netif_msg_link(priv) && net_ratelimit()) + phy_print_status(priv->phy); + + spin_unlock(&priv->lock); +} + +static int cpmac_open(struct net_device *dev) +{ + int i, size, res; + struct cpmac_priv *priv = netdev_priv(dev); + struct resource *mem; + struct cpmac_desc *desc; + struct sk_buff *skb; + + priv->phy = phy_connect(dev, priv->phy_name, &cpmac_adjust_link, + 0, PHY_INTERFACE_MODE_MII); + if (IS_ERR(priv->phy)) { + if (netif_msg_drv(priv)) + printk(KERN_ERR "%s: Could not attach to PHY\n", + dev->name); + return PTR_ERR(priv->phy); + } + + mem = platform_get_resource_byname(priv->pdev, IORESOURCE_MEM, "regs"); + if (!request_mem_region(mem->start, mem->end - mem->start, dev->name)) { + if (netif_msg_drv(priv)) + printk(KERN_ERR "%s: failed to request registers\n", + dev->name); + res = -ENXIO; + goto fail_reserve; + } + + priv->regs = ioremap(mem->start, mem->end - mem->start); + if (!priv->regs) { + if (netif_msg_drv(priv)) + printk(KERN_ERR "%s: failed to remap registers\n", + dev->name); + res = -ENXIO; + goto fail_remap; + } + + size = priv->ring_size + CPMAC_QUEUES; + priv->desc_ring = dma_alloc_coherent(&dev->dev, + sizeof(struct cpmac_desc) * size, + &priv->dma_ring, + GFP_KERNEL); + if (!priv->desc_ring) { + res = -ENOMEM; + goto fail_alloc; + } + + for (i = 0; i < size; i++) + priv->desc_ring[i].mapping = priv->dma_ring + sizeof(*desc) * i; + + priv->rx_head = &priv->desc_ring[CPMAC_QUEUES]; + for (i = 0, desc = priv->rx_head; i < priv->ring_size; i++, desc++) { + skb = netdev_alloc_skb(dev, CPMAC_SKB_SIZE); + if (unlikely(!skb)) { + res = -ENOMEM; + goto fail_desc; + } + skb_reserve(skb, 2); + desc->skb = skb; + desc->data_mapping = dma_map_single(&dev->dev, skb->data, + CPMAC_SKB_SIZE, + DMA_FROM_DEVICE); + desc->hw_data = (u32)desc->data_mapping; + desc->buflen = CPMAC_SKB_SIZE; + desc->dataflags = CPMAC_OWN; + desc->next = &priv->rx_head[(i + 1) % priv->ring_size]; + desc->hw_next = (u32)desc->next->mapping; + } + + if ((res = request_irq(dev->irq, cpmac_irq, IRQF_SHARED, + dev->name, dev))) { + if (netif_msg_drv(priv)) + printk(KERN_ERR "%s: failed to obtain irq\n", + dev->name); + goto fail_irq; + } + + INIT_WORK(&priv->reset_work, cpmac_hw_error); + cpmac_hw_start(dev); + + priv->phy->state = PHY_CHANGELINK; + phy_start(priv->phy); + + return 0; + +fail_irq: +fail_desc: + for (i = 0; i < priv->ring_size; i++) { + if (priv->rx_head[i].skb) { + dma_unmap_single(&dev->dev, + priv->rx_head[i].data_mapping, + CPMAC_SKB_SIZE, + DMA_FROM_DEVICE); + kfree_skb(priv->rx_head[i].skb); + } + } +fail_alloc: + kfree(priv->desc_ring); + iounmap(priv->regs); + +fail_remap: + release_mem_region(mem->start, mem->end - mem->start); + +fail_reserve: + phy_disconnect(priv->phy); + + return res; +} + +static int cpmac_stop(struct net_device *dev) +{ + int i; + struct cpmac_priv *priv = netdev_priv(dev); + struct resource *mem; + + netif_stop_queue(dev); + + cancel_work_sync(&priv->reset_work); + phy_stop(priv->phy); + phy_disconnect(priv->phy); + priv->phy = NULL; + + cpmac_hw_stop(dev); + + for (i = 0; i < 8; i++) + cpmac_write(priv->regs, CPMAC_TX_PTR(i), 0); + cpmac_write(priv->regs, CPMAC_RX_PTR(0), 0); + cpmac_write(priv->regs, CPMAC_MBP, 0); + + free_irq(dev->irq, dev); + iounmap(priv->regs); + mem = platform_get_resource_byname(priv->pdev, IORESOURCE_MEM, "regs"); + release_mem_region(mem->start, mem->end - mem->start); + priv->rx_head = &priv->desc_ring[CPMAC_QUEUES]; + for (i = 0; i < priv->ring_size; i++) { + if (priv->rx_head[i].skb) { + dma_unmap_single(&dev->dev, + priv->rx_head[i].data_mapping, + CPMAC_SKB_SIZE, + DMA_FROM_DEVICE); + kfree_skb(priv->rx_head[i].skb); + } + } + + dma_free_coherent(&dev->dev, sizeof(struct cpmac_desc) * + (CPMAC_QUEUES + priv->ring_size), + priv->desc_ring, priv->dma_ring); + return 0; +} + +static int external_switch; + +static int __devinit cpmac_probe(struct platform_device *pdev) +{ + int rc, phy_id; + struct resource *mem; + struct cpmac_priv *priv; + struct net_device *dev; + struct plat_cpmac_data *pdata; + + pdata = pdev->dev.platform_data; + + for (phy_id = 0; phy_id < PHY_MAX_ADDR; phy_id++) { + if (!(pdata->phy_mask & (1 << phy_id))) + continue; + if (!cpmac_mii.phy_map[phy_id]) + continue; + break; + } + + if (phy_id == PHY_MAX_ADDR) { + if (external_switch || dumb_switch) + phy_id = 0; + else { + printk(KERN_ERR "cpmac: no PHY present\n"); + return -ENODEV; + } + } + + dev = alloc_etherdev_mq(sizeof(*priv), CPMAC_QUEUES); + + if (!dev) { + printk(KERN_ERR "cpmac: Unable to allocate net_device\n"); + return -ENOMEM; + } + + platform_set_drvdata(pdev, dev); + priv = netdev_priv(dev); + + priv->pdev = pdev; + mem = platform_get_resource_byname(pdev, IORESOURCE_MEM, "regs"); + if (!mem) { + rc = -ENODEV; + goto fail; + } + + dev->irq = platform_get_irq_byname(pdev, "irq"); + + dev->open = cpmac_open; + dev->stop = cpmac_stop; + dev->set_config = cpmac_config; + dev->hard_start_xmit = cpmac_start_xmit; + dev->do_ioctl = cpmac_ioctl; + dev->set_multicast_list = cpmac_set_multicast_list; + dev->tx_timeout = cpmac_tx_timeout; + dev->ethtool_ops = &cpmac_ethtool_ops; + dev->poll = cpmac_poll; + dev->weight = 64; + dev->features |= NETIF_F_MULTI_QUEUE; + + spin_lock_init(&priv->lock); + spin_lock_init(&priv->rx_lock); + priv->dev = dev; + priv->ring_size = 64; + priv->msg_enable = netif_msg_init(debug_level, 0xff); + memcpy(dev->dev_addr, pdata->dev_addr, sizeof(dev->dev_addr)); + if (phy_id == 31) { + snprintf(priv->phy_name, BUS_ID_SIZE, PHY_ID_FMT, + cpmac_mii.id, phy_id); + } else + snprintf(priv->phy_name, BUS_ID_SIZE, "fixed@%d:%d", 100, 1); + + if ((rc = register_netdev(dev))) { + printk(KERN_ERR "cpmac: error %i registering device %s\n", rc, + dev->name); + goto fail; + } + + if (netif_msg_probe(priv)) { + printk(KERN_INFO + "cpmac: device %s (regs: %p, irq: %d, phy: %s, mac: " + MAC_FMT ")\n", dev->name, (void *)mem->start, dev->irq, + priv->phy_name, MAC_ARG(dev->dev_addr)); + } + return 0; + +fail: + free_netdev(dev); + return rc; +} + +static int __devexit cpmac_remove(struct platform_device *pdev) +{ + struct net_device *dev = platform_get_drvdata(pdev); + unregister_netdev(dev); + free_netdev(dev); + return 0; +} + +static struct platform_driver cpmac_driver = { + .driver.name = "cpmac", + .probe = cpmac_probe, + .remove = __devexit_p(cpmac_remove), +}; + +int __devinit cpmac_init(void) +{ + u32 mask; + int i, res; + + cpmac_mii.priv = ioremap(AR7_REGS_MDIO, 256); + + if (!cpmac_mii.priv) { + printk(KERN_ERR "Can't ioremap mdio registers\n"); + return -ENXIO; + } + +#warning FIXME: unhardcode gpio&reset bits + ar7_gpio_disable(26); + ar7_gpio_disable(27); + ar7_device_reset(AR7_RESET_BIT_CPMAC_LO); + ar7_device_reset(AR7_RESET_BIT_CPMAC_HI); + ar7_device_reset(AR7_RESET_BIT_EPHY); + + cpmac_mii.reset(&cpmac_mii); + + for (i = 0; i < 300000; i++) + if ((mask = cpmac_read(cpmac_mii.priv, CPMAC_MDIO_ALIVE))) + break; + else + cpu_relax(); + + mask &= 0x7fffffff; + if (mask & (mask - 1)) { + external_switch = 1; + mask = 0; + } + + cpmac_mii.phy_mask = ~(mask | 0x80000000); + + res = mdiobus_register(&cpmac_mii); + if (res) + goto fail_mii; + + res = platform_driver_register(&cpmac_driver); + if (res) + goto fail_cpmac; + + return 0; + +fail_cpmac: + mdiobus_unregister(&cpmac_mii); + +fail_mii: + iounmap(cpmac_mii.priv); + + return res; +} + +void __devexit cpmac_exit(void) +{ + platform_driver_unregister(&cpmac_driver); + mdiobus_unregister(&cpmac_mii); + iounmap(cpmac_mii.priv); +} + +module_init(cpmac_init); +module_exit(cpmac_exit); diff --git a/drivers/net/gianfar.c b/drivers/net/gianfar.c index 0db5e6fabe73..558440c15b6c 100644 --- a/drivers/net/gianfar.c +++ b/drivers/net/gianfar.c @@ -168,7 +168,6 @@ static int gfar_probe(struct platform_device *pdev) struct gfar_private *priv = NULL; struct gianfar_platform_data *einfo; struct resource *r; - int idx; int err = 0; DECLARE_MAC_BUF(mac); @@ -261,7 +260,9 @@ static int gfar_probe(struct platform_device *pdev) dev->hard_start_xmit = gfar_start_xmit; dev->tx_timeout = gfar_timeout; dev->watchdog_timeo = TX_TIMEOUT; +#ifdef CONFIG_GFAR_NAPI netif_napi_add(dev, &priv->napi, gfar_poll, GFAR_DEV_WEIGHT); +#endif #ifdef CONFIG_NET_POLL_CONTROLLER dev->poll_controller = gfar_netpoll; #endif @@ -931,9 +932,14 @@ tx_skb_fail: /* Returns 0 for success. */ static int gfar_enet_open(struct net_device *dev) { +#ifdef CONFIG_GFAR_NAPI + struct gfar_private *priv = netdev_priv(dev); +#endif int err; +#ifdef CONFIG_GFAR_NAPI napi_enable(&priv->napi); +#endif /* Initialize a bunch of registers */ init_registers(dev); @@ -943,13 +949,17 @@ static int gfar_enet_open(struct net_device *dev) err = init_phy(dev); if(err) { +#ifdef CONFIG_GFAR_NAPI napi_disable(&priv->napi); +#endif return err; } err = startup_gfar(dev); if (err) +#ifdef CONFIG_GFAR_NAPI napi_disable(&priv->napi); +#endif netif_start_queue(dev); @@ -1103,7 +1113,9 @@ static int gfar_close(struct net_device *dev) { struct gfar_private *priv = netdev_priv(dev); +#ifdef CONFIG_GFAR_NAPI napi_disable(&priv->napi); +#endif stop_gfar(dev); diff --git a/drivers/net/ibm_emac/ibm_emac_mal.c b/drivers/net/ibm_emac/ibm_emac_mal.c index 4e49e8c4f871..dcd8826fc749 100644 --- a/drivers/net/ibm_emac/ibm_emac_mal.c +++ b/drivers/net/ibm_emac/ibm_emac_mal.c @@ -413,7 +413,10 @@ static int __init mal_probe(struct ocp_device *ocpdev) ocpdev->def->index); return -ENOMEM; } - mal->dcrbase = maldata->dcr_base; + + /* XXX This only works for native dcr for now */ + mal->dcrhost = dcr_map(NULL, maldata->dcr_base, 0); + mal->def = ocpdev->def; INIT_LIST_HEAD(&mal->poll_list); diff --git a/drivers/net/ibm_emac/ibm_emac_mal.h b/drivers/net/ibm_emac/ibm_emac_mal.h index 8f54d621994d..b8adbe6d4b01 100644 --- a/drivers/net/ibm_emac/ibm_emac_mal.h +++ b/drivers/net/ibm_emac/ibm_emac_mal.h @@ -191,7 +191,6 @@ struct mal_commac { }; struct ibm_ocp_mal { - int dcrbase; dcr_host_t dcrhost; struct list_head poll_list; @@ -209,12 +208,12 @@ struct ibm_ocp_mal { static inline u32 get_mal_dcrn(struct ibm_ocp_mal *mal, int reg) { - return dcr_read(mal->dcrhost, mal->dcrbase + reg); + return dcr_read(mal->dcrhost, reg); } static inline void set_mal_dcrn(struct ibm_ocp_mal *mal, int reg, u32 val) { - dcr_write(mal->dcrhost, mal->dcrbase + reg, val); + dcr_write(mal->dcrhost, reg, val); } /* Register MAL devices */ diff --git a/drivers/net/ibm_newemac/mal.c b/drivers/net/ibm_newemac/mal.c index 58854117b1a9..39f4cb6b0cf3 100644 --- a/drivers/net/ibm_newemac/mal.c +++ b/drivers/net/ibm_newemac/mal.c @@ -461,6 +461,7 @@ static int __devinit mal_probe(struct of_device *ofdev, struct mal_instance *mal; int err = 0, i, bd_size; int index = mal_count++; + unsigned int dcr_base; const u32 *prop; u32 cfg; @@ -497,14 +498,14 @@ static int __devinit mal_probe(struct of_device *ofdev, } mal->num_rx_chans = prop[0]; - mal->dcr_base = dcr_resource_start(ofdev->node, 0); - if (mal->dcr_base == 0) { + dcr_base = dcr_resource_start(ofdev->node, 0); + if (dcr_base == 0) { printk(KERN_ERR "mal%d: can't find DCR resource!\n", index); err = -ENODEV; goto fail; } - mal->dcr_host = dcr_map(ofdev->node, mal->dcr_base, 0x100); + mal->dcr_host = dcr_map(ofdev->node, dcr_base, 0x100); if (!DCR_MAP_OK(mal->dcr_host)) { printk(KERN_ERR "mal%d: failed to map DCRs !\n", index); @@ -626,7 +627,7 @@ static int __devinit mal_probe(struct of_device *ofdev, fail2: dma_free_coherent(&ofdev->dev, bd_size, mal->bd_virt, mal->bd_dma); fail_unmap: - dcr_unmap(mal->dcr_host, mal->dcr_base, 0x100); + dcr_unmap(mal->dcr_host, 0x100); fail: kfree(mal); diff --git a/drivers/net/ibm_newemac/mal.h b/drivers/net/ibm_newemac/mal.h index cb1a16d589fe..784edb8ea822 100644 --- a/drivers/net/ibm_newemac/mal.h +++ b/drivers/net/ibm_newemac/mal.h @@ -185,7 +185,6 @@ struct mal_commac { struct mal_instance { int version; - int dcr_base; dcr_host_t dcr_host; int num_tx_chans; /* Number of TX channels */ @@ -213,12 +212,12 @@ struct mal_instance { static inline u32 get_mal_dcrn(struct mal_instance *mal, int reg) { - return dcr_read(mal->dcr_host, mal->dcr_base + reg); + return dcr_read(mal->dcr_host, reg); } static inline void set_mal_dcrn(struct mal_instance *mal, int reg, u32 val) { - dcr_write(mal->dcr_host, mal->dcr_base + reg, val); + dcr_write(mal->dcr_host, reg, val); } /* Register MAL devices */ diff --git a/drivers/net/irda/donauboe.c b/drivers/net/irda/donauboe.c index 3e5eca1aa987..a82d8f98383d 100644 --- a/drivers/net/irda/donauboe.c +++ b/drivers/net/irda/donauboe.c @@ -840,7 +840,7 @@ toshoboe_probe (struct toshoboe_cb *self) /* test 1: SIR filter and back to back */ - for (j = 0; j < (sizeof (bauds) / sizeof (int)); ++j) + for (j = 0; j < ARRAY_SIZE(bauds); ++j) { int fir = (j > 1); toshoboe_stopchip (self); diff --git a/drivers/net/jazzsonic.c b/drivers/net/jazzsonic.c index d3825c8ee994..5c154fe13859 100644 --- a/drivers/net/jazzsonic.c +++ b/drivers/net/jazzsonic.c @@ -208,7 +208,6 @@ static int __init jazz_sonic_probe(struct platform_device *pdev) struct sonic_local *lp; struct resource *res; int err = 0; - int i; DECLARE_MAC_BUF(mac); res = platform_get_resource(pdev, IORESOURCE_MEM, 0); diff --git a/drivers/net/loopback.c b/drivers/net/loopback.c index be25aa33971c..662b8d16803c 100644 --- a/drivers/net/loopback.c +++ b/drivers/net/loopback.c @@ -265,17 +265,16 @@ static __net_init int loopback_net_init(struct net *net) if (err) goto out_free_netdev; - err = 0; net->loopback_dev = dev; + return 0; -out: - if (err) - panic("loopback: Failed to register netdevice: %d\n", err); - return err; out_free_netdev: free_netdev(dev); - goto out; +out: + if (net == &init_net) + panic("loopback: Failed to register netdevice: %d\n", err); + return err; } static __net_exit void loopback_net_exit(struct net *net) diff --git a/drivers/net/mipsnet.c b/drivers/net/mipsnet.c index d593175ab6f0..37707a0c0498 100644 --- a/drivers/net/mipsnet.c +++ b/drivers/net/mipsnet.c @@ -7,12 +7,12 @@ #define DEBUG #include <linux/init.h> +#include <linux/io.h> #include <linux/kernel.h> #include <linux/module.h> #include <linux/netdevice.h> #include <linux/etherdevice.h> #include <linux/platform_device.h> -#include <asm/io.h> #include <asm/mips-boards/simint.h> #include "mipsnet.h" /* actual device IO mapping */ @@ -33,9 +33,8 @@ static int ioiocpy_frommipsnet(struct net_device *dev, unsigned char *kdata, if (available_len < len) return -EFAULT; - for (; len > 0; len--, kdata++) { + for (; len > 0; len--, kdata++) *kdata = inb(mipsnet_reg_address(dev, rxDataBuffer)); - } return inl(mipsnet_reg_address(dev, rxDataCount)); } @@ -47,16 +46,15 @@ static inline ssize_t mipsnet_put_todevice(struct net_device *dev, char *buf_ptr = skb->data; pr_debug("%s: %s(): telling MIPSNET txDataCount(%d)\n", - dev->name, __FUNCTION__, skb->len); + dev->name, __FUNCTION__, skb->len); outl(skb->len, mipsnet_reg_address(dev, txDataCount)); pr_debug("%s: %s(): sending data to MIPSNET txDataBuffer(%d)\n", - dev->name, __FUNCTION__, skb->len); + dev->name, __FUNCTION__, skb->len); - for (; count_to_go; buf_ptr++, count_to_go--) { + for (; count_to_go; buf_ptr++, count_to_go--) outb(*buf_ptr, mipsnet_reg_address(dev, txDataBuffer)); - } dev->stats.tx_packets++; dev->stats.tx_bytes += skb->len; @@ -67,7 +65,7 @@ static inline ssize_t mipsnet_put_todevice(struct net_device *dev, static int mipsnet_xmit(struct sk_buff *skb, struct net_device *dev) { pr_debug("%s:%s(): transmitting %d bytes\n", - dev->name, __FUNCTION__, skb->len); + dev->name, __FUNCTION__, skb->len); /* Only one packet at a time. Once TXDONE interrupt is serviced, the * queue will be restarted. @@ -83,7 +81,8 @@ static inline ssize_t mipsnet_get_fromdev(struct net_device *dev, size_t count) struct sk_buff *skb; size_t len = count; - if (!(skb = alloc_skb(len + 2, GFP_KERNEL))) { + skb = alloc_skb(len + 2, GFP_KERNEL); + if (!skb) { dev->stats.rx_dropped++; return -ENOMEM; } @@ -96,7 +95,7 @@ static inline ssize_t mipsnet_get_fromdev(struct net_device *dev, size_t count) skb->ip_summed = CHECKSUM_UNNECESSARY; pr_debug("%s:%s(): pushing RXed data to kernel\n", - dev->name, __FUNCTION__); + dev->name, __FUNCTION__); netif_rx(skb); dev->stats.rx_packets++; @@ -114,42 +113,44 @@ static irqreturn_t mipsnet_interrupt(int irq, void *dev_id) if (irq == dev->irq) { pr_debug("%s:%s(): irq %d for device\n", - dev->name, __FUNCTION__, irq); + dev->name, __FUNCTION__, irq); retval = IRQ_HANDLED; interruptFlags = inl(mipsnet_reg_address(dev, interruptControl)); pr_debug("%s:%s(): intCtl=0x%016llx\n", dev->name, - __FUNCTION__, interruptFlags); + __FUNCTION__, interruptFlags); if (interruptFlags & MIPSNET_INTCTL_TXDONE) { pr_debug("%s:%s(): got TXDone\n", - dev->name, __FUNCTION__); + dev->name, __FUNCTION__); outl(MIPSNET_INTCTL_TXDONE, mipsnet_reg_address(dev, interruptControl)); - // only one packet at a time, we are done. + /* only one packet at a time, we are done. */ netif_wake_queue(dev); } else if (interruptFlags & MIPSNET_INTCTL_RXDONE) { pr_debug("%s:%s(): got RX data\n", - dev->name, __FUNCTION__); + dev->name, __FUNCTION__); mipsnet_get_fromdev(dev, - inl(mipsnet_reg_address(dev, rxDataCount))); + inl(mipsnet_reg_address(dev, rxDataCount))); pr_debug("%s:%s(): clearing RX int\n", - dev->name, __FUNCTION__); + dev->name, __FUNCTION__); outl(MIPSNET_INTCTL_RXDONE, mipsnet_reg_address(dev, interruptControl)); } else if (interruptFlags & MIPSNET_INTCTL_TESTBIT) { pr_debug("%s:%s(): got test interrupt\n", - dev->name, __FUNCTION__); - // TESTBIT is cleared on read. - // And takes effect after a write with 0 + dev->name, __FUNCTION__); + /* + * TESTBIT is cleared on read. + * And takes effect after a write with 0 + */ outl(0, mipsnet_reg_address(dev, interruptControl)); } else { pr_debug("%s:%s(): no valid fags 0x%016llx\n", - dev->name, __FUNCTION__, interruptFlags); - // Maybe shared IRQ, just ignore, no clearing. + dev->name, __FUNCTION__, interruptFlags); + /* Maybe shared IRQ, just ignore, no clearing. */ retval = IRQ_NONE; } @@ -159,7 +160,7 @@ static irqreturn_t mipsnet_interrupt(int irq, void *dev_id) retval = IRQ_NONE; } return retval; -} //mipsnet_interrupt() +} static int mipsnet_open(struct net_device *dev) { @@ -171,18 +172,18 @@ static int mipsnet_open(struct net_device *dev) if (err) { pr_debug("%s: %s(): can't get irq %d\n", - dev->name, __FUNCTION__, dev->irq); + dev->name, __FUNCTION__, dev->irq); release_region(dev->base_addr, MIPSNET_IO_EXTENT); return err; } pr_debug("%s: %s(): got IO region at 0x%04lx and irq %d for dev.\n", - dev->name, __FUNCTION__, dev->base_addr, dev->irq); + dev->name, __FUNCTION__, dev->base_addr, dev->irq); netif_start_queue(dev); - // test interrupt handler + /* test interrupt handler */ outl(MIPSNET_INTCTL_TESTBIT, mipsnet_reg_address(dev, interruptControl)); @@ -199,8 +200,6 @@ static int mipsnet_close(struct net_device *dev) static void mipsnet_set_mclist(struct net_device *dev) { - // we don't do anything - return; } static int __init mipsnet_probe(struct device *dev) @@ -226,13 +225,13 @@ static int __init mipsnet_probe(struct device *dev) */ netdev->base_addr = 0x4200; netdev->irq = MIPS_CPU_IRQ_BASE + MIPSCPU_INT_MB0 + - inl(mipsnet_reg_address(netdev, interruptInfo)); + inl(mipsnet_reg_address(netdev, interruptInfo)); - // Get the io region now, get irq on open() + /* Get the io region now, get irq on open() */ if (!request_region(netdev->base_addr, MIPSNET_IO_EXTENT, "mipsnet")) { pr_debug("%s: %s(): IO region {start: 0x%04lux, len: %d} " - "for dev is not availble.\n", netdev->name, - __FUNCTION__, netdev->base_addr, MIPSNET_IO_EXTENT); + "for dev is not availble.\n", netdev->name, + __FUNCTION__, netdev->base_addr, MIPSNET_IO_EXTENT); err = -EBUSY; goto out_free_netdev; } diff --git a/drivers/net/mipsnet.h b/drivers/net/mipsnet.h index 026c732024c9..0132c6714a40 100644 --- a/drivers/net/mipsnet.h +++ b/drivers/net/mipsnet.h @@ -9,32 +9,34 @@ /* * Id of this Net device, as seen by the core. */ -#define MIPS_NET_DEV_ID ((uint64_t) \ - ((uint64_t)'M'<< 0)| \ - ((uint64_t)'I'<< 8)| \ - ((uint64_t)'P'<<16)| \ - ((uint64_t)'S'<<24)| \ - ((uint64_t)'N'<<32)| \ - ((uint64_t)'E'<<40)| \ - ((uint64_t)'T'<<48)| \ - ((uint64_t)'0'<<56)) +#define MIPS_NET_DEV_ID ((uint64_t) \ + ((uint64_t) 'M' << 0)| \ + ((uint64_t) 'I' << 8)| \ + ((uint64_t) 'P' << 16)| \ + ((uint64_t) 'S' << 24)| \ + ((uint64_t) 'N' << 32)| \ + ((uint64_t) 'E' << 40)| \ + ((uint64_t) 'T' << 48)| \ + ((uint64_t) '0' << 56)) /* * Net status/control block as seen by sw in the core. * (Why not use bit fields? can't be bothered with cross-platform struct * packing.) */ -typedef struct _net_control_block { - /// dev info for probing - /// reads as MIPSNET%d where %d is some form of version - uint64_t devId; /*0x00 */ +struct net_control_block { + /* + * dev info for probing + * reads as MIPSNET%d where %d is some form of version + */ + uint64_t devId; /* 0x00 */ /* * read only busy flag. * Set and cleared by the Net Device to indicate that an rx or a tx * is in progress. */ - uint32_t busy; /*0x08 */ + uint32_t busy; /* 0x08 */ /* * Set by the Net Device. @@ -43,16 +45,16 @@ typedef struct _net_control_block { * rxDataBuffer. The value will decrease till 0 until all the data * from rxDataBuffer has been read. */ - uint32_t rxDataCount; /*0x0c */ + uint32_t rxDataCount; /* 0x0c */ #define MIPSNET_MAX_RXTX_DATACOUNT (1<<16) /* - * Settable from the MIPS core, cleared by the Net Device. - * The core should set the number of bytes it wants to send, - * then it should write those bytes of data to txDataBuffer. - * The device will clear txDataCount has been processed (not necessarily sent). + * Settable from the MIPS core, cleared by the Net Device. The core + * should set the number of bytes it wants to send, then it should + * write those bytes of data to txDataBuffer. The device will clear + * txDataCount has been processed (not necessarily sent). */ - uint32_t txDataCount; /*0x10 */ + uint32_t txDataCount; /* 0x10 */ /* * Interrupt control @@ -69,39 +71,42 @@ typedef struct _net_control_block { * To clear the test interrupt, write 0 to this register. */ uint32_t interruptControl; /*0x14 */ -#define MIPSNET_INTCTL_TXDONE ((uint32_t)(1<< 0)) -#define MIPSNET_INTCTL_RXDONE ((uint32_t)(1<< 1)) -#define MIPSNET_INTCTL_TESTBIT ((uint32_t)(1<<31)) -#define MIPSNET_INTCTL_ALLSOURCES (MIPSNET_INTCTL_TXDONE|MIPSNET_INTCTL_RXDONE|MIPSNET_INTCTL_TESTBIT) +#define MIPSNET_INTCTL_TXDONE ((uint32_t)(1 << 0)) +#define MIPSNET_INTCTL_RXDONE ((uint32_t)(1 << 1)) +#define MIPSNET_INTCTL_TESTBIT ((uint32_t)(1 << 31)) +#define MIPSNET_INTCTL_ALLSOURCES (MIPSNET_INTCTL_TXDONE | \ + MIPSNET_INTCTL_RXDONE | \ + MIPSNET_INTCTL_TESTBIT) /* - * Readonly core-specific interrupt info for the device to signal the core. - * The meaning of the contents of this field might change. - */ - /*###\todo: the whole memIntf interrupt scheme is messy: the device should have - * no control what so ever of what VPE/register set is being used. - * The MemIntf should only expose interrupt lines, and something in the - * config should be responsible for the line<->core/vpe bindings. + * Readonly core-specific interrupt info for the device to signal the + * core. The meaning of the contents of this field might change. + * + * TODO: the whole memIntf interrupt scheme is messy: the device should + * have no control what so ever of what VPE/register set is being + * used. The MemIntf should only expose interrupt lines, and + * something in the config should be responsible for the + * line<->core/vpe bindings. */ - uint32_t interruptInfo; /*0x18 */ + uint32_t interruptInfo; /* 0x18 */ /* * This is where the received data is read out. * There is more data to read until rxDataReady is 0. * Only 1 byte at this regs offset is used. */ - uint32_t rxDataBuffer; /*0x1c */ + uint32_t rxDataBuffer; /* 0x1c */ /* - * This is where the data to transmit is written. - * Data should be written for the amount specified in the txDataCount register. - * Only 1 byte at this regs offset is used. + * This is where the data to transmit is written. Data should be + * written for the amount specified in the txDataCount register. Only + * 1 byte at this regs offset is used. */ - uint32_t txDataBuffer; /*0x20 */ -} MIPS_T_NetControl; + uint32_t txDataBuffer; /* 0x20 */ +}; #define MIPSNET_IO_EXTENT 0x40 /* being generous */ -#define field_offset(field) ((int)&((MIPS_T_NetControl*)(0))->field) +#define field_offset(field) (offsetof(struct net_control_block, field)) #endif /* __MIPSNET_H */ diff --git a/drivers/net/myri10ge/myri10ge.c b/drivers/net/myri10ge/myri10ge.c index e8afa101433e..64c8151f2004 100644 --- a/drivers/net/myri10ge/myri10ge.c +++ b/drivers/net/myri10ge/myri10ge.c @@ -75,7 +75,7 @@ #include "myri10ge_mcp.h" #include "myri10ge_mcp_gen_header.h" -#define MYRI10GE_VERSION_STR "1.3.2-1.269" +#define MYRI10GE_VERSION_STR "1.3.2-1.287" MODULE_DESCRIPTION("Myricom 10G driver (10GbE)"); MODULE_AUTHOR("Maintainer: help@myri.com"); @@ -214,6 +214,8 @@ struct myri10ge_priv { unsigned long serial_number; int vendor_specific_offset; int fw_multicast_support; + unsigned long features; + u32 max_tso6; u32 read_dma; u32 write_dma; u32 read_write_dma; @@ -311,6 +313,7 @@ MODULE_PARM_DESC(myri10ge_wcfifo, "Enable WC Fifo when WC is enabled\n"); #define myri10ge_pio_copy(to,from,size) __iowrite64_copy(to,from,size/8) static void myri10ge_set_multicast_list(struct net_device *dev); +static int myri10ge_sw_tso(struct sk_buff *skb, struct net_device *dev); static inline void put_be32(__be32 val, __be32 __iomem * p) { @@ -612,6 +615,7 @@ static int myri10ge_load_firmware(struct myri10ge_priv *mgp) __be32 buf[16]; u32 dma_low, dma_high, size; int status, i; + struct myri10ge_cmd cmd; size = 0; status = myri10ge_load_hotplug_firmware(mgp, &size); @@ -688,6 +692,14 @@ static int myri10ge_load_firmware(struct myri10ge_priv *mgp) dev_info(&mgp->pdev->dev, "handoff confirmed\n"); myri10ge_dummy_rdma(mgp, 1); + /* probe for IPv6 TSO support */ + mgp->features = NETIF_F_SG | NETIF_F_HW_CSUM | NETIF_F_TSO; + status = myri10ge_send_cmd(mgp, MXGEFW_CMD_GET_MAX_TSO6_HDR_SIZE, + &cmd, 0); + if (status == 0) { + mgp->max_tso6 = cmd.data0; + mgp->features |= NETIF_F_TSO6; + } return 0; } @@ -1047,7 +1059,8 @@ myri10ge_rx_done(struct myri10ge_priv *mgp, struct myri10ge_rx_buf *rx, hlen = MYRI10GE_HLEN > len ? len : MYRI10GE_HLEN; - /* allocate an skb to attach the page(s) to. */ + /* allocate an skb to attach the page(s) to. This is done + * after trying LRO, so as to avoid skb allocation overheads */ skb = netdev_alloc_skb(dev, MYRI10GE_HLEN + 16); if (unlikely(skb == NULL)) { @@ -1217,7 +1230,8 @@ static inline void myri10ge_check_statblock(struct myri10ge_priv *mgp) static int myri10ge_poll(struct napi_struct *napi, int budget) { - struct myri10ge_priv *mgp = container_of(napi, struct myri10ge_priv, napi); + struct myri10ge_priv *mgp = + container_of(napi, struct myri10ge_priv, napi); struct net_device *netdev = mgp->dev; struct myri10ge_rx_done *rx_done = &mgp->rx_done; int work_done; @@ -1382,6 +1396,18 @@ static int myri10ge_set_rx_csum(struct net_device *netdev, u32 csum_enabled) return 0; } +static int myri10ge_set_tso(struct net_device *netdev, u32 tso_enabled) +{ + struct myri10ge_priv *mgp = netdev_priv(netdev); + unsigned long flags = mgp->features & (NETIF_F_TSO6 | NETIF_F_TSO); + + if (tso_enabled) + netdev->features |= flags; + else + netdev->features &= ~flags; + return 0; +} + static const char myri10ge_gstrings_stats[][ETH_GSTRING_LEN] = { "rx_packets", "tx_packets", "rx_bytes", "tx_bytes", "rx_errors", "tx_errors", "rx_dropped", "tx_dropped", "multicast", "collisions", @@ -1506,7 +1532,7 @@ static const struct ethtool_ops myri10ge_ethtool_ops = { .set_rx_csum = myri10ge_set_rx_csum, .set_tx_csum = ethtool_op_set_tx_hw_csum, .set_sg = ethtool_op_set_sg, - .set_tso = ethtool_op_set_tso, + .set_tso = myri10ge_set_tso, .get_link = ethtool_op_get_link, .get_strings = myri10ge_get_strings, .get_sset_count = myri10ge_get_sset_count, @@ -2164,7 +2190,8 @@ again: pseudo_hdr_offset = cksum_offset + skb->csum_offset; /* If the headers are excessively large, then we must * fall back to a software checksum */ - if (unlikely(cksum_offset > 255 || pseudo_hdr_offset > 127)) { + if (unlikely(!mss && (cksum_offset > 255 || + pseudo_hdr_offset > 127))) { if (skb_checksum_help(skb)) goto drop; cksum_offset = 0; @@ -2184,9 +2211,18 @@ again: /* negative cum_len signifies to the * send loop that we are still in the * header portion of the TSO packet. - * TSO header must be at most 134 bytes long */ + * TSO header can be at most 1KB long */ cum_len = -(skb_transport_offset(skb) + tcp_hdrlen(skb)); + /* for IPv6 TSO, the checksum offset stores the + * TCP header length, to save the firmware from + * the need to parse the headers */ + if (skb_is_gso_v6(skb)) { + cksum_offset = tcp_hdrlen(skb); + /* Can only handle headers <= max_tso6 long */ + if (unlikely(-cum_len > mgp->max_tso6)) + return myri10ge_sw_tso(skb, dev); + } /* for TSO, pseudo_hdr_offset holds mss. * The firmware figures out where to put * the checksum by parsing the header. */ @@ -2301,10 +2337,12 @@ again: req++; count++; rdma_count++; - if (unlikely(cksum_offset > seglen)) - cksum_offset -= seglen; - else - cksum_offset = 0; + if (cksum_offset != 0 && !(mss && skb_is_gso_v6(skb))) { + if (unlikely(cksum_offset > seglen)) + cksum_offset -= seglen; + else + cksum_offset = 0; + } } if (frag_idx == frag_cnt) break; @@ -2387,6 +2425,41 @@ drop: } +static int myri10ge_sw_tso(struct sk_buff *skb, struct net_device *dev) +{ + struct sk_buff *segs, *curr; + struct myri10ge_priv *mgp = dev->priv; + int status; + + segs = skb_gso_segment(skb, dev->features & ~NETIF_F_TSO6); + if (unlikely(IS_ERR(segs))) + goto drop; + + while (segs) { + curr = segs; + segs = segs->next; + curr->next = NULL; + status = myri10ge_xmit(curr, dev); + if (status != 0) { + dev_kfree_skb_any(curr); + if (segs != NULL) { + curr = segs; + segs = segs->next; + curr->next = NULL; + dev_kfree_skb_any(segs); + } + goto drop; + } + } + dev_kfree_skb_any(skb); + return 0; + +drop: + dev_kfree_skb_any(skb); + mgp->stats.tx_dropped += 1; + return 0; +} + static struct net_device_stats *myri10ge_get_stats(struct net_device *dev) { struct myri10ge_priv *mgp = netdev_priv(dev); @@ -2706,7 +2779,6 @@ static void myri10ge_select_firmware(struct myri10ge_priv *mgp) } #ifdef CONFIG_PM - static int myri10ge_suspend(struct pci_dev *pdev, pm_message_t state) { struct myri10ge_priv *mgp; @@ -2787,7 +2859,6 @@ abort_with_enabled: return -EIO; } - #endif /* CONFIG_PM */ static u32 myri10ge_read_reboot(struct myri10ge_priv *mgp) @@ -2954,8 +3025,7 @@ static int myri10ge_probe(struct pci_dev *pdev, const struct pci_device_id *ent) mgp = netdev_priv(netdev); mgp->dev = netdev; - netif_napi_add(netdev, &mgp->napi, - myri10ge_poll, myri10ge_napi_weight); + netif_napi_add(netdev, &mgp->napi, myri10ge_poll, myri10ge_napi_weight); mgp->pdev = pdev; mgp->csum_flag = MXGEFW_FLAGS_CKSUM; mgp->pause = myri10ge_flow_control; @@ -3077,7 +3147,7 @@ static int myri10ge_probe(struct pci_dev *pdev, const struct pci_device_id *ent) netdev->change_mtu = myri10ge_change_mtu; netdev->set_multicast_list = myri10ge_set_multicast_list; netdev->set_mac_address = myri10ge_set_mac_address; - netdev->features = NETIF_F_SG | NETIF_F_HW_CSUM | NETIF_F_TSO; + netdev->features = mgp->features; if (dac_enabled) netdev->features |= NETIF_F_HIGHDMA; diff --git a/drivers/net/myri10ge/myri10ge_mcp.h b/drivers/net/myri10ge/myri10ge_mcp.h index a1d2a22296a9..58e57178c563 100644 --- a/drivers/net/myri10ge/myri10ge_mcp.h +++ b/drivers/net/myri10ge/myri10ge_mcp.h @@ -10,7 +10,7 @@ struct mcp_dma_addr { __be32 low; }; -/* 4 Bytes */ +/* 4 Bytes. 8 Bytes for NDIS drivers. */ struct mcp_slot { __sum16 checksum; __be16 length; @@ -205,8 +205,87 @@ enum myri10ge_mcp_cmd_type { /* same than DMA_TEST (same args) but abort with UNALIGNED on unaligned * chipset */ - MXGEFW_CMD_UNALIGNED_STATUS - /* return data = boolean, true if the chipset is known to be unaligned */ + MXGEFW_CMD_UNALIGNED_STATUS, + /* return data = boolean, true if the chipset is known to be unaligned */ + + MXGEFW_CMD_ALWAYS_USE_N_BIG_BUFFERS, + /* data0 = number of big buffers to use. It must be 0 or a power of 2. + * 0 indicates that the NIC consumes as many buffers as they are required + * for packet. This is the default behavior. + * A power of 2 number indicates that the NIC always uses the specified + * number of buffers for each big receive packet. + * It is up to the driver to ensure that this value is big enough for + * the NIC to be able to receive maximum-sized packets. + */ + + MXGEFW_CMD_GET_MAX_RSS_QUEUES, + MXGEFW_CMD_ENABLE_RSS_QUEUES, + /* data0 = number of slices n (0, 1, ..., n-1) to enable + * data1 = interrupt mode. 0=share one INTx/MSI, 1=use one MSI-X per queue. + * If all queues share one interrupt, the driver must have set + * RSS_SHARED_INTERRUPT_DMA before enabling queues. + */ + MXGEFW_CMD_GET_RSS_SHARED_INTERRUPT_MASK_OFFSET, + MXGEFW_CMD_SET_RSS_SHARED_INTERRUPT_DMA, + /* data0, data1 = bus address lsw, msw */ + MXGEFW_CMD_GET_RSS_TABLE_OFFSET, + /* get the offset of the indirection table */ + MXGEFW_CMD_SET_RSS_TABLE_SIZE, + /* set the size of the indirection table */ + MXGEFW_CMD_GET_RSS_KEY_OFFSET, + /* get the offset of the secret key */ + MXGEFW_CMD_RSS_KEY_UPDATED, + /* tell nic that the secret key's been updated */ + MXGEFW_CMD_SET_RSS_ENABLE, + /* data0 = enable/disable rss + * 0: disable rss. nic does not distribute receive packets. + * 1: enable rss. nic distributes receive packets among queues. + * data1 = hash type + * 1: IPV4 + * 2: TCP_IPV4 + * 3: IPV4 | TCP_IPV4 + */ + + MXGEFW_CMD_GET_MAX_TSO6_HDR_SIZE, + /* Return data = the max. size of the entire headers of a IPv6 TSO packet. + * If the header size of a IPv6 TSO packet is larger than the specified + * value, then the driver must not use TSO. + * This size restriction only applies to IPv6 TSO. + * For IPv4 TSO, the maximum size of the headers is fixed, and the NIC + * always has enough header buffer to store maximum-sized headers. + */ + + MXGEFW_CMD_SET_TSO_MODE, + /* data0 = TSO mode. + * 0: Linux/FreeBSD style (NIC default) + * 1: NDIS/NetBSD style + */ + + MXGEFW_CMD_MDIO_READ, + /* data0 = dev_addr (PMA/PMD or PCS ...), data1 = register/addr */ + MXGEFW_CMD_MDIO_WRITE, + /* data0 = dev_addr, data1 = register/addr, data2 = value */ + + MXGEFW_CMD_XFP_I2C_READ, + /* Starts to get a fresh copy of one byte or of the whole xfp i2c table, the + * obtained data is cached inside the xaui-xfi chip : + * data0 : "all" flag : 0 => get one byte, 1=> get 256 bytes, + * data1 : if (data0 == 0): index of byte to refresh [ not used otherwise ] + * The operation might take ~1ms for a single byte or ~65ms when refreshing all 256 bytes + * During the i2c operation, MXGEFW_CMD_XFP_I2C_READ or MXGEFW_CMD_XFP_BYTE attempts + * will return MXGEFW_CMD_ERROR_BUSY + */ + MXGEFW_CMD_XFP_BYTE, + /* Return the last obtained copy of a given byte in the xfp i2c table + * (copy cached during the last relevant MXGEFW_CMD_XFP_I2C_READ) + * data0 : index of the desired table entry + * Return data = the byte stored at the requested index in the table + */ + + MXGEFW_CMD_GET_VPUMP_OFFSET, + /* Return data = NIC memory offset of mcp_vpump_public_global */ + MXGEFW_CMD_RESET_VPUMP, + /* Resets the VPUMP state */ }; enum myri10ge_mcp_cmd_status { @@ -220,7 +299,10 @@ enum myri10ge_mcp_cmd_status { MXGEFW_CMD_ERROR_BAD_PORT, MXGEFW_CMD_ERROR_RESOURCES, MXGEFW_CMD_ERROR_MULTICAST, - MXGEFW_CMD_ERROR_UNALIGNED + MXGEFW_CMD_ERROR_UNALIGNED, + MXGEFW_CMD_ERROR_NO_MDIO, + MXGEFW_CMD_ERROR_XFP_FAILURE, + MXGEFW_CMD_ERROR_XFP_ABSENT }; #define MXGEFW_OLD_IRQ_DATA_LEN 40 diff --git a/drivers/net/natsemi.c b/drivers/net/natsemi.c index 527f9dcc7f69..50e1ec67ef9c 100644 --- a/drivers/net/natsemi.c +++ b/drivers/net/natsemi.c @@ -1576,7 +1576,7 @@ static int netdev_open(struct net_device *dev) /* Set the timer to check for link beat. */ init_timer(&np->timer); - np->timer.expires = jiffies + NATSEMI_TIMER_FREQ; + np->timer.expires = round_jiffies(jiffies + NATSEMI_TIMER_FREQ); np->timer.data = (unsigned long)dev; np->timer.function = &netdev_timer; /* timer handler */ add_timer(&np->timer); @@ -1856,7 +1856,11 @@ static void netdev_timer(unsigned long data) next_tick = 1; } } - mod_timer(&np->timer, jiffies + next_tick); + + if (next_tick > 1) + mod_timer(&np->timer, round_jiffies(jiffies + next_tick)); + else + mod_timer(&np->timer, jiffies + next_tick); } static void dump_ring(struct net_device *dev) @@ -3310,13 +3314,19 @@ static int natsemi_resume (struct pci_dev *pdev) { struct net_device *dev = pci_get_drvdata (pdev); struct netdev_private *np = netdev_priv(dev); + int ret = 0; rtnl_lock(); if (netif_device_present(dev)) goto out; if (netif_running(dev)) { BUG_ON(!np->hands_off); - pci_enable_device(pdev); + ret = pci_enable_device(pdev); + if (ret < 0) { + dev_err(&pdev->dev, + "pci_enable_device() failed: %d\n", ret); + goto out; + } /* pci_power_on(pdev); */ napi_enable(&np->napi); @@ -3331,12 +3341,12 @@ static int natsemi_resume (struct pci_dev *pdev) spin_unlock_irq(&np->lock); enable_irq(dev->irq); - mod_timer(&np->timer, jiffies + 1*HZ); + mod_timer(&np->timer, round_jiffies(jiffies + 1*HZ)); } netif_device_attach(dev); out: rtnl_unlock(); - return 0; + return ret; } #endif /* CONFIG_PM */ diff --git a/drivers/net/ne-h8300.c b/drivers/net/ne-h8300.c index 368f2560856d..fbc7531d3c7d 100644 --- a/drivers/net/ne-h8300.c +++ b/drivers/net/ne-h8300.c @@ -93,7 +93,7 @@ static int __init init_reg_offset(struct net_device *dev,unsigned long base_addr bus_width = *(volatile unsigned char *)ABWCR; bus_width &= 1 << ((base_addr >> 21) & 7); - for (i = 0; i < sizeof(reg_offset) / sizeof(u32); i++) + for (i = 0; i < ARRAY_SIZE(reg_offset); i++) if (bus_width == 0) reg_offset[i] = i * 2 + 1; else @@ -115,7 +115,7 @@ static int h8300_ne_irq[] = {EXT_IRQ5}; static inline int init_dev(struct net_device *dev) { - if (h8300_ne_count < (sizeof(h8300_ne_base) / sizeof(unsigned long))) { + if (h8300_ne_count < ARRAY_SIZE(h8300_ne_base)) { dev->base_addr = h8300_ne_base[h8300_ne_count]; dev->irq = h8300_ne_irq[h8300_ne_count]; h8300_ne_count++; diff --git a/drivers/net/niu.c b/drivers/net/niu.c index 43bfe7e6b6f5..ed1f9bbb2a32 100644 --- a/drivers/net/niu.c +++ b/drivers/net/niu.c @@ -6123,19 +6123,19 @@ static int __devinit niu_pci_probe_sprom(struct niu *np) val = nr64(ESPC_PHY_TYPE); switch (np->port) { case 0: - val = (val & ESPC_PHY_TYPE_PORT0) >> + val8 = (val & ESPC_PHY_TYPE_PORT0) >> ESPC_PHY_TYPE_PORT0_SHIFT; break; case 1: - val = (val & ESPC_PHY_TYPE_PORT1) >> + val8 = (val & ESPC_PHY_TYPE_PORT1) >> ESPC_PHY_TYPE_PORT1_SHIFT; break; case 2: - val = (val & ESPC_PHY_TYPE_PORT2) >> + val8 = (val & ESPC_PHY_TYPE_PORT2) >> ESPC_PHY_TYPE_PORT2_SHIFT; break; case 3: - val = (val & ESPC_PHY_TYPE_PORT3) >> + val8 = (val & ESPC_PHY_TYPE_PORT3) >> ESPC_PHY_TYPE_PORT3_SHIFT; break; default: @@ -6143,9 +6143,9 @@ static int __devinit niu_pci_probe_sprom(struct niu *np) np->port); return -EINVAL; } - niudbg(PROBE, "SPROM: PHY type %llx\n", (unsigned long long) val); + niudbg(PROBE, "SPROM: PHY type %x\n", val8); - switch (val) { + switch (val8) { case ESPC_PHY_TYPE_1G_COPPER: /* 1G copper, MII */ np->flags &= ~(NIU_FLAGS_FIBER | @@ -6175,8 +6175,7 @@ static int __devinit niu_pci_probe_sprom(struct niu *np) break; default: - dev_err(np->device, PFX "Bogus SPROM phy type %llu\n", - (unsigned long long) val); + dev_err(np->device, PFX "Bogus SPROM phy type %u\n", val8); return -EINVAL; } @@ -6213,7 +6212,7 @@ static int __devinit niu_pci_probe_sprom(struct niu *np) val = nr64(ESPC_MOD_STR_LEN); niudbg(PROBE, "SPROM: MOD_STR_LEN[%llu]\n", (unsigned long long) val); - if (val > 8 * 4) + if (val >= 8 * 4) return -EINVAL; for (i = 0; i < val; i += 4) { @@ -6229,7 +6228,7 @@ static int __devinit niu_pci_probe_sprom(struct niu *np) val = nr64(ESPC_BD_MOD_STR_LEN); niudbg(PROBE, "SPROM: BD_MOD_STR_LEN[%llu]\n", (unsigned long long) val); - if (val > 4 * 4) + if (val >= 4 * 4) return -EINVAL; for (i = 0; i < val; i += 4) { diff --git a/drivers/net/saa9730.c b/drivers/net/saa9730.c index 14361e885415..c65199df8a7f 100644 --- a/drivers/net/saa9730.c +++ b/drivers/net/saa9730.c @@ -97,13 +97,16 @@ static void evm_saa9730_unblock_lan_int(struct lan_saa9730_private *lp) &lp->evm_saa9730_regs->InterruptBlock1); } -static void __attribute_used__ show_saa9730_regs(struct lan_saa9730_private *lp) +static void __used show_saa9730_regs(struct net_device *dev) { + struct lan_saa9730_private *lp = netdev_priv(dev); int i, j; + printk("TxmBufferA = %p\n", lp->TxmBuffer[0][0]); printk("TxmBufferB = %p\n", lp->TxmBuffer[1][0]); printk("RcvBufferA = %p\n", lp->RcvBuffer[0][0]); printk("RcvBufferB = %p\n", lp->RcvBuffer[1][0]); + for (i = 0; i < LAN_SAA9730_BUFFERS; i++) { for (j = 0; j < LAN_SAA9730_TXM_Q_SIZE; j++) { printk("TxmBuffer[%d][%d] = %x\n", i, j, @@ -146,11 +149,13 @@ static void __attribute_used__ show_saa9730_regs(struct lan_saa9730_private *lp) readl(&lp->lan_saa9730_regs->RxCtl)); printk("lp->lan_saa9730_regs->RxStatus = %x\n", readl(&lp->lan_saa9730_regs->RxStatus)); + for (i = 0; i < LAN_SAA9730_CAM_DWORDS; i++) { writel(i, &lp->lan_saa9730_regs->CamAddress); printk("lp->lan_saa9730_regs->CamData = %x\n", readl(&lp->lan_saa9730_regs->CamData)); } + printk("dev->stats.tx_packets = %lx\n", dev->stats.tx_packets); printk("dev->stats.tx_errors = %lx\n", dev->stats.tx_errors); printk("dev->stats.tx_aborted_errors = %lx\n", @@ -855,7 +860,7 @@ static void lan_saa9730_tx_timeout(struct net_device *dev) /* Transmitter timeout, serious problems */ dev->stats.tx_errors++; printk("%s: transmit timed out, reset\n", dev->name); - /*show_saa9730_regs(lp); */ + /*show_saa9730_regs(dev); */ lan_saa9730_restart(lp); dev->trans_start = jiffies; diff --git a/drivers/net/tc35815.c b/drivers/net/tc35815.c index a679f4310ce1..8038f2882c9b 100644 --- a/drivers/net/tc35815.c +++ b/drivers/net/tc35815.c @@ -1461,7 +1461,6 @@ static irqreturn_t tc35815_interrupt(int irq, void *dev_id) } return IRQ_NONE; #else - struct tc35815_local *lp = dev->priv; int handled; u32 status; diff --git a/drivers/net/tehuti.c b/drivers/net/tehuti.c index 8d04654f0c59..4e1b84e6d66a 100644 --- a/drivers/net/tehuti.c +++ b/drivers/net/tehuti.c @@ -1906,7 +1906,7 @@ bdx_probe(struct pci_dev *pdev, const struct pci_device_id *ent) /************** pci *****************/ if ((err = pci_enable_device(pdev))) /* it trigers interrupt, dunno why. */ - RET(err); /* it's not a problem though */ + goto err_pci; /* it's not a problem though */ if (!(err = pci_set_dma_mask(pdev, DMA_64BIT_MASK)) && !(err = pci_set_consistent_dma_mask(pdev, DMA_64BIT_MASK))) { @@ -2076,6 +2076,7 @@ err_out_res: pci_release_regions(pdev); err_dma: pci_disable_device(pdev); +err_pci: vfree(nic); RET(err); diff --git a/drivers/net/tg3.c b/drivers/net/tg3.c index 30b1cca8144c..014dc2cfe4d6 100644 --- a/drivers/net/tg3.c +++ b/drivers/net/tg3.c @@ -64,8 +64,8 @@ #define DRV_MODULE_NAME "tg3" #define PFX DRV_MODULE_NAME ": " -#define DRV_MODULE_VERSION "3.83" -#define DRV_MODULE_RELDATE "October 10, 2007" +#define DRV_MODULE_VERSION "3.84" +#define DRV_MODULE_RELDATE "October 12, 2007" #define TG3_DEF_MAC_MODE 0 #define TG3_DEF_RX_MODE 0 @@ -5056,6 +5056,12 @@ static void tg3_restore_pci_state(struct tg3 *tp) pci_write_config_dword(tp->pdev, TG3PCI_COMMAND, tp->pci_cmd); + if (!(tp->tg3_flags2 & TG3_FLG2_PCI_EXPRESS)) { + pci_write_config_byte(tp->pdev, PCI_CACHE_LINE_SIZE, + tp->pci_cacheline_sz); + pci_write_config_byte(tp->pdev, PCI_LATENCY_TIMER, + tp->pci_lat_timer); + } /* Make sure PCI-X relaxed ordering bit is clear. */ if (tp->pcix_cap) { u16 pcix_cmd; @@ -9034,7 +9040,7 @@ static int tg3_do_mem_test(struct tg3 *tp, u32 offset, u32 len) int i; u32 j; - for (i = 0; i < sizeof(test_pattern)/sizeof(u32); i++) { + for (i = 0; i < ARRAY_SIZE(test_pattern); i++) { for (j = 0; j < len; j += 4) { u32 val; diff --git a/drivers/net/tulip/de4x5.c b/drivers/net/tulip/de4x5.c index 9b9cd83fb8b6..41f34bb91cad 100644 --- a/drivers/net/tulip/de4x5.c +++ b/drivers/net/tulip/de4x5.c @@ -1041,7 +1041,7 @@ static struct InfoLeaf infoleaf_array[] = { {DC21142, dc21142_infoleaf}, {DC21143, dc21143_infoleaf} }; -#define INFOLEAF_SIZE (sizeof(infoleaf_array)/(sizeof(int)+sizeof(int *))) +#define INFOLEAF_SIZE ARRAY_SIZE(infoleaf_array) /* ** List the SROM info block functions @@ -1056,7 +1056,7 @@ static int (*dc_infoblock[])(struct net_device *dev, u_char, u_char *) = { compact_infoblock }; -#define COMPACT (sizeof(dc_infoblock)/sizeof(int *) - 1) +#define COMPACT (ARRAY_SIZE(dc_infoblock) - 1) /* ** Miscellaneous defines... diff --git a/drivers/net/ucc_geth.c b/drivers/net/ucc_geth.c index d00e7d41f6a5..bec413ba9bca 100644 --- a/drivers/net/ucc_geth.c +++ b/drivers/net/ucc_geth.c @@ -63,7 +63,7 @@ #define UGETH_MSG_DEFAULT (NETIF_MSG_IFUP << 1 ) - 1 void uec_set_ethtool_ops(struct net_device *netdev); - + static DEFINE_SPINLOCK(ugeth_lock); static struct { @@ -3454,9 +3454,12 @@ static int ucc_geth_rx(struct ucc_geth_private *ugeth, u8 rxQ, int rx_work_limit u16 length, howmany = 0; u32 bd_status; u8 *bdBuffer; + struct net_device * dev; ugeth_vdbg("%s: IN", __FUNCTION__); + dev = ugeth->dev; + /* collect received buffers */ bd = ugeth->rxBd[rxQ]; diff --git a/drivers/net/wan/sdla.c b/drivers/net/wan/sdla.c index b39a541b2509..05df0a345b60 100644 --- a/drivers/net/wan/sdla.c +++ b/drivers/net/wan/sdla.c @@ -1342,11 +1342,11 @@ static int sdla_set_config(struct net_device *dev, struct ifmap *map) if (flp->initialized) return(-EINVAL); - for(i=0;i < sizeof(valid_port) / sizeof (int) ; i++) + for(i=0; i < ARRAY_SIZE(valid_port); i++) if (valid_port[i] == map->base_addr) break; - if (i == sizeof(valid_port) / sizeof(int)) + if (i == ARRAY_SIZE(valid_port)) return(-EINVAL); if (!request_region(map->base_addr, SDLA_IO_EXTENTS, dev->name)){ @@ -1487,12 +1487,12 @@ got_type: } } - for(i=0;i < sizeof(valid_mem) / sizeof (int) ; i++) + for(i=0; i < ARRAY_SIZE(valid_mem); i++) if (valid_mem[i] == map->mem_start) break; err = -EINVAL; - if (i == sizeof(valid_mem) / sizeof(int)) + if (i == ARRAY_SIZE(valid_mem)) goto fail2; if (flp->type == SDLA_S502A && (map->mem_start & 0xF000) >> 12 == 0x0E) diff --git a/drivers/net/xen-netfront.c b/drivers/net/xen-netfront.c index f464b82c7d5f..7fd505cc4f7a 100644 --- a/drivers/net/xen-netfront.c +++ b/drivers/net/xen-netfront.c @@ -74,22 +74,12 @@ struct netfront_info { struct napi_struct napi; - struct xen_netif_tx_front_ring tx; - struct xen_netif_rx_front_ring rx; - - spinlock_t tx_lock; - spinlock_t rx_lock; - unsigned int evtchn; + struct xenbus_device *xbdev; - /* Receive-ring batched refills. */ -#define RX_MIN_TARGET 8 -#define RX_DFL_MIN_TARGET 64 -#define RX_MAX_TARGET min_t(int, NET_RX_RING_SIZE, 256) - unsigned rx_min_target, rx_max_target, rx_target; - struct sk_buff_head rx_batch; - - struct timer_list rx_refill_timer; + spinlock_t tx_lock; + struct xen_netif_tx_front_ring tx; + int tx_ring_ref; /* * {tx,rx}_skbs store outstanding skbuffs. Free tx_skb entries @@ -108,14 +98,23 @@ struct netfront_info { grant_ref_t grant_tx_ref[NET_TX_RING_SIZE]; unsigned tx_skb_freelist; + spinlock_t rx_lock ____cacheline_aligned_in_smp; + struct xen_netif_rx_front_ring rx; + int rx_ring_ref; + + /* Receive-ring batched refills. */ +#define RX_MIN_TARGET 8 +#define RX_DFL_MIN_TARGET 64 +#define RX_MAX_TARGET min_t(int, NET_RX_RING_SIZE, 256) + unsigned rx_min_target, rx_max_target, rx_target; + struct sk_buff_head rx_batch; + + struct timer_list rx_refill_timer; + struct sk_buff *rx_skbs[NET_RX_RING_SIZE]; grant_ref_t gref_rx_head; grant_ref_t grant_rx_ref[NET_RX_RING_SIZE]; - struct xenbus_device *xbdev; - int tx_ring_ref; - int rx_ring_ref; - unsigned long rx_pfn_array[NET_RX_RING_SIZE]; struct multicall_entry rx_mcl[NET_RX_RING_SIZE+1]; struct mmu_update rx_mmu[NET_RX_RING_SIZE]; diff --git a/drivers/scsi/gdth.c b/drivers/scsi/gdth.c index e8010a702e73..3ac080ee6e2f 100644 --- a/drivers/scsi/gdth.c +++ b/drivers/scsi/gdth.c @@ -5213,6 +5213,10 @@ static int __init gdth_init(void) #endif /* CONFIG_PCI */ TRACE2(("gdth_detect() %d controller detected\n", gdth_ctr_count)); + + if (list_empty(&gdth_instances)) + return -ENODEV; + #ifdef GDTH_STATISTICS TRACE2(("gdth_detect(): Initializing timer !\n")); init_timer(&gdth_timer); diff --git a/fs/Kconfig b/fs/Kconfig index bb02b39380a3..815d201d8600 100644 --- a/fs/Kconfig +++ b/fs/Kconfig @@ -1755,6 +1755,14 @@ config SUNRPC config SUNRPC_GSS tristate +config SUNRPC_XPRT_RDMA + tristate "RDMA transport for sunrpc (EXPERIMENTAL)" + depends on SUNRPC && INFINIBAND && EXPERIMENTAL + default m + help + Adds a client RPC transport for supporting kernel NFS over RDMA + mounts, including Infiniband and iWARP. Experimental. + config SUNRPC_BIND34 bool "Support for rpcbind versions 3 & 4 (EXPERIMENTAL)" depends on SUNRPC && EXPERIMENTAL diff --git a/fs/inode.c b/fs/inode.c index 29f5068f819b..f97de0aeb3b6 100644 --- a/fs/inode.c +++ b/fs/inode.c @@ -142,6 +142,15 @@ static struct inode *alloc_inode(struct super_block *sb) return NULL; } + spin_lock_init(&inode->i_lock); + lockdep_set_class(&inode->i_lock, &sb->s_type->i_lock_key); + + mutex_init(&inode->i_mutex); + lockdep_set_class(&inode->i_mutex, &sb->s_type->i_mutex_key); + + init_rwsem(&inode->i_alloc_sem); + lockdep_set_class(&inode->i_alloc_sem, &sb->s_type->i_alloc_sem_key); + mapping->a_ops = &empty_aops; mapping->host = inode; mapping->flags = 0; @@ -190,8 +199,6 @@ void inode_init_once(struct inode *inode) INIT_HLIST_NODE(&inode->i_hash); INIT_LIST_HEAD(&inode->i_dentry); INIT_LIST_HEAD(&inode->i_devices); - mutex_init(&inode->i_mutex); - init_rwsem(&inode->i_alloc_sem); INIT_RADIX_TREE(&inode->i_data.page_tree, GFP_ATOMIC); rwlock_init(&inode->i_data.tree_lock); spin_lock_init(&inode->i_data.i_mmap_lock); @@ -199,7 +206,6 @@ void inode_init_once(struct inode *inode) spin_lock_init(&inode->i_data.private_lock); INIT_RAW_PRIO_TREE_ROOT(&inode->i_data.i_mmap); INIT_LIST_HEAD(&inode->i_data.i_mmap_nonlinear); - spin_lock_init(&inode->i_lock); i_size_ordered_init(inode); #ifdef CONFIG_INOTIFY INIT_LIST_HEAD(&inode->inotify_watches); @@ -561,6 +567,18 @@ EXPORT_SYMBOL(new_inode); void unlock_new_inode(struct inode *inode) { +#ifdef CONFIG_DEBUG_LOCK_ALLOC + struct file_system_type *type = inode->i_sb->s_type; + /* + * ensure nobody is actually holding i_mutex + */ + mutex_destroy(&inode->i_mutex); + mutex_init(&inode->i_mutex); + if (inode->i_mode & S_IFDIR) + lockdep_set_class(&inode->i_mutex, &type->i_mutex_dir_key); + else + lockdep_set_class(&inode->i_mutex, &type->i_mutex_key); +#endif /* * This is special! We do not need the spinlock * when clearing I_LOCK, because we're guaranteed diff --git a/fs/jbd/transaction.c b/fs/jbd/transaction.c index 772b6531a2a2..8df5bac0b7a5 100644 --- a/fs/jbd/transaction.c +++ b/fs/jbd/transaction.c @@ -233,6 +233,8 @@ out: return ret; } +static struct lock_class_key jbd_handle_key; + /* Allocate a new handle. This should probably be in a slab... */ static handle_t *new_handle(int nblocks) { @@ -243,6 +245,8 @@ static handle_t *new_handle(int nblocks) handle->h_buffer_credits = nblocks; handle->h_ref = 1; + lockdep_init_map(&handle->h_lockdep_map, "jbd_handle", &jbd_handle_key, 0); + return handle; } @@ -286,6 +290,9 @@ handle_t *journal_start(journal_t *journal, int nblocks) current->journal_info = NULL; handle = ERR_PTR(err); } + + lock_acquire(&handle->h_lockdep_map, 0, 0, 0, 2, _THIS_IP_); + return handle; } @@ -1411,6 +1418,8 @@ int journal_stop(handle_t *handle) spin_unlock(&journal->j_state_lock); } + lock_release(&handle->h_lockdep_map, 1, _THIS_IP_); + jbd_free_handle(handle); return err; } diff --git a/fs/lockd/mon.c b/fs/lockd/mon.c index 3353ed8421a7..908b23fadd05 100644 --- a/fs/lockd/mon.c +++ b/fs/lockd/mon.c @@ -10,6 +10,7 @@ #include <linux/utsname.h> #include <linux/kernel.h> #include <linux/sunrpc/clnt.h> +#include <linux/sunrpc/xprtsock.h> #include <linux/sunrpc/svc.h> #include <linux/lockd/lockd.h> #include <linux/lockd/sm_inter.h> @@ -132,7 +133,7 @@ nsm_create(void) .sin_port = 0, }; struct rpc_create_args args = { - .protocol = IPPROTO_UDP, + .protocol = XPRT_TRANSPORT_UDP, .address = (struct sockaddr *)&sin, .addrsize = sizeof(sin), .servername = "localhost", diff --git a/fs/lockd/xdr.c b/fs/lockd/xdr.c index 5316e307a49d..633653bff944 100644 --- a/fs/lockd/xdr.c +++ b/fs/lockd/xdr.c @@ -62,8 +62,9 @@ static __be32 *nlm_decode_cookie(__be32 *p, struct nlm_cookie *c) } else { - printk(KERN_NOTICE - "lockd: bad cookie size %d (only cookies under %d bytes are supported.)\n", len, NLM_MAXCOOKIELEN); + dprintk("lockd: bad cookie size %d (only cookies under " + "%d bytes are supported.)\n", + len, NLM_MAXCOOKIELEN); return NULL; } return p; @@ -84,8 +85,7 @@ nlm_decode_fh(__be32 *p, struct nfs_fh *f) unsigned int len; if ((len = ntohl(*p++)) != NFS2_FHSIZE) { - printk(KERN_NOTICE - "lockd: bad fhandle size %d (should be %d)\n", + dprintk("lockd: bad fhandle size %d (should be %d)\n", len, NFS2_FHSIZE); return NULL; } diff --git a/fs/lockd/xdr4.c b/fs/lockd/xdr4.c index 846fc1d639dd..43ff9397e6c6 100644 --- a/fs/lockd/xdr4.c +++ b/fs/lockd/xdr4.c @@ -64,8 +64,9 @@ nlm4_decode_cookie(__be32 *p, struct nlm_cookie *c) } else { - printk(KERN_NOTICE - "lockd: bad cookie size %d (only cookies under %d bytes are supported.)\n", len, NLM_MAXCOOKIELEN); + dprintk("lockd: bad cookie size %d (only cookies under " + "%d bytes are supported.)\n", + len, NLM_MAXCOOKIELEN); return NULL; } return p; @@ -86,8 +87,7 @@ nlm4_decode_fh(__be32 *p, struct nfs_fh *f) memset(f->data, 0, sizeof(f->data)); f->size = ntohl(*p++); if (f->size > NFS_MAXFHSIZE) { - printk(KERN_NOTICE - "lockd: bad fhandle size %d (should be <=%d)\n", + dprintk("lockd: bad fhandle size %d (should be <=%d)\n", f->size, NFS_MAXFHSIZE); return NULL; } diff --git a/fs/nfs/Makefile b/fs/nfs/Makefile index b55cb236cf74..df0f41e09885 100644 --- a/fs/nfs/Makefile +++ b/fs/nfs/Makefile @@ -16,4 +16,3 @@ nfs-$(CONFIG_NFS_V4) += nfs4proc.o nfs4xdr.o nfs4state.o nfs4renewd.o \ nfs4namespace.o nfs-$(CONFIG_NFS_DIRECTIO) += direct.o nfs-$(CONFIG_SYSCTL) += sysctl.o -nfs-objs := $(nfs-y) diff --git a/fs/nfs/client.c b/fs/nfs/client.c index a204484072f3..a532ee12740a 100644 --- a/fs/nfs/client.c +++ b/fs/nfs/client.c @@ -23,6 +23,8 @@ #include <linux/sunrpc/clnt.h> #include <linux/sunrpc/stats.h> #include <linux/sunrpc/metrics.h> +#include <linux/sunrpc/xprtsock.h> +#include <linux/sunrpc/xprtrdma.h> #include <linux/nfs_fs.h> #include <linux/nfs_mount.h> #include <linux/nfs4_mount.h> @@ -340,7 +342,8 @@ static void nfs_init_timeout_values(struct rpc_timeout *to, int proto, to->to_retries = 2; switch (proto) { - case IPPROTO_TCP: + case XPRT_TRANSPORT_TCP: + case XPRT_TRANSPORT_RDMA: if (!to->to_initval) to->to_initval = 60 * HZ; if (to->to_initval > NFS_MAX_TCP_TIMEOUT) @@ -349,7 +352,7 @@ static void nfs_init_timeout_values(struct rpc_timeout *to, int proto, to->to_maxval = to->to_initval + (to->to_increment * to->to_retries); to->to_exponential = 0; break; - case IPPROTO_UDP: + case XPRT_TRANSPORT_UDP: default: if (!to->to_initval) to->to_initval = 11 * HZ / 10; @@ -501,9 +504,9 @@ static int nfs_init_server_rpcclient(struct nfs_server *server, rpc_authflavor_t /* * Initialise an NFS2 or NFS3 client */ -static int nfs_init_client(struct nfs_client *clp, const struct nfs_mount_data *data) +static int nfs_init_client(struct nfs_client *clp, + const struct nfs_parsed_mount_data *data) { - int proto = (data->flags & NFS_MOUNT_TCP) ? IPPROTO_TCP : IPPROTO_UDP; int error; if (clp->cl_cons_state == NFS_CS_READY) { @@ -522,8 +525,8 @@ static int nfs_init_client(struct nfs_client *clp, const struct nfs_mount_data * * Create a client RPC handle for doing FSSTAT with UNIX auth only * - RFC 2623, sec 2.3.2 */ - error = nfs_create_rpc_client(clp, proto, data->timeo, data->retrans, - RPC_AUTH_UNIX, 0); + error = nfs_create_rpc_client(clp, data->nfs_server.protocol, + data->timeo, data->retrans, RPC_AUTH_UNIX, 0); if (error < 0) goto error; nfs_mark_client_ready(clp, NFS_CS_READY); @@ -538,7 +541,8 @@ error: /* * Create a version 2 or 3 client */ -static int nfs_init_server(struct nfs_server *server, const struct nfs_mount_data *data) +static int nfs_init_server(struct nfs_server *server, + const struct nfs_parsed_mount_data *data) { struct nfs_client *clp; int error, nfsvers = 2; @@ -551,7 +555,8 @@ static int nfs_init_server(struct nfs_server *server, const struct nfs_mount_dat #endif /* Allocate or find a client reference we can use */ - clp = nfs_get_client(data->hostname, &data->addr, nfsvers); + clp = nfs_get_client(data->nfs_server.hostname, + &data->nfs_server.address, nfsvers); if (IS_ERR(clp)) { dprintk("<-- nfs_init_server() = error %ld\n", PTR_ERR(clp)); return PTR_ERR(clp); @@ -581,7 +586,7 @@ static int nfs_init_server(struct nfs_server *server, const struct nfs_mount_dat if (error < 0) goto error; - error = nfs_init_server_rpcclient(server, data->pseudoflavor); + error = nfs_init_server_rpcclient(server, data->auth_flavors[0]); if (error < 0) goto error; @@ -760,7 +765,7 @@ void nfs_free_server(struct nfs_server *server) * Create a version 2 or 3 volume record * - keyed on server and FSID */ -struct nfs_server *nfs_create_server(const struct nfs_mount_data *data, +struct nfs_server *nfs_create_server(const struct nfs_parsed_mount_data *data, struct nfs_fh *mntfh) { struct nfs_server *server; @@ -906,7 +911,7 @@ error: * Create a version 4 volume record */ static int nfs4_init_server(struct nfs_server *server, - const struct nfs4_mount_data *data, rpc_authflavor_t authflavour) + const struct nfs_parsed_mount_data *data) { int error; @@ -926,7 +931,7 @@ static int nfs4_init_server(struct nfs_server *server, server->acdirmin = data->acdirmin * HZ; server->acdirmax = data->acdirmax * HZ; - error = nfs_init_server_rpcclient(server, authflavour); + error = nfs_init_server_rpcclient(server, data->auth_flavors[0]); /* Done */ dprintk("<-- nfs4_init_server() = %d\n", error); @@ -937,12 +942,7 @@ static int nfs4_init_server(struct nfs_server *server, * Create a version 4 volume record * - keyed on server and FSID */ -struct nfs_server *nfs4_create_server(const struct nfs4_mount_data *data, - const char *hostname, - const struct sockaddr_in *addr, - const char *mntpath, - const char *ip_addr, - rpc_authflavor_t authflavour, +struct nfs_server *nfs4_create_server(const struct nfs_parsed_mount_data *data, struct nfs_fh *mntfh) { struct nfs_fattr fattr; @@ -956,13 +956,18 @@ struct nfs_server *nfs4_create_server(const struct nfs4_mount_data *data, return ERR_PTR(-ENOMEM); /* Get a client record */ - error = nfs4_set_client(server, hostname, addr, ip_addr, authflavour, - data->proto, data->timeo, data->retrans); + error = nfs4_set_client(server, + data->nfs_server.hostname, + &data->nfs_server.address, + data->client_address, + data->auth_flavors[0], + data->nfs_server.protocol, + data->timeo, data->retrans); if (error < 0) goto error; /* set up the general RPC client */ - error = nfs4_init_server(server, data, authflavour); + error = nfs4_init_server(server, data); if (error < 0) goto error; @@ -971,7 +976,7 @@ struct nfs_server *nfs4_create_server(const struct nfs4_mount_data *data, BUG_ON(!server->nfs_client->rpc_ops->file_inode_ops); /* Probe the root fh to retrieve its FSID */ - error = nfs4_path_walk(server, mntfh, mntpath); + error = nfs4_path_walk(server, mntfh, data->nfs_server.export_path); if (error < 0) goto error; diff --git a/fs/nfs/delegation.c b/fs/nfs/delegation.c index c55a761c22bb..af8b235d405d 100644 --- a/fs/nfs/delegation.c +++ b/fs/nfs/delegation.c @@ -52,7 +52,7 @@ static int nfs_delegation_claim_locks(struct nfs_open_context *ctx, struct nfs4_ for (fl = inode->i_flock; fl != 0; fl = fl->fl_next) { if (!(fl->fl_flags & (FL_POSIX|FL_FLOCK))) continue; - if ((struct nfs_open_context *)fl->fl_file->private_data != ctx) + if (nfs_file_open_context(fl->fl_file) != ctx) continue; status = nfs4_lock_delegation_recall(state, fl); if (status >= 0) @@ -109,6 +109,7 @@ again: void nfs_inode_reclaim_delegation(struct inode *inode, struct rpc_cred *cred, struct nfs_openres *res) { struct nfs_delegation *delegation = NFS_I(inode)->delegation; + struct rpc_cred *oldcred; if (delegation == NULL) return; @@ -116,11 +117,12 @@ void nfs_inode_reclaim_delegation(struct inode *inode, struct rpc_cred *cred, st sizeof(delegation->stateid.data)); delegation->type = res->delegation_type; delegation->maxsize = res->maxsize; - put_rpccred(cred); + oldcred = delegation->cred; delegation->cred = get_rpccred(cred); delegation->flags &= ~NFS_DELEGATION_NEED_RECLAIM; NFS_I(inode)->delegation_state = delegation->type; smp_wmb(); + put_rpccred(oldcred); } /* diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index e4a04d16b8b0..8ec7fbd8240c 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -200,9 +200,6 @@ int nfs_readdir_filler(nfs_readdir_descriptor_t *desc, struct page *page) desc->timestamp = timestamp; desc->timestamp_valid = 1; SetPageUptodate(page); - spin_lock(&inode->i_lock); - NFS_I(inode)->cache_validity |= NFS_INO_INVALID_ATIME; - spin_unlock(&inode->i_lock); /* Ensure consistent page alignment of the data. * Note: assumes we have exclusive access to this mapping either * through inode->i_mutex or some other mechanism. @@ -214,9 +211,7 @@ int nfs_readdir_filler(nfs_readdir_descriptor_t *desc, struct page *page) unlock_page(page); return 0; error: - SetPageError(page); unlock_page(page); - nfs_zap_caches(inode); desc->error = error; return -EIO; } @@ -407,7 +402,7 @@ int nfs_do_filldir(nfs_readdir_descriptor_t *desc, void *dirent, struct file *file = desc->file; struct nfs_entry *entry = desc->entry; struct dentry *dentry = NULL; - unsigned long fileid; + u64 fileid; int loop_count = 0, res; @@ -418,7 +413,7 @@ int nfs_do_filldir(nfs_readdir_descriptor_t *desc, void *dirent, unsigned d_type = DT_UNKNOWN; /* Note: entry->prev_cookie contains the cookie for * retrieving the current dirent on the server */ - fileid = nfs_fileid_to_ino_t(entry->ino); + fileid = entry->ino; /* Get a dentry if we have one */ if (dentry != NULL) @@ -428,11 +423,12 @@ int nfs_do_filldir(nfs_readdir_descriptor_t *desc, void *dirent, /* Use readdirplus info */ if (dentry != NULL && dentry->d_inode != NULL) { d_type = dt_type(dentry->d_inode); - fileid = dentry->d_inode->i_ino; + fileid = NFS_FILEID(dentry->d_inode); } res = filldir(dirent, entry->name, entry->len, - file->f_pos, fileid, d_type); + file->f_pos, nfs_compat_user_ino64(fileid), + d_type); if (res < 0) break; file->f_pos++; @@ -490,9 +486,6 @@ int uncached_readdir(nfs_readdir_descriptor_t *desc, void *dirent, page, NFS_SERVER(inode)->dtsize, desc->plus); - spin_lock(&inode->i_lock); - NFS_I(inode)->cache_validity |= NFS_INO_INVALID_ATIME; - spin_unlock(&inode->i_lock); desc->page = page; desc->ptr = kmap(page); /* matching kunmap in nfs_do_filldir */ if (desc->error >= 0) { @@ -558,7 +551,7 @@ static int nfs_readdir(struct file *filp, void *dirent, filldir_t filldir) memset(desc, 0, sizeof(*desc)); desc->file = filp; - desc->dir_cookie = &((struct nfs_open_context *)filp->private_data)->dir_cookie; + desc->dir_cookie = &nfs_file_open_context(filp)->dir_cookie; desc->decode = NFS_PROTO(inode)->decode_dirent; desc->plus = NFS_USE_READDIRPLUS(inode); @@ -623,7 +616,7 @@ static loff_t nfs_llseek_dir(struct file *filp, loff_t offset, int origin) } if (offset != filp->f_pos) { filp->f_pos = offset; - ((struct nfs_open_context *)filp->private_data)->dir_cookie = 0; + nfs_file_open_context(filp)->dir_cookie = 0; } out: mutex_unlock(&filp->f_path.dentry->d_inode->i_mutex); @@ -650,36 +643,18 @@ static int nfs_fsync_dir(struct file *filp, struct dentry *dentry, int datasync) */ static int nfs_check_verifier(struct inode *dir, struct dentry *dentry) { - unsigned long verf; - if (IS_ROOT(dentry)) return 1; - verf = dentry->d_time; - if (nfs_caches_unstable(dir) - || verf != NFS_I(dir)->cache_change_attribute) + if (!nfs_verify_change_attribute(dir, dentry->d_time)) + return 0; + /* Revalidate nfsi->cache_change_attribute before we declare a match */ + if (nfs_revalidate_inode(NFS_SERVER(dir), dir) < 0) + return 0; + if (!nfs_verify_change_attribute(dir, dentry->d_time)) return 0; return 1; } -static inline void nfs_set_verifier(struct dentry * dentry, unsigned long verf) -{ - dentry->d_time = verf; -} - -static void nfs_refresh_verifier(struct dentry * dentry, unsigned long verf) -{ - nfs_set_verifier(dentry, verf); -} - -/* - * Whenever an NFS operation succeeds, we know that the dentry - * is valid, so we update the revalidation timestamp. - */ -static inline void nfs_renew_times(struct dentry * dentry) -{ - dentry->d_time = jiffies; -} - /* * Return the intent data that applies to this particular path component * @@ -695,6 +670,19 @@ static inline unsigned int nfs_lookup_check_intent(struct nameidata *nd, unsigne } /* + * Use intent information to check whether or not we're going to do + * an O_EXCL create using this path component. + */ +static int nfs_is_exclusive_create(struct inode *dir, struct nameidata *nd) +{ + if (NFS_PROTO(dir)->version == 2) + return 0; + if (nd == NULL || nfs_lookup_check_intent(nd, LOOKUP_CREATE) == 0) + return 0; + return (nd->intent.open.flags & O_EXCL) != 0; +} + +/* * Inode and filehandle revalidation for lookups. * * We force revalidation in the cases where the VFS sets LOOKUP_REVAL, @@ -717,6 +705,7 @@ int nfs_lookup_verify_inode(struct inode *inode, struct nameidata *nd) (S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode))) goto out_force; + return 0; } return nfs_revalidate_inode(server, inode); out_force: @@ -759,7 +748,6 @@ static int nfs_lookup_revalidate(struct dentry * dentry, struct nameidata *nd) int error; struct nfs_fh fhandle; struct nfs_fattr fattr; - unsigned long verifier; parent = dget_parent(dentry); lock_kernel(); @@ -767,10 +755,6 @@ static int nfs_lookup_revalidate(struct dentry * dentry, struct nameidata *nd) nfs_inc_stats(dir, NFSIOS_DENTRYREVALIDATE); inode = dentry->d_inode; - /* Revalidate parent directory attribute cache */ - if (nfs_revalidate_inode(NFS_SERVER(dir), dir) < 0) - goto out_zap_parent; - if (!inode) { if (nfs_neg_need_reval(dir, dentry, nd)) goto out_bad; @@ -785,7 +769,7 @@ static int nfs_lookup_revalidate(struct dentry * dentry, struct nameidata *nd) } /* Force a full look up iff the parent directory has changed */ - if (nfs_check_verifier(dir, dentry)) { + if (!nfs_is_exclusive_create(dir, nd) && nfs_check_verifier(dir, dentry)) { if (nfs_lookup_verify_inode(inode, nd)) goto out_zap_parent; goto out_valid; @@ -794,7 +778,6 @@ static int nfs_lookup_revalidate(struct dentry * dentry, struct nameidata *nd) if (NFS_STALE(inode)) goto out_bad; - verifier = nfs_save_change_attribute(dir); error = NFS_PROTO(dir)->lookup(dir, &dentry->d_name, &fhandle, &fattr); if (error) goto out_bad; @@ -803,8 +786,7 @@ static int nfs_lookup_revalidate(struct dentry * dentry, struct nameidata *nd) if ((error = nfs_refresh_inode(inode, &fattr)) != 0) goto out_bad; - nfs_renew_times(dentry); - nfs_refresh_verifier(dentry, verifier); + nfs_set_verifier(dentry, nfs_save_change_attribute(dir)); out_valid: unlock_kernel(); dput(parent); @@ -815,7 +797,7 @@ static int nfs_lookup_revalidate(struct dentry * dentry, struct nameidata *nd) out_zap_parent: nfs_zap_caches(dir); out_bad: - NFS_CACHEINV(dir); + nfs_mark_for_revalidate(dir); if (inode && S_ISDIR(inode->i_mode)) { /* Purge readdir caches. */ nfs_zap_caches(inode); @@ -872,8 +854,6 @@ static void nfs_dentry_iput(struct dentry *dentry, struct inode *inode) nfs_complete_unlink(dentry, inode); unlock_kernel(); } - /* When creating a negative dentry, we want to renew d_time */ - nfs_renew_times(dentry); iput(inode); } @@ -883,30 +863,6 @@ struct dentry_operations nfs_dentry_operations = { .d_iput = nfs_dentry_iput, }; -/* - * Use intent information to check whether or not we're going to do - * an O_EXCL create using this path component. - */ -static inline -int nfs_is_exclusive_create(struct inode *dir, struct nameidata *nd) -{ - if (NFS_PROTO(dir)->version == 2) - return 0; - if (nd == NULL || nfs_lookup_check_intent(nd, LOOKUP_CREATE) == 0) - return 0; - return (nd->intent.open.flags & O_EXCL) != 0; -} - -static inline int nfs_reval_fsid(struct inode *dir, const struct nfs_fattr *fattr) -{ - struct nfs_server *server = NFS_SERVER(dir); - - if (!nfs_fsid_equal(&server->fsid, &fattr->fsid)) - /* Revalidate fsid using the parent directory */ - return __nfs_revalidate_inode(server, dir); - return 0; -} - static struct dentry *nfs_lookup(struct inode *dir, struct dentry * dentry, struct nameidata *nd) { struct dentry *res; @@ -945,11 +901,6 @@ static struct dentry *nfs_lookup(struct inode *dir, struct dentry * dentry, stru res = ERR_PTR(error); goto out_unlock; } - error = nfs_reval_fsid(dir, &fattr); - if (error < 0) { - res = ERR_PTR(error); - goto out_unlock; - } inode = nfs_fhget(dentry->d_sb, &fhandle, &fattr); res = (struct dentry *)inode; if (IS_ERR(res)) @@ -958,17 +909,10 @@ static struct dentry *nfs_lookup(struct inode *dir, struct dentry * dentry, stru no_entry: res = d_materialise_unique(dentry, inode); if (res != NULL) { - struct dentry *parent; if (IS_ERR(res)) goto out_unlock; - /* Was a directory renamed! */ - parent = dget_parent(res); - if (!IS_ROOT(parent)) - nfs_mark_for_revalidate(parent->d_inode); - dput(parent); dentry = res; } - nfs_renew_times(dentry); nfs_set_verifier(dentry, nfs_save_change_attribute(dir)); out_unlock: unlock_kernel(); @@ -1020,28 +964,16 @@ static struct dentry *nfs_atomic_lookup(struct inode *dir, struct dentry *dentry } dentry->d_op = NFS_PROTO(dir)->dentry_ops; - /* Let vfs_create() deal with O_EXCL */ + /* Let vfs_create() deal with O_EXCL. Instantiate, but don't hash + * the dentry. */ if (nd->intent.open.flags & O_EXCL) { - d_add(dentry, NULL); + d_instantiate(dentry, NULL); goto out; } /* Open the file on the server */ lock_kernel(); - /* Revalidate parent directory attribute cache */ - error = nfs_revalidate_inode(NFS_SERVER(dir), dir); - if (error < 0) { - res = ERR_PTR(error); - unlock_kernel(); - goto out; - } - - if (nd->intent.open.flags & O_CREAT) { - nfs_begin_data_update(dir); - res = nfs4_atomic_open(dir, dentry, nd); - nfs_end_data_update(dir); - } else - res = nfs4_atomic_open(dir, dentry, nd); + res = nfs4_atomic_open(dir, dentry, nd); unlock_kernel(); if (IS_ERR(res)) { error = PTR_ERR(res); @@ -1063,8 +995,6 @@ static struct dentry *nfs_atomic_lookup(struct inode *dir, struct dentry *dentry } } else if (res != NULL) dentry = res; - nfs_renew_times(dentry); - nfs_set_verifier(dentry, nfs_save_change_attribute(dir)); out: return res; no_open: @@ -1076,7 +1006,6 @@ static int nfs_open_revalidate(struct dentry *dentry, struct nameidata *nd) struct dentry *parent = NULL; struct inode *inode = dentry->d_inode; struct inode *dir; - unsigned long verifier; int openflags, ret = 0; parent = dget_parent(dentry); @@ -1086,8 +1015,12 @@ static int nfs_open_revalidate(struct dentry *dentry, struct nameidata *nd) /* We can't create new files in nfs_open_revalidate(), so we * optimize away revalidation of negative dentries. */ - if (inode == NULL) + if (inode == NULL) { + if (!nfs_neg_need_reval(dir, dentry, nd)) + ret = 1; goto out; + } + /* NFS only supports OPEN on regular files */ if (!S_ISREG(inode->i_mode)) goto no_open; @@ -1104,10 +1037,7 @@ static int nfs_open_revalidate(struct dentry *dentry, struct nameidata *nd) * change attribute *before* we do the RPC call. */ lock_kernel(); - verifier = nfs_save_change_attribute(dir); ret = nfs4_open_revalidate(dir, dentry, openflags, nd); - if (!ret) - nfs_refresh_verifier(dentry, verifier); unlock_kernel(); out: dput(parent); @@ -1133,6 +1063,7 @@ static struct dentry *nfs_readdir_lookup(nfs_readdir_descriptor_t *desc) .len = entry->len, }; struct inode *inode; + unsigned long verf = nfs_save_change_attribute(dir); switch (name.len) { case 2: @@ -1143,6 +1074,14 @@ static struct dentry *nfs_readdir_lookup(nfs_readdir_descriptor_t *desc) if (name.name[0] == '.') return dget(parent); } + + spin_lock(&dir->i_lock); + if (NFS_I(dir)->cache_validity & NFS_INO_INVALID_DATA) { + spin_unlock(&dir->i_lock); + return NULL; + } + spin_unlock(&dir->i_lock); + name.hash = full_name_hash(name.name, name.len); dentry = d_lookup(parent, &name); if (dentry != NULL) { @@ -1183,12 +1122,8 @@ static struct dentry *nfs_readdir_lookup(nfs_readdir_descriptor_t *desc) dentry = alias; } - nfs_renew_times(dentry); - nfs_set_verifier(dentry, nfs_save_change_attribute(dir)); - return dentry; out_renew: - nfs_renew_times(dentry); - nfs_refresh_verifier(dentry, nfs_save_change_attribute(dir)); + nfs_set_verifier(dentry, verf); return dentry; } @@ -1198,32 +1133,40 @@ out_renew: int nfs_instantiate(struct dentry *dentry, struct nfs_fh *fhandle, struct nfs_fattr *fattr) { + struct dentry *parent = dget_parent(dentry); + struct inode *dir = parent->d_inode; struct inode *inode; int error = -EACCES; + d_drop(dentry); + /* We may have been initialized further down */ if (dentry->d_inode) - return 0; + goto out; if (fhandle->size == 0) { - struct inode *dir = dentry->d_parent->d_inode; error = NFS_PROTO(dir)->lookup(dir, &dentry->d_name, fhandle, fattr); if (error) - return error; + goto out_error; } + nfs_set_verifier(dentry, nfs_save_change_attribute(dir)); if (!(fattr->valid & NFS_ATTR_FATTR)) { struct nfs_server *server = NFS_SB(dentry->d_sb); error = server->nfs_client->rpc_ops->getattr(server, fhandle, fattr); if (error < 0) - return error; + goto out_error; } inode = nfs_fhget(dentry->d_sb, fhandle, fattr); error = PTR_ERR(inode); if (IS_ERR(inode)) - return error; - d_instantiate(dentry, inode); - if (d_unhashed(dentry)) - d_rehash(dentry); + goto out_error; + d_add(dentry, inode); +out: + dput(parent); return 0; +out_error: + nfs_mark_for_revalidate(dir); + dput(parent); + return error; } /* @@ -1249,13 +1192,9 @@ static int nfs_create(struct inode *dir, struct dentry *dentry, int mode, open_flags = nd->intent.open.flags; lock_kernel(); - nfs_begin_data_update(dir); error = NFS_PROTO(dir)->create(dir, dentry, &attr, open_flags, nd); - nfs_end_data_update(dir); if (error != 0) goto out_err; - nfs_renew_times(dentry); - nfs_set_verifier(dentry, nfs_save_change_attribute(dir)); unlock_kernel(); return 0; out_err: @@ -1283,13 +1222,9 @@ nfs_mknod(struct inode *dir, struct dentry *dentry, int mode, dev_t rdev) attr.ia_valid = ATTR_MODE; lock_kernel(); - nfs_begin_data_update(dir); status = NFS_PROTO(dir)->mknod(dir, dentry, &attr, rdev); - nfs_end_data_update(dir); if (status != 0) goto out_err; - nfs_renew_times(dentry); - nfs_set_verifier(dentry, nfs_save_change_attribute(dir)); unlock_kernel(); return 0; out_err: @@ -1313,13 +1248,9 @@ static int nfs_mkdir(struct inode *dir, struct dentry *dentry, int mode) attr.ia_mode = mode | S_IFDIR; lock_kernel(); - nfs_begin_data_update(dir); error = NFS_PROTO(dir)->mkdir(dir, dentry, &attr); - nfs_end_data_update(dir); if (error != 0) goto out_err; - nfs_renew_times(dentry); - nfs_set_verifier(dentry, nfs_save_change_attribute(dir)); unlock_kernel(); return 0; out_err: @@ -1336,12 +1267,10 @@ static int nfs_rmdir(struct inode *dir, struct dentry *dentry) dir->i_sb->s_id, dir->i_ino, dentry->d_name.name); lock_kernel(); - nfs_begin_data_update(dir); error = NFS_PROTO(dir)->rmdir(dir, &dentry->d_name); /* Ensure the VFS deletes this inode */ if (error == 0 && dentry->d_inode != NULL) clear_nlink(dentry->d_inode); - nfs_end_data_update(dir); unlock_kernel(); return error; @@ -1350,9 +1279,9 @@ static int nfs_rmdir(struct inode *dir, struct dentry *dentry) static int nfs_sillyrename(struct inode *dir, struct dentry *dentry) { static unsigned int sillycounter; - const int i_inosize = sizeof(dir->i_ino)*2; + const int fileidsize = sizeof(NFS_FILEID(dentry->d_inode))*2; const int countersize = sizeof(sillycounter)*2; - const int slen = sizeof(".nfs") + i_inosize + countersize - 1; + const int slen = sizeof(".nfs")+fileidsize+countersize-1; char silly[slen+1]; struct qstr qsilly; struct dentry *sdentry; @@ -1370,8 +1299,9 @@ static int nfs_sillyrename(struct inode *dir, struct dentry *dentry) if (dentry->d_flags & DCACHE_NFSFS_RENAMED) goto out; - sprintf(silly, ".nfs%*.*lx", - i_inosize, i_inosize, dentry->d_inode->i_ino); + sprintf(silly, ".nfs%*.*Lx", + fileidsize, fileidsize, + (unsigned long long)NFS_FILEID(dentry->d_inode)); /* Return delegation in anticipation of the rename */ nfs_inode_return_delegation(dentry->d_inode); @@ -1398,19 +1328,14 @@ static int nfs_sillyrename(struct inode *dir, struct dentry *dentry) qsilly.name = silly; qsilly.len = strlen(silly); - nfs_begin_data_update(dir); if (dentry->d_inode) { - nfs_begin_data_update(dentry->d_inode); error = NFS_PROTO(dir)->rename(dir, &dentry->d_name, dir, &qsilly); nfs_mark_for_revalidate(dentry->d_inode); - nfs_end_data_update(dentry->d_inode); } else error = NFS_PROTO(dir)->rename(dir, &dentry->d_name, dir, &qsilly); - nfs_end_data_update(dir); if (!error) { - nfs_renew_times(dentry); nfs_set_verifier(dentry, nfs_save_change_attribute(dir)); d_move(dentry, sdentry); error = nfs_async_unlink(dir, dentry); @@ -1443,19 +1368,15 @@ static int nfs_safe_remove(struct dentry *dentry) goto out; } - nfs_begin_data_update(dir); if (inode != NULL) { nfs_inode_return_delegation(inode); - nfs_begin_data_update(inode); error = NFS_PROTO(dir)->remove(dir, &dentry->d_name); /* The VFS may want to delete this inode */ if (error == 0) drop_nlink(inode); nfs_mark_for_revalidate(inode); - nfs_end_data_update(inode); } else error = NFS_PROTO(dir)->remove(dir, &dentry->d_name); - nfs_end_data_update(dir); out: return error; } @@ -1493,7 +1414,6 @@ static int nfs_unlink(struct inode *dir, struct dentry *dentry) spin_unlock(&dcache_lock); error = nfs_safe_remove(dentry); if (!error) { - nfs_renew_times(dentry); nfs_set_verifier(dentry, nfs_save_change_attribute(dir)); } else if (need_rehash) d_rehash(dentry); @@ -1548,9 +1468,7 @@ static int nfs_symlink(struct inode *dir, struct dentry *dentry, const char *sym memset(kaddr + pathlen, 0, PAGE_SIZE - pathlen); kunmap_atomic(kaddr, KM_USER0); - nfs_begin_data_update(dir); error = NFS_PROTO(dir)->symlink(dir, dentry, page, pathlen, &attr); - nfs_end_data_update(dir); if (error != 0) { dfprintk(VFS, "NFS: symlink(%s/%ld, %s, %s) error %d\n", dir->i_sb->s_id, dir->i_ino, @@ -1590,15 +1508,12 @@ nfs_link(struct dentry *old_dentry, struct inode *dir, struct dentry *dentry) dentry->d_parent->d_name.name, dentry->d_name.name); lock_kernel(); - nfs_begin_data_update(dir); - nfs_begin_data_update(inode); + d_drop(dentry); error = NFS_PROTO(dir)->link(inode, dir, &dentry->d_name); if (error == 0) { atomic_inc(&inode->i_count); - d_instantiate(dentry, inode); + d_add(dentry, inode); } - nfs_end_data_update(inode); - nfs_end_data_update(dir); unlock_kernel(); return error; } @@ -1701,22 +1616,16 @@ go_ahead: d_delete(new_dentry); } - nfs_begin_data_update(old_dir); - nfs_begin_data_update(new_dir); - nfs_begin_data_update(old_inode); error = NFS_PROTO(old_dir)->rename(old_dir, &old_dentry->d_name, new_dir, &new_dentry->d_name); nfs_mark_for_revalidate(old_inode); - nfs_end_data_update(old_inode); - nfs_end_data_update(new_dir); - nfs_end_data_update(old_dir); out: if (rehash) d_rehash(rehash); if (!error) { d_move(old_dentry, new_dentry); - nfs_renew_times(new_dentry); - nfs_refresh_verifier(new_dentry, nfs_save_change_attribute(new_dir)); + nfs_set_verifier(new_dentry, + nfs_save_change_attribute(new_dir)); } /* new dentry created? */ @@ -1842,7 +1751,7 @@ static struct nfs_access_entry *nfs_access_search_rbtree(struct inode *inode, st return NULL; } -int nfs_access_get_cached(struct inode *inode, struct rpc_cred *cred, struct nfs_access_entry *res) +static int nfs_access_get_cached(struct inode *inode, struct rpc_cred *cred, struct nfs_access_entry *res) { struct nfs_inode *nfsi = NFS_I(inode); struct nfs_access_entry *cache; @@ -1854,7 +1763,7 @@ int nfs_access_get_cached(struct inode *inode, struct rpc_cred *cred, struct nfs cache = nfs_access_search_rbtree(inode, cred); if (cache == NULL) goto out; - if (time_after(jiffies, cache->jiffies + NFS_ATTRTIMEO(inode))) + if (!time_in_range(jiffies, cache->jiffies, cache->jiffies + nfsi->attrtimeo)) goto out_stale; res->jiffies = cache->jiffies; res->cred = cache->cred; @@ -1909,7 +1818,7 @@ found: nfs_access_free_entry(entry); } -void nfs_access_add_cache(struct inode *inode, struct nfs_access_entry *set) +static void nfs_access_add_cache(struct inode *inode, struct nfs_access_entry *set) { struct nfs_access_entry *cache = kmalloc(sizeof(*cache), GFP_KERNEL); if (cache == NULL) @@ -1957,6 +1866,24 @@ out: return -EACCES; } +static int nfs_open_permission_mask(int openflags) +{ + int mask = 0; + + if (openflags & FMODE_READ) + mask |= MAY_READ; + if (openflags & FMODE_WRITE) + mask |= MAY_WRITE; + if (openflags & FMODE_EXEC) + mask |= MAY_EXEC; + return mask; +} + +int nfs_may_open(struct inode *inode, struct rpc_cred *cred, int openflags) +{ + return nfs_do_access(inode, cred, nfs_open_permission_mask(openflags)); +} + int nfs_permission(struct inode *inode, int mask, struct nameidata *nd) { struct rpc_cred *cred; diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c index fcf4d384610e..32fe97211eea 100644 --- a/fs/nfs/direct.c +++ b/fs/nfs/direct.c @@ -368,7 +368,7 @@ static ssize_t nfs_direct_read(struct kiocb *iocb, unsigned long user_addr, size return -ENOMEM; dreq->inode = inode; - dreq->ctx = get_nfs_open_context((struct nfs_open_context *)iocb->ki_filp->private_data); + dreq->ctx = get_nfs_open_context(nfs_file_open_context(iocb->ki_filp)); if (!is_sync_kiocb(iocb)) dreq->iocb = iocb; @@ -510,7 +510,6 @@ static void nfs_direct_write_complete(struct nfs_direct_req *dreq, struct inode nfs_direct_write_reschedule(dreq); break; default: - nfs_end_data_update(inode); if (dreq->commit_data != NULL) nfs_commit_free(dreq->commit_data); nfs_direct_free_writedata(dreq); @@ -533,7 +532,6 @@ static inline void nfs_alloc_commit_data(struct nfs_direct_req *dreq) static void nfs_direct_write_complete(struct nfs_direct_req *dreq, struct inode *inode) { - nfs_end_data_update(inode); nfs_direct_free_writedata(dreq); nfs_zap_mapping(inode, inode->i_mapping); nfs_direct_complete(dreq); @@ -718,14 +716,12 @@ static ssize_t nfs_direct_write(struct kiocb *iocb, unsigned long user_addr, siz sync = FLUSH_STABLE; dreq->inode = inode; - dreq->ctx = get_nfs_open_context((struct nfs_open_context *)iocb->ki_filp->private_data); + dreq->ctx = get_nfs_open_context(nfs_file_open_context(iocb->ki_filp)); if (!is_sync_kiocb(iocb)) dreq->iocb = iocb; nfs_add_stats(inode, NFSIOS_DIRECTWRITTENBYTES, count); - nfs_begin_data_update(inode); - rpc_clnt_sigmask(clnt, &oldset); result = nfs_direct_write_schedule(dreq, user_addr, count, pos, sync); if (!result) diff --git a/fs/nfs/file.c b/fs/nfs/file.c index 579cf8a7d4a7..c664bb921425 100644 --- a/fs/nfs/file.c +++ b/fs/nfs/file.c @@ -33,6 +33,7 @@ #include <asm/system.h> #include "delegation.h" +#include "internal.h" #include "iostat.h" #define NFSDBG_FACILITY NFSDBG_FILE @@ -55,6 +56,8 @@ static int nfs_lock(struct file *filp, int cmd, struct file_lock *fl); static int nfs_flock(struct file *filp, int cmd, struct file_lock *fl); static int nfs_setlease(struct file *file, long arg, struct file_lock **fl); +static struct vm_operations_struct nfs_file_vm_ops; + const struct file_operations nfs_file_operations = { .llseek = nfs_file_llseek, .read = do_sync_read, @@ -174,13 +177,38 @@ static loff_t nfs_file_llseek(struct file *filp, loff_t offset, int origin) } /* + * Helper for nfs_file_flush() and nfs_fsync() + * + * Notice that it clears the NFS_CONTEXT_ERROR_WRITE before synching to + * disk, but it retrieves and clears ctx->error after synching, despite + * the two being set at the same time in nfs_context_set_write_error(). + * This is because the former is used to notify the _next_ call to + * nfs_file_write() that a write error occured, and hence cause it to + * fall back to doing a synchronous write. + */ +static int nfs_do_fsync(struct nfs_open_context *ctx, struct inode *inode) +{ + int have_error, status; + int ret = 0; + + have_error = test_and_clear_bit(NFS_CONTEXT_ERROR_WRITE, &ctx->flags); + status = nfs_wb_all(inode); + have_error |= test_bit(NFS_CONTEXT_ERROR_WRITE, &ctx->flags); + if (have_error) + ret = xchg(&ctx->error, 0); + if (!ret) + ret = status; + return ret; +} + +/* * Flush all dirty pages, and check for write errors. * */ static int nfs_file_flush(struct file *file, fl_owner_t id) { - struct nfs_open_context *ctx = (struct nfs_open_context *)file->private_data; + struct nfs_open_context *ctx = nfs_file_open_context(file); struct inode *inode = file->f_path.dentry->d_inode; int status; @@ -189,16 +217,11 @@ nfs_file_flush(struct file *file, fl_owner_t id) if ((file->f_mode & FMODE_WRITE) == 0) return 0; nfs_inc_stats(inode, NFSIOS_VFSFLUSH); - lock_kernel(); + /* Ensure that data+attribute caches are up to date after close() */ - status = nfs_wb_all(inode); - if (!status) { - status = ctx->error; - ctx->error = 0; - if (!status) - nfs_revalidate_inode(NFS_SERVER(inode), inode); - } - unlock_kernel(); + status = nfs_do_fsync(ctx, inode); + if (!status) + nfs_revalidate_inode(NFS_SERVER(inode), inode); return status; } @@ -257,8 +280,11 @@ nfs_file_mmap(struct file * file, struct vm_area_struct * vma) dentry->d_parent->d_name.name, dentry->d_name.name); status = nfs_revalidate_mapping(inode, file->f_mapping); - if (!status) - status = generic_file_mmap(file, vma); + if (!status) { + vma->vm_ops = &nfs_file_vm_ops; + vma->vm_flags |= VM_CAN_NONLINEAR; + file_accessed(file); + } return status; } @@ -270,21 +296,13 @@ nfs_file_mmap(struct file * file, struct vm_area_struct * vma) static int nfs_fsync(struct file *file, struct dentry *dentry, int datasync) { - struct nfs_open_context *ctx = (struct nfs_open_context *)file->private_data; + struct nfs_open_context *ctx = nfs_file_open_context(file); struct inode *inode = dentry->d_inode; - int status; dfprintk(VFS, "nfs: fsync(%s/%ld)\n", inode->i_sb->s_id, inode->i_ino); nfs_inc_stats(inode, NFSIOS_VFSFSYNC); - lock_kernel(); - status = nfs_wb_all(inode); - if (!status) { - status = ctx->error; - ctx->error = 0; - } - unlock_kernel(); - return status; + return nfs_do_fsync(ctx, inode); } /* @@ -333,7 +351,7 @@ static int nfs_launder_page(struct page *page) const struct address_space_operations nfs_file_aops = { .readpage = nfs_readpage, .readpages = nfs_readpages, - .set_page_dirty = nfs_set_page_dirty, + .set_page_dirty = __set_page_dirty_nobuffers, .writepage = nfs_writepage, .writepages = nfs_writepages, .prepare_write = nfs_prepare_write, @@ -346,6 +364,43 @@ const struct address_space_operations nfs_file_aops = { .launder_page = nfs_launder_page, }; +static int nfs_vm_page_mkwrite(struct vm_area_struct *vma, struct page *page) +{ + struct file *filp = vma->vm_file; + unsigned pagelen; + int ret = -EINVAL; + + lock_page(page); + if (page->mapping != vma->vm_file->f_path.dentry->d_inode->i_mapping) + goto out_unlock; + pagelen = nfs_page_length(page); + if (pagelen == 0) + goto out_unlock; + ret = nfs_prepare_write(filp, page, 0, pagelen); + if (!ret) + ret = nfs_commit_write(filp, page, 0, pagelen); +out_unlock: + unlock_page(page); + return ret; +} + +static struct vm_operations_struct nfs_file_vm_ops = { + .fault = filemap_fault, + .page_mkwrite = nfs_vm_page_mkwrite, +}; + +static int nfs_need_sync_write(struct file *filp, struct inode *inode) +{ + struct nfs_open_context *ctx; + + if (IS_SYNC(inode) || (filp->f_flags & O_SYNC)) + return 1; + ctx = nfs_file_open_context(filp); + if (test_bit(NFS_CONTEXT_ERROR_WRITE, &ctx->flags)) + return 1; + return 0; +} + static ssize_t nfs_file_write(struct kiocb *iocb, const struct iovec *iov, unsigned long nr_segs, loff_t pos) { @@ -382,8 +437,8 @@ static ssize_t nfs_file_write(struct kiocb *iocb, const struct iovec *iov, nfs_add_stats(inode, NFSIOS_NORMALWRITTENBYTES, count); result = generic_file_aio_write(iocb, iov, nr_segs, pos); /* Return error values for O_SYNC and IS_SYNC() */ - if (result >= 0 && (IS_SYNC(inode) || (iocb->ki_filp->f_flags & O_SYNC))) { - int err = nfs_fsync(iocb->ki_filp, dentry, 1); + if (result >= 0 && nfs_need_sync_write(iocb->ki_filp, inode)) { + int err = nfs_do_fsync(nfs_file_open_context(iocb->ki_filp), inode); if (err < 0) result = err; } diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index 71a49c3acabd..035c769b715e 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -49,6 +49,11 @@ #define NFSDBG_FACILITY NFSDBG_VFS +#define NFS_64_BIT_INODE_NUMBERS_ENABLED 1 + +/* Default is to see 64-bit inode numbers */ +static int enable_ino64 = NFS_64_BIT_INODE_NUMBERS_ENABLED; + static void nfs_invalidate_inode(struct inode *); static int nfs_update_inode(struct inode *, struct nfs_fattr *); @@ -62,6 +67,25 @@ nfs_fattr_to_ino_t(struct nfs_fattr *fattr) return nfs_fileid_to_ino_t(fattr->fileid); } +/** + * nfs_compat_user_ino64 - returns the user-visible inode number + * @fileid: 64-bit fileid + * + * This function returns a 32-bit inode number if the boot parameter + * nfs.enable_ino64 is zero. + */ +u64 nfs_compat_user_ino64(u64 fileid) +{ + int ino; + + if (enable_ino64) + return fileid; + ino = fileid; + if (sizeof(ino) < sizeof(fileid)) + ino ^= fileid >> (sizeof(fileid)-sizeof(ino)) * 8; + return ino; +} + int nfs_write_inode(struct inode *inode, int sync) { int ret; @@ -85,7 +109,6 @@ void nfs_clear_inode(struct inode *inode) */ BUG_ON(nfs_have_writebacks(inode)); BUG_ON(!list_empty(&NFS_I(inode)->open_files)); - BUG_ON(atomic_read(&NFS_I(inode)->data_updates) != 0); nfs_zap_acl_cache(inode); nfs_access_zap_cache(inode); } @@ -118,8 +141,8 @@ static void nfs_zap_caches_locked(struct inode *inode) nfs_inc_stats(inode, NFSIOS_ATTRINVALIDATE); - NFS_ATTRTIMEO(inode) = NFS_MINATTRTIMEO(inode); - NFS_ATTRTIMEO_UPDATE(inode) = jiffies; + nfsi->attrtimeo = NFS_MINATTRTIMEO(inode); + nfsi->attrtimeo_timestamp = jiffies; memset(NFS_COOKIEVERF(inode), 0, sizeof(NFS_COOKIEVERF(inode))); if (S_ISREG(mode) || S_ISDIR(mode) || S_ISLNK(mode)) @@ -156,6 +179,13 @@ static void nfs_zap_acl_cache(struct inode *inode) spin_unlock(&inode->i_lock); } +void nfs_invalidate_atime(struct inode *inode) +{ + spin_lock(&inode->i_lock); + NFS_I(inode)->cache_validity |= NFS_INO_INVALID_ATIME; + spin_unlock(&inode->i_lock); +} + /* * Invalidate, but do not unhash, the inode. * NB: must be called with inode->i_lock held! @@ -338,7 +368,6 @@ nfs_setattr(struct dentry *dentry, struct iattr *attr) return 0; lock_kernel(); - nfs_begin_data_update(inode); /* Write all dirty data */ if (S_ISREG(inode->i_mode)) { filemap_write_and_wait(inode->i_mapping); @@ -352,7 +381,6 @@ nfs_setattr(struct dentry *dentry, struct iattr *attr) error = NFS_PROTO(inode)->setattr(dentry, &fattr, attr); if (error == 0) nfs_refresh_inode(inode, &fattr); - nfs_end_data_update(inode); unlock_kernel(); return error; } @@ -431,7 +459,7 @@ int nfs_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat) /* Flush out writes to the server in order to update c/mtime */ if (S_ISREG(inode->i_mode)) - nfs_sync_mapping_range(inode->i_mapping, 0, 0, FLUSH_NOCOMMIT); + nfs_wb_nocommit(inode); /* * We may force a getattr if the user cares about atime. @@ -450,8 +478,10 @@ int nfs_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat) err = __nfs_revalidate_inode(NFS_SERVER(inode), inode); else err = nfs_revalidate_inode(NFS_SERVER(inode), inode); - if (!err) + if (!err) { generic_fillattr(inode, stat); + stat->ino = nfs_compat_user_ino64(NFS_FILEID(inode)); + } return err; } @@ -536,7 +566,7 @@ struct nfs_open_context *nfs_find_open_context(struct inode *inode, struct rpc_c static void nfs_file_clear_open_context(struct file *filp) { struct inode *inode = filp->f_path.dentry->d_inode; - struct nfs_open_context *ctx = (struct nfs_open_context *)filp->private_data; + struct nfs_open_context *ctx = nfs_file_open_context(filp); if (ctx) { filp->private_data = NULL; @@ -598,16 +628,10 @@ __nfs_revalidate_inode(struct nfs_server *server, struct inode *inode) status = nfs_wait_on_inode(inode); if (status < 0) goto out; - if (NFS_STALE(inode)) { - status = -ESTALE; - /* Do we trust the cached ESTALE? */ - if (NFS_ATTRTIMEO(inode) != 0) { - if (nfsi->cache_validity & (NFS_INO_INVALID_ATTR|NFS_INO_INVALID_ATIME)) { - /* no */ - } else - goto out; - } - } + + status = -ESTALE; + if (NFS_STALE(inode)) + goto out; status = NFS_PROTO(inode)->getattr(server, NFS_FH(inode), &fattr); if (status != 0) { @@ -654,7 +678,7 @@ int nfs_attribute_timeout(struct inode *inode) if (nfs_have_delegation(inode, FMODE_READ)) return 0; - return time_after(jiffies, nfsi->read_cache_jiffies+nfsi->attrtimeo); + return !time_in_range(jiffies, nfsi->read_cache_jiffies, nfsi->read_cache_jiffies + nfsi->attrtimeo); } /** @@ -683,11 +707,8 @@ static int nfs_invalidate_mapping_nolock(struct inode *inode, struct address_spa } spin_lock(&inode->i_lock); nfsi->cache_validity &= ~NFS_INO_INVALID_DATA; - if (S_ISDIR(inode->i_mode)) { + if (S_ISDIR(inode->i_mode)) memset(nfsi->cookieverf, 0, sizeof(nfsi->cookieverf)); - /* This ensures we revalidate child dentries */ - nfsi->cache_change_attribute = jiffies; - } spin_unlock(&inode->i_lock); nfs_inc_stats(inode, NFSIOS_DATAINVALIDATE); dfprintk(PAGECACHE, "NFS: (%s/%Ld) data cache invalidated\n", @@ -756,56 +777,27 @@ out: return ret; } -/** - * nfs_begin_data_update - * @inode - pointer to inode - * Declare that a set of operations will update file data on the server - */ -void nfs_begin_data_update(struct inode *inode) -{ - atomic_inc(&NFS_I(inode)->data_updates); -} - -/** - * nfs_end_data_update - * @inode - pointer to inode - * Declare end of the operations that will update file data - * This will mark the inode as immediately needing revalidation - * of its attribute cache. - */ -void nfs_end_data_update(struct inode *inode) -{ - struct nfs_inode *nfsi = NFS_I(inode); - - /* Directories: invalidate page cache */ - if (S_ISDIR(inode->i_mode)) { - spin_lock(&inode->i_lock); - nfsi->cache_validity |= NFS_INO_INVALID_DATA; - spin_unlock(&inode->i_lock); - } - nfsi->cache_change_attribute = jiffies; - atomic_dec(&nfsi->data_updates); -} - static void nfs_wcc_update_inode(struct inode *inode, struct nfs_fattr *fattr) { struct nfs_inode *nfsi = NFS_I(inode); - unsigned long now = jiffies; + if ((fattr->valid & NFS_ATTR_WCC_V4) != 0 && + nfsi->change_attr == fattr->pre_change_attr) { + nfsi->change_attr = fattr->change_attr; + if (S_ISDIR(inode->i_mode)) + nfsi->cache_validity |= NFS_INO_INVALID_DATA; + } /* If we have atomic WCC data, we may update some attributes */ if ((fattr->valid & NFS_ATTR_WCC) != 0) { - if (timespec_equal(&inode->i_ctime, &fattr->pre_ctime)) { + if (timespec_equal(&inode->i_ctime, &fattr->pre_ctime)) memcpy(&inode->i_ctime, &fattr->ctime, sizeof(inode->i_ctime)); - nfsi->cache_change_attribute = now; - } if (timespec_equal(&inode->i_mtime, &fattr->pre_mtime)) { memcpy(&inode->i_mtime, &fattr->mtime, sizeof(inode->i_mtime)); - nfsi->cache_change_attribute = now; + if (S_ISDIR(inode->i_mode)) + nfsi->cache_validity |= NFS_INO_INVALID_DATA; } - if (inode->i_size == fattr->pre_size && nfsi->npages == 0) { + if (inode->i_size == fattr->pre_size && nfsi->npages == 0) inode->i_size = fattr->size; - nfsi->cache_change_attribute = now; - } } } @@ -822,7 +814,7 @@ static int nfs_check_inode_attributes(struct inode *inode, struct nfs_fattr *fat { struct nfs_inode *nfsi = NFS_I(inode); loff_t cur_size, new_isize; - int data_unstable; + unsigned long invalid = 0; /* Has the inode gone and changed behind our back? */ @@ -831,37 +823,41 @@ static int nfs_check_inode_attributes(struct inode *inode, struct nfs_fattr *fat return -EIO; } - /* Are we in the process of updating data on the server? */ - data_unstable = nfs_caches_unstable(inode); - /* Do atomic weak cache consistency updates */ nfs_wcc_update_inode(inode, fattr); if ((fattr->valid & NFS_ATTR_FATTR_V4) != 0 && nfsi->change_attr != fattr->change_attr) - nfsi->cache_validity |= NFS_INO_INVALID_ATTR|NFS_INO_REVAL_PAGECACHE; + invalid |= NFS_INO_INVALID_ATTR|NFS_INO_REVAL_PAGECACHE; /* Verify a few of the more important attributes */ if (!timespec_equal(&inode->i_mtime, &fattr->mtime)) - nfsi->cache_validity |= NFS_INO_INVALID_ATTR|NFS_INO_REVAL_PAGECACHE; + invalid |= NFS_INO_INVALID_ATTR|NFS_INO_REVAL_PAGECACHE; cur_size = i_size_read(inode); new_isize = nfs_size_to_loff_t(fattr->size); if (cur_size != new_isize && nfsi->npages == 0) - nfsi->cache_validity |= NFS_INO_INVALID_ATTR|NFS_INO_REVAL_PAGECACHE; + invalid |= NFS_INO_INVALID_ATTR|NFS_INO_REVAL_PAGECACHE; /* Have any file permissions changed? */ if ((inode->i_mode & S_IALLUGO) != (fattr->mode & S_IALLUGO) || inode->i_uid != fattr->uid || inode->i_gid != fattr->gid) - nfsi->cache_validity |= NFS_INO_INVALID_ATTR | NFS_INO_INVALID_ACCESS | NFS_INO_INVALID_ACL; + invalid |= NFS_INO_INVALID_ATTR | NFS_INO_INVALID_ACCESS | NFS_INO_INVALID_ACL; /* Has the link count changed? */ if (inode->i_nlink != fattr->nlink) - nfsi->cache_validity |= NFS_INO_INVALID_ATTR; + invalid |= NFS_INO_INVALID_ATTR; if (!timespec_equal(&inode->i_atime, &fattr->atime)) - nfsi->cache_validity |= NFS_INO_INVALID_ATIME; + invalid |= NFS_INO_INVALID_ATIME; + + if (invalid != 0) + nfsi->cache_validity |= invalid; + else + nfsi->cache_validity &= ~(NFS_INO_INVALID_ATTR + | NFS_INO_INVALID_ATIME + | NFS_INO_REVAL_PAGECACHE); nfsi->read_cache_jiffies = fattr->time_start; return 0; @@ -911,17 +907,41 @@ int nfs_refresh_inode(struct inode *inode, struct nfs_fattr *fattr) int nfs_post_op_update_inode(struct inode *inode, struct nfs_fattr *fattr) { struct nfs_inode *nfsi = NFS_I(inode); - int status = 0; spin_lock(&inode->i_lock); - if (unlikely((fattr->valid & NFS_ATTR_FATTR) == 0)) { - nfsi->cache_validity |= NFS_INO_INVALID_ACCESS|NFS_INO_INVALID_ATTR|NFS_INO_REVAL_PAGECACHE; - goto out; - } - status = nfs_update_inode(inode, fattr); -out: + nfsi->cache_validity |= NFS_INO_INVALID_ATTR|NFS_INO_REVAL_PAGECACHE; + if (S_ISDIR(inode->i_mode)) + nfsi->cache_validity |= NFS_INO_INVALID_DATA; spin_unlock(&inode->i_lock); - return status; + return nfs_refresh_inode(inode, fattr); +} + +/** + * nfs_post_op_update_inode_force_wcc - try to update the inode attribute cache + * @inode - pointer to inode + * @fattr - updated attributes + * + * After an operation that has changed the inode metadata, mark the + * attribute cache as being invalid, then try to update it. Fake up + * weak cache consistency data, if none exist. + * + * This function is mainly designed to be used by the ->write_done() functions. + */ +int nfs_post_op_update_inode_force_wcc(struct inode *inode, struct nfs_fattr *fattr) +{ + if ((fattr->valid & NFS_ATTR_FATTR_V4) != 0 && + (fattr->valid & NFS_ATTR_WCC_V4) == 0) { + fattr->pre_change_attr = NFS_I(inode)->change_attr; + fattr->valid |= NFS_ATTR_WCC_V4; + } + if ((fattr->valid & NFS_ATTR_FATTR) != 0 && + (fattr->valid & NFS_ATTR_WCC) == 0) { + memcpy(&fattr->pre_ctime, &inode->i_ctime, sizeof(fattr->pre_ctime)); + memcpy(&fattr->pre_mtime, &inode->i_mtime, sizeof(fattr->pre_mtime)); + fattr->pre_size = inode->i_size; + fattr->valid |= NFS_ATTR_WCC; + } + return nfs_post_op_update_inode(inode, fattr); } /* @@ -941,9 +961,8 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr) struct nfs_server *server; struct nfs_inode *nfsi = NFS_I(inode); loff_t cur_isize, new_isize; - unsigned int invalid = 0; + unsigned long invalid = 0; unsigned long now = jiffies; - int data_stable; dfprintk(VFS, "NFS: %s(%s/%ld ct=%d info=0x%x)\n", __FUNCTION__, inode->i_sb->s_id, inode->i_ino, @@ -968,57 +987,51 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr) * Update the read time so we don't revalidate too often. */ nfsi->read_cache_jiffies = fattr->time_start; - nfsi->last_updated = now; - /* Fix a wraparound issue with nfsi->cache_change_attribute */ - if (time_before(now, nfsi->cache_change_attribute)) - nfsi->cache_change_attribute = now - 600*HZ; - - /* Are we racing with known updates of the metadata on the server? */ - data_stable = nfs_verify_change_attribute(inode, fattr->time_start); - if (data_stable) - nfsi->cache_validity &= ~(NFS_INO_INVALID_ATTR|NFS_INO_REVAL_PAGECACHE|NFS_INO_INVALID_ATIME); + nfsi->cache_validity &= ~(NFS_INO_INVALID_ATTR | NFS_INO_INVALID_ATIME + | NFS_INO_REVAL_PAGECACHE); /* Do atomic weak cache consistency updates */ nfs_wcc_update_inode(inode, fattr); + /* More cache consistency checks */ + if (!(fattr->valid & NFS_ATTR_FATTR_V4)) { + /* NFSv2/v3: Check if the mtime agrees */ + if (!timespec_equal(&inode->i_mtime, &fattr->mtime)) { + dprintk("NFS: mtime change on server for file %s/%ld\n", + inode->i_sb->s_id, inode->i_ino); + invalid |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_DATA; + nfsi->cache_change_attribute = now; + } + /* If ctime has changed we should definitely clear access+acl caches */ + if (!timespec_equal(&inode->i_ctime, &fattr->ctime)) + invalid |= NFS_INO_INVALID_ACCESS|NFS_INO_INVALID_ACL; + } else if (nfsi->change_attr != fattr->change_attr) { + dprintk("NFS: change_attr change on server for file %s/%ld\n", + inode->i_sb->s_id, inode->i_ino); + invalid |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_DATA|NFS_INO_INVALID_ACCESS|NFS_INO_INVALID_ACL; + nfsi->cache_change_attribute = now; + } + /* Check if our cached file size is stale */ new_isize = nfs_size_to_loff_t(fattr->size); cur_isize = i_size_read(inode); if (new_isize != cur_isize) { - /* Do we perhaps have any outstanding writes? */ - if (nfsi->npages == 0) { - /* No, but did we race with nfs_end_data_update()? */ - if (data_stable) { - inode->i_size = new_isize; - invalid |= NFS_INO_INVALID_DATA; - } - invalid |= NFS_INO_INVALID_ATTR; - } else if (new_isize > cur_isize) { + /* Do we perhaps have any outstanding writes, or has + * the file grown beyond our last write? */ + if (nfsi->npages == 0 || new_isize > cur_isize) { inode->i_size = new_isize; invalid |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_DATA; } - nfsi->cache_change_attribute = now; dprintk("NFS: isize change on server for file %s/%ld\n", inode->i_sb->s_id, inode->i_ino); } - /* Check if the mtime agrees */ - if (!timespec_equal(&inode->i_mtime, &fattr->mtime)) { - memcpy(&inode->i_mtime, &fattr->mtime, sizeof(inode->i_mtime)); - dprintk("NFS: mtime change on server for file %s/%ld\n", - inode->i_sb->s_id, inode->i_ino); - invalid |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_DATA; - nfsi->cache_change_attribute = now; - } - /* If ctime has changed we should definitely clear access+acl caches */ - if (!timespec_equal(&inode->i_ctime, &fattr->ctime)) { - invalid |= NFS_INO_INVALID_ACCESS|NFS_INO_INVALID_ACL; - memcpy(&inode->i_ctime, &fattr->ctime, sizeof(inode->i_ctime)); - nfsi->cache_change_attribute = now; - } + memcpy(&inode->i_mtime, &fattr->mtime, sizeof(inode->i_mtime)); + memcpy(&inode->i_ctime, &fattr->ctime, sizeof(inode->i_ctime)); memcpy(&inode->i_atime, &fattr->atime, sizeof(inode->i_atime)); + nfsi->change_attr = fattr->change_attr; if ((inode->i_mode & S_IALLUGO) != (fattr->mode & S_IALLUGO) || inode->i_uid != fattr->uid || @@ -1039,31 +1052,29 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr) inode->i_blocks = fattr->du.nfs2.blocks; } - if ((fattr->valid & NFS_ATTR_FATTR_V4) != 0 && - nfsi->change_attr != fattr->change_attr) { - dprintk("NFS: change_attr change on server for file %s/%ld\n", - inode->i_sb->s_id, inode->i_ino); - nfsi->change_attr = fattr->change_attr; - invalid |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_DATA|NFS_INO_INVALID_ACCESS|NFS_INO_INVALID_ACL; - nfsi->cache_change_attribute = now; - } - /* Update attrtimeo value if we're out of the unstable period */ if (invalid & NFS_INO_INVALID_ATTR) { nfs_inc_stats(inode, NFSIOS_ATTRINVALIDATE); nfsi->attrtimeo = NFS_MINATTRTIMEO(inode); nfsi->attrtimeo_timestamp = now; - } else if (time_after(now, nfsi->attrtimeo_timestamp+nfsi->attrtimeo)) { - if ((nfsi->attrtimeo <<= 1) > NFS_MAXATTRTIMEO(inode)) - nfsi->attrtimeo = NFS_MAXATTRTIMEO(inode); - nfsi->attrtimeo_timestamp = now; + nfsi->last_updated = now; + } else { + if (!time_in_range(now, nfsi->attrtimeo_timestamp, nfsi->attrtimeo_timestamp + nfsi->attrtimeo)) { + if ((nfsi->attrtimeo <<= 1) > NFS_MAXATTRTIMEO(inode)) + nfsi->attrtimeo = NFS_MAXATTRTIMEO(inode); + nfsi->attrtimeo_timestamp = now; + } + /* + * Avoid jiffy wraparound issues with nfsi->last_updated + */ + if (!time_in_range(nfsi->last_updated, nfsi->read_cache_jiffies, now)) + nfsi->last_updated = nfsi->read_cache_jiffies; } + invalid &= ~NFS_INO_INVALID_ATTR; /* Don't invalidate the data if we were to blame */ if (!(S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode))) invalid &= ~NFS_INO_INVALID_DATA; - if (data_stable) - invalid &= ~(NFS_INO_INVALID_ATTR|NFS_INO_INVALID_ATIME|NFS_INO_REVAL_PAGECACHE); if (!nfs_have_delegation(inode, FMODE_READ) || (nfsi->cache_validity & NFS_INO_REVAL_FORCED)) nfsi->cache_validity |= invalid; @@ -1152,7 +1163,6 @@ static void init_once(void * foo, struct kmem_cache * cachep, unsigned long flag INIT_LIST_HEAD(&nfsi->access_cache_entry_lru); INIT_LIST_HEAD(&nfsi->access_cache_inode_lru); INIT_RADIX_TREE(&nfsi->nfs_page_tree, GFP_ATOMIC); - atomic_set(&nfsi->data_updates, 0); nfsi->ncommit = 0; nfsi->npages = 0; nfs4_init_once(nfsi); @@ -1249,6 +1259,7 @@ static void __exit exit_nfs_fs(void) /* Not quite true; I just maintain it */ MODULE_AUTHOR("Olaf Kirch <okir@monad.swb.de>"); MODULE_LICENSE("GPL"); +module_param(enable_ino64, bool, 0644); module_init(init_nfs_fs) module_exit(exit_nfs_fs) diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h index 76cf55d57101..f3acf48412be 100644 --- a/fs/nfs/internal.h +++ b/fs/nfs/internal.h @@ -5,8 +5,6 @@ #include <linux/mount.h> struct nfs_string; -struct nfs_mount_data; -struct nfs4_mount_data; /* Maximum number of readahead requests * FIXME: this should really be a sysctl so that users may tune it to suit @@ -27,20 +25,50 @@ struct nfs_clone_mount { rpc_authflavor_t authflavor; }; +/* + * In-kernel mount arguments + */ +struct nfs_parsed_mount_data { + int flags; + int rsize, wsize; + int timeo, retrans; + int acregmin, acregmax, + acdirmin, acdirmax; + int namlen; + unsigned int bsize; + unsigned int auth_flavor_len; + rpc_authflavor_t auth_flavors[1]; + char *client_address; + + struct { + struct sockaddr_in address; + char *hostname; + unsigned int program; + unsigned int version; + unsigned short port; + int protocol; + } mount_server; + + struct { + struct sockaddr_in address; + char *hostname; + char *export_path; + unsigned int program; + int protocol; + } nfs_server; +}; + /* client.c */ extern struct rpc_program nfs_program; extern void nfs_put_client(struct nfs_client *); extern struct nfs_client *nfs_find_client(const struct sockaddr_in *, int); -extern struct nfs_server *nfs_create_server(const struct nfs_mount_data *, - struct nfs_fh *); -extern struct nfs_server *nfs4_create_server(const struct nfs4_mount_data *, - const char *, - const struct sockaddr_in *, - const char *, - const char *, - rpc_authflavor_t, - struct nfs_fh *); +extern struct nfs_server *nfs_create_server( + const struct nfs_parsed_mount_data *, + struct nfs_fh *); +extern struct nfs_server *nfs4_create_server( + const struct nfs_parsed_mount_data *, + struct nfs_fh *); extern struct nfs_server *nfs4_create_referral_server(struct nfs_clone_mount *, struct nfs_fh *); extern void nfs_free_server(struct nfs_server *server); diff --git a/fs/nfs/nfs2xdr.c b/fs/nfs/nfs2xdr.c index c5fce7567200..668ab96c7b59 100644 --- a/fs/nfs/nfs2xdr.c +++ b/fs/nfs/nfs2xdr.c @@ -251,6 +251,7 @@ nfs_xdr_readargs(struct rpc_rqst *req, __be32 *p, struct nfs_readargs *args) replen = (RPC_REPHDRSIZE + auth->au_rslack + NFS_readres_sz) << 2; xdr_inline_pages(&req->rq_rcv_buf, replen, args->pages, args->pgbase, count); + req->rq_rcv_buf.flags |= XDRBUF_READ; return 0; } @@ -271,7 +272,7 @@ nfs_xdr_readres(struct rpc_rqst *req, __be32 *p, struct nfs_readres *res) res->eof = 0; hdrlen = (u8 *) p - (u8 *) iov->iov_base; if (iov->iov_len < hdrlen) { - printk(KERN_WARNING "NFS: READ reply header overflowed:" + dprintk("NFS: READ reply header overflowed:" "length %d > %Zu\n", hdrlen, iov->iov_len); return -errno_NFSERR_IO; } else if (iov->iov_len != hdrlen) { @@ -281,7 +282,7 @@ nfs_xdr_readres(struct rpc_rqst *req, __be32 *p, struct nfs_readres *res) recvd = req->rq_rcv_buf.len - hdrlen; if (count > recvd) { - printk(KERN_WARNING "NFS: server cheating in read reply: " + dprintk("NFS: server cheating in read reply: " "count %d > recvd %d\n", count, recvd); count = recvd; } @@ -313,6 +314,7 @@ nfs_xdr_writeargs(struct rpc_rqst *req, __be32 *p, struct nfs_writeargs *args) /* Copy the page array */ xdr_encode_pages(sndbuf, args->pages, args->pgbase, count); + sndbuf->flags |= XDRBUF_WRITE; return 0; } @@ -431,7 +433,7 @@ nfs_xdr_readdirres(struct rpc_rqst *req, __be32 *p, void *dummy) hdrlen = (u8 *) p - (u8 *) iov->iov_base; if (iov->iov_len < hdrlen) { - printk(KERN_WARNING "NFS: READDIR reply header overflowed:" + dprintk("NFS: READDIR reply header overflowed:" "length %d > %Zu\n", hdrlen, iov->iov_len); return -errno_NFSERR_IO; } else if (iov->iov_len != hdrlen) { @@ -454,7 +456,7 @@ nfs_xdr_readdirres(struct rpc_rqst *req, __be32 *p, void *dummy) len = ntohl(*p++); p += XDR_QUADLEN(len) + 1; /* name plus cookie */ if (len > NFS2_MAXNAMLEN) { - printk(KERN_WARNING "NFS: giant filename in readdir (len 0x%x)!\n", + dprintk("NFS: giant filename in readdir (len 0x%x)!\n", len); goto err_unmap; } @@ -471,7 +473,7 @@ nfs_xdr_readdirres(struct rpc_rqst *req, __be32 *p, void *dummy) entry[0] = entry[1] = 0; /* truncate listing ? */ if (!nr) { - printk(KERN_NOTICE "NFS: readdir reply truncated!\n"); + dprintk("NFS: readdir reply truncated!\n"); entry[1] = 1; } goto out; @@ -583,12 +585,12 @@ nfs_xdr_readlinkres(struct rpc_rqst *req, __be32 *p, void *dummy) /* Convert length of symlink */ len = ntohl(*p++); if (len >= rcvbuf->page_len || len <= 0) { - dprintk(KERN_WARNING "nfs: server returned giant symlink!\n"); + dprintk("nfs: server returned giant symlink!\n"); return -ENAMETOOLONG; } hdrlen = (u8 *) p - (u8 *) iov->iov_base; if (iov->iov_len < hdrlen) { - printk(KERN_WARNING "NFS: READLINK reply header overflowed:" + dprintk("NFS: READLINK reply header overflowed:" "length %d > %Zu\n", hdrlen, iov->iov_len); return -errno_NFSERR_IO; } else if (iov->iov_len != hdrlen) { @@ -597,7 +599,7 @@ nfs_xdr_readlinkres(struct rpc_rqst *req, __be32 *p, void *dummy) } recvd = req->rq_rcv_buf.len - hdrlen; if (recvd < len) { - printk(KERN_WARNING "NFS: server cheating in readlink reply: " + dprintk("NFS: server cheating in readlink reply: " "count %u > recvd %u\n", len, recvd); return -EIO; } @@ -695,7 +697,7 @@ nfs_stat_to_errno(int stat) if (nfs_errtbl[i].stat == stat) return nfs_errtbl[i].errno; } - printk(KERN_ERR "nfs_stat_to_errno: bad nfs status return value: %d\n", stat); + dprintk("nfs_stat_to_errno: bad nfs status return value: %d\n", stat); return nfs_errtbl[i].errno; } diff --git a/fs/nfs/nfs3acl.c b/fs/nfs/nfs3acl.c index 7322da4d2055..9b7362565c0c 100644 --- a/fs/nfs/nfs3acl.c +++ b/fs/nfs/nfs3acl.c @@ -317,13 +317,11 @@ static int nfs3_proc_setacls(struct inode *inode, struct posix_acl *acl, } dprintk("NFS call setacl\n"); - nfs_begin_data_update(inode); msg.rpc_proc = &server->client_acl->cl_procinfo[ACLPROC3_SETACL]; status = rpc_call_sync(server->client_acl, &msg, 0); spin_lock(&inode->i_lock); NFS_I(inode)->cache_validity |= NFS_INO_INVALID_ACCESS; spin_unlock(&inode->i_lock); - nfs_end_data_update(inode); dprintk("NFS reply setacl: %d\n", status); /* pages may have been allocated at the xdr layer. */ diff --git a/fs/nfs/nfs3proc.c b/fs/nfs/nfs3proc.c index c7ca5d70870b..4cdc2361a669 100644 --- a/fs/nfs/nfs3proc.c +++ b/fs/nfs/nfs3proc.c @@ -166,6 +166,7 @@ nfs3_proc_lookup(struct inode *dir, struct qstr *name, nfs_fattr_init(&dir_attr); nfs_fattr_init(fattr); status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0); + nfs_refresh_inode(dir, &dir_attr); if (status >= 0 && !(fattr->valid & NFS_ATTR_FATTR)) { msg.rpc_proc = &nfs3_procedures[NFS3PROC_GETATTR]; msg.rpc_argp = fhandle; @@ -173,8 +174,6 @@ nfs3_proc_lookup(struct inode *dir, struct qstr *name, status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0); } dprintk("NFS reply lookup: %d\n", status); - if (status >= 0) - status = nfs_refresh_inode(dir, &dir_attr); return status; } @@ -607,6 +606,9 @@ nfs3_proc_readdir(struct dentry *dentry, struct rpc_cred *cred, nfs_fattr_init(&dir_attr); status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0); + + nfs_invalidate_atime(dir); + nfs_refresh_inode(dir, &dir_attr); dprintk("NFS reply readdir: %d\n", status); return status; @@ -724,9 +726,9 @@ static int nfs3_read_done(struct rpc_task *task, struct nfs_read_data *data) { if (nfs3_async_handle_jukebox(task, data->inode)) return -EAGAIN; - /* Call back common NFS readpage processing */ - if (task->tk_status >= 0) - nfs_refresh_inode(data->inode, &data->fattr); + + nfs_invalidate_atime(data->inode); + nfs_refresh_inode(data->inode, &data->fattr); return 0; } @@ -747,7 +749,7 @@ static int nfs3_write_done(struct rpc_task *task, struct nfs_write_data *data) if (nfs3_async_handle_jukebox(task, data->inode)) return -EAGAIN; if (task->tk_status >= 0) - nfs_post_op_update_inode(data->inode, data->res.fattr); + nfs_post_op_update_inode_force_wcc(data->inode, data->res.fattr); return 0; } @@ -775,8 +777,7 @@ static int nfs3_commit_done(struct rpc_task *task, struct nfs_write_data *data) { if (nfs3_async_handle_jukebox(task, data->inode)) return -EAGAIN; - if (task->tk_status >= 0) - nfs_post_op_update_inode(data->inode, data->res.fattr); + nfs_refresh_inode(data->inode, data->res.fattr); return 0; } diff --git a/fs/nfs/nfs3xdr.c b/fs/nfs/nfs3xdr.c index d9e08f0cf2a0..616d3267b7e7 100644 --- a/fs/nfs/nfs3xdr.c +++ b/fs/nfs/nfs3xdr.c @@ -346,6 +346,7 @@ nfs3_xdr_readargs(struct rpc_rqst *req, __be32 *p, struct nfs_readargs *args) replen = (RPC_REPHDRSIZE + auth->au_rslack + NFS3_readres_sz) << 2; xdr_inline_pages(&req->rq_rcv_buf, replen, args->pages, args->pgbase, count); + req->rq_rcv_buf.flags |= XDRBUF_READ; return 0; } @@ -367,6 +368,7 @@ nfs3_xdr_writeargs(struct rpc_rqst *req, __be32 *p, struct nfs_writeargs *args) /* Copy the page array */ xdr_encode_pages(sndbuf, args->pages, args->pgbase, count); + sndbuf->flags |= XDRBUF_WRITE; return 0; } @@ -524,7 +526,7 @@ nfs3_xdr_readdirres(struct rpc_rqst *req, __be32 *p, struct nfs3_readdirres *res hdrlen = (u8 *) p - (u8 *) iov->iov_base; if (iov->iov_len < hdrlen) { - printk(KERN_WARNING "NFS: READDIR reply header overflowed:" + dprintk("NFS: READDIR reply header overflowed:" "length %d > %Zu\n", hdrlen, iov->iov_len); return -errno_NFSERR_IO; } else if (iov->iov_len != hdrlen) { @@ -547,7 +549,7 @@ nfs3_xdr_readdirres(struct rpc_rqst *req, __be32 *p, struct nfs3_readdirres *res len = ntohl(*p++); /* string length */ p += XDR_QUADLEN(len) + 2; /* name + cookie */ if (len > NFS3_MAXNAMLEN) { - printk(KERN_WARNING "NFS: giant filename in readdir (len %x)!\n", + dprintk("NFS: giant filename in readdir (len %x)!\n", len); goto err_unmap; } @@ -567,7 +569,7 @@ nfs3_xdr_readdirres(struct rpc_rqst *req, __be32 *p, struct nfs3_readdirres *res goto short_pkt; len = ntohl(*p++); if (len > NFS3_FHSIZE) { - printk(KERN_WARNING "NFS: giant filehandle in " + dprintk("NFS: giant filehandle in " "readdir (len %x)!\n", len); goto err_unmap; } @@ -588,7 +590,7 @@ nfs3_xdr_readdirres(struct rpc_rqst *req, __be32 *p, struct nfs3_readdirres *res entry[0] = entry[1] = 0; /* truncate listing ? */ if (!nr) { - printk(KERN_NOTICE "NFS: readdir reply truncated!\n"); + dprintk("NFS: readdir reply truncated!\n"); entry[1] = 1; } goto out; @@ -826,22 +828,23 @@ nfs3_xdr_readlinkres(struct rpc_rqst *req, __be32 *p, struct nfs_fattr *fattr) /* Convert length of symlink */ len = ntohl(*p++); if (len >= rcvbuf->page_len || len <= 0) { - dprintk(KERN_WARNING "nfs: server returned giant symlink!\n"); + dprintk("nfs: server returned giant symlink!\n"); return -ENAMETOOLONG; } hdrlen = (u8 *) p - (u8 *) iov->iov_base; if (iov->iov_len < hdrlen) { - printk(KERN_WARNING "NFS: READLINK reply header overflowed:" + dprintk("NFS: READLINK reply header overflowed:" "length %d > %Zu\n", hdrlen, iov->iov_len); return -errno_NFSERR_IO; } else if (iov->iov_len != hdrlen) { - dprintk("NFS: READLINK header is short. iovec will be shifted.\n"); + dprintk("NFS: READLINK header is short. " + "iovec will be shifted.\n"); xdr_shift_buf(rcvbuf, iov->iov_len - hdrlen); } recvd = req->rq_rcv_buf.len - hdrlen; if (recvd < len) { - printk(KERN_WARNING "NFS: server cheating in readlink reply: " + dprintk("NFS: server cheating in readlink reply: " "count %u > recvd %u\n", len, recvd); return -EIO; } @@ -876,13 +879,13 @@ nfs3_xdr_readres(struct rpc_rqst *req, __be32 *p, struct nfs_readres *res) ocount = ntohl(*p++); if (ocount != count) { - printk(KERN_WARNING "NFS: READ count doesn't match RPC opaque count.\n"); + dprintk("NFS: READ count doesn't match RPC opaque count.\n"); return -errno_NFSERR_IO; } hdrlen = (u8 *) p - (u8 *) iov->iov_base; if (iov->iov_len < hdrlen) { - printk(KERN_WARNING "NFS: READ reply header overflowed:" + dprintk("NFS: READ reply header overflowed:" "length %d > %Zu\n", hdrlen, iov->iov_len); return -errno_NFSERR_IO; } else if (iov->iov_len != hdrlen) { @@ -892,7 +895,7 @@ nfs3_xdr_readres(struct rpc_rqst *req, __be32 *p, struct nfs_readres *res) recvd = req->rq_rcv_buf.len - hdrlen; if (count > recvd) { - printk(KERN_WARNING "NFS: server cheating in read reply: " + dprintk("NFS: server cheating in read reply: " "count %d > recvd %d\n", count, recvd); count = recvd; res->eof = 0; diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 4b90e17555a9..cb99fd90a9ac 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -62,10 +62,8 @@ struct nfs4_opendata; static int _nfs4_proc_open(struct nfs4_opendata *data); static int nfs4_do_fsinfo(struct nfs_server *, struct nfs_fh *, struct nfs_fsinfo *); static int nfs4_async_handle_error(struct rpc_task *, const struct nfs_server *); -static int _nfs4_proc_access(struct inode *inode, struct nfs_access_entry *entry); static int nfs4_handle_exception(const struct nfs_server *server, int errorcode, struct nfs4_exception *exception); static int nfs4_wait_clnt_recover(struct rpc_clnt *clnt, struct nfs_client *clp); -static int _nfs4_do_access(struct inode *inode, struct rpc_cred *cred, int openflags); static int _nfs4_proc_lookup(struct inode *dir, const struct qstr *name, struct nfs_fh *fhandle, struct nfs_fattr *fattr); static int _nfs4_proc_getattr(struct nfs_server *server, struct nfs_fh *fhandle, struct nfs_fattr *fattr); @@ -177,7 +175,7 @@ static void nfs4_setup_readdir(u64 cookie, __be32 *verifier, struct dentry *dent *p++ = xdr_one; /* bitmap length */ *p++ = htonl(FATTR4_WORD0_FILEID); /* bitmap */ *p++ = htonl(8); /* attribute buffer length */ - p = xdr_encode_hyper(p, dentry->d_inode->i_ino); + p = xdr_encode_hyper(p, NFS_FILEID(dentry->d_inode)); } *p++ = xdr_one; /* next */ @@ -189,7 +187,7 @@ static void nfs4_setup_readdir(u64 cookie, __be32 *verifier, struct dentry *dent *p++ = xdr_one; /* bitmap length */ *p++ = htonl(FATTR4_WORD0_FILEID); /* bitmap */ *p++ = htonl(8); /* attribute buffer length */ - p = xdr_encode_hyper(p, dentry->d_parent->d_inode->i_ino); + p = xdr_encode_hyper(p, NFS_FILEID(dentry->d_parent->d_inode)); readdir->pgbase = (char *)p - (char *)start; readdir->count -= readdir->pgbase; @@ -211,8 +209,9 @@ static void update_changeattr(struct inode *dir, struct nfs4_change_info *cinfo) spin_lock(&dir->i_lock); nfsi->cache_validity |= NFS_INO_INVALID_ATTR|NFS_INO_REVAL_PAGECACHE|NFS_INO_INVALID_DATA; - if (cinfo->before == nfsi->change_attr && cinfo->atomic) - nfsi->change_attr = cinfo->after; + if (!cinfo->atomic || cinfo->before != nfsi->change_attr) + nfsi->cache_change_attribute = jiffies; + nfsi->change_attr = cinfo->after; spin_unlock(&dir->i_lock); } @@ -454,7 +453,7 @@ static struct nfs4_state *nfs4_try_open_cached(struct nfs4_opendata *opendata) memcpy(stateid.data, delegation->stateid.data, sizeof(stateid.data)); rcu_read_unlock(); lock_kernel(); - ret = _nfs4_do_access(state->inode, state->owner->so_cred, open_mode); + ret = nfs_may_open(state->inode, state->owner->so_cred, open_mode); unlock_kernel(); if (ret != 0) goto out; @@ -948,36 +947,6 @@ static int _nfs4_proc_open(struct nfs4_opendata *data) return 0; } -static int _nfs4_do_access(struct inode *inode, struct rpc_cred *cred, int openflags) -{ - struct nfs_access_entry cache; - int mask = 0; - int status; - - if (openflags & FMODE_READ) - mask |= MAY_READ; - if (openflags & FMODE_WRITE) - mask |= MAY_WRITE; - if (openflags & FMODE_EXEC) - mask |= MAY_EXEC; - status = nfs_access_get_cached(inode, cred, &cache); - if (status == 0) - goto out; - - /* Be clever: ask server to check for all possible rights */ - cache.mask = MAY_EXEC | MAY_WRITE | MAY_READ; - cache.cred = cred; - cache.jiffies = jiffies; - status = _nfs4_proc_access(inode, &cache); - if (status != 0) - return status; - nfs_access_add_cache(inode, &cache); -out: - if ((cache.mask & mask) == mask) - return 0; - return -EACCES; -} - static int nfs4_recover_expired_lease(struct nfs_server *server) { struct nfs_client *clp = server->nfs_client; @@ -1381,7 +1350,7 @@ static int nfs4_intent_set_file(struct nameidata *nd, struct path *path, struct /* If the open_intent is for execute, we have an extra check to make */ if (nd->intent.open.flags & FMODE_EXEC) { - ret = _nfs4_do_access(state->inode, + ret = nfs_may_open(state->inode, state->owner->so_cred, nd->intent.open.flags); if (ret < 0) @@ -1390,7 +1359,7 @@ static int nfs4_intent_set_file(struct nameidata *nd, struct path *path, struct filp = lookup_instantiate_filp(nd, path->dentry, NULL); if (!IS_ERR(filp)) { struct nfs_open_context *ctx; - ctx = (struct nfs_open_context *)filp->private_data; + ctx = nfs_file_open_context(filp); ctx->state = state; return 0; } @@ -1428,13 +1397,16 @@ nfs4_atomic_open(struct inode *dir, struct dentry *dentry, struct nameidata *nd) state = nfs4_do_open(dir, &path, nd->intent.open.flags, &attr, cred); put_rpccred(cred); if (IS_ERR(state)) { - if (PTR_ERR(state) == -ENOENT) + if (PTR_ERR(state) == -ENOENT) { d_add(dentry, NULL); + nfs_set_verifier(dentry, nfs_save_change_attribute(dir)); + } return (struct dentry *)state; } res = d_add_unique(dentry, igrab(state->inode)); if (res != NULL) path.dentry = res; + nfs_set_verifier(path.dentry, nfs_save_change_attribute(dir)); nfs4_intent_set_file(nd, &path, state); return res; } @@ -1468,6 +1440,7 @@ nfs4_open_revalidate(struct inode *dir, struct dentry *dentry, int openflags, st } } if (state->inode == dentry->d_inode) { + nfs_set_verifier(dentry, nfs_save_change_attribute(dir)); nfs4_intent_set_file(nd, &path, state); return 1; } @@ -1757,10 +1730,16 @@ static int nfs4_proc_lookup(struct inode *dir, struct qstr *name, struct nfs_fh static int _nfs4_proc_access(struct inode *inode, struct nfs_access_entry *entry) { + struct nfs_server *server = NFS_SERVER(inode); + struct nfs_fattr fattr; struct nfs4_accessargs args = { .fh = NFS_FH(inode), + .bitmask = server->attr_bitmask, + }; + struct nfs4_accessres res = { + .server = server, + .fattr = &fattr, }; - struct nfs4_accessres res = { 0 }; struct rpc_message msg = { .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_ACCESS], .rpc_argp = &args, @@ -1786,6 +1765,7 @@ static int _nfs4_proc_access(struct inode *inode, struct nfs_access_entry *entry if (mode & MAY_EXEC) args.access |= NFS4_ACCESS_EXECUTE; } + nfs_fattr_init(&fattr); status = rpc_call_sync(NFS_CLIENT(inode), &msg, 0); if (!status) { entry->mask = 0; @@ -1795,6 +1775,7 @@ static int _nfs4_proc_access(struct inode *inode, struct nfs_access_entry *entry entry->mask |= MAY_WRITE; if (res.access & (NFS4_ACCESS_LOOKUP|NFS4_ACCESS_EXECUTE)) entry->mask |= MAY_EXEC; + nfs_refresh_inode(inode, &fattr); } return status; } @@ -1900,11 +1881,13 @@ nfs4_proc_create(struct inode *dir, struct dentry *dentry, struct iattr *sattr, } state = nfs4_do_open(dir, &path, flags, sattr, cred); put_rpccred(cred); + d_drop(dentry); if (IS_ERR(state)) { status = PTR_ERR(state); goto out; } - d_instantiate(dentry, igrab(state->inode)); + d_add(dentry, igrab(state->inode)); + nfs_set_verifier(dentry, nfs_save_change_attribute(dir)); if (flags & O_EXCL) { struct nfs_fattr fattr; status = nfs4_do_setattr(state->inode, &fattr, sattr, state); @@ -2218,6 +2201,9 @@ static int _nfs4_proc_readdir(struct dentry *dentry, struct rpc_cred *cred, status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0); if (status == 0) memcpy(NFS_COOKIEVERF(dir), res.verifier.data, NFS4_VERIFIER_SIZE); + + nfs_invalidate_atime(dir); + dprintk("%s: returns %d\n", __FUNCTION__, status); return status; } @@ -2414,6 +2400,8 @@ static int nfs4_read_done(struct rpc_task *task, struct nfs_read_data *data) rpc_restart_call(task); return -EAGAIN; } + + nfs_invalidate_atime(data->inode); if (task->tk_status > 0) renew_lease(server, data->timestamp); return 0; @@ -2443,7 +2431,7 @@ static int nfs4_write_done(struct rpc_task *task, struct nfs_write_data *data) } if (task->tk_status >= 0) { renew_lease(NFS_SERVER(inode), data->timestamp); - nfs_post_op_update_inode(inode, data->res.fattr); + nfs_post_op_update_inode_force_wcc(inode, data->res.fattr); } return 0; } @@ -2485,8 +2473,7 @@ static int nfs4_commit_done(struct rpc_task *task, struct nfs_write_data *data) rpc_restart_call(task); return -EAGAIN; } - if (task->tk_status >= 0) - nfs_post_op_update_inode(inode, data->res.fattr); + nfs_refresh_inode(inode, data->res.fattr); return 0; } @@ -3056,7 +3043,7 @@ static int _nfs4_proc_delegreturn(struct inode *inode, struct rpc_cred *cred, co if (status == 0) { status = data->rpc_status; if (status == 0) - nfs_post_op_update_inode(inode, &data->fattr); + nfs_refresh_inode(inode, &data->fattr); } rpc_put_task(task); return status; @@ -3303,7 +3290,7 @@ static int nfs4_proc_unlck(struct nfs4_state *state, int cmd, struct file_lock * status = -ENOMEM; if (seqid == NULL) goto out; - task = nfs4_do_unlck(request, request->fl_file->private_data, lsp, seqid); + task = nfs4_do_unlck(request, nfs_file_open_context(request->fl_file), lsp, seqid); status = PTR_ERR(task); if (IS_ERR(task)) goto out; @@ -3447,7 +3434,7 @@ static int _nfs4_do_setlk(struct nfs4_state *state, int cmd, struct file_lock *f int ret; dprintk("%s: begin!\n", __FUNCTION__); - data = nfs4_alloc_lockdata(fl, fl->fl_file->private_data, + data = nfs4_alloc_lockdata(fl, nfs_file_open_context(fl->fl_file), fl->fl_u.nfs4_fl.owner); if (data == NULL) return -ENOMEM; @@ -3573,7 +3560,7 @@ nfs4_proc_lock(struct file *filp, int cmd, struct file_lock *request) int status; /* verify open state */ - ctx = (struct nfs_open_context *)filp->private_data; + ctx = nfs_file_open_context(filp); state = ctx->state; if (request->fl_start < 0 || request->fl_end < 0) diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c index 3e4adf8c8312..bfb36261cecb 100644 --- a/fs/nfs/nfs4state.c +++ b/fs/nfs/nfs4state.c @@ -774,7 +774,7 @@ static int nfs4_reclaim_locks(struct nfs4_state_recovery_ops *ops, struct nfs4_s for (fl = inode->i_flock; fl != 0; fl = fl->fl_next) { if (!(fl->fl_flags & (FL_POSIX|FL_FLOCK))) continue; - if (((struct nfs_open_context *)fl->fl_file->private_data)->state != state) + if (nfs_file_open_context(fl->fl_file)->state != state) continue; status = ops->recover_lock(state, fl); if (status >= 0) diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c index badd73b7ca12..51dd3804866f 100644 --- a/fs/nfs/nfs4xdr.c +++ b/fs/nfs/nfs4xdr.c @@ -376,10 +376,12 @@ static int nfs4_stat_to_errno(int); decode_locku_maxsz) #define NFS4_enc_access_sz (compound_encode_hdr_maxsz + \ encode_putfh_maxsz + \ - encode_access_maxsz) + encode_access_maxsz + \ + encode_getattr_maxsz) #define NFS4_dec_access_sz (compound_decode_hdr_maxsz + \ decode_putfh_maxsz + \ - decode_access_maxsz) + decode_access_maxsz + \ + decode_getattr_maxsz) #define NFS4_enc_getattr_sz (compound_encode_hdr_maxsz + \ encode_putfh_maxsz + \ encode_getattr_maxsz) @@ -562,7 +564,6 @@ struct compound_hdr { #define RESERVE_SPACE(nbytes) do { \ p = xdr_reserve_space(xdr, nbytes); \ - if (!p) printk("RESERVE_SPACE(%d) failed in function %s\n", (int) (nbytes), __FUNCTION__); \ BUG_ON(!p); \ } while (0) @@ -628,8 +629,8 @@ static int encode_attrs(struct xdr_stream *xdr, const struct iattr *iap, const s if (iap->ia_valid & ATTR_UID) { owner_namelen = nfs_map_uid_to_name(server->nfs_client, iap->ia_uid, owner_name); if (owner_namelen < 0) { - printk(KERN_WARNING "nfs: couldn't resolve uid %d to string\n", - iap->ia_uid); + dprintk("nfs: couldn't resolve uid %d to string\n", + iap->ia_uid); /* XXX */ strcpy(owner_name, "nobody"); owner_namelen = sizeof("nobody") - 1; @@ -640,8 +641,8 @@ static int encode_attrs(struct xdr_stream *xdr, const struct iattr *iap, const s if (iap->ia_valid & ATTR_GID) { owner_grouplen = nfs_map_gid_to_group(server->nfs_client, iap->ia_gid, owner_group); if (owner_grouplen < 0) { - printk(KERN_WARNING "nfs4: couldn't resolve gid %d to string\n", - iap->ia_gid); + dprintk("nfs: couldn't resolve gid %d to string\n", + iap->ia_gid); strcpy(owner_group, "nobody"); owner_grouplen = sizeof("nobody") - 1; /* goto out; */ @@ -711,7 +712,7 @@ static int encode_attrs(struct xdr_stream *xdr, const struct iattr *iap, const s * Now we backfill the bitmap and the attribute buffer length. */ if (len != ((char *)p - (char *)q) + 4) { - printk ("encode_attr: Attr length calculation error! %u != %Zu\n", + printk(KERN_ERR "nfs: Attr length error, %u != %Zu\n", len, ((char *)p - (char *)q) + 4); BUG(); } @@ -1376,14 +1377,20 @@ static int nfs4_xdr_enc_access(struct rpc_rqst *req, __be32 *p, const struct nfs { struct xdr_stream xdr; struct compound_hdr hdr = { - .nops = 2, + .nops = 3, }; int status; xdr_init_encode(&xdr, &req->rq_snd_buf, p); encode_compound_hdr(&xdr, &hdr); - if ((status = encode_putfh(&xdr, args->fh)) == 0) - status = encode_access(&xdr, args->access); + status = encode_putfh(&xdr, args->fh); + if (status != 0) + goto out; + status = encode_access(&xdr, args->access); + if (status != 0) + goto out; + status = encode_getfattr(&xdr, args->bitmask); +out: return status; } @@ -1857,6 +1864,7 @@ static int nfs4_xdr_enc_read(struct rpc_rqst *req, __be32 *p, struct nfs_readarg replen = (RPC_REPHDRSIZE + auth->au_rslack + NFS4_dec_read_sz) << 2; xdr_inline_pages(&req->rq_rcv_buf, replen, args->pages, args->pgbase, args->count); + req->rq_rcv_buf.flags |= XDRBUF_READ; out: return status; } @@ -1933,6 +1941,7 @@ static int nfs4_xdr_enc_write(struct rpc_rqst *req, __be32 *p, struct nfs_writea status = encode_write(&xdr, args); if (status) goto out; + req->rq_snd_buf.flags |= XDRBUF_WRITE; status = encode_getfattr(&xdr, args->bitmask); out: return status; @@ -2180,9 +2189,9 @@ out: #define READ_BUF(nbytes) do { \ p = xdr_inline_decode(xdr, nbytes); \ if (unlikely(!p)) { \ - printk(KERN_INFO "%s: prematurely hit end of receive" \ + dprintk("nfs: %s: prematurely hit end of receive" \ " buffer\n", __FUNCTION__); \ - printk(KERN_INFO "%s: xdr->p=%p, bytes=%u, xdr->end=%p\n", \ + dprintk("nfs: %s: xdr->p=%p, bytes=%u, xdr->end=%p\n", \ __FUNCTION__, xdr->p, nbytes, xdr->end); \ return -EIO; \ } \ @@ -2223,9 +2232,8 @@ static int decode_op_hdr(struct xdr_stream *xdr, enum nfs_opnum4 expected) READ_BUF(8); READ32(opnum); if (opnum != expected) { - printk(KERN_NOTICE - "nfs4_decode_op_hdr: Server returned operation" - " %d but we issued a request for %d\n", + dprintk("nfs: Server returned operation" + " %d but we issued a request for %d\n", opnum, expected); return -EIO; } @@ -2758,7 +2766,7 @@ static int decode_attr_owner(struct xdr_stream *xdr, uint32_t *bitmap, struct nf dprintk("%s: nfs_map_name_to_uid failed!\n", __FUNCTION__); } else - printk(KERN_WARNING "%s: name too long (%u)!\n", + dprintk("%s: name too long (%u)!\n", __FUNCTION__, len); bitmap[1] &= ~FATTR4_WORD1_OWNER; } @@ -2783,7 +2791,7 @@ static int decode_attr_group(struct xdr_stream *xdr, uint32_t *bitmap, struct nf dprintk("%s: nfs_map_group_to_gid failed!\n", __FUNCTION__); } else - printk(KERN_WARNING "%s: name too long (%u)!\n", + dprintk("%s: name too long (%u)!\n", __FUNCTION__, len); bitmap[1] &= ~FATTR4_WORD1_OWNER_GROUP; } @@ -2950,7 +2958,8 @@ static int verify_attr_len(struct xdr_stream *xdr, __be32 *savep, uint32_t attrl unsigned int nwords = xdr->p - savep; if (unlikely(attrwords != nwords)) { - printk(KERN_WARNING "%s: server returned incorrect attribute length: %u %c %u\n", + dprintk("%s: server returned incorrect attribute length: " + "%u %c %u\n", __FUNCTION__, attrwords << 2, (attrwords < nwords) ? '<' : '>', @@ -3451,7 +3460,7 @@ static int decode_read(struct xdr_stream *xdr, struct rpc_rqst *req, struct nfs_ hdrlen = (u8 *) p - (u8 *) iov->iov_base; recvd = req->rq_rcv_buf.len - hdrlen; if (count > recvd) { - printk(KERN_WARNING "NFS: server cheating in read reply: " + dprintk("NFS: server cheating in read reply: " "count %u > recvd %u\n", count, recvd); count = recvd; eof = 0; @@ -3500,7 +3509,8 @@ static int decode_readdir(struct xdr_stream *xdr, struct rpc_rqst *req, struct n p += 2; /* cookie */ len = ntohl(*p++); /* filename length */ if (len > NFS4_MAXNAMLEN) { - printk(KERN_WARNING "NFS: giant filename in readdir (len 0x%x)\n", len); + dprintk("NFS: giant filename in readdir (len 0x%x)\n", + len); goto err_unmap; } xlen = XDR_QUADLEN(len); @@ -3528,7 +3538,7 @@ short_pkt: entry[0] = entry[1] = 0; /* truncate listing ? */ if (!nr) { - printk(KERN_NOTICE "NFS: readdir reply truncated!\n"); + dprintk("NFS: readdir reply truncated!\n"); entry[1] = 1; } goto out; @@ -3554,13 +3564,13 @@ static int decode_readlink(struct xdr_stream *xdr, struct rpc_rqst *req) READ_BUF(4); READ32(len); if (len >= rcvbuf->page_len || len <= 0) { - dprintk(KERN_WARNING "nfs: server returned giant symlink!\n"); + dprintk("nfs: server returned giant symlink!\n"); return -ENAMETOOLONG; } hdrlen = (char *) xdr->p - (char *) iov->iov_base; recvd = req->rq_rcv_buf.len - hdrlen; if (recvd < len) { - printk(KERN_WARNING "NFS: server cheating in readlink reply: " + dprintk("NFS: server cheating in readlink reply: " "count %u > recvd %u\n", len, recvd); return -EIO; } @@ -3643,7 +3653,7 @@ static int decode_getacl(struct xdr_stream *xdr, struct rpc_rqst *req, hdrlen = (u8 *)xdr->p - (u8 *)iov->iov_base; recvd = req->rq_rcv_buf.len - hdrlen; if (attrlen > recvd) { - printk(KERN_WARNING "NFS: server cheating in getattr" + dprintk("NFS: server cheating in getattr" " acl reply: attrlen %u > recvd %u\n", attrlen, recvd); return -EINVAL; @@ -3688,8 +3698,7 @@ static int decode_setclientid(struct xdr_stream *xdr, struct nfs_client *clp) READ_BUF(8); READ32(opnum); if (opnum != OP_SETCLIENTID) { - printk(KERN_NOTICE - "nfs4_decode_setclientid: Server returned operation" + dprintk("nfs: decode_setclientid: Server returned operation" " %d\n", opnum); return -EIO; } @@ -3783,8 +3792,13 @@ static int nfs4_xdr_dec_access(struct rpc_rqst *rqstp, __be32 *p, struct nfs4_ac xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p); if ((status = decode_compound_hdr(&xdr, &hdr)) != 0) goto out; - if ((status = decode_putfh(&xdr)) == 0) - status = decode_access(&xdr, res); + status = decode_putfh(&xdr); + if (status != 0) + goto out; + status = decode_access(&xdr, res); + if (status != 0) + goto out; + decode_getfattr(&xdr, res->fattr, res->server); out: return status; } diff --git a/fs/nfs/nfsroot.c b/fs/nfs/nfsroot.c index 3490322d1145..e87b44ee9ac9 100644 --- a/fs/nfs/nfsroot.c +++ b/fs/nfs/nfsroot.c @@ -76,6 +76,7 @@ #include <linux/fs.h> #include <linux/init.h> #include <linux/sunrpc/clnt.h> +#include <linux/sunrpc/xprtsock.h> #include <linux/nfs.h> #include <linux/nfs_fs.h> #include <linux/nfs_mount.h> @@ -491,7 +492,7 @@ static int __init root_nfs_get_handle(void) struct sockaddr_in sin; int status; int protocol = (nfs_data.flags & NFS_MOUNT_TCP) ? - IPPROTO_TCP : IPPROTO_UDP; + XPRT_TRANSPORT_TCP : XPRT_TRANSPORT_UDP; int version = (nfs_data.flags & NFS_MOUNT_VER3) ? NFS_MNT3_VERSION : NFS_MNT_VERSION; diff --git a/fs/nfs/proc.c b/fs/nfs/proc.c index 845cdde1d8b7..97669ed05500 100644 --- a/fs/nfs/proc.c +++ b/fs/nfs/proc.c @@ -476,6 +476,8 @@ nfs_proc_readdir(struct dentry *dentry, struct rpc_cred *cred, dprintk("NFS call readdir %d\n", (unsigned int)cookie); status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0); + nfs_invalidate_atime(dir); + dprintk("NFS reply readdir: %d\n", status); return status; } @@ -550,6 +552,7 @@ nfs_proc_pathconf(struct nfs_server *server, struct nfs_fh *fhandle, static int nfs_read_done(struct rpc_task *task, struct nfs_read_data *data) { + nfs_invalidate_atime(data->inode); if (task->tk_status >= 0) { nfs_refresh_inode(data->inode, data->res.fattr); /* Emulate the eof flag, which isn't normally needed in NFSv2 @@ -576,7 +579,7 @@ static void nfs_proc_read_setup(struct nfs_read_data *data) static int nfs_write_done(struct rpc_task *task, struct nfs_write_data *data) { if (task->tk_status >= 0) - nfs_post_op_update_inode(data->inode, data->res.fattr); + nfs_post_op_update_inode_force_wcc(data->inode, data->res.fattr); return 0; } diff --git a/fs/nfs/read.c b/fs/nfs/read.c index 19e05633f4e3..4587a86adaac 100644 --- a/fs/nfs/read.c +++ b/fs/nfs/read.c @@ -341,9 +341,6 @@ int nfs_readpage_result(struct rpc_task *task, struct nfs_read_data *data) set_bit(NFS_INO_STALE, &NFS_FLAGS(data->inode)); nfs_mark_for_revalidate(data->inode); } - spin_lock(&data->inode->i_lock); - NFS_I(data->inode)->cache_validity |= NFS_INO_INVALID_ATIME; - spin_unlock(&data->inode->i_lock); return 0; } @@ -497,8 +494,7 @@ int nfs_readpage(struct file *file, struct page *page) if (ctx == NULL) goto out_unlock; } else - ctx = get_nfs_open_context((struct nfs_open_context *) - file->private_data); + ctx = get_nfs_open_context(nfs_file_open_context(file)); error = nfs_readpage_async(ctx, inode, page); @@ -576,8 +572,7 @@ int nfs_readpages(struct file *filp, struct address_space *mapping, if (desc.ctx == NULL) return -EBADF; } else - desc.ctx = get_nfs_open_context((struct nfs_open_context *) - filp->private_data); + desc.ctx = get_nfs_open_context(nfs_file_open_context(filp)); if (rsize < PAGE_CACHE_SIZE) nfs_pageio_init(&pgio, inode, nfs_pagein_multi, rsize, 0); else diff --git a/fs/nfs/super.c b/fs/nfs/super.c index b878528b64c1..fa517ae9207f 100644 --- a/fs/nfs/super.c +++ b/fs/nfs/super.c @@ -33,6 +33,8 @@ #include <linux/sunrpc/clnt.h> #include <linux/sunrpc/stats.h> #include <linux/sunrpc/metrics.h> +#include <linux/sunrpc/xprtsock.h> +#include <linux/sunrpc/xprtrdma.h> #include <linux/nfs_fs.h> #include <linux/nfs_mount.h> #include <linux/nfs4_mount.h> @@ -58,36 +60,6 @@ #define NFSDBG_FACILITY NFSDBG_VFS - -struct nfs_parsed_mount_data { - int flags; - int rsize, wsize; - int timeo, retrans; - int acregmin, acregmax, - acdirmin, acdirmax; - int namlen; - unsigned int bsize; - unsigned int auth_flavor_len; - rpc_authflavor_t auth_flavors[1]; - char *client_address; - - struct { - struct sockaddr_in address; - unsigned int program; - unsigned int version; - unsigned short port; - int protocol; - } mount_server; - - struct { - struct sockaddr_in address; - char *hostname; - char *export_path; - unsigned int program; - int protocol; - } nfs_server; -}; - enum { /* Mount options that take no arguments */ Opt_soft, Opt_hard, @@ -97,7 +69,7 @@ enum { Opt_ac, Opt_noac, Opt_lock, Opt_nolock, Opt_v2, Opt_v3, - Opt_udp, Opt_tcp, + Opt_udp, Opt_tcp, Opt_rdma, Opt_acl, Opt_noacl, Opt_rdirplus, Opt_nordirplus, Opt_sharecache, Opt_nosharecache, @@ -116,7 +88,7 @@ enum { /* Mount options that take string arguments */ Opt_sec, Opt_proto, Opt_mountproto, - Opt_addr, Opt_mounthost, Opt_clientaddr, + Opt_addr, Opt_mountaddr, Opt_clientaddr, /* Mount options that are ignored */ Opt_userspace, Opt_deprecated, @@ -143,6 +115,7 @@ static match_table_t nfs_mount_option_tokens = { { Opt_v3, "v3" }, { Opt_udp, "udp" }, { Opt_tcp, "tcp" }, + { Opt_rdma, "rdma" }, { Opt_acl, "acl" }, { Opt_noacl, "noacl" }, { Opt_rdirplus, "rdirplus" }, @@ -175,13 +148,14 @@ static match_table_t nfs_mount_option_tokens = { { Opt_mountproto, "mountproto=%s" }, { Opt_addr, "addr=%s" }, { Opt_clientaddr, "clientaddr=%s" }, - { Opt_mounthost, "mounthost=%s" }, + { Opt_userspace, "mounthost=%s" }, + { Opt_mountaddr, "mountaddr=%s" }, { Opt_err, NULL } }; enum { - Opt_xprt_udp, Opt_xprt_tcp, + Opt_xprt_udp, Opt_xprt_tcp, Opt_xprt_rdma, Opt_xprt_err }; @@ -189,6 +163,7 @@ enum { static match_table_t nfs_xprt_protocol_tokens = { { Opt_xprt_udp, "udp" }, { Opt_xprt_tcp, "tcp" }, + { Opt_xprt_rdma, "rdma" }, { Opt_xprt_err, NULL } }; @@ -449,7 +424,7 @@ static void nfs_show_mount_options(struct seq_file *m, struct nfs_server *nfss, const char *nostr; } nfs_info[] = { { NFS_MOUNT_SOFT, ",soft", ",hard" }, - { NFS_MOUNT_INTR, ",intr", "" }, + { NFS_MOUNT_INTR, ",intr", ",nointr" }, { NFS_MOUNT_NOCTO, ",nocto", "" }, { NFS_MOUNT_NOAC, ",noac", "" }, { NFS_MOUNT_NONLM, ",nolock", "" }, @@ -460,8 +435,6 @@ static void nfs_show_mount_options(struct seq_file *m, struct nfs_server *nfss, }; const struct proc_nfs_info *nfs_infop; struct nfs_client *clp = nfss->nfs_client; - char buf[12]; - const char *proto; seq_printf(m, ",vers=%d", clp->rpc_ops->version); seq_printf(m, ",rsize=%d", nfss->rsize); @@ -480,18 +453,8 @@ static void nfs_show_mount_options(struct seq_file *m, struct nfs_server *nfss, else seq_puts(m, nfs_infop->nostr); } - switch (nfss->client->cl_xprt->prot) { - case IPPROTO_TCP: - proto = "tcp"; - break; - case IPPROTO_UDP: - proto = "udp"; - break; - default: - snprintf(buf, sizeof(buf), "%u", nfss->client->cl_xprt->prot); - proto = buf; - } - seq_printf(m, ",proto=%s", proto); + seq_printf(m, ",proto=%s", + rpc_peeraddr2str(nfss->client, RPC_DISPLAY_PROTO)); seq_printf(m, ",timeo=%lu", 10U * clp->retrans_timeo / HZ); seq_printf(m, ",retrans=%u", clp->retrans_count); seq_printf(m, ",sec=%s", nfs_pseudoflavour_to_name(nfss->client->cl_auth->au_flavor)); @@ -506,8 +469,8 @@ static int nfs_show_options(struct seq_file *m, struct vfsmount *mnt) nfs_show_mount_options(m, nfss, 0); - seq_puts(m, ",addr="); - seq_escape(m, nfss->nfs_client->cl_hostname, " \t\n\\"); + seq_printf(m, ",addr="NIPQUAD_FMT, + NIPQUAD(nfss->nfs_client->cl_addr.sin_addr)); return 0; } @@ -698,13 +661,19 @@ static int nfs_parse_mount_options(char *raw, break; case Opt_udp: mnt->flags &= ~NFS_MOUNT_TCP; - mnt->nfs_server.protocol = IPPROTO_UDP; + mnt->nfs_server.protocol = XPRT_TRANSPORT_UDP; mnt->timeo = 7; mnt->retrans = 5; break; case Opt_tcp: mnt->flags |= NFS_MOUNT_TCP; - mnt->nfs_server.protocol = IPPROTO_TCP; + mnt->nfs_server.protocol = XPRT_TRANSPORT_TCP; + mnt->timeo = 600; + mnt->retrans = 2; + break; + case Opt_rdma: + mnt->flags |= NFS_MOUNT_TCP; /* for side protocols */ + mnt->nfs_server.protocol = XPRT_TRANSPORT_RDMA; mnt->timeo = 600; mnt->retrans = 2; break; @@ -913,13 +882,20 @@ static int nfs_parse_mount_options(char *raw, switch (token) { case Opt_xprt_udp: mnt->flags &= ~NFS_MOUNT_TCP; - mnt->nfs_server.protocol = IPPROTO_UDP; + mnt->nfs_server.protocol = XPRT_TRANSPORT_UDP; mnt->timeo = 7; mnt->retrans = 5; break; case Opt_xprt_tcp: mnt->flags |= NFS_MOUNT_TCP; - mnt->nfs_server.protocol = IPPROTO_TCP; + mnt->nfs_server.protocol = XPRT_TRANSPORT_TCP; + mnt->timeo = 600; + mnt->retrans = 2; + break; + case Opt_xprt_rdma: + /* vector side protocols to TCP */ + mnt->flags |= NFS_MOUNT_TCP; + mnt->nfs_server.protocol = XPRT_TRANSPORT_RDMA; mnt->timeo = 600; mnt->retrans = 2; break; @@ -937,11 +913,12 @@ static int nfs_parse_mount_options(char *raw, switch (token) { case Opt_xprt_udp: - mnt->mount_server.protocol = IPPROTO_UDP; + mnt->mount_server.protocol = XPRT_TRANSPORT_UDP; break; case Opt_xprt_tcp: - mnt->mount_server.protocol = IPPROTO_TCP; + mnt->mount_server.protocol = XPRT_TRANSPORT_TCP; break; + case Opt_xprt_rdma: /* not used for side protocols */ default: goto out_unrec_xprt; } @@ -961,7 +938,7 @@ static int nfs_parse_mount_options(char *raw, goto out_nomem; mnt->client_address = string; break; - case Opt_mounthost: + case Opt_mountaddr: string = match_strdup(args); if (string == NULL) goto out_nomem; @@ -1027,16 +1004,10 @@ static int nfs_try_mount(struct nfs_parsed_mount_data *args, sin = args->mount_server.address; else sin = args->nfs_server.address; - if (args->mount_server.port == 0) { - status = rpcb_getport_sync(&sin, - args->mount_server.program, - args->mount_server.version, - args->mount_server.protocol); - if (status < 0) - goto out_err; - sin.sin_port = htons(status); - } else - sin.sin_port = htons(args->mount_server.port); + /* + * autobind will be used if mount_server.port == 0 + */ + sin.sin_port = htons(args->mount_server.port); /* * Now ask the mount server to map our export path @@ -1049,14 +1020,11 @@ static int nfs_try_mount(struct nfs_parsed_mount_data *args, args->mount_server.version, args->mount_server.protocol, root_fh); - if (status < 0) - goto out_err; - - return status; + if (status == 0) + return 0; -out_err: - dfprintk(MOUNT, "NFS: unable to contact server on host " - NIPQUAD_FMT "\n", NIPQUAD(sin.sin_addr.s_addr)); + dfprintk(MOUNT, "NFS: unable to mount server " NIPQUAD_FMT + ", error %d\n", NIPQUAD(sin.sin_addr.s_addr), status); return status; } @@ -1079,15 +1047,31 @@ out_err: * XXX: as far as I can tell, changing the NFS program number is not * supported in the NFS client. */ -static int nfs_validate_mount_data(struct nfs_mount_data **options, +static int nfs_validate_mount_data(void *options, + struct nfs_parsed_mount_data *args, struct nfs_fh *mntfh, const char *dev_name) { - struct nfs_mount_data *data = *options; + struct nfs_mount_data *data = (struct nfs_mount_data *)options; if (data == NULL) goto out_no_data; + memset(args, 0, sizeof(*args)); + args->flags = (NFS_MOUNT_VER3 | NFS_MOUNT_TCP); + args->rsize = NFS_MAX_FILE_IO_SIZE; + args->wsize = NFS_MAX_FILE_IO_SIZE; + args->timeo = 600; + args->retrans = 2; + args->acregmin = 3; + args->acregmax = 60; + args->acdirmin = 30; + args->acdirmax = 60; + args->mount_server.protocol = XPRT_TRANSPORT_UDP; + args->mount_server.program = NFS_MNT_PROGRAM; + args->nfs_server.protocol = XPRT_TRANSPORT_TCP; + args->nfs_server.program = NFS_PROGRAM; + switch (data->version) { case 1: data->namlen = 0; @@ -1116,92 +1100,73 @@ static int nfs_validate_mount_data(struct nfs_mount_data **options, if (mntfh->size < sizeof(mntfh->data)) memset(mntfh->data + mntfh->size, 0, sizeof(mntfh->data) - mntfh->size); + + if (!nfs_verify_server_address((struct sockaddr *) &data->addr)) + goto out_no_address; + + /* + * Translate to nfs_parsed_mount_data, which nfs_fill_super + * can deal with. + */ + args->flags = data->flags; + args->rsize = data->rsize; + args->wsize = data->wsize; + args->flags = data->flags; + args->timeo = data->timeo; + args->retrans = data->retrans; + args->acregmin = data->acregmin; + args->acregmax = data->acregmax; + args->acdirmin = data->acdirmin; + args->acdirmax = data->acdirmax; + args->nfs_server.address = data->addr; + if (!(data->flags & NFS_MOUNT_TCP)) + args->nfs_server.protocol = XPRT_TRANSPORT_UDP; + /* N.B. caller will free nfs_server.hostname in all cases */ + args->nfs_server.hostname = kstrdup(data->hostname, GFP_KERNEL); + args->namlen = data->namlen; + args->bsize = data->bsize; + args->auth_flavors[0] = data->pseudoflavor; break; default: { unsigned int len; char *c; int status; - struct nfs_parsed_mount_data args = { - .flags = (NFS_MOUNT_VER3 | NFS_MOUNT_TCP), - .rsize = NFS_MAX_FILE_IO_SIZE, - .wsize = NFS_MAX_FILE_IO_SIZE, - .timeo = 600, - .retrans = 2, - .acregmin = 3, - .acregmax = 60, - .acdirmin = 30, - .acdirmax = 60, - .mount_server.protocol = IPPROTO_UDP, - .mount_server.program = NFS_MNT_PROGRAM, - .nfs_server.protocol = IPPROTO_TCP, - .nfs_server.program = NFS_PROGRAM, - }; - - if (nfs_parse_mount_options((char *) *options, &args) == 0) - return -EINVAL; - data = kzalloc(sizeof(*data), GFP_KERNEL); - if (data == NULL) - return -ENOMEM; + if (nfs_parse_mount_options((char *)options, args) == 0) + return -EINVAL; - /* - * NB: after this point, caller will free "data" - * if we return an error - */ - *options = data; + if (!nfs_verify_server_address((struct sockaddr *) + &args->nfs_server.address)) + goto out_no_address; c = strchr(dev_name, ':'); if (c == NULL) return -EINVAL; len = c - dev_name; - if (len > sizeof(data->hostname)) - return -ENAMETOOLONG; - strncpy(data->hostname, dev_name, len); - args.nfs_server.hostname = data->hostname; + /* N.B. caller will free nfs_server.hostname in all cases */ + args->nfs_server.hostname = kstrndup(dev_name, len, GFP_KERNEL); c++; if (strlen(c) > NFS_MAXPATHLEN) return -ENAMETOOLONG; - args.nfs_server.export_path = c; + args->nfs_server.export_path = c; - status = nfs_try_mount(&args, mntfh); + status = nfs_try_mount(args, mntfh); if (status) return status; - /* - * Translate to nfs_mount_data, which nfs_fill_super - * can deal with. - */ - data->version = 6; - data->flags = args.flags; - data->rsize = args.rsize; - data->wsize = args.wsize; - data->timeo = args.timeo; - data->retrans = args.retrans; - data->acregmin = args.acregmin; - data->acregmax = args.acregmax; - data->acdirmin = args.acdirmin; - data->acdirmax = args.acdirmax; - data->addr = args.nfs_server.address; - data->namlen = args.namlen; - data->bsize = args.bsize; - data->pseudoflavor = args.auth_flavors[0]; - break; } } - if (!(data->flags & NFS_MOUNT_SECFLAVOUR)) - data->pseudoflavor = RPC_AUTH_UNIX; + if (!(args->flags & NFS_MOUNT_SECFLAVOUR)) + args->auth_flavors[0] = RPC_AUTH_UNIX; #ifndef CONFIG_NFS_V3 - if (data->flags & NFS_MOUNT_VER3) + if (args->flags & NFS_MOUNT_VER3) goto out_v3_not_compiled; #endif /* !CONFIG_NFS_V3 */ - if (!nfs_verify_server_address((struct sockaddr *) &data->addr)) - goto out_no_address; - return 0; out_no_data: @@ -1258,7 +1223,8 @@ static inline void nfs_initialise_sb(struct super_block *sb) /* * Finish setting up an NFS2/3 superblock */ -static void nfs_fill_super(struct super_block *sb, struct nfs_mount_data *data) +static void nfs_fill_super(struct super_block *sb, + struct nfs_parsed_mount_data *data) { struct nfs_server *server = NFS_SB(sb); @@ -1379,7 +1345,7 @@ static int nfs_get_sb(struct file_system_type *fs_type, struct nfs_server *server = NULL; struct super_block *s; struct nfs_fh mntfh; - struct nfs_mount_data *data = raw_data; + struct nfs_parsed_mount_data data; struct dentry *mntroot; int (*compare_super)(struct super_block *, void *) = nfs_compare_super; struct nfs_sb_mountdata sb_mntdata = { @@ -1388,12 +1354,12 @@ static int nfs_get_sb(struct file_system_type *fs_type, int error; /* Validate the mount data */ - error = nfs_validate_mount_data(&data, &mntfh, dev_name); + error = nfs_validate_mount_data(raw_data, &data, &mntfh, dev_name); if (error < 0) goto out; /* Get a volume representation */ - server = nfs_create_server(data, &mntfh); + server = nfs_create_server(&data, &mntfh); if (IS_ERR(server)) { error = PTR_ERR(server); goto out; @@ -1417,7 +1383,7 @@ static int nfs_get_sb(struct file_system_type *fs_type, if (!s->s_root) { /* initial superblock/root creation */ - nfs_fill_super(s, data); + nfs_fill_super(s, &data); } mntroot = nfs_get_root(s, &mntfh); @@ -1432,8 +1398,7 @@ static int nfs_get_sb(struct file_system_type *fs_type, error = 0; out: - if (data != raw_data) - kfree(data); + kfree(data.nfs_server.hostname); return error; out_err_nosb: @@ -1559,38 +1524,49 @@ static void nfs4_fill_super(struct super_block *sb) /* * Validate NFSv4 mount options */ -static int nfs4_validate_mount_data(struct nfs4_mount_data **options, - const char *dev_name, - struct sockaddr_in *addr, - rpc_authflavor_t *authflavour, - char **hostname, - char **mntpath, - char **ip_addr) +static int nfs4_validate_mount_data(void *options, + struct nfs_parsed_mount_data *args, + const char *dev_name) { - struct nfs4_mount_data *data = *options; + struct nfs4_mount_data *data = (struct nfs4_mount_data *)options; char *c; if (data == NULL) goto out_no_data; + memset(args, 0, sizeof(*args)); + args->rsize = NFS_MAX_FILE_IO_SIZE; + args->wsize = NFS_MAX_FILE_IO_SIZE; + args->timeo = 600; + args->retrans = 2; + args->acregmin = 3; + args->acregmax = 60; + args->acdirmin = 30; + args->acdirmax = 60; + args->nfs_server.protocol = XPRT_TRANSPORT_TCP; + switch (data->version) { case 1: - if (data->host_addrlen != sizeof(*addr)) + if (data->host_addrlen != sizeof(args->nfs_server.address)) goto out_no_address; - if (copy_from_user(addr, data->host_addr, sizeof(*addr))) + if (copy_from_user(&args->nfs_server.address, + data->host_addr, + sizeof(args->nfs_server.address))) return -EFAULT; - if (addr->sin_port == 0) - addr->sin_port = htons(NFS_PORT); - if (!nfs_verify_server_address((struct sockaddr *) addr)) + if (args->nfs_server.address.sin_port == 0) + args->nfs_server.address.sin_port = htons(NFS_PORT); + if (!nfs_verify_server_address((struct sockaddr *) + &args->nfs_server.address)) goto out_no_address; switch (data->auth_flavourlen) { case 0: - *authflavour = RPC_AUTH_UNIX; + args->auth_flavors[0] = RPC_AUTH_UNIX; break; case 1: - if (copy_from_user(authflavour, data->auth_flavours, - sizeof(*authflavour))) + if (copy_from_user(&args->auth_flavors[0], + data->auth_flavours, + sizeof(args->auth_flavors[0]))) return -EFAULT; break; default: @@ -1600,75 +1576,57 @@ static int nfs4_validate_mount_data(struct nfs4_mount_data **options, c = strndup_user(data->hostname.data, NFS4_MAXNAMLEN); if (IS_ERR(c)) return PTR_ERR(c); - *hostname = c; + args->nfs_server.hostname = c; c = strndup_user(data->mnt_path.data, NFS4_MAXPATHLEN); if (IS_ERR(c)) return PTR_ERR(c); - *mntpath = c; - dfprintk(MOUNT, "NFS: MNTPATH: '%s'\n", *mntpath); + args->nfs_server.export_path = c; + dfprintk(MOUNT, "NFS: MNTPATH: '%s'\n", c); c = strndup_user(data->client_addr.data, 16); if (IS_ERR(c)) return PTR_ERR(c); - *ip_addr = c; + args->client_address = c; + + /* + * Translate to nfs_parsed_mount_data, which nfs4_fill_super + * can deal with. + */ + + args->flags = data->flags & NFS4_MOUNT_FLAGMASK; + args->rsize = data->rsize; + args->wsize = data->wsize; + args->timeo = data->timeo; + args->retrans = data->retrans; + args->acregmin = data->acregmin; + args->acregmax = data->acregmax; + args->acdirmin = data->acdirmin; + args->acdirmax = data->acdirmax; + args->nfs_server.protocol = data->proto; break; default: { unsigned int len; - struct nfs_parsed_mount_data args = { - .rsize = NFS_MAX_FILE_IO_SIZE, - .wsize = NFS_MAX_FILE_IO_SIZE, - .timeo = 600, - .retrans = 2, - .acregmin = 3, - .acregmax = 60, - .acdirmin = 30, - .acdirmax = 60, - .nfs_server.protocol = IPPROTO_TCP, - }; - - if (nfs_parse_mount_options((char *) *options, &args) == 0) + + if (nfs_parse_mount_options((char *)options, args) == 0) return -EINVAL; if (!nfs_verify_server_address((struct sockaddr *) - &args.nfs_server.address)) + &args->nfs_server.address)) return -EINVAL; - *addr = args.nfs_server.address; - switch (args.auth_flavor_len) { + switch (args->auth_flavor_len) { case 0: - *authflavour = RPC_AUTH_UNIX; + args->auth_flavors[0] = RPC_AUTH_UNIX; break; case 1: - *authflavour = (rpc_authflavor_t) args.auth_flavors[0]; break; default: goto out_inval_auth; } /* - * Translate to nfs4_mount_data, which nfs4_fill_super - * can deal with. - */ - data = kzalloc(sizeof(*data), GFP_KERNEL); - if (data == NULL) - return -ENOMEM; - *options = data; - - data->version = 1; - data->flags = args.flags & NFS4_MOUNT_FLAGMASK; - data->rsize = args.rsize; - data->wsize = args.wsize; - data->timeo = args.timeo; - data->retrans = args.retrans; - data->acregmin = args.acregmin; - data->acregmax = args.acregmax; - data->acdirmin = args.acdirmin; - data->acdirmax = args.acdirmax; - data->proto = args.nfs_server.protocol; - - /* * Split "dev_name" into "hostname:mntpath". */ c = strchr(dev_name, ':'); @@ -1678,27 +1636,25 @@ static int nfs4_validate_mount_data(struct nfs4_mount_data **options, len = c - dev_name; if (len > NFS4_MAXNAMLEN) return -ENAMETOOLONG; - *hostname = kzalloc(len, GFP_KERNEL); - if (*hostname == NULL) + args->nfs_server.hostname = kzalloc(len, GFP_KERNEL); + if (args->nfs_server.hostname == NULL) return -ENOMEM; - strncpy(*hostname, dev_name, len - 1); + strncpy(args->nfs_server.hostname, dev_name, len - 1); c++; /* step over the ':' */ len = strlen(c); if (len > NFS4_MAXPATHLEN) return -ENAMETOOLONG; - *mntpath = kzalloc(len + 1, GFP_KERNEL); - if (*mntpath == NULL) + args->nfs_server.export_path = kzalloc(len + 1, GFP_KERNEL); + if (args->nfs_server.export_path == NULL) return -ENOMEM; - strncpy(*mntpath, c, len); + strncpy(args->nfs_server.export_path, c, len); - dprintk("MNTPATH: %s\n", *mntpath); + dprintk("MNTPATH: %s\n", args->nfs_server.export_path); - if (args.client_address == NULL) + if (args->client_address == NULL) goto out_no_client_address; - *ip_addr = args.client_address; - break; } } @@ -1729,14 +1685,11 @@ out_no_client_address: static int nfs4_get_sb(struct file_system_type *fs_type, int flags, const char *dev_name, void *raw_data, struct vfsmount *mnt) { - struct nfs4_mount_data *data = raw_data; + struct nfs_parsed_mount_data data; struct super_block *s; struct nfs_server *server; - struct sockaddr_in addr; - rpc_authflavor_t authflavour; struct nfs_fh mntfh; struct dentry *mntroot; - char *mntpath = NULL, *hostname = NULL, *ip_addr = NULL; int (*compare_super)(struct super_block *, void *) = nfs_compare_super; struct nfs_sb_mountdata sb_mntdata = { .mntflags = flags, @@ -1744,14 +1697,12 @@ static int nfs4_get_sb(struct file_system_type *fs_type, int error; /* Validate the mount data */ - error = nfs4_validate_mount_data(&data, dev_name, &addr, &authflavour, - &hostname, &mntpath, &ip_addr); + error = nfs4_validate_mount_data(raw_data, &data, dev_name); if (error < 0) goto out; /* Get a volume representation */ - server = nfs4_create_server(data, hostname, &addr, mntpath, ip_addr, - authflavour, &mntfh); + server = nfs4_create_server(&data, &mntfh); if (IS_ERR(server)) { error = PTR_ERR(server); goto out; @@ -1790,9 +1741,9 @@ static int nfs4_get_sb(struct file_system_type *fs_type, error = 0; out: - kfree(ip_addr); - kfree(mntpath); - kfree(hostname); + kfree(data.client_address); + kfree(data.nfs_server.export_path); + kfree(data.nfs_server.hostname); return error; out_free: diff --git a/fs/nfs/unlink.c b/fs/nfs/unlink.c index 045ab805c17f..1aed850d18f2 100644 --- a/fs/nfs/unlink.c +++ b/fs/nfs/unlink.c @@ -66,7 +66,6 @@ static void nfs_async_unlink_init(struct rpc_task *task, void *calldata) .rpc_cred = data->cred, }; - nfs_begin_data_update(dir); NFS_PROTO(dir)->unlink_setup(&msg, dir); rpc_call_setup(task, &msg, 0); } @@ -84,8 +83,6 @@ static void nfs_async_unlink_done(struct rpc_task *task, void *calldata) if (!NFS_PROTO(dir)->unlink_done(task, dir)) rpc_restart_call(task); - else - nfs_end_data_update(dir); } /** diff --git a/fs/nfs/write.c b/fs/nfs/write.c index 0d7a77cc394b..e2bb66c34406 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -110,6 +110,13 @@ void nfs_writedata_release(void *wdata) nfs_writedata_free(wdata); } +static void nfs_context_set_write_error(struct nfs_open_context *ctx, int error) +{ + ctx->error = error; + smp_wmb(); + set_bit(NFS_CONTEXT_ERROR_WRITE, &ctx->flags); +} + static struct nfs_page *nfs_page_find_request_locked(struct page *page) { struct nfs_page *req = NULL; @@ -243,10 +250,7 @@ static void nfs_end_page_writeback(struct page *page) /* * Find an associated nfs write request, and prepare to flush it out - * Returns 1 if there was no write request, or if the request was - * already tagged by nfs_set_page_dirty.Returns 0 if the request - * was not tagged. - * May also return an error if the user signalled nfs_wait_on_request(). + * May return an error if the user signalled nfs_wait_on_request(). */ static int nfs_page_async_flush(struct nfs_pageio_descriptor *pgio, struct page *page) @@ -261,7 +265,7 @@ static int nfs_page_async_flush(struct nfs_pageio_descriptor *pgio, req = nfs_page_find_request_locked(page); if (req == NULL) { spin_unlock(&inode->i_lock); - return 1; + return 0; } if (nfs_lock_request_dontget(req)) break; @@ -282,7 +286,7 @@ static int nfs_page_async_flush(struct nfs_pageio_descriptor *pgio, spin_unlock(&inode->i_lock); nfs_unlock_request(req); nfs_pageio_complete(pgio); - return 1; + return 0; } if (nfs_set_page_writeback(page) != 0) { spin_unlock(&inode->i_lock); @@ -290,70 +294,56 @@ static int nfs_page_async_flush(struct nfs_pageio_descriptor *pgio, } radix_tree_tag_set(&nfsi->nfs_page_tree, req->wb_index, NFS_PAGE_TAG_LOCKED); - ret = test_bit(PG_NEED_FLUSH, &req->wb_flags); spin_unlock(&inode->i_lock); nfs_pageio_add_request(pgio, req); - return ret; + return 0; } -/* - * Write an mmapped page to the server. - */ -static int nfs_writepage_locked(struct page *page, struct writeback_control *wbc) +static int nfs_do_writepage(struct page *page, struct writeback_control *wbc, struct nfs_pageio_descriptor *pgio) { - struct nfs_pageio_descriptor mypgio, *pgio; - struct nfs_open_context *ctx; struct inode *inode = page->mapping->host; - unsigned offset; - int err; nfs_inc_stats(inode, NFSIOS_VFSWRITEPAGE); nfs_add_stats(inode, NFSIOS_WRITEPAGES, 1); - if (wbc->for_writepages) - pgio = wbc->fs_private; - else { - nfs_pageio_init_write(&mypgio, inode, wb_priority(wbc)); - pgio = &mypgio; - } - nfs_pageio_cond_complete(pgio, page->index); + return nfs_page_async_flush(pgio, page); +} - err = nfs_page_async_flush(pgio, page); - if (err <= 0) - goto out; - err = 0; - offset = nfs_page_length(page); - if (!offset) - goto out; - - nfs_pageio_cond_complete(pgio, page->index); +/* + * Write an mmapped page to the server. + */ +static int nfs_writepage_locked(struct page *page, struct writeback_control *wbc) +{ + struct nfs_pageio_descriptor pgio; + int err; - ctx = nfs_find_open_context(inode, NULL, FMODE_WRITE); - if (ctx == NULL) { - err = -EBADF; - goto out; - } - err = nfs_writepage_setup(ctx, page, 0, offset); - put_nfs_open_context(ctx); - if (err != 0) - goto out; - err = nfs_page_async_flush(pgio, page); - if (err > 0) - err = 0; -out: - if (!wbc->for_writepages) - nfs_pageio_complete(pgio); - return err; + nfs_pageio_init_write(&pgio, page->mapping->host, wb_priority(wbc)); + err = nfs_do_writepage(page, wbc, &pgio); + nfs_pageio_complete(&pgio); + if (err < 0) + return err; + if (pgio.pg_error < 0) + return pgio.pg_error; + return 0; } int nfs_writepage(struct page *page, struct writeback_control *wbc) { - int err; + int ret; + + ret = nfs_writepage_locked(page, wbc); + unlock_page(page); + return ret; +} + +static int nfs_writepages_callback(struct page *page, struct writeback_control *wbc, void *data) +{ + int ret; - err = nfs_writepage_locked(page, wbc); + ret = nfs_do_writepage(page, wbc, data); unlock_page(page); - return err; + return ret; } int nfs_writepages(struct address_space *mapping, struct writeback_control *wbc) @@ -365,12 +355,11 @@ int nfs_writepages(struct address_space *mapping, struct writeback_control *wbc) nfs_inc_stats(inode, NFSIOS_VFSWRITEPAGES); nfs_pageio_init_write(&pgio, inode, wb_priority(wbc)); - wbc->fs_private = &pgio; - err = generic_writepages(mapping, wbc); + err = write_cache_pages(mapping, wbc, nfs_writepages_callback, &pgio); nfs_pageio_complete(&pgio); - if (err) + if (err < 0) return err; - if (pgio.pg_error) + if (pgio.pg_error < 0) return pgio.pg_error; return 0; } @@ -389,14 +378,11 @@ static int nfs_inode_add_request(struct inode *inode, struct nfs_page *req) return error; if (!nfsi->npages) { igrab(inode); - nfs_begin_data_update(inode); if (nfs_have_delegation(inode, FMODE_WRITE)) nfsi->change_attr++; } SetPagePrivate(req->wb_page); set_page_private(req->wb_page, (unsigned long)req); - if (PageDirty(req->wb_page)) - set_bit(PG_NEED_FLUSH, &req->wb_flags); nfsi->npages++; kref_get(&req->wb_kref); return 0; @@ -416,12 +402,9 @@ static void nfs_inode_remove_request(struct nfs_page *req) set_page_private(req->wb_page, 0); ClearPagePrivate(req->wb_page); radix_tree_delete(&nfsi->nfs_page_tree, req->wb_index); - if (test_and_clear_bit(PG_NEED_FLUSH, &req->wb_flags)) - __set_page_dirty_nobuffers(req->wb_page); nfsi->npages--; if (!nfsi->npages) { spin_unlock(&inode->i_lock); - nfs_end_data_update(inode); iput(inode); } else spin_unlock(&inode->i_lock); @@ -682,7 +665,7 @@ static struct nfs_page * nfs_update_request(struct nfs_open_context* ctx, int nfs_flush_incompatible(struct file *file, struct page *page) { - struct nfs_open_context *ctx = (struct nfs_open_context *)file->private_data; + struct nfs_open_context *ctx = nfs_file_open_context(file); struct nfs_page *req; int do_flush, status; /* @@ -716,7 +699,7 @@ int nfs_flush_incompatible(struct file *file, struct page *page) int nfs_updatepage(struct file *file, struct page *page, unsigned int offset, unsigned int count) { - struct nfs_open_context *ctx = (struct nfs_open_context *)file->private_data; + struct nfs_open_context *ctx = nfs_file_open_context(file); struct inode *inode = page->mapping->host; int status = 0; @@ -967,7 +950,7 @@ static void nfs_writeback_done_partial(struct rpc_task *task, void *calldata) if (task->tk_status < 0) { nfs_set_pageerror(page); - req->wb_context->error = task->tk_status; + nfs_context_set_write_error(req->wb_context, task->tk_status); dprintk(", error = %d\n", task->tk_status); goto out; } @@ -1030,7 +1013,7 @@ static void nfs_writeback_done_full(struct rpc_task *task, void *calldata) if (task->tk_status < 0) { nfs_set_pageerror(page); - req->wb_context->error = task->tk_status; + nfs_context_set_write_error(req->wb_context, task->tk_status); dprintk(", error = %d\n", task->tk_status); goto remove_request; } @@ -1244,7 +1227,7 @@ static void nfs_commit_done(struct rpc_task *task, void *calldata) req->wb_bytes, (long long)req_offset(req)); if (task->tk_status < 0) { - req->wb_context->error = task->tk_status; + nfs_context_set_write_error(req->wb_context, task->tk_status); nfs_inode_remove_request(req); dprintk(", error = %d\n", task->tk_status); goto next; @@ -1347,53 +1330,52 @@ long nfs_sync_mapping_wait(struct address_space *mapping, struct writeback_contr return ret; } -/* - * flush the inode to disk. - */ -int nfs_wb_all(struct inode *inode) +static int __nfs_write_mapping(struct address_space *mapping, struct writeback_control *wbc, int how) { - struct address_space *mapping = inode->i_mapping; - struct writeback_control wbc = { - .bdi = mapping->backing_dev_info, - .sync_mode = WB_SYNC_ALL, - .nr_to_write = LONG_MAX, - .for_writepages = 1, - .range_cyclic = 1, - }; int ret; - ret = nfs_writepages(mapping, &wbc); + ret = nfs_writepages(mapping, wbc); if (ret < 0) goto out; - ret = nfs_sync_mapping_wait(mapping, &wbc, 0); - if (ret >= 0) - return 0; + ret = nfs_sync_mapping_wait(mapping, wbc, how); + if (ret < 0) + goto out; + return 0; out: __mark_inode_dirty(mapping->host, I_DIRTY_PAGES); return ret; } -int nfs_sync_mapping_range(struct address_space *mapping, loff_t range_start, loff_t range_end, int how) +/* Two pass sync: first using WB_SYNC_NONE, then WB_SYNC_ALL */ +static int nfs_write_mapping(struct address_space *mapping, int how) { struct writeback_control wbc = { .bdi = mapping->backing_dev_info, - .sync_mode = WB_SYNC_ALL, + .sync_mode = WB_SYNC_NONE, .nr_to_write = LONG_MAX, - .range_start = range_start, - .range_end = range_end, .for_writepages = 1, + .range_cyclic = 1, }; int ret; - ret = nfs_writepages(mapping, &wbc); + ret = __nfs_write_mapping(mapping, &wbc, how); if (ret < 0) - goto out; - ret = nfs_sync_mapping_wait(mapping, &wbc, how); - if (ret >= 0) - return 0; -out: - __mark_inode_dirty(mapping->host, I_DIRTY_PAGES); - return ret; + return ret; + wbc.sync_mode = WB_SYNC_ALL; + return __nfs_write_mapping(mapping, &wbc, how); +} + +/* + * flush the inode to disk. + */ +int nfs_wb_all(struct inode *inode) +{ + return nfs_write_mapping(inode->i_mapping, 0); +} + +int nfs_wb_nocommit(struct inode *inode) +{ + return nfs_write_mapping(inode->i_mapping, FLUSH_NOCOMMIT); } int nfs_wb_page_cancel(struct inode *inode, struct page *page) @@ -1477,35 +1459,6 @@ int nfs_wb_page(struct inode *inode, struct page* page) return nfs_wb_page_priority(inode, page, FLUSH_STABLE); } -int nfs_set_page_dirty(struct page *page) -{ - struct address_space *mapping = page->mapping; - struct inode *inode; - struct nfs_page *req; - int ret; - - if (!mapping) - goto out_raced; - inode = mapping->host; - if (!inode) - goto out_raced; - spin_lock(&inode->i_lock); - req = nfs_page_find_request_locked(page); - if (req != NULL) { - /* Mark any existing write requests for flushing */ - ret = !test_and_set_bit(PG_NEED_FLUSH, &req->wb_flags); - spin_unlock(&inode->i_lock); - nfs_release_request(req); - return ret; - } - ret = __set_page_dirty_nobuffers(page); - spin_unlock(&inode->i_lock); - return ret; -out_raced: - return !TestSetPageDirty(page); -} - - int __init nfs_init_writepagecache(void) { nfs_wdata_cachep = kmem_cache_create("nfs_write_data", diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c index e15f2cf8ac15..57333944af7f 100644 --- a/fs/nfsd/nfs4xdr.c +++ b/fs/nfsd/nfs4xdr.c @@ -102,7 +102,8 @@ check_filename(char *str, int len, __be32 err) out: \ return status; \ xdr_error: \ - printk(KERN_NOTICE "xdr error! (%s:%d)\n", __FILE__, __LINE__); \ + dprintk("NFSD: xdr error (%s:%d)\n", \ + __FILE__, __LINE__); \ status = nfserr_bad_xdr; \ goto out @@ -124,7 +125,8 @@ xdr_error: \ if (!(x = (p==argp->tmp || p == argp->tmpp) ? \ savemem(argp, p, nbytes) : \ (char *)p)) { \ - printk(KERN_NOTICE "xdr error! (%s:%d)\n", __FILE__, __LINE__); \ + dprintk("NFSD: xdr error (%s:%d)\n", \ + __FILE__, __LINE__); \ goto xdr_error; \ } \ p += XDR_QUADLEN(nbytes); \ @@ -140,7 +142,8 @@ xdr_error: \ p = argp->p; \ argp->p += XDR_QUADLEN(nbytes); \ } else if (!(p = read_buf(argp, nbytes))) { \ - printk(KERN_NOTICE "xdr error! (%s:%d)\n", __FILE__, __LINE__); \ + dprintk("NFSD: xdr error (%s:%d)\n", \ + __FILE__, __LINE__); \ goto xdr_error; \ } \ } while (0) @@ -948,7 +951,8 @@ nfsd4_decode_write(struct nfsd4_compoundargs *argp, struct nfsd4_write *write) */ avail = (char*)argp->end - (char*)argp->p; if (avail + argp->pagelen < write->wr_buflen) { - printk(KERN_NOTICE "xdr error! (%s:%d)\n", __FILE__, __LINE__); + dprintk("NFSD: xdr error (%s:%d)\n", + __FILE__, __LINE__); goto xdr_error; } argp->rqstp->rq_vec[0].iov_base = p; @@ -1019,7 +1023,7 @@ nfsd4_decode_compound(struct nfsd4_compoundargs *argp) argp->ops = kmalloc(argp->opcnt * sizeof(*argp->ops), GFP_KERNEL); if (!argp->ops) { argp->ops = argp->iops; - printk(KERN_INFO "nfsd: couldn't allocate room for COMPOUND\n"); + dprintk("nfsd: couldn't allocate room for COMPOUND\n"); goto xdr_error; } } @@ -1326,7 +1330,7 @@ static char *nfsd4_path(struct svc_rqst *rqstp, struct svc_export *exp, __be32 * path = exp->ex_path; if (strncmp(path, rootpath, strlen(rootpath))) { - printk("nfsd: fs_locations failed;" + dprintk("nfsd: fs_locations failed;" "%s is not contained in %s\n", path, rootpath); *stat = nfserr_notsupp; return NULL; diff --git a/include/asm-blackfin/mach-bf548/bf54x_keys.h b/include/asm-blackfin/mach-bf548/bf54x_keys.h new file mode 100644 index 000000000000..1fb4ec77cc25 --- /dev/null +++ b/include/asm-blackfin/mach-bf548/bf54x_keys.h @@ -0,0 +1,17 @@ +#ifndef _BFIN_KPAD_H +#define _BFIN_KPAD_H + +struct bfin_kpad_platform_data { + int rows; + int cols; + const unsigned int *keymap; + unsigned short keymapsize; + unsigned short repeat; + u32 debounce_time; /* in ns */ + u32 coldrive_time; /* in ns */ + u32 keyup_test_interval; /* in ms */ +}; + +#define KEYVAL(col, row, val) (((1 << col) << 24) | ((1 << row) << 16) | (val)) + +#endif diff --git a/include/asm-mips/mach-au1x00/prom.h b/include/asm-mips/mach-au1x00/prom.h new file mode 100644 index 000000000000..e38715577c51 --- /dev/null +++ b/include/asm-mips/mach-au1x00/prom.h @@ -0,0 +1,13 @@ +#ifndef __AU1X00_PROM_H +#define __AU1X00_PROM_H + +extern int prom_argc; +extern char **prom_argv; +extern char **prom_envp; + +extern void prom_init_cmdline(void); +extern char *prom_getcmdline(void); +extern char *prom_getenv(char *envname); +extern int prom_get_ethernet_addr(char *ethernet_addr); + +#endif diff --git a/include/asm-powerpc/dcr-mmio.h b/include/asm-powerpc/dcr-mmio.h index 6b82c3ba495a..08532ff1899c 100644 --- a/include/asm-powerpc/dcr-mmio.h +++ b/include/asm-powerpc/dcr-mmio.h @@ -33,16 +33,16 @@ typedef struct { extern dcr_host_t dcr_map(struct device_node *dev, unsigned int dcr_n, unsigned int dcr_c); -extern void dcr_unmap(dcr_host_t host, unsigned int dcr_n, unsigned int dcr_c); +extern void dcr_unmap(dcr_host_t host, unsigned int dcr_c); static inline u32 dcr_read(dcr_host_t host, unsigned int dcr_n) { - return in_be32(host.token + dcr_n * host.stride); + return in_be32(host.token + ((host.base + dcr_n) * host.stride)); } static inline void dcr_write(dcr_host_t host, unsigned int dcr_n, u32 value) { - out_be32(host.token + dcr_n * host.stride, value); + out_be32(host.token + ((host.base + dcr_n) * host.stride), value); } extern u64 of_translate_dcr_address(struct device_node *dev, diff --git a/include/asm-powerpc/dcr-native.h b/include/asm-powerpc/dcr-native.h index f41058c0f6cb..8dbb1ab0aa04 100644 --- a/include/asm-powerpc/dcr-native.h +++ b/include/asm-powerpc/dcr-native.h @@ -29,9 +29,9 @@ typedef struct { #define DCR_MAP_OK(host) (1) #define dcr_map(dev, dcr_n, dcr_c) ((dcr_host_t){ .base = (dcr_n) }) -#define dcr_unmap(host, dcr_n, dcr_c) do {} while (0) -#define dcr_read(host, dcr_n) mfdcr(dcr_n) -#define dcr_write(host, dcr_n, value) mtdcr(dcr_n, value) +#define dcr_unmap(host, dcr_c) do {} while (0) +#define dcr_read(host, dcr_n) mfdcr(dcr_n + host.base) +#define dcr_write(host, dcr_n, value) mtdcr(dcr_n + host.base, value) /* Device Control Registers */ void __mtdcr(int reg, unsigned int val); diff --git a/include/asm-x86/irqflags_32.h b/include/asm-x86/irqflags_32.h index eff8585cb741..d058b04e0083 100644 --- a/include/asm-x86/irqflags_32.h +++ b/include/asm-x86/irqflags_32.h @@ -160,4 +160,17 @@ static inline int raw_irqs_disabled(void) # define TRACE_IRQS_OFF #endif +#ifdef CONFIG_DEBUG_LOCK_ALLOC +# define LOCKDEP_SYS_EXIT \ + pushl %eax; \ + pushl %ecx; \ + pushl %edx; \ + call lockdep_sys_exit; \ + popl %edx; \ + popl %ecx; \ + popl %eax; +#else +# define LOCKDEP_SYS_EXIT +#endif + #endif diff --git a/include/asm-x86/irqflags_64.h b/include/asm-x86/irqflags_64.h index 86e70fe23659..5341ea1f815a 100644 --- a/include/asm-x86/irqflags_64.h +++ b/include/asm-x86/irqflags_64.h @@ -137,6 +137,20 @@ static inline void halt(void) # define TRACE_IRQS_ON # define TRACE_IRQS_OFF # endif +# ifdef CONFIG_DEBUG_LOCK_ALLOC +# define LOCKDEP_SYS_EXIT call lockdep_sys_exit_thunk +# define LOCKDEP_SYS_EXIT_IRQ \ + TRACE_IRQS_ON; \ + sti; \ + SAVE_REST; \ + LOCKDEP_SYS_EXIT; \ + RESTORE_REST; \ + cli; \ + TRACE_IRQS_OFF; +# else +# define LOCKDEP_SYS_EXIT +# define LOCKDEP_SYS_EXIT_IRQ +# endif #endif #endif diff --git a/include/linux/fs.h b/include/linux/fs.h index 16421f662a7a..6d760f1ad875 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1302,8 +1302,14 @@ struct file_system_type { struct module *owner; struct file_system_type * next; struct list_head fs_supers; + struct lock_class_key s_lock_key; struct lock_class_key s_umount_key; + + struct lock_class_key i_lock_key; + struct lock_class_key i_mutex_key; + struct lock_class_key i_mutex_dir_key; + struct lock_class_key i_alloc_sem_key; }; extern int get_sb_bdev(struct file_system_type *fs_type, diff --git a/include/linux/gpio_keys.h b/include/linux/gpio_keys.h index 265d17830a0f..c6d3a9de5634 100644 --- a/include/linux/gpio_keys.h +++ b/include/linux/gpio_keys.h @@ -8,6 +8,7 @@ struct gpio_keys_button { int active_low; char *desc; int type; /* input event type (EV_KEY, EV_SW) */ + int wakeup; /* configure the button as a wake-up source */ }; struct gpio_keys_platform_data { diff --git a/include/linux/if_bridge.h b/include/linux/if_bridge.h index 99e3a1a00099..58e43e566457 100644 --- a/include/linux/if_bridge.h +++ b/include/linux/if_bridge.h @@ -107,7 +107,7 @@ struct __fdb_entry extern void brioctl_set(int (*ioctl_hook)(struct net *, unsigned int, void __user *)); extern struct sk_buff *(*br_handle_frame_hook)(struct net_bridge_port *p, struct sk_buff *skb); -extern int (*br_should_route_hook)(struct sk_buff **pskb); +extern int (*br_should_route_hook)(struct sk_buff *skb); #endif diff --git a/include/linux/input.h b/include/linux/input.h index 52d1bd434a50..f30da6fc08e3 100644 --- a/include/linux/input.h +++ b/include/linux/input.h @@ -856,7 +856,7 @@ struct ff_rumble_effect { * defining effect parameters * * This structure is sent through ioctl from the application to the driver. - * To create a new effect aplication should set its @id to -1; the kernel + * To create a new effect application should set its @id to -1; the kernel * will return assigned @id which can later be used to update or delete * this effect. * @@ -936,9 +936,82 @@ struct ff_effect { #define BIT(x) (1UL<<((x)%BITS_PER_LONG)) #define LONG(x) ((x)/BITS_PER_LONG) +/** + * struct input_dev - represents an input device + * @name: name of the device + * @phys: physical path to the device in the system hierarchy + * @uniq: unique identification code for the device (if device has it) + * @id: id of the device (struct input_id) + * @evbit: bitmap of types of events supported by the device (EV_KEY, + * EV_REL, etc.) + * @keybit: bitmap of keys/buttons this device has + * @relbit: bitmap of relative axes for the device + * @absbit: bitmap of absolute axes for the device + * @mscbit: bitmap of miscellaneous events supported by the device + * @ledbit: bitmap of leds present on the device + * @sndbit: bitmap of sound effects supported by the device + * @ffbit: bitmap of force feedback effects supported by the device + * @swbit: bitmap of switches present on the device + * @keycodemax: size of keycode table + * @keycodesize: size of elements in keycode table + * @keycode: map of scancodes to keycodes for this device + * @setkeycode: optional method to alter current keymap, used to implement + * sparse keymaps. If not supplied default mechanism will be used + * @getkeycode: optional method to retrieve current keymap. If not supplied + * default mechanism will be used + * @ff: force feedback structure associated with the device if device + * supports force feedback effects + * @repeat_key: stores key code of the last key pressed; used to implement + * software autorepeat + * @timer: timer for software autorepeat + * @sync: set to 1 when there were no new events since last EV_SYNC + * @abs: current values for reports from absolute axes + * @rep: current values for autorepeat parameters (delay, rate) + * @key: reflects current state of device's keys/buttons + * @led: reflects current state of device's LEDs + * @snd: reflects current state of sound effects + * @sw: reflects current state of device's switches + * @absmax: maximum values for events coming from absolute axes + * @absmin: minimum values for events coming from absolute axes + * @absfuzz: describes noisiness for axes + * @absflat: size of the center flat position (used by joydev) + * @open: this method is called when the very first user calls + * input_open_device(). The driver must prepare the device + * to start generating events (start polling thread, + * request an IRQ, submit URB, etc.) + * @close: this method is called when the very last user calls + * input_close_device(). + * @flush: purges the device. Most commonly used to get rid of force + * feedback effects loaded into the device when disconnecting + * from it + * @event: event handler for events sent _to_ the device, like EV_LED + * or EV_SND. The device is expected to carry out the requested + * action (turn on a LED, play sound, etc.) The call is protected + * by @event_lock and must not sleep + * @grab: input handle that currently has the device grabbed (via + * EVIOCGRAB ioctl). When a handle grabs a device it becomes sole + * recipient for all input events coming from the device + * @event_lock: this spinlock is is taken when input core receives + * and processes a new event for the device (in input_event()). + * Code that accesses and/or modifies parameters of a device + * (such as keymap or absmin, absmax, absfuzz, etc.) after device + * has been registered with input core must take this lock. + * @mutex: serializes calls to open(), close() and flush() methods + * @users: stores number of users (input handlers) that opened this + * device. It is used by input_open_device() and input_close_device() + * to make sure that dev->open() is only called when the first + * user opens device and dev->close() is called when the very + * last user closes the device + * @going_away: marks devices that are in a middle of unregistering and + * causes input_open_device*() fail with -ENODEV. + * @dev: driver model's view of this device + * @h_list: list of input handles associated with the device. When + * accessing the list dev->mutex must be held + * @node: used to place the device onto input_dev_list + */ struct input_dev { - void *private; + void *private; /* do not use */ const char *name; const char *phys; @@ -966,8 +1039,6 @@ struct input_dev { unsigned int repeat_key; struct timer_list timer; - int state; - int sync; int abs[ABS_MAX + 1]; @@ -990,8 +1061,11 @@ struct input_dev { struct input_handle *grab; - struct mutex mutex; /* serializes open and close operations */ + spinlock_t event_lock; + struct mutex mutex; + unsigned int users; + int going_away; struct device dev; union { /* temporarily so while we switching to struct device */ @@ -1057,7 +1131,9 @@ struct input_handle; /** * struct input_handler - implements one of interfaces for input devices * @private: driver-specific data - * @event: event handler + * @event: event handler. This method is being called by input core with + * interrupts disabled and dev->event_lock spinlock held and so + * it may not sleep * @connect: called when attaching a handler to an input device * @disconnect: disconnects a handler from input device * @start: starts handler for given handle. This function is called by @@ -1069,10 +1145,18 @@ struct input_handle; * @name: name of the handler, to be shown in /proc/bus/input/handlers * @id_table: pointer to a table of input_device_ids this driver can * handle - * @blacklist: prointer to a table of input_device_ids this driver should + * @blacklist: pointer to a table of input_device_ids this driver should * ignore even if they match @id_table * @h_list: list of input handles associated with the handler * @node: for placing the driver onto input_handler_list + * + * Input handlers attach to input devices and create input handles. There + * are likely several handlers attached to any given input device at the + * same time. All of them will get their copy of input event generated by + * the device. + * + * Note that input core serializes calls to connect() and disconnect() + * methods. */ struct input_handler { @@ -1094,6 +1178,18 @@ struct input_handler { struct list_head node; }; +/** + * struct input_handle - links input device with an input handler + * @private: handler-specific data + * @open: counter showing whether the handle is 'open', i.e. should deliver + * events from its device + * @name: name given to the handle by handler that created it + * @dev: input device the handle is attached to + * @handler: handler that works with the device through this handle + * @d_node: used to put the handle on device's list of attached handles + * @h_node: used to put the handle on handler's list of handles from which + * it gets events + */ struct input_handle { void *private; @@ -1136,10 +1232,10 @@ static inline void input_set_drvdata(struct input_dev *dev, void *data) dev->private = data; } -int input_register_device(struct input_dev *); +int __must_check input_register_device(struct input_dev *); void input_unregister_device(struct input_dev *); -int input_register_handler(struct input_handler *); +int __must_check input_register_handler(struct input_handler *); void input_unregister_handler(struct input_handler *); int input_register_handle(struct input_handle *); @@ -1216,7 +1312,7 @@ extern struct class input_class; * @max_effects: maximum number of effects supported by device * @effects: pointer to an array of effects currently loaded into device * @effect_owners: array of effect owners; when file handle owning - * an effect gets closed the effcet is automatically erased + * an effect gets closed the effect is automatically erased * * Every force-feedback device must implement upload() and playback() * methods; erase() is optional. set_gain() and set_autocenter() need diff --git a/include/linux/isdn.h b/include/linux/isdn.h index ad09506554a3..d5dda4b643ac 100644 --- a/include/linux/isdn.h +++ b/include/linux/isdn.h @@ -286,7 +286,6 @@ typedef struct { /* Local interface-data */ typedef struct isdn_net_local_s { ulong magic; - char name[10]; /* Name of device */ struct net_device_stats stats; /* Ethernet Statistics */ int isdn_device; /* Index to isdn-device */ int isdn_channel; /* Index to isdn-channel */ diff --git a/include/linux/jbd.h b/include/linux/jbd.h index 452737551260..700a93b79189 100644 --- a/include/linux/jbd.h +++ b/include/linux/jbd.h @@ -30,6 +30,7 @@ #include <linux/bit_spinlock.h> #include <linux/mutex.h> #include <linux/timer.h> +#include <linux/lockdep.h> #include <asm/semaphore.h> #endif @@ -396,6 +397,10 @@ struct handle_s unsigned int h_sync: 1; /* sync-on-close */ unsigned int h_jdata: 1; /* force data journaling */ unsigned int h_aborted: 1; /* fatal error on handle */ + +#ifdef CONFIG_DEBUG_LOCK_ALLOC + struct lockdep_map h_lockdep_map; +#endif }; diff --git a/include/linux/jiffies.h b/include/linux/jiffies.h index d7a5e034c3a2..e757a74b9d17 100644 --- a/include/linux/jiffies.h +++ b/include/linux/jiffies.h @@ -109,6 +109,10 @@ static inline u64 get_jiffies_64(void) ((long)(a) - (long)(b) >= 0)) #define time_before_eq(a,b) time_after_eq(b,a) +#define time_in_range(a,b,c) \ + (time_after_eq(a,b) && \ + time_before_eq(a,c)) + /* Same as above, but does so with platform independent 64bit types. * These must be used when utilizing jiffies_64 (i.e. return value of * get_jiffies_64() */ diff --git a/include/linux/keyboard.h b/include/linux/keyboard.h index 7ddbc30aa8e7..33b5c2e325b9 100644 --- a/include/linux/keyboard.h +++ b/include/linux/keyboard.h @@ -416,6 +416,7 @@ extern unsigned short plain_map[NR_KEYS]; #define K_SHIFTRLOCK K(KT_LOCK,KG_SHIFTR) #define K_CTRLLLOCK K(KT_LOCK,KG_CTRLL) #define K_CTRLRLOCK K(KT_LOCK,KG_CTRLR) +#define K_CAPSSHIFTLOCK K(KT_LOCK,KG_CAPSSHIFT) #define K_SHIFT_SLOCK K(KT_SLOCK,KG_SHIFT) #define K_CTRL_SLOCK K(KT_SLOCK,KG_CTRL) @@ -425,8 +426,9 @@ extern unsigned short plain_map[NR_KEYS]; #define K_SHIFTR_SLOCK K(KT_SLOCK,KG_SHIFTR) #define K_CTRLL_SLOCK K(KT_SLOCK,KG_CTRLL) #define K_CTRLR_SLOCK K(KT_SLOCK,KG_CTRLR) +#define K_CAPSSHIFT_SLOCK K(KT_SLOCK,KG_CAPSSHIFT) -#define NR_LOCK 8 +#define NR_LOCK 9 #define K_BRL_BLANK K(KT_BRL, 0) #define K_BRL_DOT1 K(KT_BRL, 1) diff --git a/include/linux/lockdep.h b/include/linux/lockdep.h index 0e843bf65877..f6279f68a827 100644 --- a/include/linux/lockdep.h +++ b/include/linux/lockdep.h @@ -238,6 +238,7 @@ extern void lockdep_info(void); extern void lockdep_reset(void); extern void lockdep_reset_lock(struct lockdep_map *lock); extern void lockdep_free_key_range(void *start, unsigned long size); +extern void lockdep_sys_exit(void); extern void lockdep_off(void); extern void lockdep_on(void); @@ -252,6 +253,13 @@ extern void lockdep_init_map(struct lockdep_map *lock, const char *name, struct lock_class_key *key, int subclass); /* + * To initialize a lockdep_map statically use this macro. + * Note that _name must not be NULL. + */ +#define STATIC_LOCKDEP_MAP_INIT(_name, _key) \ + { .name = (_name), .key = (void *)(_key), } + +/* * Reinitialize a lock key - for cases where there is special locking or * special initialization of locks so that the validator gets the scope * of dependencies wrong: they are either too broad (they need a class-split) @@ -317,6 +325,7 @@ static inline void lockdep_on(void) # define INIT_LOCKDEP # define lockdep_reset() do { debug_locks = 1; } while (0) # define lockdep_free_key_range(start, size) do { } while (0) +# define lockdep_sys_exit() do { } while (0) /* * The class key takes no space if lockdep is disabled: */ diff --git a/include/linux/mutex.h b/include/linux/mutex.h index 0d50ea3df689..6a735c72f23f 100644 --- a/include/linux/mutex.h +++ b/include/linux/mutex.h @@ -120,14 +120,17 @@ static inline int fastcall mutex_is_locked(struct mutex *lock) * See kernel/mutex.c for detailed documentation of these APIs. * Also see Documentation/mutex-design.txt. */ -extern void fastcall mutex_lock(struct mutex *lock); -extern int __must_check fastcall mutex_lock_interruptible(struct mutex *lock); - #ifdef CONFIG_DEBUG_LOCK_ALLOC extern void mutex_lock_nested(struct mutex *lock, unsigned int subclass); extern int __must_check mutex_lock_interruptible_nested(struct mutex *lock, unsigned int subclass); + +#define mutex_lock(lock) mutex_lock_nested(lock, 0) +#define mutex_lock_interruptible(lock) mutex_lock_interruptible_nested(lock, 0) #else +extern void fastcall mutex_lock(struct mutex *lock); +extern int __must_check fastcall mutex_lock_interruptible(struct mutex *lock); + # define mutex_lock_nested(lock, subclass) mutex_lock(lock) # define mutex_lock_interruptible_nested(lock, subclass) mutex_lock_interruptible(lock) #endif diff --git a/include/linux/netfilter.h b/include/linux/netfilter.h index 1dd075eda595..16adac688af5 100644 --- a/include/linux/netfilter.h +++ b/include/linux/netfilter.h @@ -51,7 +51,7 @@ struct sk_buff; struct net_device; typedef unsigned int nf_hookfn(unsigned int hooknum, - struct sk_buff **skb, + struct sk_buff *skb, const struct net_device *in, const struct net_device *out, int (*okfn)(struct sk_buff *)); @@ -183,7 +183,7 @@ void nf_log_packet(int pf, struct nf_loginfo *li, const char *fmt, ...); -int nf_hook_slow(int pf, unsigned int hook, struct sk_buff **pskb, +int nf_hook_slow(int pf, unsigned int hook, struct sk_buff *skb, struct net_device *indev, struct net_device *outdev, int (*okfn)(struct sk_buff *), int thresh); @@ -195,7 +195,7 @@ int nf_hook_slow(int pf, unsigned int hook, struct sk_buff **pskb, * value indicates the packet has been consumed by the hook. */ static inline int nf_hook_thresh(int pf, unsigned int hook, - struct sk_buff **pskb, + struct sk_buff *skb, struct net_device *indev, struct net_device *outdev, int (*okfn)(struct sk_buff *), int thresh, @@ -207,14 +207,14 @@ static inline int nf_hook_thresh(int pf, unsigned int hook, if (list_empty(&nf_hooks[pf][hook])) return 1; #endif - return nf_hook_slow(pf, hook, pskb, indev, outdev, okfn, thresh); + return nf_hook_slow(pf, hook, skb, indev, outdev, okfn, thresh); } -static inline int nf_hook(int pf, unsigned int hook, struct sk_buff **pskb, +static inline int nf_hook(int pf, unsigned int hook, struct sk_buff *skb, struct net_device *indev, struct net_device *outdev, int (*okfn)(struct sk_buff *)) { - return nf_hook_thresh(pf, hook, pskb, indev, outdev, okfn, INT_MIN, 1); + return nf_hook_thresh(pf, hook, skb, indev, outdev, okfn, INT_MIN, 1); } /* Activate hook; either okfn or kfree_skb called, unless a hook @@ -241,13 +241,13 @@ static inline int nf_hook(int pf, unsigned int hook, struct sk_buff **pskb, #define NF_HOOK_THRESH(pf, hook, skb, indev, outdev, okfn, thresh) \ ({int __ret; \ -if ((__ret=nf_hook_thresh(pf, hook, &(skb), indev, outdev, okfn, thresh, 1)) == 1)\ +if ((__ret=nf_hook_thresh(pf, hook, (skb), indev, outdev, okfn, thresh, 1)) == 1)\ __ret = (okfn)(skb); \ __ret;}) #define NF_HOOK_COND(pf, hook, skb, indev, outdev, okfn, cond) \ ({int __ret; \ -if ((__ret=nf_hook_thresh(pf, hook, &(skb), indev, outdev, okfn, INT_MIN, cond)) == 1)\ +if ((__ret=nf_hook_thresh(pf, hook, (skb), indev, outdev, okfn, INT_MIN, cond)) == 1)\ __ret = (okfn)(skb); \ __ret;}) @@ -287,7 +287,7 @@ extern void nf_invalidate_cache(int pf); /* Call this before modifying an existing packet: ensures it is modifiable and linear to the point you care about (writable_len). Returns true or false. */ -extern int skb_make_writable(struct sk_buff **pskb, unsigned int writable_len); +extern int skb_make_writable(struct sk_buff *skb, unsigned int writable_len); static inline void nf_csum_replace4(__sum16 *sum, __be32 from, __be32 to) { @@ -317,7 +317,7 @@ struct nf_afinfo { unsigned int dataoff, u_int8_t protocol); void (*saveroute)(const struct sk_buff *skb, struct nf_info *info); - int (*reroute)(struct sk_buff **skb, + int (*reroute)(struct sk_buff *skb, const struct nf_info *info); int route_key_size; }; @@ -371,15 +371,15 @@ extern struct proc_dir_entry *proc_net_netfilter; #define NF_HOOK(pf, hook, skb, indev, outdev, okfn) (okfn)(skb) #define NF_HOOK_COND(pf, hook, skb, indev, outdev, okfn, cond) (okfn)(skb) static inline int nf_hook_thresh(int pf, unsigned int hook, - struct sk_buff **pskb, + struct sk_buff *skb, struct net_device *indev, struct net_device *outdev, int (*okfn)(struct sk_buff *), int thresh, int cond) { - return okfn(*pskb); + return okfn(skb); } -static inline int nf_hook(int pf, unsigned int hook, struct sk_buff **pskb, +static inline int nf_hook(int pf, unsigned int hook, struct sk_buff *skb, struct net_device *indev, struct net_device *outdev, int (*okfn)(struct sk_buff *)) { diff --git a/include/linux/netfilter/nf_conntrack_amanda.h b/include/linux/netfilter/nf_conntrack_amanda.h index 26c223544ae8..0bb5a6976bf3 100644 --- a/include/linux/netfilter/nf_conntrack_amanda.h +++ b/include/linux/netfilter/nf_conntrack_amanda.h @@ -2,7 +2,7 @@ #define _NF_CONNTRACK_AMANDA_H /* AMANDA tracking. */ -extern unsigned int (*nf_nat_amanda_hook)(struct sk_buff **pskb, +extern unsigned int (*nf_nat_amanda_hook)(struct sk_buff *skb, enum ip_conntrack_info ctinfo, unsigned int matchoff, unsigned int matchlen, diff --git a/include/linux/netfilter/nf_conntrack_ftp.h b/include/linux/netfilter/nf_conntrack_ftp.h index b7c360ffd0d0..47727d7546ea 100644 --- a/include/linux/netfilter/nf_conntrack_ftp.h +++ b/include/linux/netfilter/nf_conntrack_ftp.h @@ -32,7 +32,7 @@ struct nf_conntrack_expect; /* For NAT to hook in when we find a packet which describes what other * connection we should expect. */ -extern unsigned int (*nf_nat_ftp_hook)(struct sk_buff **pskb, +extern unsigned int (*nf_nat_ftp_hook)(struct sk_buff *skb, enum ip_conntrack_info ctinfo, enum nf_ct_ftp_type type, unsigned int matchoff, diff --git a/include/linux/netfilter/nf_conntrack_h323.h b/include/linux/netfilter/nf_conntrack_h323.h index 08e2f4977c2e..aabd24ac7631 100644 --- a/include/linux/netfilter/nf_conntrack_h323.h +++ b/include/linux/netfilter/nf_conntrack_h323.h @@ -36,27 +36,27 @@ extern void nf_conntrack_h245_expect(struct nf_conn *new, struct nf_conntrack_expect *this); extern void nf_conntrack_q931_expect(struct nf_conn *new, struct nf_conntrack_expect *this); -extern int (*set_h245_addr_hook) (struct sk_buff **pskb, +extern int (*set_h245_addr_hook) (struct sk_buff *skb, unsigned char **data, int dataoff, H245_TransportAddress *taddr, union nf_conntrack_address *addr, __be16 port); -extern int (*set_h225_addr_hook) (struct sk_buff **pskb, +extern int (*set_h225_addr_hook) (struct sk_buff *skb, unsigned char **data, int dataoff, TransportAddress *taddr, union nf_conntrack_address *addr, __be16 port); -extern int (*set_sig_addr_hook) (struct sk_buff **pskb, +extern int (*set_sig_addr_hook) (struct sk_buff *skb, struct nf_conn *ct, enum ip_conntrack_info ctinfo, unsigned char **data, TransportAddress *taddr, int count); -extern int (*set_ras_addr_hook) (struct sk_buff **pskb, +extern int (*set_ras_addr_hook) (struct sk_buff *skb, struct nf_conn *ct, enum ip_conntrack_info ctinfo, unsigned char **data, TransportAddress *taddr, int count); -extern int (*nat_rtp_rtcp_hook) (struct sk_buff **pskb, +extern int (*nat_rtp_rtcp_hook) (struct sk_buff *skb, struct nf_conn *ct, enum ip_conntrack_info ctinfo, unsigned char **data, int dataoff, @@ -64,24 +64,24 @@ extern int (*nat_rtp_rtcp_hook) (struct sk_buff **pskb, __be16 port, __be16 rtp_port, struct nf_conntrack_expect *rtp_exp, struct nf_conntrack_expect *rtcp_exp); -extern int (*nat_t120_hook) (struct sk_buff **pskb, struct nf_conn *ct, +extern int (*nat_t120_hook) (struct sk_buff *skb, struct nf_conn *ct, enum ip_conntrack_info ctinfo, unsigned char **data, int dataoff, H245_TransportAddress *taddr, __be16 port, struct nf_conntrack_expect *exp); -extern int (*nat_h245_hook) (struct sk_buff **pskb, struct nf_conn *ct, +extern int (*nat_h245_hook) (struct sk_buff *skb, struct nf_conn *ct, enum ip_conntrack_info ctinfo, unsigned char **data, int dataoff, TransportAddress *taddr, __be16 port, struct nf_conntrack_expect *exp); -extern int (*nat_callforwarding_hook) (struct sk_buff **pskb, +extern int (*nat_callforwarding_hook) (struct sk_buff *skb, struct nf_conn *ct, enum ip_conntrack_info ctinfo, unsigned char **data, int dataoff, TransportAddress *taddr, __be16 port, struct nf_conntrack_expect *exp); -extern int (*nat_q931_hook) (struct sk_buff **pskb, struct nf_conn *ct, +extern int (*nat_q931_hook) (struct sk_buff *skb, struct nf_conn *ct, enum ip_conntrack_info ctinfo, unsigned char **data, TransportAddress *taddr, int idx, __be16 port, diff --git a/include/linux/netfilter/nf_conntrack_irc.h b/include/linux/netfilter/nf_conntrack_irc.h index 2ab6b8255911..36282bf71b63 100644 --- a/include/linux/netfilter/nf_conntrack_irc.h +++ b/include/linux/netfilter/nf_conntrack_irc.h @@ -5,7 +5,7 @@ #define IRC_PORT 6667 -extern unsigned int (*nf_nat_irc_hook)(struct sk_buff **pskb, +extern unsigned int (*nf_nat_irc_hook)(struct sk_buff *skb, enum ip_conntrack_info ctinfo, unsigned int matchoff, unsigned int matchlen, diff --git a/include/linux/netfilter/nf_conntrack_pptp.h b/include/linux/netfilter/nf_conntrack_pptp.h index c93061f33144..23435496d24a 100644 --- a/include/linux/netfilter/nf_conntrack_pptp.h +++ b/include/linux/netfilter/nf_conntrack_pptp.h @@ -301,13 +301,13 @@ struct nf_conn; struct nf_conntrack_expect; extern int -(*nf_nat_pptp_hook_outbound)(struct sk_buff **pskb, +(*nf_nat_pptp_hook_outbound)(struct sk_buff *skb, struct nf_conn *ct, enum ip_conntrack_info ctinfo, struct PptpControlHeader *ctlh, union pptp_ctrl_union *pptpReq); extern int -(*nf_nat_pptp_hook_inbound)(struct sk_buff **pskb, +(*nf_nat_pptp_hook_inbound)(struct sk_buff *skb, struct nf_conn *ct, enum ip_conntrack_info ctinfo, struct PptpControlHeader *ctlh, union pptp_ctrl_union *pptpReq); diff --git a/include/linux/netfilter/nf_conntrack_sip.h b/include/linux/netfilter/nf_conntrack_sip.h index bb7f2041db74..9fff19779bd5 100644 --- a/include/linux/netfilter/nf_conntrack_sip.h +++ b/include/linux/netfilter/nf_conntrack_sip.h @@ -21,11 +21,11 @@ enum sip_header_pos { POS_SDP_HEADER, }; -extern unsigned int (*nf_nat_sip_hook)(struct sk_buff **pskb, +extern unsigned int (*nf_nat_sip_hook)(struct sk_buff *skb, enum ip_conntrack_info ctinfo, struct nf_conn *ct, const char **dptr); -extern unsigned int (*nf_nat_sdp_hook)(struct sk_buff **pskb, +extern unsigned int (*nf_nat_sdp_hook)(struct sk_buff *skb, enum ip_conntrack_info ctinfo, struct nf_conntrack_expect *exp, const char *dptr); diff --git a/include/linux/netfilter/nf_conntrack_tftp.h b/include/linux/netfilter/nf_conntrack_tftp.h index 0d79b7ae051f..c78d38fdb050 100644 --- a/include/linux/netfilter/nf_conntrack_tftp.h +++ b/include/linux/netfilter/nf_conntrack_tftp.h @@ -13,7 +13,7 @@ struct tftphdr { #define TFTP_OPCODE_ACK 4 #define TFTP_OPCODE_ERROR 5 -extern unsigned int (*nf_nat_tftp_hook)(struct sk_buff **pskb, +extern unsigned int (*nf_nat_tftp_hook)(struct sk_buff *skb, enum ip_conntrack_info ctinfo, struct nf_conntrack_expect *exp); diff --git a/include/linux/netfilter/x_tables.h b/include/linux/netfilter/x_tables.h index 64f425a855bb..03e6ce979eaa 100644 --- a/include/linux/netfilter/x_tables.h +++ b/include/linux/netfilter/x_tables.h @@ -191,7 +191,7 @@ struct xt_target /* Returns verdict. Argument order changed since 2.6.9, as this must now handle non-linear skbs, using skb_copy_bits and skb_ip_make_writable. */ - unsigned int (*target)(struct sk_buff **pskb, + unsigned int (*target)(struct sk_buff *skb, const struct net_device *in, const struct net_device *out, unsigned int hooknum, diff --git a/include/linux/netfilter_arp/arp_tables.h b/include/linux/netfilter_arp/arp_tables.h index 584cd1b18f12..2fc73fa8e37f 100644 --- a/include/linux/netfilter_arp/arp_tables.h +++ b/include/linux/netfilter_arp/arp_tables.h @@ -287,7 +287,7 @@ struct arpt_error extern int arpt_register_table(struct arpt_table *table, const struct arpt_replace *repl); extern void arpt_unregister_table(struct arpt_table *table); -extern unsigned int arpt_do_table(struct sk_buff **pskb, +extern unsigned int arpt_do_table(struct sk_buff *skb, unsigned int hook, const struct net_device *in, const struct net_device *out, diff --git a/include/linux/netfilter_bridge/ebtables.h b/include/linux/netfilter_bridge/ebtables.h index 94e0a7dc0cb2..892f5b7771c7 100644 --- a/include/linux/netfilter_bridge/ebtables.h +++ b/include/linux/netfilter_bridge/ebtables.h @@ -237,7 +237,7 @@ struct ebt_target struct list_head list; const char name[EBT_FUNCTION_MAXNAMELEN]; /* returns one of the standard verdicts */ - int (*target)(struct sk_buff **pskb, unsigned int hooknr, + int (*target)(struct sk_buff *skb, unsigned int hooknr, const struct net_device *in, const struct net_device *out, const void *targetdata, unsigned int datalen); /* 0 == let it in */ @@ -294,7 +294,7 @@ extern int ebt_register_watcher(struct ebt_watcher *watcher); extern void ebt_unregister_watcher(struct ebt_watcher *watcher); extern int ebt_register_target(struct ebt_target *target); extern void ebt_unregister_target(struct ebt_target *target); -extern unsigned int ebt_do_table(unsigned int hook, struct sk_buff **pskb, +extern unsigned int ebt_do_table(unsigned int hook, struct sk_buff *skb, const struct net_device *in, const struct net_device *out, struct ebt_table *table); diff --git a/include/linux/netfilter_ipv4.h b/include/linux/netfilter_ipv4.h index ceae87a4c891..1a63adf5c4c1 100644 --- a/include/linux/netfilter_ipv4.h +++ b/include/linux/netfilter_ipv4.h @@ -75,8 +75,8 @@ enum nf_ip_hook_priorities { #define SO_ORIGINAL_DST 80 #ifdef __KERNEL__ -extern int ip_route_me_harder(struct sk_buff **pskb, unsigned addr_type); -extern int ip_xfrm_me_harder(struct sk_buff **pskb); +extern int ip_route_me_harder(struct sk_buff *skb, unsigned addr_type); +extern int ip_xfrm_me_harder(struct sk_buff *skb); extern __sum16 nf_ip_checksum(struct sk_buff *skb, unsigned int hook, unsigned int dataoff, u_int8_t protocol); #endif /*__KERNEL__*/ diff --git a/include/linux/netfilter_ipv4/ip_tables.h b/include/linux/netfilter_ipv4/ip_tables.h index e992cd6b28f5..d79ed69cbc1f 100644 --- a/include/linux/netfilter_ipv4/ip_tables.h +++ b/include/linux/netfilter_ipv4/ip_tables.h @@ -337,7 +337,7 @@ struct ipt_error .target.errorname = "ERROR", \ } -extern unsigned int ipt_do_table(struct sk_buff **pskb, +extern unsigned int ipt_do_table(struct sk_buff *skb, unsigned int hook, const struct net_device *in, const struct net_device *out, diff --git a/include/linux/netfilter_ipv6/ip6_tables.h b/include/linux/netfilter_ipv6/ip6_tables.h index 9a720f05888f..7dc481ce7cba 100644 --- a/include/linux/netfilter_ipv6/ip6_tables.h +++ b/include/linux/netfilter_ipv6/ip6_tables.h @@ -336,7 +336,7 @@ extern void ip6t_init(void) __init; extern int ip6t_register_table(struct xt_table *table, const struct ip6t_replace *repl); extern void ip6t_unregister_table(struct xt_table *table); -extern unsigned int ip6t_do_table(struct sk_buff **pskb, +extern unsigned int ip6t_do_table(struct sk_buff *skb, unsigned int hook, const struct net_device *in, const struct net_device *out, diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h index 7250eeadd7b5..c5164c257f71 100644 --- a/include/linux/nfs_fs.h +++ b/include/linux/nfs_fs.h @@ -47,10 +47,8 @@ #include <linux/nfs3.h> #include <linux/nfs4.h> #include <linux/nfs_xdr.h> - #include <linux/nfs_fs_sb.h> -#include <linux/rwsem.h> #include <linux/mempool.h> /* @@ -77,6 +75,9 @@ struct nfs_open_context { struct nfs4_state *state; fl_owner_t lockowner; int mode; + + unsigned long flags; +#define NFS_CONTEXT_ERROR_WRITE (0) int error; struct list_head list; @@ -133,11 +134,6 @@ struct nfs_inode { * server. */ unsigned long cache_change_attribute; - /* - * Counter indicating the number of outstanding requests that - * will cause a file data update. - */ - atomic_t data_updates; struct rb_root access_cache; struct list_head access_cache_entry_lru; @@ -205,27 +201,18 @@ static inline struct nfs_inode *NFS_I(struct inode *inode) #define NFS_CLIENT(inode) (NFS_SERVER(inode)->client) #define NFS_PROTO(inode) (NFS_SERVER(inode)->nfs_client->rpc_ops) #define NFS_COOKIEVERF(inode) (NFS_I(inode)->cookieverf) -#define NFS_READTIME(inode) (NFS_I(inode)->read_cache_jiffies) -#define NFS_CHANGE_ATTR(inode) (NFS_I(inode)->change_attr) -#define NFS_ATTRTIMEO(inode) (NFS_I(inode)->attrtimeo) #define NFS_MINATTRTIMEO(inode) \ (S_ISDIR(inode->i_mode)? NFS_SERVER(inode)->acdirmin \ : NFS_SERVER(inode)->acregmin) #define NFS_MAXATTRTIMEO(inode) \ (S_ISDIR(inode->i_mode)? NFS_SERVER(inode)->acdirmax \ : NFS_SERVER(inode)->acregmax) -#define NFS_ATTRTIMEO_UPDATE(inode) (NFS_I(inode)->attrtimeo_timestamp) #define NFS_FLAGS(inode) (NFS_I(inode)->flags) #define NFS_STALE(inode) (test_bit(NFS_INO_STALE, &NFS_FLAGS(inode))) #define NFS_FILEID(inode) (NFS_I(inode)->fileid) -static inline int nfs_caches_unstable(struct inode *inode) -{ - return atomic_read(&NFS_I(inode)->data_updates) != 0; -} - static inline void nfs_mark_for_revalidate(struct inode *inode) { struct nfs_inode *nfsi = NFS_I(inode); @@ -237,12 +224,6 @@ static inline void nfs_mark_for_revalidate(struct inode *inode) spin_unlock(&inode->i_lock); } -static inline void NFS_CACHEINV(struct inode *inode) -{ - if (!nfs_caches_unstable(inode)) - nfs_mark_for_revalidate(inode); -} - static inline int nfs_server_capable(struct inode *inode, int cap) { return NFS_SERVER(inode)->caps & cap; @@ -253,28 +234,33 @@ static inline int NFS_USE_READDIRPLUS(struct inode *inode) return test_bit(NFS_INO_ADVISE_RDPLUS, &NFS_FLAGS(inode)); } +static inline void nfs_set_verifier(struct dentry * dentry, unsigned long verf) +{ + dentry->d_time = verf; +} + /** * nfs_save_change_attribute - Returns the inode attribute change cookie - * @inode - pointer to inode + * @dir - pointer to parent directory inode * The "change attribute" is updated every time we finish an operation * that will result in a metadata change on the server. */ -static inline long nfs_save_change_attribute(struct inode *inode) +static inline unsigned long nfs_save_change_attribute(struct inode *dir) { - return NFS_I(inode)->cache_change_attribute; + return NFS_I(dir)->cache_change_attribute; } /** - * nfs_verify_change_attribute - Detects NFS inode cache updates - * @inode - pointer to inode + * nfs_verify_change_attribute - Detects NFS remote directory changes + * @dir - pointer to parent directory inode * @chattr - previously saved change attribute - * Return "false" if metadata has been updated (or is in the process of - * being updated) since the change attribute was saved. + * Return "false" if the verifiers doesn't match the change attribute. + * This would usually indicate that the directory contents have changed on + * the server, and that any dentries need revalidating. */ -static inline int nfs_verify_change_attribute(struct inode *inode, unsigned long chattr) +static inline int nfs_verify_change_attribute(struct inode *dir, unsigned long chattr) { - return !nfs_caches_unstable(inode) - && time_after_eq(chattr, NFS_I(inode)->cache_change_attribute); + return chattr == NFS_I(dir)->cache_change_attribute; } /* @@ -283,15 +269,14 @@ static inline int nfs_verify_change_attribute(struct inode *inode, unsigned long extern int nfs_sync_mapping(struct address_space *mapping); extern void nfs_zap_mapping(struct inode *inode, struct address_space *mapping); extern void nfs_zap_caches(struct inode *); +extern void nfs_invalidate_atime(struct inode *); extern struct inode *nfs_fhget(struct super_block *, struct nfs_fh *, struct nfs_fattr *); extern int nfs_refresh_inode(struct inode *, struct nfs_fattr *); extern int nfs_post_op_update_inode(struct inode *inode, struct nfs_fattr *fattr); +extern int nfs_post_op_update_inode_force_wcc(struct inode *inode, struct nfs_fattr *fattr); extern int nfs_getattr(struct vfsmount *, struct dentry *, struct kstat *); extern int nfs_permission(struct inode *, int, struct nameidata *); -extern int nfs_access_get_cached(struct inode *, struct rpc_cred *, struct nfs_access_entry *); -extern void nfs_access_add_cache(struct inode *, struct nfs_access_entry *); -extern void nfs_access_zap_cache(struct inode *inode); extern int nfs_open(struct inode *, struct file *); extern int nfs_release(struct inode *, struct file *); extern int nfs_attribute_timeout(struct inode *inode); @@ -301,13 +286,10 @@ extern int nfs_revalidate_mapping(struct inode *inode, struct address_space *map extern int nfs_revalidate_mapping_nolock(struct inode *inode, struct address_space *mapping); extern int nfs_setattr(struct dentry *, struct iattr *); extern void nfs_setattr_update_inode(struct inode *inode, struct iattr *attr); -extern void nfs_begin_attr_update(struct inode *); -extern void nfs_end_attr_update(struct inode *); -extern void nfs_begin_data_update(struct inode *); -extern void nfs_end_data_update(struct inode *); extern struct nfs_open_context *get_nfs_open_context(struct nfs_open_context *ctx); extern void put_nfs_open_context(struct nfs_open_context *ctx); extern struct nfs_open_context *nfs_find_open_context(struct inode *inode, struct rpc_cred *cred, int mode); +extern u64 nfs_compat_user_ino64(u64 fileid); /* linux/net/ipv4/ipconfig.c: trims ip addr off front of name, too. */ extern __be32 root_nfs_parse_addr(char *name); /*__init*/ @@ -328,14 +310,15 @@ extern const struct inode_operations nfs3_file_inode_operations; extern const struct file_operations nfs_file_operations; extern const struct address_space_operations nfs_file_aops; -static inline struct rpc_cred *nfs_file_cred(struct file *file) +static inline struct nfs_open_context *nfs_file_open_context(struct file *filp) { - if (file != NULL) { - struct nfs_open_context *ctx; + return filp->private_data; +} - ctx = (struct nfs_open_context*)file->private_data; - return ctx->cred; - } +static inline struct rpc_cred *nfs_file_cred(struct file *file) +{ + if (file != NULL) + return nfs_file_open_context(file)->cred; return NULL; } @@ -378,6 +361,8 @@ extern const struct file_operations nfs_dir_operations; extern struct dentry_operations nfs_dentry_operations; extern int nfs_instantiate(struct dentry *dentry, struct nfs_fh *fh, struct nfs_fattr *fattr); +extern int nfs_may_open(struct inode *inode, struct rpc_cred *cred, int openflags); +extern void nfs_access_zap_cache(struct inode *inode); /* * linux/fs/nfs/symlink.c @@ -420,15 +405,14 @@ extern int nfs_flush_incompatible(struct file *file, struct page *page); extern int nfs_updatepage(struct file *, struct page *, unsigned int, unsigned int); extern int nfs_writeback_done(struct rpc_task *, struct nfs_write_data *); extern void nfs_writedata_release(void *); -extern int nfs_set_page_dirty(struct page *); /* * Try to write back everything synchronously (but check the * return value!) */ extern long nfs_sync_mapping_wait(struct address_space *, struct writeback_control *, int); -extern int nfs_sync_mapping_range(struct address_space *, loff_t, loff_t, int); extern int nfs_wb_all(struct inode *inode); +extern int nfs_wb_nocommit(struct inode *inode); extern int nfs_wb_page(struct inode *inode, struct page* page); extern int nfs_wb_page_priority(struct inode *inode, struct page* page, int how); extern int nfs_wb_page_cancel(struct inode *inode, struct page* page); diff --git a/include/linux/nfs_page.h b/include/linux/nfs_page.h index 78e60798d10e..30dbcc185e69 100644 --- a/include/linux/nfs_page.h +++ b/include/linux/nfs_page.h @@ -30,7 +30,6 @@ #define PG_BUSY 0 #define PG_NEED_COMMIT 1 #define PG_NEED_RESCHED 2 -#define PG_NEED_FLUSH 3 struct nfs_inode; struct nfs_page { diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index cf74a4db84a5..daab252f2e5c 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h @@ -62,7 +62,8 @@ struct nfs_fattr { #define NFS_ATTR_FATTR 0x0002 /* post-op attributes */ #define NFS_ATTR_FATTR_V3 0x0004 /* NFSv3 attributes */ #define NFS_ATTR_FATTR_V4 0x0008 /* NFSv4 change attribute */ -#define NFS_ATTR_FATTR_V4_REFERRAL 0x0010 /* NFSv4 referral */ +#define NFS_ATTR_WCC_V4 0x0010 /* pre-op change attribute */ +#define NFS_ATTR_FATTR_V4_REFERRAL 0x0020 /* NFSv4 referral */ /* * Info on the file system @@ -538,10 +539,13 @@ typedef u64 clientid4; struct nfs4_accessargs { const struct nfs_fh * fh; + const u32 * bitmask; u32 access; }; struct nfs4_accessres { + const struct nfs_server * server; + struct nfs_fattr * fattr; u32 supported; u32 access; }; diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h index fe17d7d750c2..76c1a530edc5 100644 --- a/include/linux/rcupdate.h +++ b/include/linux/rcupdate.h @@ -41,6 +41,7 @@ #include <linux/percpu.h> #include <linux/cpumask.h> #include <linux/seqlock.h> +#include <linux/lockdep.h> /** * struct rcu_head - callback structure for use with RCU @@ -133,6 +134,15 @@ static inline void rcu_bh_qsctr_inc(int cpu) extern int rcu_pending(int cpu); extern int rcu_needs_cpu(int cpu); +#ifdef CONFIG_DEBUG_LOCK_ALLOC +extern struct lockdep_map rcu_lock_map; +# define rcu_read_acquire() lock_acquire(&rcu_lock_map, 0, 0, 2, 1, _THIS_IP_) +# define rcu_read_release() lock_release(&rcu_lock_map, 1, _THIS_IP_) +#else +# define rcu_read_acquire() do { } while (0) +# define rcu_read_release() do { } while (0) +#endif + /** * rcu_read_lock - mark the beginning of an RCU read-side critical section. * @@ -166,6 +176,7 @@ extern int rcu_needs_cpu(int cpu); do { \ preempt_disable(); \ __acquire(RCU); \ + rcu_read_acquire(); \ } while(0) /** @@ -175,6 +186,7 @@ extern int rcu_needs_cpu(int cpu); */ #define rcu_read_unlock() \ do { \ + rcu_read_release(); \ __release(RCU); \ preempt_enable(); \ } while(0) @@ -204,6 +216,7 @@ extern int rcu_needs_cpu(int cpu); do { \ local_bh_disable(); \ __acquire(RCU_BH); \ + rcu_read_acquire(); \ } while(0) /* @@ -213,6 +226,7 @@ extern int rcu_needs_cpu(int cpu); */ #define rcu_read_unlock_bh() \ do { \ + rcu_read_release(); \ __release(RCU_BH); \ local_bh_enable(); \ } while(0) diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index a656cecd373c..f93f22b3d2ff 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -357,6 +357,7 @@ static inline struct sk_buff *alloc_skb_fclone(unsigned int size, } extern void kfree_skbmem(struct sk_buff *skb); +extern struct sk_buff *skb_morph(struct sk_buff *dst, struct sk_buff *src); extern struct sk_buff *skb_clone(struct sk_buff *skb, gfp_t priority); extern struct sk_buff *skb_copy(const struct sk_buff *skb, @@ -1781,6 +1782,11 @@ static inline int skb_is_gso(const struct sk_buff *skb) return skb_shinfo(skb)->gso_size; } +static inline int skb_is_gso_v6(const struct sk_buff *skb) +{ + return skb_shinfo(skb)->gso_type & SKB_GSO_TCPV6; +} + static inline void skb_forward_csum(struct sk_buff *skb) { /* Unfortunately we don't support this one. Any brave souls? */ diff --git a/include/linux/sunrpc/clnt.h b/include/linux/sunrpc/clnt.h index c0d9d14983b3..d9d5c5ad826c 100644 --- a/include/linux/sunrpc/clnt.h +++ b/include/linux/sunrpc/clnt.h @@ -117,7 +117,7 @@ struct rpc_create_args { struct rpc_clnt *rpc_create(struct rpc_create_args *args); struct rpc_clnt *rpc_bind_new_program(struct rpc_clnt *, - struct rpc_program *, int); + struct rpc_program *, u32); struct rpc_clnt *rpc_clone_client(struct rpc_clnt *); void rpc_shutdown_client(struct rpc_clnt *); void rpc_release_client(struct rpc_clnt *); diff --git a/include/linux/sunrpc/debug.h b/include/linux/sunrpc/debug.h index 3912cf16361e..3347c72b848a 100644 --- a/include/linux/sunrpc/debug.h +++ b/include/linux/sunrpc/debug.h @@ -88,6 +88,11 @@ enum { CTL_SLOTTABLE_TCP, CTL_MIN_RESVPORT, CTL_MAX_RESVPORT, + CTL_SLOTTABLE_RDMA, + CTL_RDMA_MAXINLINEREAD, + CTL_RDMA_MAXINLINEWRITE, + CTL_RDMA_WRITEPADDING, + CTL_RDMA_MEMREG, }; #endif /* _LINUX_SUNRPC_DEBUG_H_ */ diff --git a/include/linux/sunrpc/msg_prot.h b/include/linux/sunrpc/msg_prot.h index 784d4c3ef651..c4beb5775111 100644 --- a/include/linux/sunrpc/msg_prot.h +++ b/include/linux/sunrpc/msg_prot.h @@ -138,6 +138,19 @@ typedef __be32 rpc_fraghdr; #define RPC_MAX_HEADER_WITH_AUTH \ (RPC_CALLHDRSIZE + 2*(2+RPC_MAX_AUTH_SIZE/4)) +/* + * RFC1833/RFC3530 rpcbind (v3+) well-known netid's. + */ +#define RPCBIND_NETID_UDP "udp" +#define RPCBIND_NETID_TCP "tcp" +#define RPCBIND_NETID_UDP6 "udp6" +#define RPCBIND_NETID_TCP6 "tcp6" + +/* + * Note that RFC 1833 does not put any size restrictions on the + * netid string, but all currently defined netid's fit in 4 bytes. + */ +#define RPCBIND_MAXNETIDLEN (4u) #endif /* __KERNEL__ */ #endif /* _LINUX_SUNRPC_MSGPROT_H_ */ diff --git a/include/linux/sunrpc/rpc_rdma.h b/include/linux/sunrpc/rpc_rdma.h new file mode 100644 index 000000000000..0013a0d8dc6b --- /dev/null +++ b/include/linux/sunrpc/rpc_rdma.h @@ -0,0 +1,116 @@ +/* + * Copyright (c) 2003-2007 Network Appliance, Inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the BSD-type + * license below: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials provided + * with the distribution. + * + * Neither the name of the Network Appliance, Inc. nor the names of + * its contributors may be used to endorse or promote products + * derived from this software without specific prior written + * permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _LINUX_SUNRPC_RPC_RDMA_H +#define _LINUX_SUNRPC_RPC_RDMA_H + +struct rpcrdma_segment { + uint32_t rs_handle; /* Registered memory handle */ + uint32_t rs_length; /* Length of the chunk in bytes */ + uint64_t rs_offset; /* Chunk virtual address or offset */ +}; + +/* + * read chunk(s), encoded as a linked list. + */ +struct rpcrdma_read_chunk { + uint32_t rc_discrim; /* 1 indicates presence */ + uint32_t rc_position; /* Position in XDR stream */ + struct rpcrdma_segment rc_target; +}; + +/* + * write chunk, and reply chunk. + */ +struct rpcrdma_write_chunk { + struct rpcrdma_segment wc_target; +}; + +/* + * write chunk(s), encoded as a counted array. + */ +struct rpcrdma_write_array { + uint32_t wc_discrim; /* 1 indicates presence */ + uint32_t wc_nchunks; /* Array count */ + struct rpcrdma_write_chunk wc_array[0]; +}; + +struct rpcrdma_msg { + uint32_t rm_xid; /* Mirrors the RPC header xid */ + uint32_t rm_vers; /* Version of this protocol */ + uint32_t rm_credit; /* Buffers requested/granted */ + uint32_t rm_type; /* Type of message (enum rpcrdma_proc) */ + union { + + struct { /* no chunks */ + uint32_t rm_empty[3]; /* 3 empty chunk lists */ + } rm_nochunks; + + struct { /* no chunks and padded */ + uint32_t rm_align; /* Padding alignment */ + uint32_t rm_thresh; /* Padding threshold */ + uint32_t rm_pempty[3]; /* 3 empty chunk lists */ + } rm_padded; + + uint32_t rm_chunks[0]; /* read, write and reply chunks */ + + } rm_body; +}; + +#define RPCRDMA_HDRLEN_MIN 28 + +enum rpcrdma_errcode { + ERR_VERS = 1, + ERR_CHUNK = 2 +}; + +struct rpcrdma_err_vers { + uint32_t rdma_vers_low; /* Version range supported by peer */ + uint32_t rdma_vers_high; +}; + +enum rpcrdma_proc { + RDMA_MSG = 0, /* An RPC call or reply msg */ + RDMA_NOMSG = 1, /* An RPC call or reply msg - separate body */ + RDMA_MSGP = 2, /* An RPC call or reply msg with padding */ + RDMA_DONE = 3, /* Client signals reply completion */ + RDMA_ERROR = 4 /* An RPC RDMA encoding error */ +}; + +#endif /* _LINUX_SUNRPC_RPC_RDMA_H */ diff --git a/include/linux/sunrpc/xdr.h b/include/linux/sunrpc/xdr.h index c6b53d181bfa..0751c9464d0f 100644 --- a/include/linux/sunrpc/xdr.h +++ b/include/linux/sunrpc/xdr.h @@ -70,7 +70,10 @@ struct xdr_buf { struct page ** pages; /* Array of contiguous pages */ unsigned int page_base, /* Start of page data */ - page_len; /* Length of page data */ + page_len, /* Length of page data */ + flags; /* Flags for data disposition */ +#define XDRBUF_READ 0x01 /* target of file read */ +#define XDRBUF_WRITE 0x02 /* source of file write */ unsigned int buflen, /* Total length of storage buffer */ len; /* Length of XDR encoded message */ diff --git a/include/linux/sunrpc/xprt.h b/include/linux/sunrpc/xprt.h index d11cedd14f0f..30b17b3bc1a9 100644 --- a/include/linux/sunrpc/xprt.h +++ b/include/linux/sunrpc/xprt.h @@ -19,25 +19,11 @@ #ifdef __KERNEL__ -extern unsigned int xprt_udp_slot_table_entries; -extern unsigned int xprt_tcp_slot_table_entries; - #define RPC_MIN_SLOT_TABLE (2U) #define RPC_DEF_SLOT_TABLE (16U) #define RPC_MAX_SLOT_TABLE (128U) /* - * Parameters for choosing a free port - */ -extern unsigned int xprt_min_resvport; -extern unsigned int xprt_max_resvport; - -#define RPC_MIN_RESVPORT (1U) -#define RPC_MAX_RESVPORT (65535U) -#define RPC_DEF_MIN_RESVPORT (665U) -#define RPC_DEF_MAX_RESVPORT (1023U) - -/* * This describes a timeout strategy */ struct rpc_timeout { @@ -53,6 +39,10 @@ enum rpc_display_format_t { RPC_DISPLAY_PORT, RPC_DISPLAY_PROTO, RPC_DISPLAY_ALL, + RPC_DISPLAY_HEX_ADDR, + RPC_DISPLAY_HEX_PORT, + RPC_DISPLAY_UNIVERSAL_ADDR, + RPC_DISPLAY_NETID, RPC_DISPLAY_MAX, }; @@ -196,14 +186,22 @@ struct rpc_xprt { char * address_strings[RPC_DISPLAY_MAX]; }; -struct rpc_xprtsock_create { - int proto; /* IPPROTO_UDP or IPPROTO_TCP */ +struct xprt_create { + int ident; /* XPRT_TRANSPORT identifier */ struct sockaddr * srcaddr; /* optional local address */ struct sockaddr * dstaddr; /* remote peer address */ size_t addrlen; struct rpc_timeout * timeout; /* optional timeout parameters */ }; +struct xprt_class { + struct list_head list; + int ident; /* XPRT_TRANSPORT identifier */ + struct rpc_xprt * (*setup)(struct xprt_create *); + struct module *owner; + char name[32]; +}; + /* * Transport operations used by ULPs */ @@ -212,7 +210,7 @@ void xprt_set_timeout(struct rpc_timeout *to, unsigned int retr, unsigned long /* * Generic internal transport functions */ -struct rpc_xprt * xprt_create_transport(struct rpc_xprtsock_create *args); +struct rpc_xprt *xprt_create_transport(struct xprt_create *args); void xprt_connect(struct rpc_task *task); void xprt_reserve(struct rpc_task *task); int xprt_reserve_xprt(struct rpc_task *task); @@ -235,6 +233,8 @@ static inline __be32 *xprt_skip_transport_header(struct rpc_xprt *xprt, __be32 * /* * Transport switch helper functions */ +int xprt_register_transport(struct xprt_class *type); +int xprt_unregister_transport(struct xprt_class *type); void xprt_set_retrans_timeout_def(struct rpc_task *task); void xprt_set_retrans_timeout_rtt(struct rpc_task *task); void xprt_wake_pending_tasks(struct rpc_xprt *xprt, int status); @@ -248,14 +248,6 @@ void xprt_release_rqst_cong(struct rpc_task *task); void xprt_disconnect(struct rpc_xprt *xprt); /* - * Socket transport setup operations - */ -struct rpc_xprt * xs_setup_udp(struct rpc_xprtsock_create *args); -struct rpc_xprt * xs_setup_tcp(struct rpc_xprtsock_create *args); -int init_socket_xprt(void); -void cleanup_socket_xprt(void); - -/* * Reserved bit positions in xprt->state */ #define XPRT_LOCKED (0) diff --git a/include/linux/sunrpc/xprtrdma.h b/include/linux/sunrpc/xprtrdma.h new file mode 100644 index 000000000000..4de56b1d372b --- /dev/null +++ b/include/linux/sunrpc/xprtrdma.h @@ -0,0 +1,85 @@ +/* + * Copyright (c) 2003-2007 Network Appliance, Inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the BSD-type + * license below: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials provided + * with the distribution. + * + * Neither the name of the Network Appliance, Inc. nor the names of + * its contributors may be used to endorse or promote products + * derived from this software without specific prior written + * permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _LINUX_SUNRPC_XPRTRDMA_H +#define _LINUX_SUNRPC_XPRTRDMA_H + +/* + * RPC transport identifier for RDMA + */ +#define XPRT_TRANSPORT_RDMA 256 + +/* + * rpcbind (v3+) RDMA netid. + */ +#define RPCBIND_NETID_RDMA "rdma" + +/* + * Constants. Max RPC/NFS header is big enough to account for + * additional marshaling buffers passed down by Linux client. + * + * RDMA header is currently fixed max size, and is big enough for a + * fully-chunked NFS message (read chunks are the largest). Note only + * a single chunk type per message is supported currently. + */ +#define RPCRDMA_MIN_SLOT_TABLE (2U) +#define RPCRDMA_DEF_SLOT_TABLE (32U) +#define RPCRDMA_MAX_SLOT_TABLE (256U) + +#define RPCRDMA_DEF_INLINE (1024) /* default inline max */ + +#define RPCRDMA_INLINE_PAD_THRESH (512)/* payload threshold to pad (bytes) */ + +#define RDMA_RESOLVE_TIMEOUT (5*HZ) /* TBD 5 seconds */ +#define RDMA_CONNECT_RETRY_MAX (2) /* retries if no listener backlog */ + +/* memory registration strategies */ +#define RPCRDMA_PERSISTENT_REGISTRATION (1) + +enum rpcrdma_memreg { + RPCRDMA_BOUNCEBUFFERS = 0, + RPCRDMA_REGISTER, + RPCRDMA_MEMWINDOWS, + RPCRDMA_MEMWINDOWS_ASYNC, + RPCRDMA_MTHCAFMR, + RPCRDMA_ALLPHYSICAL, + RPCRDMA_LAST +}; + +#endif /* _LINUX_SUNRPC_XPRTRDMA_H */ diff --git a/include/linux/sunrpc/xprtsock.h b/include/linux/sunrpc/xprtsock.h new file mode 100644 index 000000000000..2c6c2c2783d8 --- /dev/null +++ b/include/linux/sunrpc/xprtsock.h @@ -0,0 +1,51 @@ +/* + * linux/include/linux/sunrpc/xprtsock.h + * + * Declarations for the RPC transport socket provider. + */ + +#ifndef _LINUX_SUNRPC_XPRTSOCK_H +#define _LINUX_SUNRPC_XPRTSOCK_H + +#ifdef __KERNEL__ + +/* + * Socket transport setup operations + */ +struct rpc_xprt *xs_setup_udp(struct xprt_create *args); +struct rpc_xprt *xs_setup_tcp(struct xprt_create *args); + +int init_socket_xprt(void); +void cleanup_socket_xprt(void); + +/* + * RPC transport identifiers for UDP, TCP + * + * To preserve compatibility with the historical use of raw IP protocol + * id's for transport selection, these are specified with the previous + * values. No such restriction exists for new transports, except that + * they may not collide with these values (17 and 6, respectively). + */ +#define XPRT_TRANSPORT_UDP IPPROTO_UDP +#define XPRT_TRANSPORT_TCP IPPROTO_TCP + +/* + * RPC slot table sizes for UDP, TCP transports + */ +extern unsigned int xprt_udp_slot_table_entries; +extern unsigned int xprt_tcp_slot_table_entries; + +/* + * Parameters for choosing a free port + */ +extern unsigned int xprt_min_resvport; +extern unsigned int xprt_max_resvport; + +#define RPC_MIN_RESVPORT (1U) +#define RPC_MAX_RESVPORT (65535U) +#define RPC_DEF_MIN_RESVPORT (665U) +#define RPC_DEF_MAX_RESVPORT (1023U) + +#endif /* __KERNEL__ */ + +#endif /* _LINUX_SUNRPC_XPRTSOCK_H */ diff --git a/include/linux/tcp.h b/include/linux/tcp.h index c5b94c1a5ee2..bac17c59b24e 100644 --- a/include/linux/tcp.h +++ b/include/linux/tcp.h @@ -315,7 +315,7 @@ struct tcp_sock { */ u32 snd_ssthresh; /* Slow start size threshold */ u32 snd_cwnd; /* Sending congestion window */ - u16 snd_cwnd_cnt; /* Linear increase counter */ + u32 snd_cwnd_cnt; /* Linear increase counter */ u32 snd_cwnd_clamp; /* Do not allow snd_cwnd to grow above this */ u32 snd_cwnd_used; u32 snd_cwnd_stamp; diff --git a/include/linux/writeback.h b/include/linux/writeback.h index c7c3337c3a88..d1321a81c9c4 100644 --- a/include/linux/writeback.h +++ b/include/linux/writeback.h @@ -62,8 +62,6 @@ struct writeback_control { unsigned for_reclaim:1; /* Invoked from the page allocator */ unsigned for_writepages:1; /* This is a writepages() call */ unsigned range_cyclic:1; /* range_start is cyclic */ - - void *fs_private; /* For use by ->writepages() */ }; /* diff --git a/include/net/inet_frag.h b/include/net/inet_frag.h new file mode 100644 index 000000000000..911c2cd02941 --- /dev/null +++ b/include/net/inet_frag.h @@ -0,0 +1,60 @@ +#ifndef __NET_FRAG_H__ +#define __NET_FRAG_H__ + +struct inet_frag_queue { + struct hlist_node list; + struct list_head lru_list; /* lru list member */ + spinlock_t lock; + atomic_t refcnt; + struct timer_list timer; /* when will this queue expire? */ + struct sk_buff *fragments; /* list of received fragments */ + ktime_t stamp; + int len; /* total length of orig datagram */ + int meat; + __u8 last_in; /* first/last segment arrived? */ + +#define COMPLETE 4 +#define FIRST_IN 2 +#define LAST_IN 1 +}; + +#define INETFRAGS_HASHSZ 64 + +struct inet_frags_ctl { + int high_thresh; + int low_thresh; + int timeout; + int secret_interval; +}; + +struct inet_frags { + struct list_head lru_list; + struct hlist_head hash[INETFRAGS_HASHSZ]; + rwlock_t lock; + u32 rnd; + int nqueues; + int qsize; + atomic_t mem; + struct timer_list secret_timer; + struct inet_frags_ctl *ctl; + + unsigned int (*hashfn)(struct inet_frag_queue *); + void (*destructor)(struct inet_frag_queue *); + void (*skb_free)(struct sk_buff *); +}; + +void inet_frags_init(struct inet_frags *); +void inet_frags_fini(struct inet_frags *); + +void inet_frag_kill(struct inet_frag_queue *q, struct inet_frags *f); +void inet_frag_destroy(struct inet_frag_queue *q, + struct inet_frags *f, int *work); +int inet_frag_evictor(struct inet_frags *f); + +static inline void inet_frag_put(struct inet_frag_queue *q, struct inet_frags *f) +{ + if (atomic_dec_and_test(&q->refcnt)) + inet_frag_destroy(q, f, NULL); +} + +#endif diff --git a/include/net/ip.h b/include/net/ip.h index 3af3ed9d320b..840dd91b513b 100644 --- a/include/net/ip.h +++ b/include/net/ip.h @@ -160,6 +160,7 @@ DECLARE_SNMP_STAT(struct ipstats_mib, ip_statistics); #define IP_INC_STATS(field) SNMP_INC_STATS(ip_statistics, field) #define IP_INC_STATS_BH(field) SNMP_INC_STATS_BH(ip_statistics, field) #define IP_INC_STATS_USER(field) SNMP_INC_STATS_USER(ip_statistics, field) +#define IP_ADD_STATS_BH(field, val) SNMP_ADD_STATS_BH(ip_statistics, field, val) DECLARE_SNMP_STAT(struct linux_mib, net_statistics); #define NET_INC_STATS(field) SNMP_INC_STATS(net_statistics, field) #define NET_INC_STATS_BH(field) SNMP_INC_STATS_BH(net_statistics, field) @@ -177,10 +178,8 @@ extern int sysctl_ip_default_ttl; extern int sysctl_ip_nonlocal_bind; /* From ip_fragment.c */ -extern int sysctl_ipfrag_high_thresh; -extern int sysctl_ipfrag_low_thresh; -extern int sysctl_ipfrag_time; -extern int sysctl_ipfrag_secret_interval; +struct inet_frags_ctl; +extern struct inet_frags_ctl ip4_frags_ctl; extern int sysctl_ipfrag_max_dist; /* From inetpeer.c */ @@ -332,9 +331,9 @@ enum ip_defrag_users IP_DEFRAG_VS_FWD }; -struct sk_buff *ip_defrag(struct sk_buff *skb, u32 user); -extern int ip_frag_nqueues; -extern atomic_t ip_frag_mem; +int ip_defrag(struct sk_buff *skb, u32 user); +int ip_frag_mem(void); +int ip_frag_nqueues(void); /* * Functions provided by ip_forward.c diff --git a/include/net/ip_vs.h b/include/net/ip_vs.h index 672564e5a81d..41870564df8e 100644 --- a/include/net/ip_vs.h +++ b/include/net/ip_vs.h @@ -464,10 +464,10 @@ struct ip_vs_protocol { unsigned int proto_off, int inverse); - int (*snat_handler)(struct sk_buff **pskb, + int (*snat_handler)(struct sk_buff *skb, struct ip_vs_protocol *pp, struct ip_vs_conn *cp); - int (*dnat_handler)(struct sk_buff **pskb, + int (*dnat_handler)(struct sk_buff *skb, struct ip_vs_protocol *pp, struct ip_vs_conn *cp); int (*csum_check)(struct sk_buff *skb, struct ip_vs_protocol *pp); @@ -654,11 +654,11 @@ struct ip_vs_app /* output hook: return false if can't linearize. diff set for TCP. */ int (*pkt_out)(struct ip_vs_app *, struct ip_vs_conn *, - struct sk_buff **, int *diff); + struct sk_buff *, int *diff); /* input hook: return false if can't linearize. diff set for TCP. */ int (*pkt_in)(struct ip_vs_app *, struct ip_vs_conn *, - struct sk_buff **, int *diff); + struct sk_buff *, int *diff); /* ip_vs_app initializer */ int (*init_conn)(struct ip_vs_app *, struct ip_vs_conn *); @@ -832,8 +832,8 @@ register_ip_vs_app_inc(struct ip_vs_app *app, __u16 proto, __u16 port); extern int ip_vs_app_inc_get(struct ip_vs_app *inc); extern void ip_vs_app_inc_put(struct ip_vs_app *inc); -extern int ip_vs_app_pkt_out(struct ip_vs_conn *, struct sk_buff **pskb); -extern int ip_vs_app_pkt_in(struct ip_vs_conn *, struct sk_buff **pskb); +extern int ip_vs_app_pkt_out(struct ip_vs_conn *, struct sk_buff *skb); +extern int ip_vs_app_pkt_in(struct ip_vs_conn *, struct sk_buff *skb); extern int ip_vs_skb_replace(struct sk_buff *skb, gfp_t pri, char *o_buf, int o_len, char *n_buf, int n_len); extern int ip_vs_app_init(void); @@ -984,7 +984,6 @@ static inline char ip_vs_fwd_tag(struct ip_vs_conn *cp) return fwd; } -extern int ip_vs_make_skb_writable(struct sk_buff **pskb, int len); extern void ip_vs_nat_icmp(struct sk_buff *skb, struct ip_vs_protocol *pp, struct ip_vs_conn *cp, int dir); diff --git a/include/net/ipv6.h b/include/net/ipv6.h index 31b3f1b45a2b..cc796cbc1b26 100644 --- a/include/net/ipv6.h +++ b/include/net/ipv6.h @@ -120,12 +120,21 @@ extern int sysctl_mld_max_msf; SNMP_INC_STATS##modifier(statname##_statistics, (field)); \ }) +#define _DEVADD(statname, modifier, idev, field, val) \ +({ \ + struct inet6_dev *_idev = (idev); \ + if (likely(_idev != NULL)) \ + SNMP_ADD_STATS##modifier((_idev)->stats.statname, (field), (val)); \ + SNMP_ADD_STATS##modifier(statname##_statistics, (field), (val));\ +}) + /* MIBs */ DECLARE_SNMP_STAT(struct ipstats_mib, ipv6_statistics); #define IP6_INC_STATS(idev,field) _DEVINC(ipv6, , idev, field) #define IP6_INC_STATS_BH(idev,field) _DEVINC(ipv6, _BH, idev, field) #define IP6_INC_STATS_USER(idev,field) _DEVINC(ipv6, _USER, idev, field) +#define IP6_ADD_STATS_BH(idev,field,val) _DEVADD(ipv6, _BH, idev, field, val) DECLARE_SNMP_STAT(struct icmpv6_mib, icmpv6_statistics); DECLARE_SNMP_STAT(struct icmpv6msg_mib, icmpv6msg_statistics); @@ -240,7 +249,7 @@ extern int ip6_ra_control(struct sock *sk, int sel, void (*destructor)(struct sock *)); -extern int ipv6_parse_hopopts(struct sk_buff **skbp); +extern int ipv6_parse_hopopts(struct sk_buff *skb); extern struct ipv6_txoptions * ipv6_dup_options(struct sock *sk, struct ipv6_txoptions *opt); extern struct ipv6_txoptions * ipv6_renew_options(struct sock *sk, struct ipv6_txoptions *opt, @@ -252,8 +261,8 @@ struct ipv6_txoptions *ipv6_fixup_options(struct ipv6_txoptions *opt_space, extern int ipv6_opt_accepted(struct sock *sk, struct sk_buff *skb); -extern int ip6_frag_nqueues; -extern atomic_t ip6_frag_mem; +int ip6_frag_nqueues(void); +int ip6_frag_mem(void); #define IPV6_FRAG_TIMEOUT (60*HZ) /* 60 seconds */ @@ -565,10 +574,8 @@ extern int inet6_hash_connect(struct inet_timewait_death_row *death_row, /* * reassembly.c */ -extern int sysctl_ip6frag_high_thresh; -extern int sysctl_ip6frag_low_thresh; -extern int sysctl_ip6frag_time; -extern int sysctl_ip6frag_secret_interval; +struct inet_frags_ctl; +extern struct inet_frags_ctl ip6_frags_ctl; extern const struct proto_ops inet6_stream_ops; extern const struct proto_ops inet6_dgram_ops; diff --git a/include/net/netfilter/ipv6/nf_conntrack_ipv6.h b/include/net/netfilter/ipv6/nf_conntrack_ipv6.h index 070d12cb4634..f703533fb4db 100644 --- a/include/net/netfilter/ipv6/nf_conntrack_ipv6.h +++ b/include/net/netfilter/ipv6/nf_conntrack_ipv6.h @@ -15,8 +15,7 @@ extern void nf_ct_frag6_output(unsigned int hooknum, struct sk_buff *skb, struct net_device *out, int (*okfn)(struct sk_buff *)); -extern unsigned int nf_ct_frag6_timeout; -extern unsigned int nf_ct_frag6_low_thresh; -extern unsigned int nf_ct_frag6_high_thresh; +struct inet_frags_ctl; +extern struct inet_frags_ctl nf_frags_ctl; #endif /* _NF_CONNTRACK_IPV6_H*/ diff --git a/include/net/netfilter/nf_conntrack_core.h b/include/net/netfilter/nf_conntrack_core.h index 4056f5f08da1..a532e7b5ed6a 100644 --- a/include/net/netfilter/nf_conntrack_core.h +++ b/include/net/netfilter/nf_conntrack_core.h @@ -22,7 +22,7 @@ of connection tracking. */ extern unsigned int nf_conntrack_in(int pf, unsigned int hooknum, - struct sk_buff **pskb); + struct sk_buff *skb); extern int nf_conntrack_init(void); extern void nf_conntrack_cleanup(void); @@ -60,17 +60,17 @@ nf_ct_invert_tuple(struct nf_conntrack_tuple *inverse, extern struct nf_conntrack_tuple_hash * nf_conntrack_find_get(const struct nf_conntrack_tuple *tuple); -extern int __nf_conntrack_confirm(struct sk_buff **pskb); +extern int __nf_conntrack_confirm(struct sk_buff *skb); /* Confirm a connection: returns NF_DROP if packet must be dropped. */ -static inline int nf_conntrack_confirm(struct sk_buff **pskb) +static inline int nf_conntrack_confirm(struct sk_buff *skb) { - struct nf_conn *ct = (struct nf_conn *)(*pskb)->nfct; + struct nf_conn *ct = (struct nf_conn *)skb->nfct; int ret = NF_ACCEPT; if (ct) { if (!nf_ct_is_confirmed(ct) && !nf_ct_is_dying(ct)) - ret = __nf_conntrack_confirm(pskb); + ret = __nf_conntrack_confirm(skb); nf_ct_deliver_cached_events(ct); } return ret; diff --git a/include/net/netfilter/nf_conntrack_helper.h b/include/net/netfilter/nf_conntrack_helper.h index 0dcc4c828ce9..d7b2d5483a71 100644 --- a/include/net/netfilter/nf_conntrack_helper.h +++ b/include/net/netfilter/nf_conntrack_helper.h @@ -29,7 +29,7 @@ struct nf_conntrack_helper /* Function to call when data passes; return verdict, or -1 to invalidate. */ - int (*help)(struct sk_buff **pskb, + int (*help)(struct sk_buff *skb, unsigned int protoff, struct nf_conn *ct, enum ip_conntrack_info conntrackinfo); diff --git a/include/net/netfilter/nf_nat_core.h b/include/net/netfilter/nf_nat_core.h index c3cd127ba4bb..f29eeb9777e0 100644 --- a/include/net/netfilter/nf_nat_core.h +++ b/include/net/netfilter/nf_nat_core.h @@ -10,12 +10,12 @@ extern unsigned int nf_nat_packet(struct nf_conn *ct, enum ip_conntrack_info ctinfo, unsigned int hooknum, - struct sk_buff **pskb); + struct sk_buff *skb); extern int nf_nat_icmp_reply_translation(struct nf_conn *ct, enum ip_conntrack_info ctinfo, unsigned int hooknum, - struct sk_buff **pskb); + struct sk_buff *skb); static inline int nf_nat_initialized(struct nf_conn *ct, enum nf_nat_manip_type manip) diff --git a/include/net/netfilter/nf_nat_helper.h b/include/net/netfilter/nf_nat_helper.h index ec98ecf95fc8..58dd22687949 100644 --- a/include/net/netfilter/nf_nat_helper.h +++ b/include/net/netfilter/nf_nat_helper.h @@ -7,21 +7,21 @@ struct sk_buff; /* These return true or false. */ -extern int nf_nat_mangle_tcp_packet(struct sk_buff **skb, +extern int nf_nat_mangle_tcp_packet(struct sk_buff *skb, struct nf_conn *ct, enum ip_conntrack_info ctinfo, unsigned int match_offset, unsigned int match_len, const char *rep_buffer, unsigned int rep_len); -extern int nf_nat_mangle_udp_packet(struct sk_buff **skb, +extern int nf_nat_mangle_udp_packet(struct sk_buff *skb, struct nf_conn *ct, enum ip_conntrack_info ctinfo, unsigned int match_offset, unsigned int match_len, const char *rep_buffer, unsigned int rep_len); -extern int nf_nat_seq_adjust(struct sk_buff **pskb, +extern int nf_nat_seq_adjust(struct sk_buff *skb, struct nf_conn *ct, enum ip_conntrack_info ctinfo); diff --git a/include/net/netfilter/nf_nat_protocol.h b/include/net/netfilter/nf_nat_protocol.h index 14c7b2d7263c..04578bfe23e1 100644 --- a/include/net/netfilter/nf_nat_protocol.h +++ b/include/net/netfilter/nf_nat_protocol.h @@ -18,7 +18,7 @@ struct nf_nat_protocol /* Translate a packet to the target according to manip type. Return true if succeeded. */ - int (*manip_pkt)(struct sk_buff **pskb, + int (*manip_pkt)(struct sk_buff *skb, unsigned int iphdroff, const struct nf_conntrack_tuple *tuple, enum nf_nat_manip_type maniptype); diff --git a/include/net/netfilter/nf_nat_rule.h b/include/net/netfilter/nf_nat_rule.h index f9743187d57f..75d1825031d7 100644 --- a/include/net/netfilter/nf_nat_rule.h +++ b/include/net/netfilter/nf_nat_rule.h @@ -6,7 +6,7 @@ extern int nf_nat_rule_init(void) __init; extern void nf_nat_rule_cleanup(void); -extern int nf_nat_rule_find(struct sk_buff **pskb, +extern int nf_nat_rule_find(struct sk_buff *skb, unsigned int hooknum, const struct net_device *in, const struct net_device *out, diff --git a/include/net/protocol.h b/include/net/protocol.h index 105bf12b0c79..1166ffb4b3ec 100644 --- a/include/net/protocol.h +++ b/include/net/protocol.h @@ -45,7 +45,7 @@ struct net_protocol { #if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE) struct inet6_protocol { - int (*handler)(struct sk_buff **skb); + int (*handler)(struct sk_buff *skb); void (*err_handler)(struct sk_buff *skb, struct inet6_skb_parm *opt, diff --git a/include/net/xfrm.h b/include/net/xfrm.h index 77be396ca633..0e844845f3f4 100644 --- a/include/net/xfrm.h +++ b/include/net/xfrm.h @@ -1051,7 +1051,7 @@ extern int xfrm4_output(struct sk_buff *skb); extern int xfrm4_tunnel_register(struct xfrm_tunnel *handler, unsigned short family); extern int xfrm4_tunnel_deregister(struct xfrm_tunnel *handler, unsigned short family); extern int xfrm6_rcv_spi(struct sk_buff *skb, __be32 spi); -extern int xfrm6_rcv(struct sk_buff **pskb); +extern int xfrm6_rcv(struct sk_buff *skb); extern int xfrm6_input_addr(struct sk_buff *skb, xfrm_address_t *daddr, xfrm_address_t *saddr, u8 proto); extern int xfrm6_tunnel_register(struct xfrm6_tunnel *handler, unsigned short family); diff --git a/kernel/auditsc.c b/kernel/auditsc.c index 04f3ffb8d9d4..0ae703c157ba 100644 --- a/kernel/auditsc.c +++ b/kernel/auditsc.c @@ -1525,6 +1525,7 @@ add_names: context->names[idx].ino = (unsigned long)-1; } } +EXPORT_SYMBOL_GPL(__audit_inode_child); /** * auditsc_get_stamp - get local copies of audit_context values diff --git a/kernel/lockdep.c b/kernel/lockdep.c index 734da579ad13..a6f1ee9c92d9 100644 --- a/kernel/lockdep.c +++ b/kernel/lockdep.c @@ -1521,7 +1521,7 @@ cache_hit: } static int validate_chain(struct task_struct *curr, struct lockdep_map *lock, - struct held_lock *hlock, int chain_head) + struct held_lock *hlock, int chain_head, u64 chain_key) { /* * Trylock needs to maintain the stack of held locks, but it @@ -1534,7 +1534,7 @@ static int validate_chain(struct task_struct *curr, struct lockdep_map *lock, * graph_lock for us) */ if (!hlock->trylock && (hlock->check == 2) && - lookup_chain_cache(curr->curr_chain_key, hlock->class)) { + lookup_chain_cache(chain_key, hlock->class)) { /* * Check whether last held lock: * @@ -1576,7 +1576,7 @@ static int validate_chain(struct task_struct *curr, struct lockdep_map *lock, #else static inline int validate_chain(struct task_struct *curr, struct lockdep_map *lock, struct held_lock *hlock, - int chain_head) + int chain_head, u64 chain_key) { return 1; } @@ -2450,11 +2450,11 @@ static int __lock_acquire(struct lockdep_map *lock, unsigned int subclass, chain_head = 1; } chain_key = iterate_chain_key(chain_key, id); - curr->curr_chain_key = chain_key; - if (!validate_chain(curr, lock, hlock, chain_head)) + if (!validate_chain(curr, lock, hlock, chain_head, chain_key)) return 0; + curr->curr_chain_key = chain_key; curr->lockdep_depth++; check_chain_key(curr); #ifdef CONFIG_DEBUG_LOCKDEP @@ -3199,3 +3199,19 @@ void debug_show_held_locks(struct task_struct *task) } EXPORT_SYMBOL_GPL(debug_show_held_locks); + +void lockdep_sys_exit(void) +{ + struct task_struct *curr = current; + + if (unlikely(curr->lockdep_depth)) { + if (!debug_locks_off()) + return; + printk("\n================================================\n"); + printk( "[ BUG: lock held when returning to user space! ]\n"); + printk( "------------------------------------------------\n"); + printk("%s/%d is leaving the kernel with locks still held!\n", + curr->comm, curr->pid); + lockdep_print_held_locks(curr); + } +} diff --git a/kernel/lockdep_proc.c b/kernel/lockdep_proc.c index c851b2dcc685..8a135bd163c2 100644 --- a/kernel/lockdep_proc.c +++ b/kernel/lockdep_proc.c @@ -25,28 +25,38 @@ static void *l_next(struct seq_file *m, void *v, loff_t *pos) { - struct lock_class *class = v; + struct lock_class *class; (*pos)++; - if (class->lock_entry.next != &all_lock_classes) - class = list_entry(class->lock_entry.next, struct lock_class, - lock_entry); - else - class = NULL; - m->private = class; + if (v == SEQ_START_TOKEN) + class = m->private; + else { + class = v; + + if (class->lock_entry.next != &all_lock_classes) + class = list_entry(class->lock_entry.next, + struct lock_class, lock_entry); + else + class = NULL; + } return class; } static void *l_start(struct seq_file *m, loff_t *pos) { - struct lock_class *class = m->private; + struct lock_class *class; + loff_t i = 0; - if (&class->lock_entry == all_lock_classes.next) - seq_printf(m, "all lock classes:\n"); + if (*pos == 0) + return SEQ_START_TOKEN; - return class; + list_for_each_entry(class, &all_lock_classes, lock_entry) { + if (++i == *pos) + return class; + } + return NULL; } static void l_stop(struct seq_file *m, void *v) @@ -101,10 +111,15 @@ static void print_name(struct seq_file *m, struct lock_class *class) static int l_show(struct seq_file *m, void *v) { unsigned long nr_forward_deps, nr_backward_deps; - struct lock_class *class = m->private; + struct lock_class *class = v; struct lock_list *entry; char c1, c2, c3, c4; + if (v == SEQ_START_TOKEN) { + seq_printf(m, "all lock classes:\n"); + return 0; + } + seq_printf(m, "%p", class->key); #ifdef CONFIG_DEBUG_LOCKDEP seq_printf(m, " OPS:%8ld", class->ops); @@ -523,10 +538,11 @@ static void *ls_start(struct seq_file *m, loff_t *pos) { struct lock_stat_seq *data = m->private; - if (data->iter == data->stats) - seq_header(m); + if (*pos == 0) + return SEQ_START_TOKEN; - if (data->iter == data->iter_end) + data->iter = data->stats + *pos; + if (data->iter >= data->iter_end) data->iter = NULL; return data->iter; @@ -538,8 +554,13 @@ static void *ls_next(struct seq_file *m, void *v, loff_t *pos) (*pos)++; - data->iter = v; - data->iter++; + if (v == SEQ_START_TOKEN) + data->iter = data->stats; + else { + data->iter = v; + data->iter++; + } + if (data->iter == data->iter_end) data->iter = NULL; @@ -552,9 +573,11 @@ static void ls_stop(struct seq_file *m, void *v) static int ls_show(struct seq_file *m, void *v) { - struct lock_stat_seq *data = m->private; + if (v == SEQ_START_TOKEN) + seq_header(m); + else + seq_stats(m, v); - seq_stats(m, data->iter); return 0; } diff --git a/kernel/mutex.c b/kernel/mutex.c index 691b86564dd9..d7fe50cc556f 100644 --- a/kernel/mutex.c +++ b/kernel/mutex.c @@ -51,6 +51,7 @@ __mutex_init(struct mutex *lock, const char *name, struct lock_class_key *key) EXPORT_SYMBOL(__mutex_init); +#ifndef CONFIG_DEBUG_LOCK_ALLOC /* * We split the mutex lock/unlock logic into separate fastpath and * slowpath functions, to reduce the register pressure on the fastpath. @@ -92,6 +93,7 @@ void inline fastcall __sched mutex_lock(struct mutex *lock) } EXPORT_SYMBOL(mutex_lock); +#endif static void fastcall noinline __sched __mutex_unlock_slowpath(atomic_t *lock_count); @@ -122,7 +124,8 @@ EXPORT_SYMBOL(mutex_unlock); * Lock a mutex (possibly interruptible), slowpath: */ static inline int __sched -__mutex_lock_common(struct mutex *lock, long state, unsigned int subclass) +__mutex_lock_common(struct mutex *lock, long state, unsigned int subclass, + unsigned long ip) { struct task_struct *task = current; struct mutex_waiter waiter; @@ -132,7 +135,7 @@ __mutex_lock_common(struct mutex *lock, long state, unsigned int subclass) spin_lock_mutex(&lock->wait_lock, flags); debug_mutex_lock_common(lock, &waiter); - mutex_acquire(&lock->dep_map, subclass, 0, _RET_IP_); + mutex_acquire(&lock->dep_map, subclass, 0, ip); debug_mutex_add_waiter(lock, &waiter, task_thread_info(task)); /* add waiting tasks to the end of the waitqueue (FIFO): */ @@ -143,7 +146,7 @@ __mutex_lock_common(struct mutex *lock, long state, unsigned int subclass) if (old_val == 1) goto done; - lock_contended(&lock->dep_map, _RET_IP_); + lock_contended(&lock->dep_map, ip); for (;;) { /* @@ -166,7 +169,7 @@ __mutex_lock_common(struct mutex *lock, long state, unsigned int subclass) if (unlikely(state == TASK_INTERRUPTIBLE && signal_pending(task))) { mutex_remove_waiter(lock, &waiter, task_thread_info(task)); - mutex_release(&lock->dep_map, 1, _RET_IP_); + mutex_release(&lock->dep_map, 1, ip); spin_unlock_mutex(&lock->wait_lock, flags); debug_mutex_free_waiter(&waiter); @@ -197,20 +200,12 @@ done: return 0; } -static void fastcall noinline __sched -__mutex_lock_slowpath(atomic_t *lock_count) -{ - struct mutex *lock = container_of(lock_count, struct mutex, count); - - __mutex_lock_common(lock, TASK_UNINTERRUPTIBLE, 0); -} - #ifdef CONFIG_DEBUG_LOCK_ALLOC void __sched mutex_lock_nested(struct mutex *lock, unsigned int subclass) { might_sleep(); - __mutex_lock_common(lock, TASK_UNINTERRUPTIBLE, subclass); + __mutex_lock_common(lock, TASK_UNINTERRUPTIBLE, subclass, _RET_IP_); } EXPORT_SYMBOL_GPL(mutex_lock_nested); @@ -219,7 +214,7 @@ int __sched mutex_lock_interruptible_nested(struct mutex *lock, unsigned int subclass) { might_sleep(); - return __mutex_lock_common(lock, TASK_INTERRUPTIBLE, subclass); + return __mutex_lock_common(lock, TASK_INTERRUPTIBLE, subclass, _RET_IP_); } EXPORT_SYMBOL_GPL(mutex_lock_interruptible_nested); @@ -271,6 +266,7 @@ __mutex_unlock_slowpath(atomic_t *lock_count) __mutex_unlock_common_slowpath(lock_count, 1); } +#ifndef CONFIG_DEBUG_LOCK_ALLOC /* * Here come the less common (and hence less performance-critical) APIs: * mutex_lock_interruptible() and mutex_trylock(). @@ -298,13 +294,22 @@ int fastcall __sched mutex_lock_interruptible(struct mutex *lock) EXPORT_SYMBOL(mutex_lock_interruptible); +static void fastcall noinline __sched +__mutex_lock_slowpath(atomic_t *lock_count) +{ + struct mutex *lock = container_of(lock_count, struct mutex, count); + + __mutex_lock_common(lock, TASK_UNINTERRUPTIBLE, 0, _RET_IP_); +} + static int fastcall noinline __sched __mutex_lock_interruptible_slowpath(atomic_t *lock_count) { struct mutex *lock = container_of(lock_count, struct mutex, count); - return __mutex_lock_common(lock, TASK_INTERRUPTIBLE, 0); + return __mutex_lock_common(lock, TASK_INTERRUPTIBLE, 0, _RET_IP_); } +#endif /* * Spinlock based trylock, we take the spinlock and check whether we diff --git a/kernel/rcupdate.c b/kernel/rcupdate.c index 2c2dd8410dc4..130214f3d229 100644 --- a/kernel/rcupdate.c +++ b/kernel/rcupdate.c @@ -49,6 +49,14 @@ #include <linux/cpu.h> #include <linux/mutex.h> +#ifdef CONFIG_DEBUG_LOCK_ALLOC +static struct lock_class_key rcu_lock_key; +struct lockdep_map rcu_lock_map = + STATIC_LOCKDEP_MAP_INIT("rcu_read_lock", &rcu_lock_key); + +EXPORT_SYMBOL_GPL(rcu_lock_map); +#endif + /* Definition for rcupdate control block. */ static struct rcu_ctrlblk rcu_ctrlblk = { .cur = -300, diff --git a/net/bridge/br.c b/net/bridge/br.c index 848b8fa8bedd..93867bb6cc97 100644 --- a/net/bridge/br.c +++ b/net/bridge/br.c @@ -23,7 +23,7 @@ #include "br_private.h" -int (*br_should_route_hook) (struct sk_buff **pskb) = NULL; +int (*br_should_route_hook)(struct sk_buff *skb); static struct llc_sap *br_stp_sap; diff --git a/net/bridge/br_input.c b/net/bridge/br_input.c index 3a8a015c92e0..3cedd4eeeed6 100644 --- a/net/bridge/br_input.c +++ b/net/bridge/br_input.c @@ -126,6 +126,10 @@ struct sk_buff *br_handle_frame(struct net_bridge_port *p, struct sk_buff *skb) if (!is_valid_ether_addr(eth_hdr(skb)->h_source)) goto drop; + skb = skb_share_check(skb, GFP_ATOMIC); + if (!skb) + return NULL; + if (unlikely(is_link_local(dest))) { /* Pause frames shouldn't be passed up by driver anyway */ if (skb->protocol == htons(ETH_P_PAUSE)) @@ -145,7 +149,7 @@ struct sk_buff *br_handle_frame(struct net_bridge_port *p, struct sk_buff *skb) case BR_STATE_FORWARDING: if (br_should_route_hook) { - if (br_should_route_hook(&skb)) + if (br_should_route_hook(skb)) return skb; dest = eth_hdr(skb)->h_dest; } diff --git a/net/bridge/br_netfilter.c b/net/bridge/br_netfilter.c index 8245f051ccbb..da22f900e89d 100644 --- a/net/bridge/br_netfilter.c +++ b/net/bridge/br_netfilter.c @@ -503,18 +503,14 @@ inhdr_error: * receiving device) to make netfilter happy, the REDIRECT * target in particular. Save the original destination IP * address to be able to detect DNAT afterwards. */ -static unsigned int br_nf_pre_routing(unsigned int hook, struct sk_buff **pskb, +static unsigned int br_nf_pre_routing(unsigned int hook, struct sk_buff *skb, const struct net_device *in, const struct net_device *out, int (*okfn)(struct sk_buff *)) { struct iphdr *iph; - struct sk_buff *skb = *pskb; __u32 len = nf_bridge_encap_header_len(skb); - if ((skb = skb_share_check(skb, GFP_ATOMIC)) == NULL) - return NF_STOLEN; - if (unlikely(!pskb_may_pull(skb, len))) goto out; @@ -584,13 +580,11 @@ out: * took place when the packet entered the bridge), but we * register an IPv4 PRE_ROUTING 'sabotage' hook that will * prevent this from happening. */ -static unsigned int br_nf_local_in(unsigned int hook, struct sk_buff **pskb, +static unsigned int br_nf_local_in(unsigned int hook, struct sk_buff *skb, const struct net_device *in, const struct net_device *out, int (*okfn)(struct sk_buff *)) { - struct sk_buff *skb = *pskb; - if (skb->dst == (struct dst_entry *)&__fake_rtable) { dst_release(skb->dst); skb->dst = NULL; @@ -625,12 +619,11 @@ static int br_nf_forward_finish(struct sk_buff *skb) * but we are still able to filter on the 'real' indev/outdev * because of the physdev module. For ARP, indev and outdev are the * bridge ports. */ -static unsigned int br_nf_forward_ip(unsigned int hook, struct sk_buff **pskb, +static unsigned int br_nf_forward_ip(unsigned int hook, struct sk_buff *skb, const struct net_device *in, const struct net_device *out, int (*okfn)(struct sk_buff *)) { - struct sk_buff *skb = *pskb; struct nf_bridge_info *nf_bridge; struct net_device *parent; int pf; @@ -648,7 +641,7 @@ static unsigned int br_nf_forward_ip(unsigned int hook, struct sk_buff **pskb, else pf = PF_INET6; - nf_bridge_pull_encap_header(*pskb); + nf_bridge_pull_encap_header(skb); nf_bridge = skb->nf_bridge; if (skb->pkt_type == PACKET_OTHERHOST) { @@ -666,12 +659,11 @@ static unsigned int br_nf_forward_ip(unsigned int hook, struct sk_buff **pskb, return NF_STOLEN; } -static unsigned int br_nf_forward_arp(unsigned int hook, struct sk_buff **pskb, +static unsigned int br_nf_forward_arp(unsigned int hook, struct sk_buff *skb, const struct net_device *in, const struct net_device *out, int (*okfn)(struct sk_buff *)) { - struct sk_buff *skb = *pskb; struct net_device **d = (struct net_device **)(skb->cb); #ifdef CONFIG_SYSCTL @@ -682,12 +674,12 @@ static unsigned int br_nf_forward_arp(unsigned int hook, struct sk_buff **pskb, if (skb->protocol != htons(ETH_P_ARP)) { if (!IS_VLAN_ARP(skb)) return NF_ACCEPT; - nf_bridge_pull_encap_header(*pskb); + nf_bridge_pull_encap_header(skb); } if (arp_hdr(skb)->ar_pln != 4) { if (IS_VLAN_ARP(skb)) - nf_bridge_push_encap_header(*pskb); + nf_bridge_push_encap_header(skb); return NF_ACCEPT; } *d = (struct net_device *)in; @@ -709,13 +701,12 @@ static unsigned int br_nf_forward_arp(unsigned int hook, struct sk_buff **pskb, * NF_BR_PRI_FIRST, so no relevant PF_BRIDGE/INPUT functions have been nor * will be executed. */ -static unsigned int br_nf_local_out(unsigned int hook, struct sk_buff **pskb, +static unsigned int br_nf_local_out(unsigned int hook, struct sk_buff *skb, const struct net_device *in, const struct net_device *out, int (*okfn)(struct sk_buff *)) { struct net_device *realindev; - struct sk_buff *skb = *pskb; struct nf_bridge_info *nf_bridge; if (!skb->nf_bridge) @@ -752,13 +743,12 @@ static int br_nf_dev_queue_xmit(struct sk_buff *skb) } /* PF_BRIDGE/POST_ROUTING ********************************************/ -static unsigned int br_nf_post_routing(unsigned int hook, struct sk_buff **pskb, +static unsigned int br_nf_post_routing(unsigned int hook, struct sk_buff *skb, const struct net_device *in, const struct net_device *out, int (*okfn)(struct sk_buff *)) { - struct sk_buff *skb = *pskb; - struct nf_bridge_info *nf_bridge = (*pskb)->nf_bridge; + struct nf_bridge_info *nf_bridge = skb->nf_bridge; struct net_device *realoutdev = bridge_parent(skb->dev); int pf; @@ -828,13 +818,13 @@ print_error: /* IP/SABOTAGE *****************************************************/ /* Don't hand locally destined packets to PF_INET(6)/PRE_ROUTING * for the second time. */ -static unsigned int ip_sabotage_in(unsigned int hook, struct sk_buff **pskb, +static unsigned int ip_sabotage_in(unsigned int hook, struct sk_buff *skb, const struct net_device *in, const struct net_device *out, int (*okfn)(struct sk_buff *)) { - if ((*pskb)->nf_bridge && - !((*pskb)->nf_bridge->mask & BRNF_NF_BRIDGE_PREROUTING)) { + if (skb->nf_bridge && + !(skb->nf_bridge->mask & BRNF_NF_BRIDGE_PREROUTING)) { return NF_STOP; } diff --git a/net/bridge/netfilter/ebt_arpreply.c b/net/bridge/netfilter/ebt_arpreply.c index ffe468a632e7..48a80e423287 100644 --- a/net/bridge/netfilter/ebt_arpreply.c +++ b/net/bridge/netfilter/ebt_arpreply.c @@ -15,7 +15,7 @@ #include <net/arp.h> #include <linux/module.h> -static int ebt_target_reply(struct sk_buff **pskb, unsigned int hooknr, +static int ebt_target_reply(struct sk_buff *skb, unsigned int hooknr, const struct net_device *in, const struct net_device *out, const void *data, unsigned int datalen) { @@ -23,7 +23,6 @@ static int ebt_target_reply(struct sk_buff **pskb, unsigned int hooknr, __be32 _sip, *siptr, _dip, *diptr; struct arphdr _ah, *ap; unsigned char _sha[ETH_ALEN], *shp; - struct sk_buff *skb = *pskb; ap = skb_header_pointer(skb, 0, sizeof(_ah), &_ah); if (ap == NULL) diff --git a/net/bridge/netfilter/ebt_dnat.c b/net/bridge/netfilter/ebt_dnat.c index 4582659dff0e..74262e9a566a 100644 --- a/net/bridge/netfilter/ebt_dnat.c +++ b/net/bridge/netfilter/ebt_dnat.c @@ -8,29 +8,22 @@ * */ +#include <linux/netfilter.h> #include <linux/netfilter_bridge/ebtables.h> #include <linux/netfilter_bridge/ebt_nat.h> #include <linux/module.h> #include <net/sock.h> -static int ebt_target_dnat(struct sk_buff **pskb, unsigned int hooknr, +static int ebt_target_dnat(struct sk_buff *skb, unsigned int hooknr, const struct net_device *in, const struct net_device *out, const void *data, unsigned int datalen) { struct ebt_nat_info *info = (struct ebt_nat_info *)data; - if (skb_shared(*pskb) || skb_cloned(*pskb)) { - struct sk_buff *nskb; + if (skb_make_writable(skb, 0)) + return NF_DROP; - nskb = skb_copy(*pskb, GFP_ATOMIC); - if (!nskb) - return NF_DROP; - if ((*pskb)->sk) - skb_set_owner_w(nskb, (*pskb)->sk); - kfree_skb(*pskb); - *pskb = nskb; - } - memcpy(eth_hdr(*pskb)->h_dest, info->mac, ETH_ALEN); + memcpy(eth_hdr(skb)->h_dest, info->mac, ETH_ALEN); return info->target; } diff --git a/net/bridge/netfilter/ebt_mark.c b/net/bridge/netfilter/ebt_mark.c index 62d23c7b25e6..6cba54309c09 100644 --- a/net/bridge/netfilter/ebt_mark.c +++ b/net/bridge/netfilter/ebt_mark.c @@ -17,7 +17,7 @@ #include <linux/netfilter_bridge/ebt_mark_t.h> #include <linux/module.h> -static int ebt_target_mark(struct sk_buff **pskb, unsigned int hooknr, +static int ebt_target_mark(struct sk_buff *skb, unsigned int hooknr, const struct net_device *in, const struct net_device *out, const void *data, unsigned int datalen) { @@ -25,13 +25,13 @@ static int ebt_target_mark(struct sk_buff **pskb, unsigned int hooknr, int action = info->target & -16; if (action == MARK_SET_VALUE) - (*pskb)->mark = info->mark; + skb->mark = info->mark; else if (action == MARK_OR_VALUE) - (*pskb)->mark |= info->mark; + skb->mark |= info->mark; else if (action == MARK_AND_VALUE) - (*pskb)->mark &= info->mark; + skb->mark &= info->mark; else - (*pskb)->mark ^= info->mark; + skb->mark ^= info->mark; return info->target | ~EBT_VERDICT_BITS; } diff --git a/net/bridge/netfilter/ebt_redirect.c b/net/bridge/netfilter/ebt_redirect.c index 9f378eab72d0..422cb834cff9 100644 --- a/net/bridge/netfilter/ebt_redirect.c +++ b/net/bridge/netfilter/ebt_redirect.c @@ -8,35 +8,28 @@ * */ +#include <linux/netfilter.h> #include <linux/netfilter_bridge/ebtables.h> #include <linux/netfilter_bridge/ebt_redirect.h> #include <linux/module.h> #include <net/sock.h> #include "../br_private.h" -static int ebt_target_redirect(struct sk_buff **pskb, unsigned int hooknr, +static int ebt_target_redirect(struct sk_buff *skb, unsigned int hooknr, const struct net_device *in, const struct net_device *out, const void *data, unsigned int datalen) { struct ebt_redirect_info *info = (struct ebt_redirect_info *)data; - if (skb_shared(*pskb) || skb_cloned(*pskb)) { - struct sk_buff *nskb; + if (skb_make_writable(skb, 0)) + return NF_DROP; - nskb = skb_copy(*pskb, GFP_ATOMIC); - if (!nskb) - return NF_DROP; - if ((*pskb)->sk) - skb_set_owner_w(nskb, (*pskb)->sk); - kfree_skb(*pskb); - *pskb = nskb; - } if (hooknr != NF_BR_BROUTING) - memcpy(eth_hdr(*pskb)->h_dest, + memcpy(eth_hdr(skb)->h_dest, in->br_port->br->dev->dev_addr, ETH_ALEN); else - memcpy(eth_hdr(*pskb)->h_dest, in->dev_addr, ETH_ALEN); - (*pskb)->pkt_type = PACKET_HOST; + memcpy(eth_hdr(skb)->h_dest, in->dev_addr, ETH_ALEN); + skb->pkt_type = PACKET_HOST; return info->target; } diff --git a/net/bridge/netfilter/ebt_snat.c b/net/bridge/netfilter/ebt_snat.c index a50722182bfe..425ac920904d 100644 --- a/net/bridge/netfilter/ebt_snat.c +++ b/net/bridge/netfilter/ebt_snat.c @@ -8,6 +8,7 @@ * */ +#include <linux/netfilter.h> #include <linux/netfilter_bridge/ebtables.h> #include <linux/netfilter_bridge/ebt_nat.h> #include <linux/module.h> @@ -15,34 +16,26 @@ #include <linux/if_arp.h> #include <net/arp.h> -static int ebt_target_snat(struct sk_buff **pskb, unsigned int hooknr, +static int ebt_target_snat(struct sk_buff *skb, unsigned int hooknr, const struct net_device *in, const struct net_device *out, const void *data, unsigned int datalen) { struct ebt_nat_info *info = (struct ebt_nat_info *) data; - if (skb_shared(*pskb) || skb_cloned(*pskb)) { - struct sk_buff *nskb; + if (skb_make_writable(skb, 0)) + return NF_DROP; - nskb = skb_copy(*pskb, GFP_ATOMIC); - if (!nskb) - return NF_DROP; - if ((*pskb)->sk) - skb_set_owner_w(nskb, (*pskb)->sk); - kfree_skb(*pskb); - *pskb = nskb; - } - memcpy(eth_hdr(*pskb)->h_source, info->mac, ETH_ALEN); + memcpy(eth_hdr(skb)->h_source, info->mac, ETH_ALEN); if (!(info->target & NAT_ARP_BIT) && - eth_hdr(*pskb)->h_proto == htons(ETH_P_ARP)) { + eth_hdr(skb)->h_proto == htons(ETH_P_ARP)) { struct arphdr _ah, *ap; - ap = skb_header_pointer(*pskb, 0, sizeof(_ah), &_ah); + ap = skb_header_pointer(skb, 0, sizeof(_ah), &_ah); if (ap == NULL) return EBT_DROP; if (ap->ar_hln != ETH_ALEN) goto out; - if (skb_store_bits(*pskb, sizeof(_ah), info->mac,ETH_ALEN)) + if (skb_store_bits(skb, sizeof(_ah), info->mac,ETH_ALEN)) return EBT_DROP; } out: diff --git a/net/bridge/netfilter/ebtable_broute.c b/net/bridge/netfilter/ebtable_broute.c index d37ce0478938..e44519ebf1d2 100644 --- a/net/bridge/netfilter/ebtable_broute.c +++ b/net/bridge/netfilter/ebtable_broute.c @@ -51,11 +51,11 @@ static struct ebt_table broute_table = .me = THIS_MODULE, }; -static int ebt_broute(struct sk_buff **pskb) +static int ebt_broute(struct sk_buff *skb) { int ret; - ret = ebt_do_table(NF_BR_BROUTING, pskb, (*pskb)->dev, NULL, + ret = ebt_do_table(NF_BR_BROUTING, skb, skb->dev, NULL, &broute_table); if (ret == NF_DROP) return 1; /* route it */ diff --git a/net/bridge/netfilter/ebtable_filter.c b/net/bridge/netfilter/ebtable_filter.c index 81d84145c417..210493f99bc4 100644 --- a/net/bridge/netfilter/ebtable_filter.c +++ b/net/bridge/netfilter/ebtable_filter.c @@ -61,10 +61,10 @@ static struct ebt_table frame_filter = }; static unsigned int -ebt_hook (unsigned int hook, struct sk_buff **pskb, const struct net_device *in, +ebt_hook(unsigned int hook, struct sk_buff *skb, const struct net_device *in, const struct net_device *out, int (*okfn)(struct sk_buff *)) { - return ebt_do_table(hook, pskb, in, out, &frame_filter); + return ebt_do_table(hook, skb, in, out, &frame_filter); } static struct nf_hook_ops ebt_ops_filter[] = { diff --git a/net/bridge/netfilter/ebtable_nat.c b/net/bridge/netfilter/ebtable_nat.c index 9c50488b62eb..3e58c2e5ee21 100644 --- a/net/bridge/netfilter/ebtable_nat.c +++ b/net/bridge/netfilter/ebtable_nat.c @@ -61,17 +61,17 @@ static struct ebt_table frame_nat = }; static unsigned int -ebt_nat_dst(unsigned int hook, struct sk_buff **pskb, const struct net_device *in +ebt_nat_dst(unsigned int hook, struct sk_buff *skb, const struct net_device *in , const struct net_device *out, int (*okfn)(struct sk_buff *)) { - return ebt_do_table(hook, pskb, in, out, &frame_nat); + return ebt_do_table(hook, skb, in, out, &frame_nat); } static unsigned int -ebt_nat_src(unsigned int hook, struct sk_buff **pskb, const struct net_device *in +ebt_nat_src(unsigned int hook, struct sk_buff *skb, const struct net_device *in , const struct net_device *out, int (*okfn)(struct sk_buff *)) { - return ebt_do_table(hook, pskb, in, out, &frame_nat); + return ebt_do_table(hook, skb, in, out, &frame_nat); } static struct nf_hook_ops ebt_ops_nat[] = { diff --git a/net/bridge/netfilter/ebtables.c b/net/bridge/netfilter/ebtables.c index 6018d0e51938..d5a09eaef915 100644 --- a/net/bridge/netfilter/ebtables.c +++ b/net/bridge/netfilter/ebtables.c @@ -142,7 +142,7 @@ static inline int ebt_basic_match(struct ebt_entry *e, struct ethhdr *h, } /* Do some firewalling */ -unsigned int ebt_do_table (unsigned int hook, struct sk_buff **pskb, +unsigned int ebt_do_table (unsigned int hook, struct sk_buff *skb, const struct net_device *in, const struct net_device *out, struct ebt_table *table) { @@ -172,19 +172,19 @@ unsigned int ebt_do_table (unsigned int hook, struct sk_buff **pskb, base = private->entries; i = 0; while (i < nentries) { - if (ebt_basic_match(point, eth_hdr(*pskb), in, out)) + if (ebt_basic_match(point, eth_hdr(skb), in, out)) goto letscontinue; - if (EBT_MATCH_ITERATE(point, ebt_do_match, *pskb, in, out) != 0) + if (EBT_MATCH_ITERATE(point, ebt_do_match, skb, in, out) != 0) goto letscontinue; /* increase counter */ (*(counter_base + i)).pcnt++; - (*(counter_base + i)).bcnt+=(**pskb).len; + (*(counter_base + i)).bcnt += skb->len; /* these should only watch: not modify, nor tell us what to do with the packet */ - EBT_WATCHER_ITERATE(point, ebt_do_watcher, *pskb, hook, in, + EBT_WATCHER_ITERATE(point, ebt_do_watcher, skb, hook, in, out); t = (struct ebt_entry_target *) @@ -193,7 +193,7 @@ unsigned int ebt_do_table (unsigned int hook, struct sk_buff **pskb, if (!t->u.target->target) verdict = ((struct ebt_standard_target *)t)->verdict; else - verdict = t->u.target->target(pskb, hook, + verdict = t->u.target->target(skb, hook, in, out, t->data, t->target_size); if (verdict == EBT_ACCEPT) { read_unlock_bh(&table->lock); diff --git a/net/core/dev.c b/net/core/dev.c index 99b7bda37d10..38b03da5c1ca 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -1362,22 +1362,21 @@ int skb_checksum_help(struct sk_buff *skb) goto out_set_summed; } - if (skb_cloned(skb)) { + offset = skb->csum_start - skb_headroom(skb); + BUG_ON(offset >= skb_headlen(skb)); + csum = skb_checksum(skb, offset, skb->len - offset, 0); + + offset += skb->csum_offset; + BUG_ON(offset + sizeof(__sum16) > skb_headlen(skb)); + + if (skb_cloned(skb) && + !skb_clone_writable(skb, offset + sizeof(__sum16))) { ret = pskb_expand_head(skb, 0, 0, GFP_ATOMIC); if (ret) goto out; } - offset = skb->csum_start - skb_headroom(skb); - BUG_ON(offset > (int)skb->len); - csum = skb_checksum(skb, offset, skb->len-offset, 0); - - offset = skb_headlen(skb) - offset; - BUG_ON(offset <= 0); - BUG_ON(skb->csum_offset + 2 > offset); - - *(__sum16 *)(skb->head + skb->csum_start + skb->csum_offset) = - csum_fold(csum); + *(__sum16 *)(skb->data + offset) = csum_fold(csum); out_set_summed: skb->ip_summed = CHECKSUM_NONE; out: @@ -1949,27 +1948,51 @@ static int ing_filter(struct sk_buff *skb) struct Qdisc *q; struct net_device *dev = skb->dev; int result = TC_ACT_OK; + u32 ttl = G_TC_RTTL(skb->tc_verd); - if (dev->qdisc_ingress) { - __u32 ttl = (__u32) G_TC_RTTL(skb->tc_verd); - if (MAX_RED_LOOP < ttl++) { - printk(KERN_WARNING "Redir loop detected Dropping packet (%d->%d)\n", - skb->iif, skb->dev->ifindex); - return TC_ACT_SHOT; - } + if (MAX_RED_LOOP < ttl++) { + printk(KERN_WARNING + "Redir loop detected Dropping packet (%d->%d)\n", + skb->iif, dev->ifindex); + return TC_ACT_SHOT; + } + + skb->tc_verd = SET_TC_RTTL(skb->tc_verd, ttl); + skb->tc_verd = SET_TC_AT(skb->tc_verd, AT_INGRESS); - skb->tc_verd = SET_TC_RTTL(skb->tc_verd,ttl); + spin_lock(&dev->ingress_lock); + if ((q = dev->qdisc_ingress) != NULL) + result = q->enqueue(skb, q); + spin_unlock(&dev->ingress_lock); - skb->tc_verd = SET_TC_AT(skb->tc_verd,AT_INGRESS); + return result; +} + +static inline struct sk_buff *handle_ing(struct sk_buff *skb, + struct packet_type **pt_prev, + int *ret, struct net_device *orig_dev) +{ + if (!skb->dev->qdisc_ingress) + goto out; - spin_lock(&dev->ingress_lock); - if ((q = dev->qdisc_ingress) != NULL) - result = q->enqueue(skb, q); - spin_unlock(&dev->ingress_lock); + if (*pt_prev) { + *ret = deliver_skb(skb, *pt_prev, orig_dev); + *pt_prev = NULL; + } else { + /* Huh? Why does turning on AF_PACKET affect this? */ + skb->tc_verd = SET_TC_OK2MUNGE(skb->tc_verd); + } + switch (ing_filter(skb)) { + case TC_ACT_SHOT: + case TC_ACT_STOLEN: + kfree_skb(skb); + return NULL; } - return result; +out: + skb->tc_verd = 0; + return skb; } #endif @@ -2021,21 +2044,9 @@ int netif_receive_skb(struct sk_buff *skb) } #ifdef CONFIG_NET_CLS_ACT - if (pt_prev) { - ret = deliver_skb(skb, pt_prev, orig_dev); - pt_prev = NULL; /* noone else should process this after*/ - } else { - skb->tc_verd = SET_TC_OK2MUNGE(skb->tc_verd); - } - - ret = ing_filter(skb); - - if (ret == TC_ACT_SHOT || (ret == TC_ACT_STOLEN)) { - kfree_skb(skb); + skb = handle_ing(skb, &pt_prev, &ret, orig_dev); + if (!skb) goto out; - } - - skb->tc_verd = 0; ncls: #endif diff --git a/net/core/neighbour.c b/net/core/neighbour.c index c52df858d0be..cd3af59b38a1 100644 --- a/net/core/neighbour.c +++ b/net/core/neighbour.c @@ -481,6 +481,8 @@ struct pneigh_entry * pneigh_lookup(struct neigh_table *tbl, const void *pkey, if (!creat) goto out; + ASSERT_RTNL(); + n = kmalloc(sizeof(*n) + key_len, GFP_KERNEL); if (!n) goto out; diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 944189d96323..70d9b5da96ae 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -362,6 +362,97 @@ void kfree_skb(struct sk_buff *skb) __kfree_skb(skb); } +static void __copy_skb_header(struct sk_buff *new, const struct sk_buff *old) +{ + new->tstamp = old->tstamp; + new->dev = old->dev; + new->transport_header = old->transport_header; + new->network_header = old->network_header; + new->mac_header = old->mac_header; + new->dst = dst_clone(old->dst); +#ifdef CONFIG_INET + new->sp = secpath_get(old->sp); +#endif + memcpy(new->cb, old->cb, sizeof(old->cb)); + new->csum_start = old->csum_start; + new->csum_offset = old->csum_offset; + new->local_df = old->local_df; + new->pkt_type = old->pkt_type; + new->ip_summed = old->ip_summed; + skb_copy_queue_mapping(new, old); + new->priority = old->priority; +#if defined(CONFIG_IP_VS) || defined(CONFIG_IP_VS_MODULE) + new->ipvs_property = old->ipvs_property; +#endif + new->protocol = old->protocol; + new->mark = old->mark; + __nf_copy(new, old); +#if defined(CONFIG_NETFILTER_XT_TARGET_TRACE) || \ + defined(CONFIG_NETFILTER_XT_TARGET_TRACE_MODULE) + new->nf_trace = old->nf_trace; +#endif +#ifdef CONFIG_NET_SCHED + new->tc_index = old->tc_index; +#ifdef CONFIG_NET_CLS_ACT + new->tc_verd = old->tc_verd; +#endif +#endif + skb_copy_secmark(new, old); +} + +static struct sk_buff *__skb_clone(struct sk_buff *n, struct sk_buff *skb) +{ +#define C(x) n->x = skb->x + + n->next = n->prev = NULL; + n->sk = NULL; + __copy_skb_header(n, skb); + + C(len); + C(data_len); + C(mac_len); + n->cloned = 1; + n->hdr_len = skb->nohdr ? skb_headroom(skb) : skb->hdr_len; + n->nohdr = 0; + n->destructor = NULL; +#ifdef CONFIG_NET_CLS_ACT + /* FIXME What is this and why don't we do it in copy_skb_header? */ + n->tc_verd = SET_TC_VERD(n->tc_verd,0); + n->tc_verd = CLR_TC_OK2MUNGE(n->tc_verd); + n->tc_verd = CLR_TC_MUNGED(n->tc_verd); + C(iif); +#endif + C(truesize); + atomic_set(&n->users, 1); + C(head); + C(data); + C(tail); + C(end); + + atomic_inc(&(skb_shinfo(skb)->dataref)); + skb->cloned = 1; + + return n; +#undef C +} + +/** + * skb_morph - morph one skb into another + * @dst: the skb to receive the contents + * @src: the skb to supply the contents + * + * This is identical to skb_clone except that the target skb is + * supplied by the user. + * + * The target skb is returned upon exit. + */ +struct sk_buff *skb_morph(struct sk_buff *dst, struct sk_buff *src) +{ + skb_release_data(dst); + return __skb_clone(dst, src); +} +EXPORT_SYMBOL_GPL(skb_morph); + /** * skb_clone - duplicate an sk_buff * @skb: buffer to clone @@ -393,66 +484,7 @@ struct sk_buff *skb_clone(struct sk_buff *skb, gfp_t gfp_mask) n->fclone = SKB_FCLONE_UNAVAILABLE; } -#define C(x) n->x = skb->x - - n->next = n->prev = NULL; - n->sk = NULL; - C(tstamp); - C(dev); - C(transport_header); - C(network_header); - C(mac_header); - C(dst); - dst_clone(skb->dst); - C(sp); -#ifdef CONFIG_INET - secpath_get(skb->sp); -#endif - memcpy(n->cb, skb->cb, sizeof(skb->cb)); - C(len); - C(data_len); - C(mac_len); - C(csum); - C(local_df); - n->cloned = 1; - n->hdr_len = skb->nohdr ? skb_headroom(skb) : skb->hdr_len; - n->nohdr = 0; - C(pkt_type); - C(ip_summed); - skb_copy_queue_mapping(n, skb); - C(priority); -#if defined(CONFIG_IP_VS) || defined(CONFIG_IP_VS_MODULE) - C(ipvs_property); -#endif - C(protocol); - n->destructor = NULL; - C(mark); - __nf_copy(n, skb); -#if defined(CONFIG_NETFILTER_XT_TARGET_TRACE) || \ - defined(CONFIG_NETFILTER_XT_TARGET_TRACE_MODULE) - C(nf_trace); -#endif -#ifdef CONFIG_NET_SCHED - C(tc_index); -#ifdef CONFIG_NET_CLS_ACT - n->tc_verd = SET_TC_VERD(skb->tc_verd,0); - n->tc_verd = CLR_TC_OK2MUNGE(n->tc_verd); - n->tc_verd = CLR_TC_MUNGED(n->tc_verd); - C(iif); -#endif -#endif - skb_copy_secmark(n, skb); - C(truesize); - atomic_set(&n->users, 1); - C(head); - C(data); - C(tail); - C(end); - - atomic_inc(&(skb_shinfo(skb)->dataref)); - skb->cloned = 1; - - return n; + return __skb_clone(n, skb); } static void copy_skb_header(struct sk_buff *new, const struct sk_buff *old) @@ -463,50 +495,15 @@ static void copy_skb_header(struct sk_buff *new, const struct sk_buff *old) */ unsigned long offset = new->data - old->data; #endif - new->sk = NULL; - new->dev = old->dev; - skb_copy_queue_mapping(new, old); - new->priority = old->priority; - new->protocol = old->protocol; - new->dst = dst_clone(old->dst); -#ifdef CONFIG_INET - new->sp = secpath_get(old->sp); -#endif - new->csum_start = old->csum_start; - new->csum_offset = old->csum_offset; - new->ip_summed = old->ip_summed; - new->transport_header = old->transport_header; - new->network_header = old->network_header; - new->mac_header = old->mac_header; + + __copy_skb_header(new, old); + #ifndef NET_SKBUFF_DATA_USES_OFFSET /* {transport,network,mac}_header are relative to skb->head */ new->transport_header += offset; new->network_header += offset; new->mac_header += offset; #endif - memcpy(new->cb, old->cb, sizeof(old->cb)); - new->local_df = old->local_df; - new->fclone = SKB_FCLONE_UNAVAILABLE; - new->pkt_type = old->pkt_type; - new->tstamp = old->tstamp; - new->destructor = NULL; - new->mark = old->mark; - __nf_copy(new, old); -#if defined(CONFIG_NETFILTER_XT_TARGET_TRACE) || \ - defined(CONFIG_NETFILTER_XT_TARGET_TRACE_MODULE) - new->nf_trace = old->nf_trace; -#endif -#if defined(CONFIG_IP_VS) || defined(CONFIG_IP_VS_MODULE) - new->ipvs_property = old->ipvs_property; -#endif -#ifdef CONFIG_NET_SCHED -#ifdef CONFIG_NET_CLS_ACT - new->tc_verd = old->tc_verd; -#endif - new->tc_index = old->tc_index; -#endif - skb_copy_secmark(new, old); - atomic_set(&new->users, 1); skb_shinfo(new)->gso_size = skb_shinfo(old)->gso_size; skb_shinfo(new)->gso_segs = skb_shinfo(old)->gso_segs; skb_shinfo(new)->gso_type = skb_shinfo(old)->gso_type; @@ -685,7 +682,7 @@ int pskb_expand_head(struct sk_buff *skb, int nhead, int ntail, skb->transport_header += off; skb->network_header += off; skb->mac_header += off; - skb->csum_start += off; + skb->csum_start += nhead; skb->cloned = 0; skb->hdr_len = 0; skb->nohdr = 0; diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c index 006a3834fbcd..cac53548c2d8 100644 --- a/net/dccp/ipv6.c +++ b/net/dccp/ipv6.c @@ -767,10 +767,9 @@ discard: return 0; } -static int dccp_v6_rcv(struct sk_buff **pskb) +static int dccp_v6_rcv(struct sk_buff *skb) { const struct dccp_hdr *dh; - struct sk_buff *skb = *pskb; struct sock *sk; int min_cov; diff --git a/net/decnet/netfilter/dn_rtmsg.c b/net/decnet/netfilter/dn_rtmsg.c index f7fba7721e63..43fcd29046d1 100644 --- a/net/decnet/netfilter/dn_rtmsg.c +++ b/net/decnet/netfilter/dn_rtmsg.c @@ -88,12 +88,12 @@ static void dnrmg_send_peer(struct sk_buff *skb) static unsigned int dnrmg_hook(unsigned int hook, - struct sk_buff **pskb, + struct sk_buff *skb, const struct net_device *in, const struct net_device *out, int (*okfn)(struct sk_buff *)) { - dnrmg_send_peer(*pskb); + dnrmg_send_peer(skb); return NF_ACCEPT; } diff --git a/net/ipv4/Makefile b/net/ipv4/Makefile index a02c36d0a13e..93fe3966805d 100644 --- a/net/ipv4/Makefile +++ b/net/ipv4/Makefile @@ -10,7 +10,8 @@ obj-y := route.o inetpeer.o protocol.o \ tcp_minisocks.o tcp_cong.o \ datagram.o raw.o udp.o udplite.o \ arp.o icmp.o devinet.o af_inet.o igmp.o \ - sysctl_net_ipv4.o fib_frontend.o fib_semantics.o + sysctl_net_ipv4.o fib_frontend.o fib_semantics.o \ + inet_fragment.o obj-$(CONFIG_IP_FIB_HASH) += fib_hash.o obj-$(CONFIG_IP_FIB_TRIE) += fib_trie.o diff --git a/net/ipv4/inet_fragment.c b/net/ipv4/inet_fragment.c new file mode 100644 index 000000000000..484cf512858f --- /dev/null +++ b/net/ipv4/inet_fragment.c @@ -0,0 +1,174 @@ +/* + * inet fragments management + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + * Authors: Pavel Emelyanov <xemul@openvz.org> + * Started as consolidation of ipv4/ip_fragment.c, + * ipv6/reassembly. and ipv6 nf conntrack reassembly + */ + +#include <linux/list.h> +#include <linux/spinlock.h> +#include <linux/module.h> +#include <linux/timer.h> +#include <linux/mm.h> +#include <linux/random.h> +#include <linux/skbuff.h> +#include <linux/rtnetlink.h> + +#include <net/inet_frag.h> + +static void inet_frag_secret_rebuild(unsigned long dummy) +{ + struct inet_frags *f = (struct inet_frags *)dummy; + unsigned long now = jiffies; + int i; + + write_lock(&f->lock); + get_random_bytes(&f->rnd, sizeof(u32)); + for (i = 0; i < INETFRAGS_HASHSZ; i++) { + struct inet_frag_queue *q; + struct hlist_node *p, *n; + + hlist_for_each_entry_safe(q, p, n, &f->hash[i], list) { + unsigned int hval = f->hashfn(q); + + if (hval != i) { + hlist_del(&q->list); + + /* Relink to new hash chain. */ + hlist_add_head(&q->list, &f->hash[hval]); + } + } + } + write_unlock(&f->lock); + + mod_timer(&f->secret_timer, now + f->ctl->secret_interval); +} + +void inet_frags_init(struct inet_frags *f) +{ + int i; + + for (i = 0; i < INETFRAGS_HASHSZ; i++) + INIT_HLIST_HEAD(&f->hash[i]); + + INIT_LIST_HEAD(&f->lru_list); + rwlock_init(&f->lock); + + f->rnd = (u32) ((num_physpages ^ (num_physpages>>7)) ^ + (jiffies ^ (jiffies >> 6))); + + f->nqueues = 0; + atomic_set(&f->mem, 0); + + init_timer(&f->secret_timer); + f->secret_timer.function = inet_frag_secret_rebuild; + f->secret_timer.data = (unsigned long)f; + f->secret_timer.expires = jiffies + f->ctl->secret_interval; + add_timer(&f->secret_timer); +} +EXPORT_SYMBOL(inet_frags_init); + +void inet_frags_fini(struct inet_frags *f) +{ + del_timer(&f->secret_timer); +} +EXPORT_SYMBOL(inet_frags_fini); + +static inline void fq_unlink(struct inet_frag_queue *fq, struct inet_frags *f) +{ + write_lock(&f->lock); + hlist_del(&fq->list); + list_del(&fq->lru_list); + f->nqueues--; + write_unlock(&f->lock); +} + +void inet_frag_kill(struct inet_frag_queue *fq, struct inet_frags *f) +{ + if (del_timer(&fq->timer)) + atomic_dec(&fq->refcnt); + + if (!(fq->last_in & COMPLETE)) { + fq_unlink(fq, f); + atomic_dec(&fq->refcnt); + fq->last_in |= COMPLETE; + } +} + +EXPORT_SYMBOL(inet_frag_kill); + +static inline void frag_kfree_skb(struct inet_frags *f, struct sk_buff *skb, + int *work) +{ + if (work) + *work -= skb->truesize; + + atomic_sub(skb->truesize, &f->mem); + if (f->skb_free) + f->skb_free(skb); + kfree_skb(skb); +} + +void inet_frag_destroy(struct inet_frag_queue *q, struct inet_frags *f, + int *work) +{ + struct sk_buff *fp; + + BUG_TRAP(q->last_in & COMPLETE); + BUG_TRAP(del_timer(&q->timer) == 0); + + /* Release all fragment data. */ + fp = q->fragments; + while (fp) { + struct sk_buff *xp = fp->next; + + frag_kfree_skb(f, fp, work); + fp = xp; + } + + if (work) + *work -= f->qsize; + atomic_sub(f->qsize, &f->mem); + + f->destructor(q); + +} +EXPORT_SYMBOL(inet_frag_destroy); + +int inet_frag_evictor(struct inet_frags *f) +{ + struct inet_frag_queue *q; + int work, evicted = 0; + + work = atomic_read(&f->mem) - f->ctl->low_thresh; + while (work > 0) { + read_lock(&f->lock); + if (list_empty(&f->lru_list)) { + read_unlock(&f->lock); + break; + } + + q = list_first_entry(&f->lru_list, + struct inet_frag_queue, lru_list); + atomic_inc(&q->refcnt); + read_unlock(&f->lock); + + spin_lock(&q->lock); + if (!(q->last_in & COMPLETE)) + inet_frag_kill(q, f); + spin_unlock(&q->lock); + + if (atomic_dec_and_test(&q->refcnt)) + inet_frag_destroy(q, f, &work); + evicted++; + } + + return evicted; +} +EXPORT_SYMBOL(inet_frag_evictor); diff --git a/net/ipv4/ip_forward.c b/net/ipv4/ip_forward.c index afbf938836f5..877da3ed52e2 100644 --- a/net/ipv4/ip_forward.c +++ b/net/ipv4/ip_forward.c @@ -40,7 +40,7 @@ #include <net/route.h> #include <net/xfrm.h> -static inline int ip_forward_finish(struct sk_buff *skb) +static int ip_forward_finish(struct sk_buff *skb) { struct ip_options * opt = &(IPCB(skb)->opt); diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c index fabb86db763b..443b3f89192f 100644 --- a/net/ipv4/ip_fragment.c +++ b/net/ipv4/ip_fragment.c @@ -39,6 +39,7 @@ #include <net/icmp.h> #include <net/checksum.h> #include <net/inetpeer.h> +#include <net/inet_frag.h> #include <linux/tcp.h> #include <linux/udp.h> #include <linux/inet.h> @@ -49,21 +50,8 @@ * as well. Or notify me, at least. --ANK */ -/* Fragment cache limits. We will commit 256K at one time. Should we - * cross that limit we will prune down to 192K. This should cope with - * even the most extreme cases without allowing an attacker to measurably - * harm machine performance. - */ -int sysctl_ipfrag_high_thresh __read_mostly = 256*1024; -int sysctl_ipfrag_low_thresh __read_mostly = 192*1024; - int sysctl_ipfrag_max_dist __read_mostly = 64; -/* Important NOTE! Fragment queue must be destroyed before MSL expires. - * RFC791 is wrong proposing to prolongate timer each fragment arrival by TTL. - */ -int sysctl_ipfrag_time __read_mostly = IP_FRAG_TIME; - struct ipfrag_skb_cb { struct inet_skb_parm h; @@ -74,153 +62,102 @@ struct ipfrag_skb_cb /* Describe an entry in the "incomplete datagrams" queue. */ struct ipq { - struct hlist_node list; - struct list_head lru_list; /* lru list member */ + struct inet_frag_queue q; + u32 user; __be32 saddr; __be32 daddr; __be16 id; u8 protocol; - u8 last_in; -#define COMPLETE 4 -#define FIRST_IN 2 -#define LAST_IN 1 - - struct sk_buff *fragments; /* linked list of received fragments */ - int len; /* total length of original datagram */ - int meat; - spinlock_t lock; - atomic_t refcnt; - struct timer_list timer; /* when will this queue expire? */ - ktime_t stamp; int iif; unsigned int rid; struct inet_peer *peer; }; -/* Hash table. */ +struct inet_frags_ctl ip4_frags_ctl __read_mostly = { + /* + * Fragment cache limits. We will commit 256K at one time. Should we + * cross that limit we will prune down to 192K. This should cope with + * even the most extreme cases without allowing an attacker to + * measurably harm machine performance. + */ + .high_thresh = 256 * 1024, + .low_thresh = 192 * 1024, -#define IPQ_HASHSZ 64 + /* + * Important NOTE! Fragment queue must be destroyed before MSL expires. + * RFC791 is wrong proposing to prolongate timer each fragment arrival + * by TTL. + */ + .timeout = IP_FRAG_TIME, + .secret_interval = 10 * 60 * HZ, +}; -/* Per-bucket lock is easy to add now. */ -static struct hlist_head ipq_hash[IPQ_HASHSZ]; -static DEFINE_RWLOCK(ipfrag_lock); -static u32 ipfrag_hash_rnd; -static LIST_HEAD(ipq_lru_list); -int ip_frag_nqueues = 0; +static struct inet_frags ip4_frags; -static __inline__ void __ipq_unlink(struct ipq *qp) +int ip_frag_nqueues(void) { - hlist_del(&qp->list); - list_del(&qp->lru_list); - ip_frag_nqueues--; + return ip4_frags.nqueues; } -static __inline__ void ipq_unlink(struct ipq *ipq) +int ip_frag_mem(void) { - write_lock(&ipfrag_lock); - __ipq_unlink(ipq); - write_unlock(&ipfrag_lock); + return atomic_read(&ip4_frags.mem); } +static int ip_frag_reasm(struct ipq *qp, struct sk_buff *prev, + struct net_device *dev); + static unsigned int ipqhashfn(__be16 id, __be32 saddr, __be32 daddr, u8 prot) { return jhash_3words((__force u32)id << 16 | prot, (__force u32)saddr, (__force u32)daddr, - ipfrag_hash_rnd) & (IPQ_HASHSZ - 1); + ip4_frags.rnd) & (INETFRAGS_HASHSZ - 1); } -static struct timer_list ipfrag_secret_timer; -int sysctl_ipfrag_secret_interval __read_mostly = 10 * 60 * HZ; - -static void ipfrag_secret_rebuild(unsigned long dummy) +static unsigned int ip4_hashfn(struct inet_frag_queue *q) { - unsigned long now = jiffies; - int i; - - write_lock(&ipfrag_lock); - get_random_bytes(&ipfrag_hash_rnd, sizeof(u32)); - for (i = 0; i < IPQ_HASHSZ; i++) { - struct ipq *q; - struct hlist_node *p, *n; - - hlist_for_each_entry_safe(q, p, n, &ipq_hash[i], list) { - unsigned int hval = ipqhashfn(q->id, q->saddr, - q->daddr, q->protocol); - - if (hval != i) { - hlist_del(&q->list); + struct ipq *ipq; - /* Relink to new hash chain. */ - hlist_add_head(&q->list, &ipq_hash[hval]); - } - } - } - write_unlock(&ipfrag_lock); - - mod_timer(&ipfrag_secret_timer, now + sysctl_ipfrag_secret_interval); + ipq = container_of(q, struct ipq, q); + return ipqhashfn(ipq->id, ipq->saddr, ipq->daddr, ipq->protocol); } -atomic_t ip_frag_mem = ATOMIC_INIT(0); /* Memory used for fragments */ - /* Memory Tracking Functions. */ static __inline__ void frag_kfree_skb(struct sk_buff *skb, int *work) { if (work) *work -= skb->truesize; - atomic_sub(skb->truesize, &ip_frag_mem); + atomic_sub(skb->truesize, &ip4_frags.mem); kfree_skb(skb); } -static __inline__ void frag_free_queue(struct ipq *qp, int *work) +static __inline__ void ip4_frag_free(struct inet_frag_queue *q) { - if (work) - *work -= sizeof(struct ipq); - atomic_sub(sizeof(struct ipq), &ip_frag_mem); + struct ipq *qp; + + qp = container_of(q, struct ipq, q); + if (qp->peer) + inet_putpeer(qp->peer); kfree(qp); } static __inline__ struct ipq *frag_alloc_queue(void) { - struct ipq *qp = kmalloc(sizeof(struct ipq), GFP_ATOMIC); + struct ipq *qp = kzalloc(sizeof(struct ipq), GFP_ATOMIC); if (!qp) return NULL; - atomic_add(sizeof(struct ipq), &ip_frag_mem); + atomic_add(sizeof(struct ipq), &ip4_frags.mem); return qp; } /* Destruction primitives. */ -/* Complete destruction of ipq. */ -static void ip_frag_destroy(struct ipq *qp, int *work) -{ - struct sk_buff *fp; - - BUG_TRAP(qp->last_in&COMPLETE); - BUG_TRAP(del_timer(&qp->timer) == 0); - - if (qp->peer) - inet_putpeer(qp->peer); - - /* Release all fragment data. */ - fp = qp->fragments; - while (fp) { - struct sk_buff *xp = fp->next; - - frag_kfree_skb(fp, work); - fp = xp; - } - - /* Finally, release the queue descriptor itself. */ - frag_free_queue(qp, work); -} - -static __inline__ void ipq_put(struct ipq *ipq, int *work) +static __inline__ void ipq_put(struct ipq *ipq) { - if (atomic_dec_and_test(&ipq->refcnt)) - ip_frag_destroy(ipq, work); + inet_frag_put(&ipq->q, &ip4_frags); } /* Kill ipq entry. It is not destroyed immediately, @@ -228,14 +165,7 @@ static __inline__ void ipq_put(struct ipq *ipq, int *work) */ static void ipq_kill(struct ipq *ipq) { - if (del_timer(&ipq->timer)) - atomic_dec(&ipq->refcnt); - - if (!(ipq->last_in & COMPLETE)) { - ipq_unlink(ipq); - atomic_dec(&ipq->refcnt); - ipq->last_in |= COMPLETE; - } + inet_frag_kill(&ipq->q, &ip4_frags); } /* Memory limiting on fragments. Evictor trashes the oldest @@ -243,33 +173,11 @@ static void ipq_kill(struct ipq *ipq) */ static void ip_evictor(void) { - struct ipq *qp; - struct list_head *tmp; - int work; - - work = atomic_read(&ip_frag_mem) - sysctl_ipfrag_low_thresh; - if (work <= 0) - return; - - while (work > 0) { - read_lock(&ipfrag_lock); - if (list_empty(&ipq_lru_list)) { - read_unlock(&ipfrag_lock); - return; - } - tmp = ipq_lru_list.next; - qp = list_entry(tmp, struct ipq, lru_list); - atomic_inc(&qp->refcnt); - read_unlock(&ipfrag_lock); + int evicted; - spin_lock(&qp->lock); - if (!(qp->last_in&COMPLETE)) - ipq_kill(qp); - spin_unlock(&qp->lock); - - ipq_put(qp, &work); - IP_INC_STATS_BH(IPSTATS_MIB_REASMFAILS); - } + evicted = inet_frag_evictor(&ip4_frags); + if (evicted) + IP_ADD_STATS_BH(IPSTATS_MIB_REASMFAILS, evicted); } /* @@ -279,9 +187,9 @@ static void ip_expire(unsigned long arg) { struct ipq *qp = (struct ipq *) arg; - spin_lock(&qp->lock); + spin_lock(&qp->q.lock); - if (qp->last_in & COMPLETE) + if (qp->q.last_in & COMPLETE) goto out; ipq_kill(qp); @@ -289,8 +197,8 @@ static void ip_expire(unsigned long arg) IP_INC_STATS_BH(IPSTATS_MIB_REASMTIMEOUT); IP_INC_STATS_BH(IPSTATS_MIB_REASMFAILS); - if ((qp->last_in&FIRST_IN) && qp->fragments != NULL) { - struct sk_buff *head = qp->fragments; + if ((qp->q.last_in&FIRST_IN) && qp->q.fragments != NULL) { + struct sk_buff *head = qp->q.fragments; /* Send an ICMP "Fragment Reassembly Timeout" message. */ if ((head->dev = dev_get_by_index(&init_net, qp->iif)) != NULL) { icmp_send(head, ICMP_TIME_EXCEEDED, ICMP_EXC_FRAGTIME, 0); @@ -298,8 +206,8 @@ static void ip_expire(unsigned long arg) } } out: - spin_unlock(&qp->lock); - ipq_put(qp, NULL); + spin_unlock(&qp->q.lock); + ipq_put(qp); } /* Creation primitives. */ @@ -312,7 +220,7 @@ static struct ipq *ip_frag_intern(struct ipq *qp_in) #endif unsigned int hash; - write_lock(&ipfrag_lock); + write_lock(&ip4_frags.lock); hash = ipqhashfn(qp_in->id, qp_in->saddr, qp_in->daddr, qp_in->protocol); #ifdef CONFIG_SMP @@ -320,31 +228,31 @@ static struct ipq *ip_frag_intern(struct ipq *qp_in) * such entry could be created on other cpu, while we * promoted read lock to write lock. */ - hlist_for_each_entry(qp, n, &ipq_hash[hash], list) { + hlist_for_each_entry(qp, n, &ip4_frags.hash[hash], q.list) { if (qp->id == qp_in->id && qp->saddr == qp_in->saddr && qp->daddr == qp_in->daddr && qp->protocol == qp_in->protocol && qp->user == qp_in->user) { - atomic_inc(&qp->refcnt); - write_unlock(&ipfrag_lock); - qp_in->last_in |= COMPLETE; - ipq_put(qp_in, NULL); + atomic_inc(&qp->q.refcnt); + write_unlock(&ip4_frags.lock); + qp_in->q.last_in |= COMPLETE; + ipq_put(qp_in); return qp; } } #endif qp = qp_in; - if (!mod_timer(&qp->timer, jiffies + sysctl_ipfrag_time)) - atomic_inc(&qp->refcnt); + if (!mod_timer(&qp->q.timer, jiffies + ip4_frags_ctl.timeout)) + atomic_inc(&qp->q.refcnt); - atomic_inc(&qp->refcnt); - hlist_add_head(&qp->list, &ipq_hash[hash]); - INIT_LIST_HEAD(&qp->lru_list); - list_add_tail(&qp->lru_list, &ipq_lru_list); - ip_frag_nqueues++; - write_unlock(&ipfrag_lock); + atomic_inc(&qp->q.refcnt); + hlist_add_head(&qp->q.list, &ip4_frags.hash[hash]); + INIT_LIST_HEAD(&qp->q.lru_list); + list_add_tail(&qp->q.lru_list, &ip4_frags.lru_list); + ip4_frags.nqueues++; + write_unlock(&ip4_frags.lock); return qp; } @@ -357,23 +265,18 @@ static struct ipq *ip_frag_create(struct iphdr *iph, u32 user) goto out_nomem; qp->protocol = iph->protocol; - qp->last_in = 0; qp->id = iph->id; qp->saddr = iph->saddr; qp->daddr = iph->daddr; qp->user = user; - qp->len = 0; - qp->meat = 0; - qp->fragments = NULL; - qp->iif = 0; qp->peer = sysctl_ipfrag_max_dist ? inet_getpeer(iph->saddr, 1) : NULL; /* Initialize a timer for this entry. */ - init_timer(&qp->timer); - qp->timer.data = (unsigned long) qp; /* pointer to queue */ - qp->timer.function = ip_expire; /* expire function */ - spin_lock_init(&qp->lock); - atomic_set(&qp->refcnt, 1); + init_timer(&qp->q.timer); + qp->q.timer.data = (unsigned long) qp; /* pointer to queue */ + qp->q.timer.function = ip_expire; /* expire function */ + spin_lock_init(&qp->q.lock); + atomic_set(&qp->q.refcnt, 1); return ip_frag_intern(qp); @@ -395,20 +298,20 @@ static inline struct ipq *ip_find(struct iphdr *iph, u32 user) struct ipq *qp; struct hlist_node *n; - read_lock(&ipfrag_lock); + read_lock(&ip4_frags.lock); hash = ipqhashfn(id, saddr, daddr, protocol); - hlist_for_each_entry(qp, n, &ipq_hash[hash], list) { + hlist_for_each_entry(qp, n, &ip4_frags.hash[hash], q.list) { if (qp->id == id && qp->saddr == saddr && qp->daddr == daddr && qp->protocol == protocol && qp->user == user) { - atomic_inc(&qp->refcnt); - read_unlock(&ipfrag_lock); + atomic_inc(&qp->q.refcnt); + read_unlock(&ip4_frags.lock); return qp; } } - read_unlock(&ipfrag_lock); + read_unlock(&ip4_frags.lock); return ip_frag_create(iph, user); } @@ -429,7 +332,7 @@ static inline int ip_frag_too_far(struct ipq *qp) end = atomic_inc_return(&peer->rid); qp->rid = end; - rc = qp->fragments && (end - start) > max; + rc = qp->q.fragments && (end - start) > max; if (rc) { IP_INC_STATS_BH(IPSTATS_MIB_REASMFAILS); @@ -442,39 +345,42 @@ static int ip_frag_reinit(struct ipq *qp) { struct sk_buff *fp; - if (!mod_timer(&qp->timer, jiffies + sysctl_ipfrag_time)) { - atomic_inc(&qp->refcnt); + if (!mod_timer(&qp->q.timer, jiffies + ip4_frags_ctl.timeout)) { + atomic_inc(&qp->q.refcnt); return -ETIMEDOUT; } - fp = qp->fragments; + fp = qp->q.fragments; do { struct sk_buff *xp = fp->next; frag_kfree_skb(fp, NULL); fp = xp; } while (fp); - qp->last_in = 0; - qp->len = 0; - qp->meat = 0; - qp->fragments = NULL; + qp->q.last_in = 0; + qp->q.len = 0; + qp->q.meat = 0; + qp->q.fragments = NULL; qp->iif = 0; return 0; } /* Add new segment to existing queue. */ -static void ip_frag_queue(struct ipq *qp, struct sk_buff *skb) +static int ip_frag_queue(struct ipq *qp, struct sk_buff *skb) { struct sk_buff *prev, *next; + struct net_device *dev; int flags, offset; int ihl, end; + int err = -ENOENT; - if (qp->last_in & COMPLETE) + if (qp->q.last_in & COMPLETE) goto err; if (!(IPCB(skb)->flags & IPSKB_FRAG_COMPLETE) && - unlikely(ip_frag_too_far(qp)) && unlikely(ip_frag_reinit(qp))) { + unlikely(ip_frag_too_far(qp)) && + unlikely(err = ip_frag_reinit(qp))) { ipq_kill(qp); goto err; } @@ -487,36 +393,40 @@ static void ip_frag_queue(struct ipq *qp, struct sk_buff *skb) /* Determine the position of this fragment. */ end = offset + skb->len - ihl; + err = -EINVAL; /* Is this the final fragment? */ if ((flags & IP_MF) == 0) { /* If we already have some bits beyond end * or have different end, the segment is corrrupted. */ - if (end < qp->len || - ((qp->last_in & LAST_IN) && end != qp->len)) + if (end < qp->q.len || + ((qp->q.last_in & LAST_IN) && end != qp->q.len)) goto err; - qp->last_in |= LAST_IN; - qp->len = end; + qp->q.last_in |= LAST_IN; + qp->q.len = end; } else { if (end&7) { end &= ~7; if (skb->ip_summed != CHECKSUM_UNNECESSARY) skb->ip_summed = CHECKSUM_NONE; } - if (end > qp->len) { + if (end > qp->q.len) { /* Some bits beyond end -> corruption. */ - if (qp->last_in & LAST_IN) + if (qp->q.last_in & LAST_IN) goto err; - qp->len = end; + qp->q.len = end; } } if (end == offset) goto err; + err = -ENOMEM; if (pskb_pull(skb, ihl) == NULL) goto err; - if (pskb_trim_rcsum(skb, end-offset)) + + err = pskb_trim_rcsum(skb, end - offset); + if (err) goto err; /* Find out which fragments are in front and at the back of us @@ -524,7 +434,7 @@ static void ip_frag_queue(struct ipq *qp, struct sk_buff *skb) * this fragment, right? */ prev = NULL; - for (next = qp->fragments; next != NULL; next = next->next) { + for (next = qp->q.fragments; next != NULL; next = next->next) { if (FRAG_CB(next)->offset >= offset) break; /* bingo! */ prev = next; @@ -539,8 +449,10 @@ static void ip_frag_queue(struct ipq *qp, struct sk_buff *skb) if (i > 0) { offset += i; + err = -EINVAL; if (end <= offset) goto err; + err = -ENOMEM; if (!pskb_pull(skb, i)) goto err; if (skb->ip_summed != CHECKSUM_UNNECESSARY) @@ -548,6 +460,8 @@ static void ip_frag_queue(struct ipq *qp, struct sk_buff *skb) } } + err = -ENOMEM; + while (next && FRAG_CB(next)->offset < end) { int i = end - FRAG_CB(next)->offset; /* overlap is 'i' bytes */ @@ -558,7 +472,7 @@ static void ip_frag_queue(struct ipq *qp, struct sk_buff *skb) if (!pskb_pull(next, i)) goto err; FRAG_CB(next)->offset += i; - qp->meat -= i; + qp->q.meat -= i; if (next->ip_summed != CHECKSUM_UNNECESSARY) next->ip_summed = CHECKSUM_NONE; break; @@ -573,9 +487,9 @@ static void ip_frag_queue(struct ipq *qp, struct sk_buff *skb) if (prev) prev->next = next; else - qp->fragments = next; + qp->q.fragments = next; - qp->meat -= free_it->len; + qp->q.meat -= free_it->len; frag_kfree_skb(free_it, NULL); } } @@ -587,50 +501,77 @@ static void ip_frag_queue(struct ipq *qp, struct sk_buff *skb) if (prev) prev->next = skb; else - qp->fragments = skb; - - if (skb->dev) - qp->iif = skb->dev->ifindex; - skb->dev = NULL; - qp->stamp = skb->tstamp; - qp->meat += skb->len; - atomic_add(skb->truesize, &ip_frag_mem); + qp->q.fragments = skb; + + dev = skb->dev; + if (dev) { + qp->iif = dev->ifindex; + skb->dev = NULL; + } + qp->q.stamp = skb->tstamp; + qp->q.meat += skb->len; + atomic_add(skb->truesize, &ip4_frags.mem); if (offset == 0) - qp->last_in |= FIRST_IN; + qp->q.last_in |= FIRST_IN; - write_lock(&ipfrag_lock); - list_move_tail(&qp->lru_list, &ipq_lru_list); - write_unlock(&ipfrag_lock); + if (qp->q.last_in == (FIRST_IN | LAST_IN) && qp->q.meat == qp->q.len) + return ip_frag_reasm(qp, prev, dev); - return; + write_lock(&ip4_frags.lock); + list_move_tail(&qp->q.lru_list, &ip4_frags.lru_list); + write_unlock(&ip4_frags.lock); + return -EINPROGRESS; err: kfree_skb(skb); + return err; } /* Build a new IP datagram from all its fragments. */ -static struct sk_buff *ip_frag_reasm(struct ipq *qp, struct net_device *dev) +static int ip_frag_reasm(struct ipq *qp, struct sk_buff *prev, + struct net_device *dev) { struct iphdr *iph; - struct sk_buff *fp, *head = qp->fragments; + struct sk_buff *fp, *head = qp->q.fragments; int len; int ihlen; + int err; ipq_kill(qp); + /* Make the one we just received the head. */ + if (prev) { + head = prev->next; + fp = skb_clone(head, GFP_ATOMIC); + + if (!fp) + goto out_nomem; + + fp->next = head->next; + prev->next = fp; + + skb_morph(head, qp->q.fragments); + head->next = qp->q.fragments->next; + + kfree_skb(qp->q.fragments); + qp->q.fragments = head; + } + BUG_TRAP(head != NULL); BUG_TRAP(FRAG_CB(head)->offset == 0); /* Allocate a new buffer for the datagram. */ ihlen = ip_hdrlen(head); - len = ihlen + qp->len; + len = ihlen + qp->q.len; + err = -E2BIG; if (len > 65535) goto out_oversize; /* Head of list must not be cloned. */ + err = -ENOMEM; if (skb_cloned(head) && pskb_expand_head(head, 0, 0, GFP_ATOMIC)) goto out_nomem; @@ -654,12 +595,12 @@ static struct sk_buff *ip_frag_reasm(struct ipq *qp, struct net_device *dev) head->len -= clone->len; clone->csum = 0; clone->ip_summed = head->ip_summed; - atomic_add(clone->truesize, &ip_frag_mem); + atomic_add(clone->truesize, &ip4_frags.mem); } skb_shinfo(head)->frag_list = head->next; skb_push(head, head->data - skb_network_header(head)); - atomic_sub(head->truesize, &ip_frag_mem); + atomic_sub(head->truesize, &ip4_frags.mem); for (fp=head->next; fp; fp = fp->next) { head->data_len += fp->len; @@ -669,19 +610,19 @@ static struct sk_buff *ip_frag_reasm(struct ipq *qp, struct net_device *dev) else if (head->ip_summed == CHECKSUM_COMPLETE) head->csum = csum_add(head->csum, fp->csum); head->truesize += fp->truesize; - atomic_sub(fp->truesize, &ip_frag_mem); + atomic_sub(fp->truesize, &ip4_frags.mem); } head->next = NULL; head->dev = dev; - head->tstamp = qp->stamp; + head->tstamp = qp->q.stamp; iph = ip_hdr(head); iph->frag_off = 0; iph->tot_len = htons(len); IP_INC_STATS_BH(IPSTATS_MIB_REASMOKS); - qp->fragments = NULL; - return head; + qp->q.fragments = NULL; + return 0; out_nomem: LIMIT_NETDEBUG(KERN_ERR "IP: queue_glue: no memory for gluing " @@ -694,54 +635,46 @@ out_oversize: NIPQUAD(qp->saddr)); out_fail: IP_INC_STATS_BH(IPSTATS_MIB_REASMFAILS); - return NULL; + return err; } /* Process an incoming IP datagram fragment. */ -struct sk_buff *ip_defrag(struct sk_buff *skb, u32 user) +int ip_defrag(struct sk_buff *skb, u32 user) { struct ipq *qp; - struct net_device *dev; IP_INC_STATS_BH(IPSTATS_MIB_REASMREQDS); /* Start by cleaning up the memory. */ - if (atomic_read(&ip_frag_mem) > sysctl_ipfrag_high_thresh) + if (atomic_read(&ip4_frags.mem) > ip4_frags_ctl.high_thresh) ip_evictor(); - dev = skb->dev; - /* Lookup (or create) queue header */ if ((qp = ip_find(ip_hdr(skb), user)) != NULL) { - struct sk_buff *ret = NULL; - - spin_lock(&qp->lock); + int ret; - ip_frag_queue(qp, skb); + spin_lock(&qp->q.lock); - if (qp->last_in == (FIRST_IN|LAST_IN) && - qp->meat == qp->len) - ret = ip_frag_reasm(qp, dev); + ret = ip_frag_queue(qp, skb); - spin_unlock(&qp->lock); - ipq_put(qp, NULL); + spin_unlock(&qp->q.lock); + ipq_put(qp); return ret; } IP_INC_STATS_BH(IPSTATS_MIB_REASMFAILS); kfree_skb(skb); - return NULL; + return -ENOMEM; } void __init ipfrag_init(void) { - ipfrag_hash_rnd = (u32) ((num_physpages ^ (num_physpages>>7)) ^ - (jiffies ^ (jiffies >> 6))); - - init_timer(&ipfrag_secret_timer); - ipfrag_secret_timer.function = ipfrag_secret_rebuild; - ipfrag_secret_timer.expires = jiffies + sysctl_ipfrag_secret_interval; - add_timer(&ipfrag_secret_timer); + ip4_frags.ctl = &ip4_frags_ctl; + ip4_frags.hashfn = ip4_hashfn; + ip4_frags.destructor = ip4_frag_free; + ip4_frags.skb_free = NULL; + ip4_frags.qsize = sizeof(struct ipq); + inet_frags_init(&ip4_frags); } EXPORT_SYMBOL(ip_defrag); diff --git a/net/ipv4/ip_input.c b/net/ipv4/ip_input.c index 41d8964591e7..168c871fcd79 100644 --- a/net/ipv4/ip_input.c +++ b/net/ipv4/ip_input.c @@ -172,8 +172,7 @@ int ip_call_ra_chain(struct sk_buff *skb) (!sk->sk_bound_dev_if || sk->sk_bound_dev_if == skb->dev->ifindex)) { if (ip_hdr(skb)->frag_off & htons(IP_MF | IP_OFFSET)) { - skb = ip_defrag(skb, IP_DEFRAG_CALL_RA_CHAIN); - if (skb == NULL) { + if (ip_defrag(skb, IP_DEFRAG_CALL_RA_CHAIN)) { read_unlock(&ip_ra_lock); return 1; } @@ -196,7 +195,7 @@ int ip_call_ra_chain(struct sk_buff *skb) return 0; } -static inline int ip_local_deliver_finish(struct sk_buff *skb) +static int ip_local_deliver_finish(struct sk_buff *skb) { __skb_pull(skb, ip_hdrlen(skb)); @@ -265,8 +264,7 @@ int ip_local_deliver(struct sk_buff *skb) */ if (ip_hdr(skb)->frag_off & htons(IP_MF | IP_OFFSET)) { - skb = ip_defrag(skb, IP_DEFRAG_LOCAL_DELIVER); - if (!skb) + if (ip_defrag(skb, IP_DEFRAG_LOCAL_DELIVER)) return 0; } @@ -326,7 +324,7 @@ drop: return -1; } -static inline int ip_rcv_finish(struct sk_buff *skb) +static int ip_rcv_finish(struct sk_buff *skb) { const struct iphdr *iph = ip_hdr(skb); struct rtable *rt; diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index 699f06781fd8..f508835ba713 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -202,7 +202,7 @@ static inline int ip_skb_dst_mtu(struct sk_buff *skb) skb->dst->dev->mtu : dst_mtu(skb->dst); } -static inline int ip_finish_output(struct sk_buff *skb) +static int ip_finish_output(struct sk_buff *skb) { #if defined(CONFIG_NETFILTER) && defined(CONFIG_XFRM) /* Policy lookup after SNAT yielded a new policy */ diff --git a/net/ipv4/ipvs/ip_vs_app.c b/net/ipv4/ipvs/ip_vs_app.c index 341474eefa55..664cb8e97c1c 100644 --- a/net/ipv4/ipvs/ip_vs_app.c +++ b/net/ipv4/ipvs/ip_vs_app.c @@ -25,6 +25,7 @@ #include <linux/skbuff.h> #include <linux/in.h> #include <linux/ip.h> +#include <linux/netfilter.h> #include <net/net_namespace.h> #include <net/protocol.h> #include <net/tcp.h> @@ -328,18 +329,18 @@ static inline void vs_seq_update(struct ip_vs_conn *cp, struct ip_vs_seq *vseq, spin_unlock(&cp->lock); } -static inline int app_tcp_pkt_out(struct ip_vs_conn *cp, struct sk_buff **pskb, +static inline int app_tcp_pkt_out(struct ip_vs_conn *cp, struct sk_buff *skb, struct ip_vs_app *app) { int diff; - const unsigned int tcp_offset = ip_hdrlen(*pskb); + const unsigned int tcp_offset = ip_hdrlen(skb); struct tcphdr *th; __u32 seq; - if (!ip_vs_make_skb_writable(pskb, tcp_offset + sizeof(*th))) + if (!skb_make_writable(skb, tcp_offset + sizeof(*th))) return 0; - th = (struct tcphdr *)(skb_network_header(*pskb) + tcp_offset); + th = (struct tcphdr *)(skb_network_header(skb) + tcp_offset); /* * Remember seq number in case this pkt gets resized @@ -360,7 +361,7 @@ static inline int app_tcp_pkt_out(struct ip_vs_conn *cp, struct sk_buff **pskb, if (app->pkt_out == NULL) return 1; - if (!app->pkt_out(app, cp, pskb, &diff)) + if (!app->pkt_out(app, cp, skb, &diff)) return 0; /* @@ -378,7 +379,7 @@ static inline int app_tcp_pkt_out(struct ip_vs_conn *cp, struct sk_buff **pskb, * called by ipvs packet handler, assumes previously checked cp!=NULL * returns false if it can't handle packet (oom) */ -int ip_vs_app_pkt_out(struct ip_vs_conn *cp, struct sk_buff **pskb) +int ip_vs_app_pkt_out(struct ip_vs_conn *cp, struct sk_buff *skb) { struct ip_vs_app *app; @@ -391,7 +392,7 @@ int ip_vs_app_pkt_out(struct ip_vs_conn *cp, struct sk_buff **pskb) /* TCP is complicated */ if (cp->protocol == IPPROTO_TCP) - return app_tcp_pkt_out(cp, pskb, app); + return app_tcp_pkt_out(cp, skb, app); /* * Call private output hook function @@ -399,22 +400,22 @@ int ip_vs_app_pkt_out(struct ip_vs_conn *cp, struct sk_buff **pskb) if (app->pkt_out == NULL) return 1; - return app->pkt_out(app, cp, pskb, NULL); + return app->pkt_out(app, cp, skb, NULL); } -static inline int app_tcp_pkt_in(struct ip_vs_conn *cp, struct sk_buff **pskb, +static inline int app_tcp_pkt_in(struct ip_vs_conn *cp, struct sk_buff *skb, struct ip_vs_app *app) { int diff; - const unsigned int tcp_offset = ip_hdrlen(*pskb); + const unsigned int tcp_offset = ip_hdrlen(skb); struct tcphdr *th; __u32 seq; - if (!ip_vs_make_skb_writable(pskb, tcp_offset + sizeof(*th))) + if (!skb_make_writable(skb, tcp_offset + sizeof(*th))) return 0; - th = (struct tcphdr *)(skb_network_header(*pskb) + tcp_offset); + th = (struct tcphdr *)(skb_network_header(skb) + tcp_offset); /* * Remember seq number in case this pkt gets resized @@ -435,7 +436,7 @@ static inline int app_tcp_pkt_in(struct ip_vs_conn *cp, struct sk_buff **pskb, if (app->pkt_in == NULL) return 1; - if (!app->pkt_in(app, cp, pskb, &diff)) + if (!app->pkt_in(app, cp, skb, &diff)) return 0; /* @@ -453,7 +454,7 @@ static inline int app_tcp_pkt_in(struct ip_vs_conn *cp, struct sk_buff **pskb, * called by ipvs packet handler, assumes previously checked cp!=NULL. * returns false if can't handle packet (oom). */ -int ip_vs_app_pkt_in(struct ip_vs_conn *cp, struct sk_buff **pskb) +int ip_vs_app_pkt_in(struct ip_vs_conn *cp, struct sk_buff *skb) { struct ip_vs_app *app; @@ -466,7 +467,7 @@ int ip_vs_app_pkt_in(struct ip_vs_conn *cp, struct sk_buff **pskb) /* TCP is complicated */ if (cp->protocol == IPPROTO_TCP) - return app_tcp_pkt_in(cp, pskb, app); + return app_tcp_pkt_in(cp, skb, app); /* * Call private input hook function @@ -474,7 +475,7 @@ int ip_vs_app_pkt_in(struct ip_vs_conn *cp, struct sk_buff **pskb) if (app->pkt_in == NULL) return 1; - return app->pkt_in(app, cp, pskb, NULL); + return app->pkt_in(app, cp, skb, NULL); } diff --git a/net/ipv4/ipvs/ip_vs_core.c b/net/ipv4/ipvs/ip_vs_core.c index fbca2a2ff29f..c6ed7654e839 100644 --- a/net/ipv4/ipvs/ip_vs_core.c +++ b/net/ipv4/ipvs/ip_vs_core.c @@ -58,7 +58,6 @@ EXPORT_SYMBOL(ip_vs_conn_put); #ifdef CONFIG_IP_VS_DEBUG EXPORT_SYMBOL(ip_vs_get_debug_level); #endif -EXPORT_SYMBOL(ip_vs_make_skb_writable); /* ID used in ICMP lookups */ @@ -163,42 +162,6 @@ ip_vs_set_state(struct ip_vs_conn *cp, int direction, } -int ip_vs_make_skb_writable(struct sk_buff **pskb, int writable_len) -{ - struct sk_buff *skb = *pskb; - - /* skb is already used, better copy skb and its payload */ - if (unlikely(skb_shared(skb) || skb->sk)) - goto copy_skb; - - /* skb data is already used, copy it */ - if (unlikely(skb_cloned(skb))) - goto copy_data; - - return pskb_may_pull(skb, writable_len); - - copy_data: - if (unlikely(writable_len > skb->len)) - return 0; - return !pskb_expand_head(skb, 0, 0, GFP_ATOMIC); - - copy_skb: - if (unlikely(writable_len > skb->len)) - return 0; - skb = skb_copy(skb, GFP_ATOMIC); - if (!skb) - return 0; - BUG_ON(skb_is_nonlinear(skb)); - - /* Rest of kernel will get very unhappy if we pass it a - suddenly-orphaned skbuff */ - if ((*pskb)->sk) - skb_set_owner_w(skb, (*pskb)->sk); - kfree_skb(*pskb); - *pskb = skb; - return 1; -} - /* * IPVS persistent scheduling function * It creates a connection entry according to its template if exists, @@ -525,12 +488,12 @@ int ip_vs_leave(struct ip_vs_service *svc, struct sk_buff *skb, * for VS/NAT. */ static unsigned int ip_vs_post_routing(unsigned int hooknum, - struct sk_buff **pskb, + struct sk_buff *skb, const struct net_device *in, const struct net_device *out, int (*okfn)(struct sk_buff *)) { - if (!((*pskb)->ipvs_property)) + if (!skb->ipvs_property) return NF_ACCEPT; /* The packet was sent from IPVS, exit this chain */ return NF_STOP; @@ -541,13 +504,14 @@ __sum16 ip_vs_checksum_complete(struct sk_buff *skb, int offset) return csum_fold(skb_checksum(skb, offset, skb->len - offset, 0)); } -static inline struct sk_buff * -ip_vs_gather_frags(struct sk_buff *skb, u_int32_t user) +static inline int ip_vs_gather_frags(struct sk_buff *skb, u_int32_t user) { - skb = ip_defrag(skb, user); - if (skb) + int err = ip_defrag(skb, user); + + if (!err) ip_send_check(ip_hdr(skb)); - return skb; + + return err; } /* @@ -605,9 +569,8 @@ void ip_vs_nat_icmp(struct sk_buff *skb, struct ip_vs_protocol *pp, * Currently handles error types - unreachable, quench, ttl exceeded. * (Only used in VS/NAT) */ -static int ip_vs_out_icmp(struct sk_buff **pskb, int *related) +static int ip_vs_out_icmp(struct sk_buff *skb, int *related) { - struct sk_buff *skb = *pskb; struct iphdr *iph; struct icmphdr _icmph, *ic; struct iphdr _ciph, *cih; /* The ip header contained within the ICMP */ @@ -619,10 +582,8 @@ static int ip_vs_out_icmp(struct sk_buff **pskb, int *related) /* reassemble IP fragments */ if (ip_hdr(skb)->frag_off & htons(IP_MF | IP_OFFSET)) { - skb = ip_vs_gather_frags(skb, IP_DEFRAG_VS_OUT); - if (!skb) + if (ip_vs_gather_frags(skb, IP_DEFRAG_VS_OUT)) return NF_STOLEN; - *pskb = skb; } iph = ip_hdr(skb); @@ -690,9 +651,8 @@ static int ip_vs_out_icmp(struct sk_buff **pskb, int *related) if (IPPROTO_TCP == cih->protocol || IPPROTO_UDP == cih->protocol) offset += 2 * sizeof(__u16); - if (!ip_vs_make_skb_writable(pskb, offset)) + if (!skb_make_writable(skb, offset)) goto out; - skb = *pskb; ip_vs_nat_icmp(skb, pp, cp, 1); @@ -724,11 +684,10 @@ static inline int is_tcp_reset(const struct sk_buff *skb) * rewrite addresses of the packet and send it on its way... */ static unsigned int -ip_vs_out(unsigned int hooknum, struct sk_buff **pskb, +ip_vs_out(unsigned int hooknum, struct sk_buff *skb, const struct net_device *in, const struct net_device *out, int (*okfn)(struct sk_buff *)) { - struct sk_buff *skb = *pskb; struct iphdr *iph; struct ip_vs_protocol *pp; struct ip_vs_conn *cp; @@ -741,11 +700,10 @@ ip_vs_out(unsigned int hooknum, struct sk_buff **pskb, iph = ip_hdr(skb); if (unlikely(iph->protocol == IPPROTO_ICMP)) { - int related, verdict = ip_vs_out_icmp(pskb, &related); + int related, verdict = ip_vs_out_icmp(skb, &related); if (related) return verdict; - skb = *pskb; iph = ip_hdr(skb); } @@ -756,11 +714,9 @@ ip_vs_out(unsigned int hooknum, struct sk_buff **pskb, /* reassemble IP fragments */ if (unlikely(iph->frag_off & htons(IP_MF|IP_OFFSET) && !pp->dont_defrag)) { - skb = ip_vs_gather_frags(skb, IP_DEFRAG_VS_OUT); - if (!skb) + if (ip_vs_gather_frags(skb, IP_DEFRAG_VS_OUT)) return NF_STOLEN; iph = ip_hdr(skb); - *pskb = skb; } ihl = iph->ihl << 2; @@ -802,13 +758,12 @@ ip_vs_out(unsigned int hooknum, struct sk_buff **pskb, IP_VS_DBG_PKT(11, pp, skb, 0, "Outgoing packet"); - if (!ip_vs_make_skb_writable(pskb, ihl)) + if (!skb_make_writable(skb, ihl)) goto drop; /* mangle the packet */ - if (pp->snat_handler && !pp->snat_handler(pskb, pp, cp)) + if (pp->snat_handler && !pp->snat_handler(skb, pp, cp)) goto drop; - skb = *pskb; ip_hdr(skb)->saddr = cp->vaddr; ip_send_check(ip_hdr(skb)); @@ -818,9 +773,8 @@ ip_vs_out(unsigned int hooknum, struct sk_buff **pskb, * if it came from this machine itself. So re-compute * the routing information. */ - if (ip_route_me_harder(pskb, RTN_LOCAL) != 0) + if (ip_route_me_harder(skb, RTN_LOCAL) != 0) goto drop; - skb = *pskb; IP_VS_DBG_PKT(10, pp, skb, 0, "After SNAT"); @@ -835,7 +789,7 @@ ip_vs_out(unsigned int hooknum, struct sk_buff **pskb, drop: ip_vs_conn_put(cp); - kfree_skb(*pskb); + kfree_skb(skb); return NF_STOLEN; } @@ -847,9 +801,8 @@ ip_vs_out(unsigned int hooknum, struct sk_buff **pskb, * Currently handles error types - unreachable, quench, ttl exceeded. */ static int -ip_vs_in_icmp(struct sk_buff **pskb, int *related, unsigned int hooknum) +ip_vs_in_icmp(struct sk_buff *skb, int *related, unsigned int hooknum) { - struct sk_buff *skb = *pskb; struct iphdr *iph; struct icmphdr _icmph, *ic; struct iphdr _ciph, *cih; /* The ip header contained within the ICMP */ @@ -861,12 +814,9 @@ ip_vs_in_icmp(struct sk_buff **pskb, int *related, unsigned int hooknum) /* reassemble IP fragments */ if (ip_hdr(skb)->frag_off & htons(IP_MF | IP_OFFSET)) { - skb = ip_vs_gather_frags(skb, - hooknum == NF_IP_LOCAL_IN ? - IP_DEFRAG_VS_IN : IP_DEFRAG_VS_FWD); - if (!skb) + if (ip_vs_gather_frags(skb, hooknum == NF_IP_LOCAL_IN ? + IP_DEFRAG_VS_IN : IP_DEFRAG_VS_FWD)) return NF_STOLEN; - *pskb = skb; } iph = ip_hdr(skb); @@ -945,11 +895,10 @@ ip_vs_in_icmp(struct sk_buff **pskb, int *related, unsigned int hooknum) * and send it on its way... */ static unsigned int -ip_vs_in(unsigned int hooknum, struct sk_buff **pskb, +ip_vs_in(unsigned int hooknum, struct sk_buff *skb, const struct net_device *in, const struct net_device *out, int (*okfn)(struct sk_buff *)) { - struct sk_buff *skb = *pskb; struct iphdr *iph; struct ip_vs_protocol *pp; struct ip_vs_conn *cp; @@ -971,11 +920,10 @@ ip_vs_in(unsigned int hooknum, struct sk_buff **pskb, iph = ip_hdr(skb); if (unlikely(iph->protocol == IPPROTO_ICMP)) { - int related, verdict = ip_vs_in_icmp(pskb, &related, hooknum); + int related, verdict = ip_vs_in_icmp(skb, &related, hooknum); if (related) return verdict; - skb = *pskb; iph = ip_hdr(skb); } @@ -1056,16 +1004,16 @@ ip_vs_in(unsigned int hooknum, struct sk_buff **pskb, * and send them to ip_vs_in_icmp. */ static unsigned int -ip_vs_forward_icmp(unsigned int hooknum, struct sk_buff **pskb, +ip_vs_forward_icmp(unsigned int hooknum, struct sk_buff *skb, const struct net_device *in, const struct net_device *out, int (*okfn)(struct sk_buff *)) { int r; - if (ip_hdr(*pskb)->protocol != IPPROTO_ICMP) + if (ip_hdr(skb)->protocol != IPPROTO_ICMP) return NF_ACCEPT; - return ip_vs_in_icmp(pskb, &r, hooknum); + return ip_vs_in_icmp(skb, &r, hooknum); } diff --git a/net/ipv4/ipvs/ip_vs_ftp.c b/net/ipv4/ipvs/ip_vs_ftp.c index 344ddbbdc756..59aa166b7678 100644 --- a/net/ipv4/ipvs/ip_vs_ftp.c +++ b/net/ipv4/ipvs/ip_vs_ftp.c @@ -30,6 +30,7 @@ #include <linux/skbuff.h> #include <linux/in.h> #include <linux/ip.h> +#include <linux/netfilter.h> #include <net/protocol.h> #include <net/tcp.h> #include <asm/unaligned.h> @@ -135,7 +136,7 @@ static int ip_vs_ftp_get_addrport(char *data, char *data_limit, * xxx,xxx,xxx,xxx is the server address, ppp,ppp is the server port number. */ static int ip_vs_ftp_out(struct ip_vs_app *app, struct ip_vs_conn *cp, - struct sk_buff **pskb, int *diff) + struct sk_buff *skb, int *diff) { struct iphdr *iph; struct tcphdr *th; @@ -155,14 +156,14 @@ static int ip_vs_ftp_out(struct ip_vs_app *app, struct ip_vs_conn *cp, return 1; /* Linear packets are much easier to deal with. */ - if (!ip_vs_make_skb_writable(pskb, (*pskb)->len)) + if (!skb_make_writable(skb, skb->len)) return 0; if (cp->app_data == &ip_vs_ftp_pasv) { - iph = ip_hdr(*pskb); + iph = ip_hdr(skb); th = (struct tcphdr *)&(((char *)iph)[iph->ihl*4]); data = (char *)th + (th->doff << 2); - data_limit = skb_tail_pointer(*pskb); + data_limit = skb_tail_pointer(skb); if (ip_vs_ftp_get_addrport(data, data_limit, SERVER_STRING, @@ -213,7 +214,7 @@ static int ip_vs_ftp_out(struct ip_vs_app *app, struct ip_vs_conn *cp, memcpy(start, buf, buf_len); ret = 1; } else { - ret = !ip_vs_skb_replace(*pskb, GFP_ATOMIC, start, + ret = !ip_vs_skb_replace(skb, GFP_ATOMIC, start, end-start, buf, buf_len); } @@ -238,7 +239,7 @@ static int ip_vs_ftp_out(struct ip_vs_app *app, struct ip_vs_conn *cp, * the client. */ static int ip_vs_ftp_in(struct ip_vs_app *app, struct ip_vs_conn *cp, - struct sk_buff **pskb, int *diff) + struct sk_buff *skb, int *diff) { struct iphdr *iph; struct tcphdr *th; @@ -256,20 +257,20 @@ static int ip_vs_ftp_in(struct ip_vs_app *app, struct ip_vs_conn *cp, return 1; /* Linear packets are much easier to deal with. */ - if (!ip_vs_make_skb_writable(pskb, (*pskb)->len)) + if (!skb_make_writable(skb, skb->len)) return 0; /* * Detecting whether it is passive */ - iph = ip_hdr(*pskb); + iph = ip_hdr(skb); th = (struct tcphdr *)&(((char *)iph)[iph->ihl*4]); /* Since there may be OPTIONS in the TCP packet and the HLEN is the length of the header in 32-bit multiples, it is accurate to calculate data address by th+HLEN*4 */ data = data_start = (char *)th + (th->doff << 2); - data_limit = skb_tail_pointer(*pskb); + data_limit = skb_tail_pointer(skb); while (data <= data_limit - 6) { if (strnicmp(data, "PASV\r\n", 6) == 0) { diff --git a/net/ipv4/ipvs/ip_vs_proto_tcp.c b/net/ipv4/ipvs/ip_vs_proto_tcp.c index e65577a77006..12dc0d640b6d 100644 --- a/net/ipv4/ipvs/ip_vs_proto_tcp.c +++ b/net/ipv4/ipvs/ip_vs_proto_tcp.c @@ -20,6 +20,7 @@ #include <linux/tcp.h> /* for tcphdr */ #include <net/ip.h> #include <net/tcp.h> /* for csum_tcpudp_magic */ +#include <linux/netfilter.h> #include <linux/netfilter_ipv4.h> #include <net/ip_vs.h> @@ -122,27 +123,27 @@ tcp_fast_csum_update(struct tcphdr *tcph, __be32 oldip, __be32 newip, static int -tcp_snat_handler(struct sk_buff **pskb, +tcp_snat_handler(struct sk_buff *skb, struct ip_vs_protocol *pp, struct ip_vs_conn *cp) { struct tcphdr *tcph; - const unsigned int tcphoff = ip_hdrlen(*pskb); + const unsigned int tcphoff = ip_hdrlen(skb); /* csum_check requires unshared skb */ - if (!ip_vs_make_skb_writable(pskb, tcphoff+sizeof(*tcph))) + if (!skb_make_writable(skb, tcphoff+sizeof(*tcph))) return 0; if (unlikely(cp->app != NULL)) { /* Some checks before mangling */ - if (pp->csum_check && !pp->csum_check(*pskb, pp)) + if (pp->csum_check && !pp->csum_check(skb, pp)) return 0; /* Call application helper if needed */ - if (!ip_vs_app_pkt_out(cp, pskb)) + if (!ip_vs_app_pkt_out(cp, skb)) return 0; } - tcph = (void *)ip_hdr(*pskb) + tcphoff; + tcph = (void *)ip_hdr(skb) + tcphoff; tcph->source = cp->vport; /* Adjust TCP checksums */ @@ -150,17 +151,15 @@ tcp_snat_handler(struct sk_buff **pskb, /* Only port and addr are changed, do fast csum update */ tcp_fast_csum_update(tcph, cp->daddr, cp->vaddr, cp->dport, cp->vport); - if ((*pskb)->ip_summed == CHECKSUM_COMPLETE) - (*pskb)->ip_summed = CHECKSUM_NONE; + if (skb->ip_summed == CHECKSUM_COMPLETE) + skb->ip_summed = CHECKSUM_NONE; } else { /* full checksum calculation */ tcph->check = 0; - (*pskb)->csum = skb_checksum(*pskb, tcphoff, - (*pskb)->len - tcphoff, 0); + skb->csum = skb_checksum(skb, tcphoff, skb->len - tcphoff, 0); tcph->check = csum_tcpudp_magic(cp->vaddr, cp->caddr, - (*pskb)->len - tcphoff, - cp->protocol, - (*pskb)->csum); + skb->len - tcphoff, + cp->protocol, skb->csum); IP_VS_DBG(11, "O-pkt: %s O-csum=%d (+%zd)\n", pp->name, tcph->check, (char*)&(tcph->check) - (char*)tcph); @@ -170,30 +169,30 @@ tcp_snat_handler(struct sk_buff **pskb, static int -tcp_dnat_handler(struct sk_buff **pskb, +tcp_dnat_handler(struct sk_buff *skb, struct ip_vs_protocol *pp, struct ip_vs_conn *cp) { struct tcphdr *tcph; - const unsigned int tcphoff = ip_hdrlen(*pskb); + const unsigned int tcphoff = ip_hdrlen(skb); /* csum_check requires unshared skb */ - if (!ip_vs_make_skb_writable(pskb, tcphoff+sizeof(*tcph))) + if (!skb_make_writable(skb, tcphoff+sizeof(*tcph))) return 0; if (unlikely(cp->app != NULL)) { /* Some checks before mangling */ - if (pp->csum_check && !pp->csum_check(*pskb, pp)) + if (pp->csum_check && !pp->csum_check(skb, pp)) return 0; /* * Attempt ip_vs_app call. * It will fix ip_vs_conn and iph ack_seq stuff */ - if (!ip_vs_app_pkt_in(cp, pskb)) + if (!ip_vs_app_pkt_in(cp, skb)) return 0; } - tcph = (void *)ip_hdr(*pskb) + tcphoff; + tcph = (void *)ip_hdr(skb) + tcphoff; tcph->dest = cp->dport; /* @@ -203,18 +202,16 @@ tcp_dnat_handler(struct sk_buff **pskb, /* Only port and addr are changed, do fast csum update */ tcp_fast_csum_update(tcph, cp->vaddr, cp->daddr, cp->vport, cp->dport); - if ((*pskb)->ip_summed == CHECKSUM_COMPLETE) - (*pskb)->ip_summed = CHECKSUM_NONE; + if (skb->ip_summed == CHECKSUM_COMPLETE) + skb->ip_summed = CHECKSUM_NONE; } else { /* full checksum calculation */ tcph->check = 0; - (*pskb)->csum = skb_checksum(*pskb, tcphoff, - (*pskb)->len - tcphoff, 0); + skb->csum = skb_checksum(skb, tcphoff, skb->len - tcphoff, 0); tcph->check = csum_tcpudp_magic(cp->caddr, cp->daddr, - (*pskb)->len - tcphoff, - cp->protocol, - (*pskb)->csum); - (*pskb)->ip_summed = CHECKSUM_UNNECESSARY; + skb->len - tcphoff, + cp->protocol, skb->csum); + skb->ip_summed = CHECKSUM_UNNECESSARY; } return 1; } diff --git a/net/ipv4/ipvs/ip_vs_proto_udp.c b/net/ipv4/ipvs/ip_vs_proto_udp.c index 8ee5fe6a101d..1fa7b330b9ac 100644 --- a/net/ipv4/ipvs/ip_vs_proto_udp.c +++ b/net/ipv4/ipvs/ip_vs_proto_udp.c @@ -18,6 +18,7 @@ #include <linux/in.h> #include <linux/ip.h> #include <linux/kernel.h> +#include <linux/netfilter.h> #include <linux/netfilter_ipv4.h> #include <linux/udp.h> @@ -129,29 +130,29 @@ udp_fast_csum_update(struct udphdr *uhdr, __be32 oldip, __be32 newip, } static int -udp_snat_handler(struct sk_buff **pskb, +udp_snat_handler(struct sk_buff *skb, struct ip_vs_protocol *pp, struct ip_vs_conn *cp) { struct udphdr *udph; - const unsigned int udphoff = ip_hdrlen(*pskb); + const unsigned int udphoff = ip_hdrlen(skb); /* csum_check requires unshared skb */ - if (!ip_vs_make_skb_writable(pskb, udphoff+sizeof(*udph))) + if (!skb_make_writable(skb, udphoff+sizeof(*udph))) return 0; if (unlikely(cp->app != NULL)) { /* Some checks before mangling */ - if (pp->csum_check && !pp->csum_check(*pskb, pp)) + if (pp->csum_check && !pp->csum_check(skb, pp)) return 0; /* * Call application helper if needed */ - if (!ip_vs_app_pkt_out(cp, pskb)) + if (!ip_vs_app_pkt_out(cp, skb)) return 0; } - udph = (void *)ip_hdr(*pskb) + udphoff; + udph = (void *)ip_hdr(skb) + udphoff; udph->source = cp->vport; /* @@ -161,17 +162,15 @@ udp_snat_handler(struct sk_buff **pskb, /* Only port and addr are changed, do fast csum update */ udp_fast_csum_update(udph, cp->daddr, cp->vaddr, cp->dport, cp->vport); - if ((*pskb)->ip_summed == CHECKSUM_COMPLETE) - (*pskb)->ip_summed = CHECKSUM_NONE; + if (skb->ip_summed == CHECKSUM_COMPLETE) + skb->ip_summed = CHECKSUM_NONE; } else { /* full checksum calculation */ udph->check = 0; - (*pskb)->csum = skb_checksum(*pskb, udphoff, - (*pskb)->len - udphoff, 0); + skb->csum = skb_checksum(skb, udphoff, skb->len - udphoff, 0); udph->check = csum_tcpudp_magic(cp->vaddr, cp->caddr, - (*pskb)->len - udphoff, - cp->protocol, - (*pskb)->csum); + skb->len - udphoff, + cp->protocol, skb->csum); if (udph->check == 0) udph->check = CSUM_MANGLED_0; IP_VS_DBG(11, "O-pkt: %s O-csum=%d (+%zd)\n", @@ -183,30 +182,30 @@ udp_snat_handler(struct sk_buff **pskb, static int -udp_dnat_handler(struct sk_buff **pskb, +udp_dnat_handler(struct sk_buff *skb, struct ip_vs_protocol *pp, struct ip_vs_conn *cp) { struct udphdr *udph; - unsigned int udphoff = ip_hdrlen(*pskb); + unsigned int udphoff = ip_hdrlen(skb); /* csum_check requires unshared skb */ - if (!ip_vs_make_skb_writable(pskb, udphoff+sizeof(*udph))) + if (!skb_make_writable(skb, udphoff+sizeof(*udph))) return 0; if (unlikely(cp->app != NULL)) { /* Some checks before mangling */ - if (pp->csum_check && !pp->csum_check(*pskb, pp)) + if (pp->csum_check && !pp->csum_check(skb, pp)) return 0; /* * Attempt ip_vs_app call. * It will fix ip_vs_conn */ - if (!ip_vs_app_pkt_in(cp, pskb)) + if (!ip_vs_app_pkt_in(cp, skb)) return 0; } - udph = (void *)ip_hdr(*pskb) + udphoff; + udph = (void *)ip_hdr(skb) + udphoff; udph->dest = cp->dport; /* @@ -216,20 +215,18 @@ udp_dnat_handler(struct sk_buff **pskb, /* Only port and addr are changed, do fast csum update */ udp_fast_csum_update(udph, cp->vaddr, cp->daddr, cp->vport, cp->dport); - if ((*pskb)->ip_summed == CHECKSUM_COMPLETE) - (*pskb)->ip_summed = CHECKSUM_NONE; + if (skb->ip_summed == CHECKSUM_COMPLETE) + skb->ip_summed = CHECKSUM_NONE; } else { /* full checksum calculation */ udph->check = 0; - (*pskb)->csum = skb_checksum(*pskb, udphoff, - (*pskb)->len - udphoff, 0); + skb->csum = skb_checksum(skb, udphoff, skb->len - udphoff, 0); udph->check = csum_tcpudp_magic(cp->caddr, cp->daddr, - (*pskb)->len - udphoff, - cp->protocol, - (*pskb)->csum); + skb->len - udphoff, + cp->protocol, skb->csum); if (udph->check == 0) udph->check = CSUM_MANGLED_0; - (*pskb)->ip_summed = CHECKSUM_UNNECESSARY; + skb->ip_summed = CHECKSUM_UNNECESSARY; } return 1; } diff --git a/net/ipv4/ipvs/ip_vs_xmit.c b/net/ipv4/ipvs/ip_vs_xmit.c index 666e080a74a3..d0a92dec1050 100644 --- a/net/ipv4/ipvs/ip_vs_xmit.c +++ b/net/ipv4/ipvs/ip_vs_xmit.c @@ -253,7 +253,7 @@ ip_vs_nat_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, } /* copy-on-write the packet before mangling it */ - if (!ip_vs_make_skb_writable(&skb, sizeof(struct iphdr))) + if (!skb_make_writable(skb, sizeof(struct iphdr))) goto tx_error_put; if (skb_cow(skb, rt->u.dst.dev->hard_header_len)) @@ -264,7 +264,7 @@ ip_vs_nat_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, skb->dst = &rt->u.dst; /* mangle the packet */ - if (pp->dnat_handler && !pp->dnat_handler(&skb, pp, cp)) + if (pp->dnat_handler && !pp->dnat_handler(skb, pp, cp)) goto tx_error; ip_hdr(skb)->daddr = cp->daddr; ip_send_check(ip_hdr(skb)); @@ -529,7 +529,7 @@ ip_vs_icmp_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, } /* copy-on-write the packet before mangling it */ - if (!ip_vs_make_skb_writable(&skb, offset)) + if (!skb_make_writable(skb, offset)) goto tx_error_put; if (skb_cow(skb, rt->u.dst.dev->hard_header_len)) diff --git a/net/ipv4/netfilter.c b/net/ipv4/netfilter.c index b44192924f95..5539debf4973 100644 --- a/net/ipv4/netfilter.c +++ b/net/ipv4/netfilter.c @@ -3,14 +3,15 @@ #include <linux/netfilter.h> #include <linux/netfilter_ipv4.h> #include <linux/ip.h> +#include <linux/skbuff.h> #include <net/route.h> #include <net/xfrm.h> #include <net/ip.h> /* route_me_harder function, used by iptable_nat, iptable_mangle + ip_queue */ -int ip_route_me_harder(struct sk_buff **pskb, unsigned addr_type) +int ip_route_me_harder(struct sk_buff *skb, unsigned addr_type) { - const struct iphdr *iph = ip_hdr(*pskb); + const struct iphdr *iph = ip_hdr(skb); struct rtable *rt; struct flowi fl = {}; struct dst_entry *odst; @@ -29,14 +30,14 @@ int ip_route_me_harder(struct sk_buff **pskb, unsigned addr_type) if (type == RTN_LOCAL) fl.nl_u.ip4_u.saddr = iph->saddr; fl.nl_u.ip4_u.tos = RT_TOS(iph->tos); - fl.oif = (*pskb)->sk ? (*pskb)->sk->sk_bound_dev_if : 0; - fl.mark = (*pskb)->mark; + fl.oif = skb->sk ? skb->sk->sk_bound_dev_if : 0; + fl.mark = skb->mark; if (ip_route_output_key(&rt, &fl) != 0) return -1; /* Drop old route. */ - dst_release((*pskb)->dst); - (*pskb)->dst = &rt->u.dst; + dst_release(skb->dst); + skb->dst = &rt->u.dst; } else { /* non-local src, find valid iif to satisfy * rp-filter when calling ip_route_input. */ @@ -44,8 +45,8 @@ int ip_route_me_harder(struct sk_buff **pskb, unsigned addr_type) if (ip_route_output_key(&rt, &fl) != 0) return -1; - odst = (*pskb)->dst; - if (ip_route_input(*pskb, iph->daddr, iph->saddr, + odst = skb->dst; + if (ip_route_input(skb, iph->daddr, iph->saddr, RT_TOS(iph->tos), rt->u.dst.dev) != 0) { dst_release(&rt->u.dst); return -1; @@ -54,70 +55,54 @@ int ip_route_me_harder(struct sk_buff **pskb, unsigned addr_type) dst_release(odst); } - if ((*pskb)->dst->error) + if (skb->dst->error) return -1; #ifdef CONFIG_XFRM - if (!(IPCB(*pskb)->flags & IPSKB_XFRM_TRANSFORMED) && - xfrm_decode_session(*pskb, &fl, AF_INET) == 0) - if (xfrm_lookup(&(*pskb)->dst, &fl, (*pskb)->sk, 0)) + if (!(IPCB(skb)->flags & IPSKB_XFRM_TRANSFORMED) && + xfrm_decode_session(skb, &fl, AF_INET) == 0) + if (xfrm_lookup(&skb->dst, &fl, skb->sk, 0)) return -1; #endif /* Change in oif may mean change in hh_len. */ - hh_len = (*pskb)->dst->dev->hard_header_len; - if (skb_headroom(*pskb) < hh_len) { - struct sk_buff *nskb; - - nskb = skb_realloc_headroom(*pskb, hh_len); - if (!nskb) - return -1; - if ((*pskb)->sk) - skb_set_owner_w(nskb, (*pskb)->sk); - kfree_skb(*pskb); - *pskb = nskb; - } + hh_len = skb->dst->dev->hard_header_len; + if (skb_headroom(skb) < hh_len && + pskb_expand_head(skb, hh_len - skb_headroom(skb), 0, GFP_ATOMIC)) + return -1; return 0; } EXPORT_SYMBOL(ip_route_me_harder); #ifdef CONFIG_XFRM -int ip_xfrm_me_harder(struct sk_buff **pskb) +int ip_xfrm_me_harder(struct sk_buff *skb) { struct flowi fl; unsigned int hh_len; struct dst_entry *dst; - if (IPCB(*pskb)->flags & IPSKB_XFRM_TRANSFORMED) + if (IPCB(skb)->flags & IPSKB_XFRM_TRANSFORMED) return 0; - if (xfrm_decode_session(*pskb, &fl, AF_INET) < 0) + if (xfrm_decode_session(skb, &fl, AF_INET) < 0) return -1; - dst = (*pskb)->dst; + dst = skb->dst; if (dst->xfrm) dst = ((struct xfrm_dst *)dst)->route; dst_hold(dst); - if (xfrm_lookup(&dst, &fl, (*pskb)->sk, 0) < 0) + if (xfrm_lookup(&dst, &fl, skb->sk, 0) < 0) return -1; - dst_release((*pskb)->dst); - (*pskb)->dst = dst; + dst_release(skb->dst); + skb->dst = dst; /* Change in oif may mean change in hh_len. */ - hh_len = (*pskb)->dst->dev->hard_header_len; - if (skb_headroom(*pskb) < hh_len) { - struct sk_buff *nskb; - - nskb = skb_realloc_headroom(*pskb, hh_len); - if (!nskb) - return -1; - if ((*pskb)->sk) - skb_set_owner_w(nskb, (*pskb)->sk); - kfree_skb(*pskb); - *pskb = nskb; - } + hh_len = skb->dst->dev->hard_header_len; + if (skb_headroom(skb) < hh_len && + pskb_expand_head(skb, hh_len - skb_headroom(skb), 0, GFP_ATOMIC)) + return -1; return 0; } EXPORT_SYMBOL(ip_xfrm_me_harder); @@ -150,17 +135,17 @@ static void nf_ip_saveroute(const struct sk_buff *skb, struct nf_info *info) } } -static int nf_ip_reroute(struct sk_buff **pskb, const struct nf_info *info) +static int nf_ip_reroute(struct sk_buff *skb, const struct nf_info *info) { const struct ip_rt_info *rt_info = nf_info_reroute(info); if (info->hook == NF_IP_LOCAL_OUT) { - const struct iphdr *iph = ip_hdr(*pskb); + const struct iphdr *iph = ip_hdr(skb); if (!(iph->tos == rt_info->tos && iph->daddr == rt_info->daddr && iph->saddr == rt_info->saddr)) - return ip_route_me_harder(pskb, RTN_UNSPEC); + return ip_route_me_harder(skb, RTN_UNSPEC); } return 0; } diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c index 29114a9ccd1d..2909c92ecd99 100644 --- a/net/ipv4/netfilter/arp_tables.c +++ b/net/ipv4/netfilter/arp_tables.c @@ -197,7 +197,7 @@ static inline int arp_checkentry(const struct arpt_arp *arp) return 1; } -static unsigned int arpt_error(struct sk_buff **pskb, +static unsigned int arpt_error(struct sk_buff *skb, const struct net_device *in, const struct net_device *out, unsigned int hooknum, @@ -215,7 +215,7 @@ static inline struct arpt_entry *get_entry(void *base, unsigned int offset) return (struct arpt_entry *)(base + offset); } -unsigned int arpt_do_table(struct sk_buff **pskb, +unsigned int arpt_do_table(struct sk_buff *skb, unsigned int hook, const struct net_device *in, const struct net_device *out, @@ -231,9 +231,9 @@ unsigned int arpt_do_table(struct sk_buff **pskb, struct xt_table_info *private; /* ARP header, plus 2 device addresses, plus 2 IP addresses. */ - if (!pskb_may_pull((*pskb), (sizeof(struct arphdr) + - (2 * (*pskb)->dev->addr_len) + - (2 * sizeof(u32))))) + if (!pskb_may_pull(skb, (sizeof(struct arphdr) + + (2 * skb->dev->addr_len) + + (2 * sizeof(u32))))) return NF_DROP; indev = in ? in->name : nulldevname; @@ -245,14 +245,14 @@ unsigned int arpt_do_table(struct sk_buff **pskb, e = get_entry(table_base, private->hook_entry[hook]); back = get_entry(table_base, private->underflow[hook]); - arp = arp_hdr(*pskb); + arp = arp_hdr(skb); do { - if (arp_packet_match(arp, (*pskb)->dev, indev, outdev, &e->arp)) { + if (arp_packet_match(arp, skb->dev, indev, outdev, &e->arp)) { struct arpt_entry_target *t; int hdr_len; hdr_len = sizeof(*arp) + (2 * sizeof(struct in_addr)) + - (2 * (*pskb)->dev->addr_len); + (2 * skb->dev->addr_len); ADD_COUNTER(e->counters, hdr_len, 1); t = arpt_get_target(e); @@ -290,14 +290,14 @@ unsigned int arpt_do_table(struct sk_buff **pskb, /* Targets which reenter must return * abs. verdicts */ - verdict = t->u.kernel.target->target(pskb, + verdict = t->u.kernel.target->target(skb, in, out, hook, t->u.kernel.target, t->data); /* Target might have changed stuff. */ - arp = arp_hdr(*pskb); + arp = arp_hdr(skb); if (verdict == ARPT_CONTINUE) e = (void *)e + e->next_offset; diff --git a/net/ipv4/netfilter/arpt_mangle.c b/net/ipv4/netfilter/arpt_mangle.c index c4bdab47597f..45fa4e20094a 100644 --- a/net/ipv4/netfilter/arpt_mangle.c +++ b/net/ipv4/netfilter/arpt_mangle.c @@ -1,5 +1,6 @@ /* module that allows mangling of the arp payload */ #include <linux/module.h> +#include <linux/netfilter.h> #include <linux/netfilter_arp/arpt_mangle.h> #include <net/sock.h> @@ -8,7 +9,7 @@ MODULE_AUTHOR("Bart De Schuymer <bdschuym@pandora.be>"); MODULE_DESCRIPTION("arptables arp payload mangle target"); static unsigned int -target(struct sk_buff **pskb, +target(struct sk_buff *skb, const struct net_device *in, const struct net_device *out, unsigned int hooknum, const struct xt_target *target, const void *targinfo) @@ -18,47 +19,38 @@ target(struct sk_buff **pskb, unsigned char *arpptr; int pln, hln; - if (skb_shared(*pskb) || skb_cloned(*pskb)) { - struct sk_buff *nskb; + if (skb_make_writable(skb, skb->len)) + return NF_DROP; - nskb = skb_copy(*pskb, GFP_ATOMIC); - if (!nskb) - return NF_DROP; - if ((*pskb)->sk) - skb_set_owner_w(nskb, (*pskb)->sk); - kfree_skb(*pskb); - *pskb = nskb; - } - - arp = arp_hdr(*pskb); - arpptr = skb_network_header(*pskb) + sizeof(*arp); + arp = arp_hdr(skb); + arpptr = skb_network_header(skb) + sizeof(*arp); pln = arp->ar_pln; hln = arp->ar_hln; /* We assume that pln and hln were checked in the match */ if (mangle->flags & ARPT_MANGLE_SDEV) { if (ARPT_DEV_ADDR_LEN_MAX < hln || - (arpptr + hln > skb_tail_pointer(*pskb))) + (arpptr + hln > skb_tail_pointer(skb))) return NF_DROP; memcpy(arpptr, mangle->src_devaddr, hln); } arpptr += hln; if (mangle->flags & ARPT_MANGLE_SIP) { if (ARPT_MANGLE_ADDR_LEN_MAX < pln || - (arpptr + pln > skb_tail_pointer(*pskb))) + (arpptr + pln > skb_tail_pointer(skb))) return NF_DROP; memcpy(arpptr, &mangle->u_s.src_ip, pln); } arpptr += pln; if (mangle->flags & ARPT_MANGLE_TDEV) { if (ARPT_DEV_ADDR_LEN_MAX < hln || - (arpptr + hln > skb_tail_pointer(*pskb))) + (arpptr + hln > skb_tail_pointer(skb))) return NF_DROP; memcpy(arpptr, mangle->tgt_devaddr, hln); } arpptr += hln; if (mangle->flags & ARPT_MANGLE_TIP) { if (ARPT_MANGLE_ADDR_LEN_MAX < pln || - (arpptr + pln > skb_tail_pointer(*pskb))) + (arpptr + pln > skb_tail_pointer(skb))) return NF_DROP; memcpy(arpptr, &mangle->u_t.tgt_ip, pln); } diff --git a/net/ipv4/netfilter/arptable_filter.c b/net/ipv4/netfilter/arptable_filter.c index 75c023062533..302d3da5f696 100644 --- a/net/ipv4/netfilter/arptable_filter.c +++ b/net/ipv4/netfilter/arptable_filter.c @@ -56,12 +56,12 @@ static struct arpt_table packet_filter = { /* The work comes in here from netfilter.c */ static unsigned int arpt_hook(unsigned int hook, - struct sk_buff **pskb, + struct sk_buff *skb, const struct net_device *in, const struct net_device *out, int (*okfn)(struct sk_buff *)) { - return arpt_do_table(pskb, hook, in, out, &packet_filter); + return arpt_do_table(skb, hook, in, out, &packet_filter); } static struct nf_hook_ops arpt_ops[] = { diff --git a/net/ipv4/netfilter/ip_queue.c b/net/ipv4/netfilter/ip_queue.c index 23cbfc7c80fd..10a2ce09fd8e 100644 --- a/net/ipv4/netfilter/ip_queue.c +++ b/net/ipv4/netfilter/ip_queue.c @@ -335,6 +335,7 @@ static int ipq_mangle_ipv4(ipq_verdict_msg_t *v, struct ipq_queue_entry *e) { int diff; + int err; struct iphdr *user_iph = (struct iphdr *)v->payload; if (v->data_len < sizeof(*user_iph)) @@ -347,25 +348,18 @@ ipq_mangle_ipv4(ipq_verdict_msg_t *v, struct ipq_queue_entry *e) if (v->data_len > 0xFFFF) return -EINVAL; if (diff > skb_tailroom(e->skb)) { - struct sk_buff *newskb; - - newskb = skb_copy_expand(e->skb, - skb_headroom(e->skb), - diff, - GFP_ATOMIC); - if (newskb == NULL) { - printk(KERN_WARNING "ip_queue: OOM " - "in mangle, dropping packet\n"); - return -ENOMEM; + err = pskb_expand_head(e->skb, 0, + diff - skb_tailroom(e->skb), + GFP_ATOMIC); + if (err) { + printk(KERN_WARNING "ip_queue: error " + "in mangle, dropping packet: %d\n", -err); + return err; } - if (e->skb->sk) - skb_set_owner_w(newskb, e->skb->sk); - kfree_skb(e->skb); - e->skb = newskb; } skb_put(e->skb, diff); } - if (!skb_make_writable(&e->skb, v->data_len)) + if (!skb_make_writable(e->skb, v->data_len)) return -ENOMEM; skb_copy_to_linear_data(e->skb, v->payload, v->data_len); e->skb->ip_summed = CHECKSUM_NONE; diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c index 6486894f450c..4b10b98640ac 100644 --- a/net/ipv4/netfilter/ip_tables.c +++ b/net/ipv4/netfilter/ip_tables.c @@ -169,7 +169,7 @@ ip_checkentry(const struct ipt_ip *ip) } static unsigned int -ipt_error(struct sk_buff **pskb, +ipt_error(struct sk_buff *skb, const struct net_device *in, const struct net_device *out, unsigned int hooknum, @@ -312,7 +312,7 @@ static void trace_packet(struct sk_buff *skb, /* Returns one of the generic firewall policies, like NF_ACCEPT. */ unsigned int -ipt_do_table(struct sk_buff **pskb, +ipt_do_table(struct sk_buff *skb, unsigned int hook, const struct net_device *in, const struct net_device *out, @@ -331,8 +331,8 @@ ipt_do_table(struct sk_buff **pskb, struct xt_table_info *private; /* Initialization */ - ip = ip_hdr(*pskb); - datalen = (*pskb)->len - ip->ihl * 4; + ip = ip_hdr(skb); + datalen = skb->len - ip->ihl * 4; indev = in ? in->name : nulldevname; outdev = out ? out->name : nulldevname; /* We handle fragments by dealing with the first fragment as @@ -359,7 +359,7 @@ ipt_do_table(struct sk_buff **pskb, struct ipt_entry_target *t; if (IPT_MATCH_ITERATE(e, do_match, - *pskb, in, out, + skb, in, out, offset, &hotdrop) != 0) goto no_match; @@ -371,8 +371,8 @@ ipt_do_table(struct sk_buff **pskb, #if defined(CONFIG_NETFILTER_XT_TARGET_TRACE) || \ defined(CONFIG_NETFILTER_XT_TARGET_TRACE_MODULE) /* The packet is traced: log it */ - if (unlikely((*pskb)->nf_trace)) - trace_packet(*pskb, hook, in, out, + if (unlikely(skb->nf_trace)) + trace_packet(skb, hook, in, out, table->name, private, e); #endif /* Standard target? */ @@ -410,7 +410,7 @@ ipt_do_table(struct sk_buff **pskb, ((struct ipt_entry *)table_base)->comefrom = 0xeeeeeeec; #endif - verdict = t->u.kernel.target->target(pskb, + verdict = t->u.kernel.target->target(skb, in, out, hook, t->u.kernel.target, @@ -428,8 +428,8 @@ ipt_do_table(struct sk_buff **pskb, = 0x57acc001; #endif /* Target might have changed stuff. */ - ip = ip_hdr(*pskb); - datalen = (*pskb)->len - ip->ihl * 4; + ip = ip_hdr(skb); + datalen = skb->len - ip->ihl * 4; if (verdict == IPT_CONTINUE) e = (void *)e + e->next_offset; diff --git a/net/ipv4/netfilter/ipt_CLUSTERIP.c b/net/ipv4/netfilter/ipt_CLUSTERIP.c index 27f14e1ebd8b..2f544dac72df 100644 --- a/net/ipv4/netfilter/ipt_CLUSTERIP.c +++ b/net/ipv4/netfilter/ipt_CLUSTERIP.c @@ -289,7 +289,7 @@ clusterip_responsible(const struct clusterip_config *config, u_int32_t hash) ***********************************************************************/ static unsigned int -target(struct sk_buff **pskb, +target(struct sk_buff *skb, const struct net_device *in, const struct net_device *out, unsigned int hooknum, @@ -305,7 +305,7 @@ target(struct sk_buff **pskb, * is only decremented by destroy() - and ip_tables guarantees * that the ->target() function isn't called after ->destroy() */ - ct = nf_ct_get(*pskb, &ctinfo); + ct = nf_ct_get(skb, &ctinfo); if (ct == NULL) { printk(KERN_ERR "CLUSTERIP: no conntrack!\n"); /* FIXME: need to drop invalid ones, since replies @@ -316,7 +316,7 @@ target(struct sk_buff **pskb, /* special case: ICMP error handling. conntrack distinguishes between * error messages (RELATED) and information requests (see below) */ - if (ip_hdr(*pskb)->protocol == IPPROTO_ICMP + if (ip_hdr(skb)->protocol == IPPROTO_ICMP && (ctinfo == IP_CT_RELATED || ctinfo == IP_CT_RELATED+IP_CT_IS_REPLY)) return XT_CONTINUE; @@ -325,7 +325,7 @@ target(struct sk_buff **pskb, * TIMESTAMP, INFO_REQUEST or ADDRESS type icmp packets from here * on, which all have an ID field [relevant for hashing]. */ - hash = clusterip_hashfn(*pskb, cipinfo->config); + hash = clusterip_hashfn(skb, cipinfo->config); switch (ctinfo) { case IP_CT_NEW: @@ -355,7 +355,7 @@ target(struct sk_buff **pskb, /* despite being received via linklayer multicast, this is * actually a unicast IP packet. TCP doesn't like PACKET_MULTICAST */ - (*pskb)->pkt_type = PACKET_HOST; + skb->pkt_type = PACKET_HOST; return XT_CONTINUE; } @@ -505,12 +505,12 @@ static void arp_print(struct arp_payload *payload) static unsigned int arp_mangle(unsigned int hook, - struct sk_buff **pskb, + struct sk_buff *skb, const struct net_device *in, const struct net_device *out, int (*okfn)(struct sk_buff *)) { - struct arphdr *arp = arp_hdr(*pskb); + struct arphdr *arp = arp_hdr(skb); struct arp_payload *payload; struct clusterip_config *c; diff --git a/net/ipv4/netfilter/ipt_ECN.c b/net/ipv4/netfilter/ipt_ECN.c index f1253bd3837f..add110060a22 100644 --- a/net/ipv4/netfilter/ipt_ECN.c +++ b/net/ipv4/netfilter/ipt_ECN.c @@ -26,15 +26,15 @@ MODULE_DESCRIPTION("iptables ECN modification module"); /* set ECT codepoint from IP header. * return false if there was an error. */ static inline bool -set_ect_ip(struct sk_buff **pskb, const struct ipt_ECN_info *einfo) +set_ect_ip(struct sk_buff *skb, const struct ipt_ECN_info *einfo) { - struct iphdr *iph = ip_hdr(*pskb); + struct iphdr *iph = ip_hdr(skb); if ((iph->tos & IPT_ECN_IP_MASK) != (einfo->ip_ect & IPT_ECN_IP_MASK)) { __u8 oldtos; - if (!skb_make_writable(pskb, sizeof(struct iphdr))) + if (!skb_make_writable(skb, sizeof(struct iphdr))) return false; - iph = ip_hdr(*pskb); + iph = ip_hdr(skb); oldtos = iph->tos; iph->tos &= ~IPT_ECN_IP_MASK; iph->tos |= (einfo->ip_ect & IPT_ECN_IP_MASK); @@ -45,14 +45,13 @@ set_ect_ip(struct sk_buff **pskb, const struct ipt_ECN_info *einfo) /* Return false if there was an error. */ static inline bool -set_ect_tcp(struct sk_buff **pskb, const struct ipt_ECN_info *einfo) +set_ect_tcp(struct sk_buff *skb, const struct ipt_ECN_info *einfo) { struct tcphdr _tcph, *tcph; __be16 oldval; /* Not enought header? */ - tcph = skb_header_pointer(*pskb, ip_hdrlen(*pskb), - sizeof(_tcph), &_tcph); + tcph = skb_header_pointer(skb, ip_hdrlen(skb), sizeof(_tcph), &_tcph); if (!tcph) return false; @@ -62,9 +61,9 @@ set_ect_tcp(struct sk_buff **pskb, const struct ipt_ECN_info *einfo) tcph->cwr == einfo->proto.tcp.cwr)) return true; - if (!skb_make_writable(pskb, ip_hdrlen(*pskb) + sizeof(*tcph))) + if (!skb_make_writable(skb, ip_hdrlen(skb) + sizeof(*tcph))) return false; - tcph = (void *)ip_hdr(*pskb) + ip_hdrlen(*pskb); + tcph = (void *)ip_hdr(skb) + ip_hdrlen(skb); oldval = ((__be16 *)tcph)[6]; if (einfo->operation & IPT_ECN_OP_SET_ECE) @@ -72,13 +71,13 @@ set_ect_tcp(struct sk_buff **pskb, const struct ipt_ECN_info *einfo) if (einfo->operation & IPT_ECN_OP_SET_CWR) tcph->cwr = einfo->proto.tcp.cwr; - nf_proto_csum_replace2(&tcph->check, *pskb, + nf_proto_csum_replace2(&tcph->check, skb, oldval, ((__be16 *)tcph)[6], 0); return true; } static unsigned int -target(struct sk_buff **pskb, +target(struct sk_buff *skb, const struct net_device *in, const struct net_device *out, unsigned int hooknum, @@ -88,12 +87,12 @@ target(struct sk_buff **pskb, const struct ipt_ECN_info *einfo = targinfo; if (einfo->operation & IPT_ECN_OP_SET_IP) - if (!set_ect_ip(pskb, einfo)) + if (!set_ect_ip(skb, einfo)) return NF_DROP; if (einfo->operation & (IPT_ECN_OP_SET_ECE | IPT_ECN_OP_SET_CWR) - && ip_hdr(*pskb)->protocol == IPPROTO_TCP) - if (!set_ect_tcp(pskb, einfo)) + && ip_hdr(skb)->protocol == IPPROTO_TCP) + if (!set_ect_tcp(skb, einfo)) return NF_DROP; return XT_CONTINUE; diff --git a/net/ipv4/netfilter/ipt_LOG.c b/net/ipv4/netfilter/ipt_LOG.c index 127a5e89bf14..4b5e8216a4e7 100644 --- a/net/ipv4/netfilter/ipt_LOG.c +++ b/net/ipv4/netfilter/ipt_LOG.c @@ -418,7 +418,7 @@ ipt_log_packet(unsigned int pf, } static unsigned int -ipt_log_target(struct sk_buff **pskb, +ipt_log_target(struct sk_buff *skb, const struct net_device *in, const struct net_device *out, unsigned int hooknum, @@ -432,7 +432,7 @@ ipt_log_target(struct sk_buff **pskb, li.u.log.level = loginfo->level; li.u.log.logflags = loginfo->logflags; - ipt_log_packet(PF_INET, hooknum, *pskb, in, out, &li, + ipt_log_packet(PF_INET, hooknum, skb, in, out, &li, loginfo->prefix); return XT_CONTINUE; } diff --git a/net/ipv4/netfilter/ipt_MASQUERADE.c b/net/ipv4/netfilter/ipt_MASQUERADE.c index 3e0b562b2db7..44b516e7cb79 100644 --- a/net/ipv4/netfilter/ipt_MASQUERADE.c +++ b/net/ipv4/netfilter/ipt_MASQUERADE.c @@ -52,7 +52,7 @@ masquerade_check(const char *tablename, } static unsigned int -masquerade_target(struct sk_buff **pskb, +masquerade_target(struct sk_buff *skb, const struct net_device *in, const struct net_device *out, unsigned int hooknum, @@ -69,7 +69,7 @@ masquerade_target(struct sk_buff **pskb, NF_CT_ASSERT(hooknum == NF_IP_POST_ROUTING); - ct = nf_ct_get(*pskb, &ctinfo); + ct = nf_ct_get(skb, &ctinfo); nat = nfct_nat(ct); NF_CT_ASSERT(ct && (ctinfo == IP_CT_NEW || ctinfo == IP_CT_RELATED @@ -82,7 +82,7 @@ masquerade_target(struct sk_buff **pskb, return NF_ACCEPT; mr = targinfo; - rt = (struct rtable *)(*pskb)->dst; + rt = (struct rtable *)skb->dst; newsrc = inet_select_addr(out, rt->rt_gateway, RT_SCOPE_UNIVERSE); if (!newsrc) { printk("MASQUERADE: %s ate my IP address\n", out->name); diff --git a/net/ipv4/netfilter/ipt_NETMAP.c b/net/ipv4/netfilter/ipt_NETMAP.c index 41a011d5a065..f8699291e33d 100644 --- a/net/ipv4/netfilter/ipt_NETMAP.c +++ b/net/ipv4/netfilter/ipt_NETMAP.c @@ -43,7 +43,7 @@ check(const char *tablename, } static unsigned int -target(struct sk_buff **pskb, +target(struct sk_buff *skb, const struct net_device *in, const struct net_device *out, unsigned int hooknum, @@ -59,14 +59,14 @@ target(struct sk_buff **pskb, NF_CT_ASSERT(hooknum == NF_IP_PRE_ROUTING || hooknum == NF_IP_POST_ROUTING || hooknum == NF_IP_LOCAL_OUT); - ct = nf_ct_get(*pskb, &ctinfo); + ct = nf_ct_get(skb, &ctinfo); netmask = ~(mr->range[0].min_ip ^ mr->range[0].max_ip); if (hooknum == NF_IP_PRE_ROUTING || hooknum == NF_IP_LOCAL_OUT) - new_ip = ip_hdr(*pskb)->daddr & ~netmask; + new_ip = ip_hdr(skb)->daddr & ~netmask; else - new_ip = ip_hdr(*pskb)->saddr & ~netmask; + new_ip = ip_hdr(skb)->saddr & ~netmask; new_ip |= mr->range[0].min_ip & netmask; newrange = ((struct nf_nat_range) diff --git a/net/ipv4/netfilter/ipt_REDIRECT.c b/net/ipv4/netfilter/ipt_REDIRECT.c index 6ac7a2373316..f7cf7d61a2d4 100644 --- a/net/ipv4/netfilter/ipt_REDIRECT.c +++ b/net/ipv4/netfilter/ipt_REDIRECT.c @@ -47,7 +47,7 @@ redirect_check(const char *tablename, } static unsigned int -redirect_target(struct sk_buff **pskb, +redirect_target(struct sk_buff *skb, const struct net_device *in, const struct net_device *out, unsigned int hooknum, @@ -63,7 +63,7 @@ redirect_target(struct sk_buff **pskb, NF_CT_ASSERT(hooknum == NF_IP_PRE_ROUTING || hooknum == NF_IP_LOCAL_OUT); - ct = nf_ct_get(*pskb, &ctinfo); + ct = nf_ct_get(skb, &ctinfo); NF_CT_ASSERT(ct && (ctinfo == IP_CT_NEW || ctinfo == IP_CT_RELATED)); /* Local packets: make them go to loopback */ @@ -76,7 +76,7 @@ redirect_target(struct sk_buff **pskb, newdst = 0; rcu_read_lock(); - indev = __in_dev_get_rcu((*pskb)->dev); + indev = __in_dev_get_rcu(skb->dev); if (indev && (ifa = indev->ifa_list)) newdst = ifa->ifa_local; rcu_read_unlock(); diff --git a/net/ipv4/netfilter/ipt_REJECT.c b/net/ipv4/netfilter/ipt_REJECT.c index cb038c8fbc9d..dcf4d21d5116 100644 --- a/net/ipv4/netfilter/ipt_REJECT.c +++ b/net/ipv4/netfilter/ipt_REJECT.c @@ -131,7 +131,7 @@ static void send_reset(struct sk_buff *oldskb, int hook) ) addr_type = RTN_LOCAL; - if (ip_route_me_harder(&nskb, addr_type)) + if (ip_route_me_harder(nskb, addr_type)) goto free_nskb; nskb->ip_summed = CHECKSUM_NONE; @@ -162,7 +162,7 @@ static inline void send_unreach(struct sk_buff *skb_in, int code) icmp_send(skb_in, ICMP_DEST_UNREACH, code, 0); } -static unsigned int reject(struct sk_buff **pskb, +static unsigned int reject(struct sk_buff *skb, const struct net_device *in, const struct net_device *out, unsigned int hooknum, @@ -173,7 +173,7 @@ static unsigned int reject(struct sk_buff **pskb, /* Our naive response construction doesn't deal with IP options, and probably shouldn't try. */ - if (ip_hdrlen(*pskb) != sizeof(struct iphdr)) + if (ip_hdrlen(skb) != sizeof(struct iphdr)) return NF_DROP; /* WARNING: This code causes reentry within iptables. @@ -181,28 +181,28 @@ static unsigned int reject(struct sk_buff **pskb, must return an absolute verdict. --RR */ switch (reject->with) { case IPT_ICMP_NET_UNREACHABLE: - send_unreach(*pskb, ICMP_NET_UNREACH); + send_unreach(skb, ICMP_NET_UNREACH); break; case IPT_ICMP_HOST_UNREACHABLE: - send_unreach(*pskb, ICMP_HOST_UNREACH); + send_unreach(skb, ICMP_HOST_UNREACH); break; case IPT_ICMP_PROT_UNREACHABLE: - send_unreach(*pskb, ICMP_PROT_UNREACH); + send_unreach(skb, ICMP_PROT_UNREACH); break; case IPT_ICMP_PORT_UNREACHABLE: - send_unreach(*pskb, ICMP_PORT_UNREACH); + send_unreach(skb, ICMP_PORT_UNREACH); break; case IPT_ICMP_NET_PROHIBITED: - send_unreach(*pskb, ICMP_NET_ANO); + send_unreach(skb, ICMP_NET_ANO); break; case IPT_ICMP_HOST_PROHIBITED: - send_unreach(*pskb, ICMP_HOST_ANO); + send_unreach(skb, ICMP_HOST_ANO); break; case IPT_ICMP_ADMIN_PROHIBITED: - send_unreach(*pskb, ICMP_PKT_FILTERED); + send_unreach(skb, ICMP_PKT_FILTERED); break; case IPT_TCP_RESET: - send_reset(*pskb, hooknum); + send_reset(skb, hooknum); case IPT_ICMP_ECHOREPLY: /* Doesn't happen. */ break; diff --git a/net/ipv4/netfilter/ipt_SAME.c b/net/ipv4/netfilter/ipt_SAME.c index 97641f1a97f6..8988571436b8 100644 --- a/net/ipv4/netfilter/ipt_SAME.c +++ b/net/ipv4/netfilter/ipt_SAME.c @@ -104,7 +104,7 @@ same_destroy(const struct xt_target *target, void *targinfo) } static unsigned int -same_target(struct sk_buff **pskb, +same_target(struct sk_buff *skb, const struct net_device *in, const struct net_device *out, unsigned int hooknum, @@ -121,7 +121,7 @@ same_target(struct sk_buff **pskb, NF_CT_ASSERT(hooknum == NF_IP_PRE_ROUTING || hooknum == NF_IP_POST_ROUTING); - ct = nf_ct_get(*pskb, &ctinfo); + ct = nf_ct_get(skb, &ctinfo); t = &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple; diff --git a/net/ipv4/netfilter/ipt_TOS.c b/net/ipv4/netfilter/ipt_TOS.c index 25f5d0b39065..d4573baa7f27 100644 --- a/net/ipv4/netfilter/ipt_TOS.c +++ b/net/ipv4/netfilter/ipt_TOS.c @@ -21,7 +21,7 @@ MODULE_AUTHOR("Netfilter Core Team <coreteam@netfilter.org>"); MODULE_DESCRIPTION("iptables TOS mangling module"); static unsigned int -target(struct sk_buff **pskb, +target(struct sk_buff *skb, const struct net_device *in, const struct net_device *out, unsigned int hooknum, @@ -29,13 +29,13 @@ target(struct sk_buff **pskb, const void *targinfo) { const struct ipt_tos_target_info *tosinfo = targinfo; - struct iphdr *iph = ip_hdr(*pskb); + struct iphdr *iph = ip_hdr(skb); if ((iph->tos & IPTOS_TOS_MASK) != tosinfo->tos) { __u8 oldtos; - if (!skb_make_writable(pskb, sizeof(struct iphdr))) + if (!skb_make_writable(skb, sizeof(struct iphdr))) return NF_DROP; - iph = ip_hdr(*pskb); + iph = ip_hdr(skb); oldtos = iph->tos; iph->tos = (iph->tos & IPTOS_PREC_MASK) | tosinfo->tos; nf_csum_replace2(&iph->check, htons(oldtos), htons(iph->tos)); diff --git a/net/ipv4/netfilter/ipt_TTL.c b/net/ipv4/netfilter/ipt_TTL.c index 2b54e7b0cfe8..c620a0527666 100644 --- a/net/ipv4/netfilter/ipt_TTL.c +++ b/net/ipv4/netfilter/ipt_TTL.c @@ -20,7 +20,7 @@ MODULE_DESCRIPTION("IP tables TTL modification module"); MODULE_LICENSE("GPL"); static unsigned int -ipt_ttl_target(struct sk_buff **pskb, +ipt_ttl_target(struct sk_buff *skb, const struct net_device *in, const struct net_device *out, unsigned int hooknum, const struct xt_target *target, const void *targinfo) @@ -29,10 +29,10 @@ ipt_ttl_target(struct sk_buff **pskb, const struct ipt_TTL_info *info = targinfo; int new_ttl; - if (!skb_make_writable(pskb, (*pskb)->len)) + if (!skb_make_writable(skb, skb->len)) return NF_DROP; - iph = ip_hdr(*pskb); + iph = ip_hdr(skb); switch (info->mode) { case IPT_TTL_SET: diff --git a/net/ipv4/netfilter/ipt_ULOG.c b/net/ipv4/netfilter/ipt_ULOG.c index c636d6d63574..212b830765a4 100644 --- a/net/ipv4/netfilter/ipt_ULOG.c +++ b/net/ipv4/netfilter/ipt_ULOG.c @@ -279,7 +279,7 @@ alloc_failure: spin_unlock_bh(&ulog_lock); } -static unsigned int ipt_ulog_target(struct sk_buff **pskb, +static unsigned int ipt_ulog_target(struct sk_buff *skb, const struct net_device *in, const struct net_device *out, unsigned int hooknum, @@ -288,7 +288,7 @@ static unsigned int ipt_ulog_target(struct sk_buff **pskb, { struct ipt_ulog_info *loginfo = (struct ipt_ulog_info *) targinfo; - ipt_ulog_packet(hooknum, *pskb, in, out, loginfo, NULL); + ipt_ulog_packet(hooknum, skb, in, out, loginfo, NULL); return XT_CONTINUE; } diff --git a/net/ipv4/netfilter/iptable_filter.c b/net/ipv4/netfilter/iptable_filter.c index 4f51c1d7d2d6..ba3262c60437 100644 --- a/net/ipv4/netfilter/iptable_filter.c +++ b/net/ipv4/netfilter/iptable_filter.c @@ -62,31 +62,31 @@ static struct xt_table packet_filter = { /* The work comes in here from netfilter.c. */ static unsigned int ipt_hook(unsigned int hook, - struct sk_buff **pskb, + struct sk_buff *skb, const struct net_device *in, const struct net_device *out, int (*okfn)(struct sk_buff *)) { - return ipt_do_table(pskb, hook, in, out, &packet_filter); + return ipt_do_table(skb, hook, in, out, &packet_filter); } static unsigned int ipt_local_out_hook(unsigned int hook, - struct sk_buff **pskb, + struct sk_buff *skb, const struct net_device *in, const struct net_device *out, int (*okfn)(struct sk_buff *)) { /* root is playing with raw sockets. */ - if ((*pskb)->len < sizeof(struct iphdr) - || ip_hdrlen(*pskb) < sizeof(struct iphdr)) { + if (skb->len < sizeof(struct iphdr) || + ip_hdrlen(skb) < sizeof(struct iphdr)) { if (net_ratelimit()) printk("iptable_filter: ignoring short SOCK_RAW " "packet.\n"); return NF_ACCEPT; } - return ipt_do_table(pskb, hook, in, out, &packet_filter); + return ipt_do_table(skb, hook, in, out, &packet_filter); } static struct nf_hook_ops ipt_ops[] = { diff --git a/net/ipv4/netfilter/iptable_mangle.c b/net/ipv4/netfilter/iptable_mangle.c index 902446f7cbca..b4360a69d5ca 100644 --- a/net/ipv4/netfilter/iptable_mangle.c +++ b/net/ipv4/netfilter/iptable_mangle.c @@ -75,17 +75,17 @@ static struct xt_table packet_mangler = { /* The work comes in here from netfilter.c. */ static unsigned int ipt_route_hook(unsigned int hook, - struct sk_buff **pskb, + struct sk_buff *skb, const struct net_device *in, const struct net_device *out, int (*okfn)(struct sk_buff *)) { - return ipt_do_table(pskb, hook, in, out, &packet_mangler); + return ipt_do_table(skb, hook, in, out, &packet_mangler); } static unsigned int ipt_local_hook(unsigned int hook, - struct sk_buff **pskb, + struct sk_buff *skb, const struct net_device *in, const struct net_device *out, int (*okfn)(struct sk_buff *)) @@ -97,8 +97,8 @@ ipt_local_hook(unsigned int hook, u_int32_t mark; /* root is playing with raw sockets. */ - if ((*pskb)->len < sizeof(struct iphdr) - || ip_hdrlen(*pskb) < sizeof(struct iphdr)) { + if (skb->len < sizeof(struct iphdr) + || ip_hdrlen(skb) < sizeof(struct iphdr)) { if (net_ratelimit()) printk("iptable_mangle: ignoring short SOCK_RAW " "packet.\n"); @@ -106,22 +106,22 @@ ipt_local_hook(unsigned int hook, } /* Save things which could affect route */ - mark = (*pskb)->mark; - iph = ip_hdr(*pskb); + mark = skb->mark; + iph = ip_hdr(skb); saddr = iph->saddr; daddr = iph->daddr; tos = iph->tos; - ret = ipt_do_table(pskb, hook, in, out, &packet_mangler); + ret = ipt_do_table(skb, hook, in, out, &packet_mangler); /* Reroute for ANY change. */ if (ret != NF_DROP && ret != NF_STOLEN && ret != NF_QUEUE) { - iph = ip_hdr(*pskb); + iph = ip_hdr(skb); if (iph->saddr != saddr || iph->daddr != daddr || - (*pskb)->mark != mark || + skb->mark != mark || iph->tos != tos) - if (ip_route_me_harder(pskb, RTN_UNSPEC)) + if (ip_route_me_harder(skb, RTN_UNSPEC)) ret = NF_DROP; } diff --git a/net/ipv4/netfilter/iptable_raw.c b/net/ipv4/netfilter/iptable_raw.c index d6e503395684..5de6e57ac55c 100644 --- a/net/ipv4/netfilter/iptable_raw.c +++ b/net/ipv4/netfilter/iptable_raw.c @@ -47,30 +47,30 @@ static struct xt_table packet_raw = { /* The work comes in here from netfilter.c. */ static unsigned int ipt_hook(unsigned int hook, - struct sk_buff **pskb, + struct sk_buff *skb, const struct net_device *in, const struct net_device *out, int (*okfn)(struct sk_buff *)) { - return ipt_do_table(pskb, hook, in, out, &packet_raw); + return ipt_do_table(skb, hook, in, out, &packet_raw); } static unsigned int ipt_local_hook(unsigned int hook, - struct sk_buff **pskb, + struct sk_buff *skb, const struct net_device *in, const struct net_device *out, int (*okfn)(struct sk_buff *)) { /* root is playing with raw sockets. */ - if ((*pskb)->len < sizeof(struct iphdr) || - ip_hdrlen(*pskb) < sizeof(struct iphdr)) { + if (skb->len < sizeof(struct iphdr) || + ip_hdrlen(skb) < sizeof(struct iphdr)) { if (net_ratelimit()) printk("iptable_raw: ignoring short SOCK_RAW" "packet.\n"); return NF_ACCEPT; } - return ipt_do_table(pskb, hook, in, out, &packet_raw); + return ipt_do_table(skb, hook, in, out, &packet_raw); } /* 'raw' is the very first table. */ diff --git a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c index 2fcb9249a8da..831e9b29806d 100644 --- a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c +++ b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c @@ -63,19 +63,20 @@ static int ipv4_print_conntrack(struct seq_file *s, } /* Returns new sk_buff, or NULL */ -static struct sk_buff * -nf_ct_ipv4_gather_frags(struct sk_buff *skb, u_int32_t user) +static int nf_ct_ipv4_gather_frags(struct sk_buff *skb, u_int32_t user) { + int err; + skb_orphan(skb); local_bh_disable(); - skb = ip_defrag(skb, user); + err = ip_defrag(skb, user); local_bh_enable(); - if (skb) + if (!err) ip_send_check(ip_hdr(skb)); - return skb; + return err; } static int ipv4_get_l4proto(const struct sk_buff *skb, unsigned int nhoff, @@ -99,17 +100,17 @@ static int ipv4_get_l4proto(const struct sk_buff *skb, unsigned int nhoff, } static unsigned int ipv4_confirm(unsigned int hooknum, - struct sk_buff **pskb, + struct sk_buff *skb, const struct net_device *in, const struct net_device *out, int (*okfn)(struct sk_buff *)) { /* We've seen it coming out the other side: confirm it */ - return nf_conntrack_confirm(pskb); + return nf_conntrack_confirm(skb); } static unsigned int ipv4_conntrack_help(unsigned int hooknum, - struct sk_buff **pskb, + struct sk_buff *skb, const struct net_device *in, const struct net_device *out, int (*okfn)(struct sk_buff *)) @@ -120,7 +121,7 @@ static unsigned int ipv4_conntrack_help(unsigned int hooknum, struct nf_conntrack_helper *helper; /* This is where we call the helper: as the packet goes out. */ - ct = nf_ct_get(*pskb, &ctinfo); + ct = nf_ct_get(skb, &ctinfo); if (!ct || ctinfo == IP_CT_RELATED + IP_CT_IS_REPLY) return NF_ACCEPT; @@ -131,56 +132,55 @@ static unsigned int ipv4_conntrack_help(unsigned int hooknum, helper = rcu_dereference(help->helper); if (!helper) return NF_ACCEPT; - return helper->help(pskb, skb_network_offset(*pskb) + ip_hdrlen(*pskb), + return helper->help(skb, skb_network_offset(skb) + ip_hdrlen(skb), ct, ctinfo); } static unsigned int ipv4_conntrack_defrag(unsigned int hooknum, - struct sk_buff **pskb, + struct sk_buff *skb, const struct net_device *in, const struct net_device *out, int (*okfn)(struct sk_buff *)) { /* Previously seen (loopback)? Ignore. Do this before fragment check. */ - if ((*pskb)->nfct) + if (skb->nfct) return NF_ACCEPT; /* Gather fragments. */ - if (ip_hdr(*pskb)->frag_off & htons(IP_MF | IP_OFFSET)) { - *pskb = nf_ct_ipv4_gather_frags(*pskb, - hooknum == NF_IP_PRE_ROUTING ? - IP_DEFRAG_CONNTRACK_IN : - IP_DEFRAG_CONNTRACK_OUT); - if (!*pskb) + if (ip_hdr(skb)->frag_off & htons(IP_MF | IP_OFFSET)) { + if (nf_ct_ipv4_gather_frags(skb, + hooknum == NF_IP_PRE_ROUTING ? + IP_DEFRAG_CONNTRACK_IN : + IP_DEFRAG_CONNTRACK_OUT)) return NF_STOLEN; } return NF_ACCEPT; } static unsigned int ipv4_conntrack_in(unsigned int hooknum, - struct sk_buff **pskb, + struct sk_buff *skb, const struct net_device *in, const struct net_device *out, int (*okfn)(struct sk_buff *)) { - return nf_conntrack_in(PF_INET, hooknum, pskb); + return nf_conntrack_in(PF_INET, hooknum, skb); } static unsigned int ipv4_conntrack_local(unsigned int hooknum, - struct sk_buff **pskb, + struct sk_buff *skb, const struct net_device *in, const struct net_device *out, int (*okfn)(struct sk_buff *)) { /* root is playing with raw sockets. */ - if ((*pskb)->len < sizeof(struct iphdr) - || ip_hdrlen(*pskb) < sizeof(struct iphdr)) { + if (skb->len < sizeof(struct iphdr) || + ip_hdrlen(skb) < sizeof(struct iphdr)) { if (net_ratelimit()) printk("ipt_hook: happy cracking.\n"); return NF_ACCEPT; } - return nf_conntrack_in(PF_INET, hooknum, pskb); + return nf_conntrack_in(PF_INET, hooknum, skb); } /* Connection tracking may drop packets, but never alters them, so diff --git a/net/ipv4/netfilter/nf_nat_amanda.c b/net/ipv4/netfilter/nf_nat_amanda.c index bd93a1d71052..35a5aa69cd92 100644 --- a/net/ipv4/netfilter/nf_nat_amanda.c +++ b/net/ipv4/netfilter/nf_nat_amanda.c @@ -24,7 +24,7 @@ MODULE_DESCRIPTION("Amanda NAT helper"); MODULE_LICENSE("GPL"); MODULE_ALIAS("ip_nat_amanda"); -static unsigned int help(struct sk_buff **pskb, +static unsigned int help(struct sk_buff *skb, enum ip_conntrack_info ctinfo, unsigned int matchoff, unsigned int matchlen, @@ -53,7 +53,7 @@ static unsigned int help(struct sk_buff **pskb, return NF_DROP; sprintf(buffer, "%u", port); - ret = nf_nat_mangle_udp_packet(pskb, exp->master, ctinfo, + ret = nf_nat_mangle_udp_packet(skb, exp->master, ctinfo, matchoff, matchlen, buffer, strlen(buffer)); if (ret != NF_ACCEPT) diff --git a/net/ipv4/netfilter/nf_nat_core.c b/net/ipv4/netfilter/nf_nat_core.c index 7221aa20e6ff..56e93f692e82 100644 --- a/net/ipv4/netfilter/nf_nat_core.c +++ b/net/ipv4/netfilter/nf_nat_core.c @@ -349,7 +349,7 @@ EXPORT_SYMBOL(nf_nat_setup_info); /* Returns true if succeeded. */ static int manip_pkt(u_int16_t proto, - struct sk_buff **pskb, + struct sk_buff *skb, unsigned int iphdroff, const struct nf_conntrack_tuple *target, enum nf_nat_manip_type maniptype) @@ -357,19 +357,19 @@ manip_pkt(u_int16_t proto, struct iphdr *iph; struct nf_nat_protocol *p; - if (!skb_make_writable(pskb, iphdroff + sizeof(*iph))) + if (!skb_make_writable(skb, iphdroff + sizeof(*iph))) return 0; - iph = (void *)(*pskb)->data + iphdroff; + iph = (void *)skb->data + iphdroff; /* Manipulate protcol part. */ /* rcu_read_lock()ed by nf_hook_slow */ p = __nf_nat_proto_find(proto); - if (!p->manip_pkt(pskb, iphdroff, target, maniptype)) + if (!p->manip_pkt(skb, iphdroff, target, maniptype)) return 0; - iph = (void *)(*pskb)->data + iphdroff; + iph = (void *)skb->data + iphdroff; if (maniptype == IP_NAT_MANIP_SRC) { nf_csum_replace4(&iph->check, iph->saddr, target->src.u3.ip); @@ -385,7 +385,7 @@ manip_pkt(u_int16_t proto, unsigned int nf_nat_packet(struct nf_conn *ct, enum ip_conntrack_info ctinfo, unsigned int hooknum, - struct sk_buff **pskb) + struct sk_buff *skb) { enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo); unsigned long statusbit; @@ -407,7 +407,7 @@ unsigned int nf_nat_packet(struct nf_conn *ct, /* We are aiming to look like inverse of other direction. */ nf_ct_invert_tuplepr(&target, &ct->tuplehash[!dir].tuple); - if (!manip_pkt(target.dst.protonum, pskb, 0, &target, mtype)) + if (!manip_pkt(target.dst.protonum, skb, 0, &target, mtype)) return NF_DROP; } return NF_ACCEPT; @@ -418,7 +418,7 @@ EXPORT_SYMBOL_GPL(nf_nat_packet); int nf_nat_icmp_reply_translation(struct nf_conn *ct, enum ip_conntrack_info ctinfo, unsigned int hooknum, - struct sk_buff **pskb) + struct sk_buff *skb) { struct { struct icmphdr icmp; @@ -426,24 +426,24 @@ int nf_nat_icmp_reply_translation(struct nf_conn *ct, } *inside; struct nf_conntrack_l4proto *l4proto; struct nf_conntrack_tuple inner, target; - int hdrlen = ip_hdrlen(*pskb); + int hdrlen = ip_hdrlen(skb); enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo); unsigned long statusbit; enum nf_nat_manip_type manip = HOOK2MANIP(hooknum); - if (!skb_make_writable(pskb, hdrlen + sizeof(*inside))) + if (!skb_make_writable(skb, hdrlen + sizeof(*inside))) return 0; - inside = (void *)(*pskb)->data + ip_hdrlen(*pskb); + inside = (void *)skb->data + ip_hdrlen(skb); /* We're actually going to mangle it beyond trivial checksum adjustment, so make sure the current checksum is correct. */ - if (nf_ip_checksum(*pskb, hooknum, hdrlen, 0)) + if (nf_ip_checksum(skb, hooknum, hdrlen, 0)) return 0; /* Must be RELATED */ - NF_CT_ASSERT((*pskb)->nfctinfo == IP_CT_RELATED || - (*pskb)->nfctinfo == IP_CT_RELATED+IP_CT_IS_REPLY); + NF_CT_ASSERT(skb->nfctinfo == IP_CT_RELATED || + skb->nfctinfo == IP_CT_RELATED+IP_CT_IS_REPLY); /* Redirects on non-null nats must be dropped, else they'll start talking to each other without our translation, and be @@ -458,15 +458,15 @@ int nf_nat_icmp_reply_translation(struct nf_conn *ct, } pr_debug("icmp_reply_translation: translating error %p manip %u " - "dir %s\n", *pskb, manip, + "dir %s\n", skb, manip, dir == IP_CT_DIR_ORIGINAL ? "ORIG" : "REPLY"); /* rcu_read_lock()ed by nf_hook_slow */ l4proto = __nf_ct_l4proto_find(PF_INET, inside->ip.protocol); - if (!nf_ct_get_tuple(*pskb, - ip_hdrlen(*pskb) + sizeof(struct icmphdr), - (ip_hdrlen(*pskb) + + if (!nf_ct_get_tuple(skb, + ip_hdrlen(skb) + sizeof(struct icmphdr), + (ip_hdrlen(skb) + sizeof(struct icmphdr) + inside->ip.ihl * 4), (u_int16_t)AF_INET, inside->ip.protocol, @@ -478,19 +478,19 @@ int nf_nat_icmp_reply_translation(struct nf_conn *ct, pass all hooks (locally-generated ICMP). Consider incoming packet: PREROUTING (DST manip), routing produces ICMP, goes through POSTROUTING (which must correct the DST manip). */ - if (!manip_pkt(inside->ip.protocol, pskb, - ip_hdrlen(*pskb) + sizeof(inside->icmp), + if (!manip_pkt(inside->ip.protocol, skb, + ip_hdrlen(skb) + sizeof(inside->icmp), &ct->tuplehash[!dir].tuple, !manip)) return 0; - if ((*pskb)->ip_summed != CHECKSUM_PARTIAL) { + if (skb->ip_summed != CHECKSUM_PARTIAL) { /* Reloading "inside" here since manip_pkt inner. */ - inside = (void *)(*pskb)->data + ip_hdrlen(*pskb); + inside = (void *)skb->data + ip_hdrlen(skb); inside->icmp.checksum = 0; inside->icmp.checksum = - csum_fold(skb_checksum(*pskb, hdrlen, - (*pskb)->len - hdrlen, 0)); + csum_fold(skb_checksum(skb, hdrlen, + skb->len - hdrlen, 0)); } /* Change outer to look the reply to an incoming packet @@ -506,7 +506,7 @@ int nf_nat_icmp_reply_translation(struct nf_conn *ct, if (ct->status & statusbit) { nf_ct_invert_tuplepr(&target, &ct->tuplehash[!dir].tuple); - if (!manip_pkt(0, pskb, 0, &target, manip)) + if (!manip_pkt(0, skb, 0, &target, manip)) return 0; } diff --git a/net/ipv4/netfilter/nf_nat_ftp.c b/net/ipv4/netfilter/nf_nat_ftp.c index 3663bd879c39..e1a16d3ea4cb 100644 --- a/net/ipv4/netfilter/nf_nat_ftp.c +++ b/net/ipv4/netfilter/nf_nat_ftp.c @@ -28,7 +28,7 @@ MODULE_ALIAS("ip_nat_ftp"); /* FIXME: Time out? --RR */ static int -mangle_rfc959_packet(struct sk_buff **pskb, +mangle_rfc959_packet(struct sk_buff *skb, __be32 newip, u_int16_t port, unsigned int matchoff, @@ -43,13 +43,13 @@ mangle_rfc959_packet(struct sk_buff **pskb, pr_debug("calling nf_nat_mangle_tcp_packet\n"); - return nf_nat_mangle_tcp_packet(pskb, ct, ctinfo, matchoff, + return nf_nat_mangle_tcp_packet(skb, ct, ctinfo, matchoff, matchlen, buffer, strlen(buffer)); } /* |1|132.235.1.2|6275| */ static int -mangle_eprt_packet(struct sk_buff **pskb, +mangle_eprt_packet(struct sk_buff *skb, __be32 newip, u_int16_t port, unsigned int matchoff, @@ -63,13 +63,13 @@ mangle_eprt_packet(struct sk_buff **pskb, pr_debug("calling nf_nat_mangle_tcp_packet\n"); - return nf_nat_mangle_tcp_packet(pskb, ct, ctinfo, matchoff, + return nf_nat_mangle_tcp_packet(skb, ct, ctinfo, matchoff, matchlen, buffer, strlen(buffer)); } /* |1|132.235.1.2|6275| */ static int -mangle_epsv_packet(struct sk_buff **pskb, +mangle_epsv_packet(struct sk_buff *skb, __be32 newip, u_int16_t port, unsigned int matchoff, @@ -83,11 +83,11 @@ mangle_epsv_packet(struct sk_buff **pskb, pr_debug("calling nf_nat_mangle_tcp_packet\n"); - return nf_nat_mangle_tcp_packet(pskb, ct, ctinfo, matchoff, + return nf_nat_mangle_tcp_packet(skb, ct, ctinfo, matchoff, matchlen, buffer, strlen(buffer)); } -static int (*mangle[])(struct sk_buff **, __be32, u_int16_t, +static int (*mangle[])(struct sk_buff *, __be32, u_int16_t, unsigned int, unsigned int, struct nf_conn *, enum ip_conntrack_info) = { @@ -99,7 +99,7 @@ static int (*mangle[])(struct sk_buff **, __be32, u_int16_t, /* So, this packet has hit the connection tracking matching code. Mangle it, and change the expectation to match the new version. */ -static unsigned int nf_nat_ftp(struct sk_buff **pskb, +static unsigned int nf_nat_ftp(struct sk_buff *skb, enum ip_conntrack_info ctinfo, enum nf_ct_ftp_type type, unsigned int matchoff, @@ -132,7 +132,7 @@ static unsigned int nf_nat_ftp(struct sk_buff **pskb, if (port == 0) return NF_DROP; - if (!mangle[type](pskb, newip, port, matchoff, matchlen, ct, ctinfo)) { + if (!mangle[type](skb, newip, port, matchoff, matchlen, ct, ctinfo)) { nf_ct_unexpect_related(exp); return NF_DROP; } diff --git a/net/ipv4/netfilter/nf_nat_h323.c b/net/ipv4/netfilter/nf_nat_h323.c index c1b059a73708..a868c8c41328 100644 --- a/net/ipv4/netfilter/nf_nat_h323.c +++ b/net/ipv4/netfilter/nf_nat_h323.c @@ -22,12 +22,12 @@ #include <linux/netfilter/nf_conntrack_h323.h> /****************************************************************************/ -static int set_addr(struct sk_buff **pskb, +static int set_addr(struct sk_buff *skb, unsigned char **data, int dataoff, unsigned int addroff, __be32 ip, __be16 port) { enum ip_conntrack_info ctinfo; - struct nf_conn *ct = nf_ct_get(*pskb, &ctinfo); + struct nf_conn *ct = nf_ct_get(skb, &ctinfo); struct { __be32 ip; __be16 port; @@ -38,8 +38,8 @@ static int set_addr(struct sk_buff **pskb, buf.port = port; addroff += dataoff; - if (ip_hdr(*pskb)->protocol == IPPROTO_TCP) { - if (!nf_nat_mangle_tcp_packet(pskb, ct, ctinfo, + if (ip_hdr(skb)->protocol == IPPROTO_TCP) { + if (!nf_nat_mangle_tcp_packet(skb, ct, ctinfo, addroff, sizeof(buf), (char *) &buf, sizeof(buf))) { if (net_ratelimit()) @@ -49,14 +49,13 @@ static int set_addr(struct sk_buff **pskb, } /* Relocate data pointer */ - th = skb_header_pointer(*pskb, ip_hdrlen(*pskb), + th = skb_header_pointer(skb, ip_hdrlen(skb), sizeof(_tcph), &_tcph); if (th == NULL) return -1; - *data = (*pskb)->data + ip_hdrlen(*pskb) + - th->doff * 4 + dataoff; + *data = skb->data + ip_hdrlen(skb) + th->doff * 4 + dataoff; } else { - if (!nf_nat_mangle_udp_packet(pskb, ct, ctinfo, + if (!nf_nat_mangle_udp_packet(skb, ct, ctinfo, addroff, sizeof(buf), (char *) &buf, sizeof(buf))) { if (net_ratelimit()) @@ -67,36 +66,35 @@ static int set_addr(struct sk_buff **pskb, /* nf_nat_mangle_udp_packet uses skb_make_writable() to copy * or pull everything in a linear buffer, so we can safely * use the skb pointers now */ - *data = ((*pskb)->data + ip_hdrlen(*pskb) + - sizeof(struct udphdr)); + *data = skb->data + ip_hdrlen(skb) + sizeof(struct udphdr); } return 0; } /****************************************************************************/ -static int set_h225_addr(struct sk_buff **pskb, +static int set_h225_addr(struct sk_buff *skb, unsigned char **data, int dataoff, TransportAddress *taddr, union nf_conntrack_address *addr, __be16 port) { - return set_addr(pskb, data, dataoff, taddr->ipAddress.ip, + return set_addr(skb, data, dataoff, taddr->ipAddress.ip, addr->ip, port); } /****************************************************************************/ -static int set_h245_addr(struct sk_buff **pskb, +static int set_h245_addr(struct sk_buff *skb, unsigned char **data, int dataoff, H245_TransportAddress *taddr, union nf_conntrack_address *addr, __be16 port) { - return set_addr(pskb, data, dataoff, + return set_addr(skb, data, dataoff, taddr->unicastAddress.iPAddress.network, addr->ip, port); } /****************************************************************************/ -static int set_sig_addr(struct sk_buff **pskb, struct nf_conn *ct, +static int set_sig_addr(struct sk_buff *skb, struct nf_conn *ct, enum ip_conntrack_info ctinfo, unsigned char **data, TransportAddress *taddr, int count) @@ -125,7 +123,7 @@ static int set_sig_addr(struct sk_buff **pskb, struct nf_conn *ct, NIPQUAD(addr.ip), port, NIPQUAD(ct->tuplehash[!dir].tuple.dst.u3.ip), info->sig_port[!dir]); - return set_h225_addr(pskb, data, 0, &taddr[i], + return set_h225_addr(skb, data, 0, &taddr[i], &ct->tuplehash[!dir]. tuple.dst.u3, info->sig_port[!dir]); @@ -137,7 +135,7 @@ static int set_sig_addr(struct sk_buff **pskb, struct nf_conn *ct, NIPQUAD(addr.ip), port, NIPQUAD(ct->tuplehash[!dir].tuple.src.u3.ip), info->sig_port[!dir]); - return set_h225_addr(pskb, data, 0, &taddr[i], + return set_h225_addr(skb, data, 0, &taddr[i], &ct->tuplehash[!dir]. tuple.src.u3, info->sig_port[!dir]); @@ -149,7 +147,7 @@ static int set_sig_addr(struct sk_buff **pskb, struct nf_conn *ct, } /****************************************************************************/ -static int set_ras_addr(struct sk_buff **pskb, struct nf_conn *ct, +static int set_ras_addr(struct sk_buff *skb, struct nf_conn *ct, enum ip_conntrack_info ctinfo, unsigned char **data, TransportAddress *taddr, int count) @@ -168,7 +166,7 @@ static int set_ras_addr(struct sk_buff **pskb, struct nf_conn *ct, NIPQUAD(addr.ip), ntohs(port), NIPQUAD(ct->tuplehash[!dir].tuple.dst.u3.ip), ntohs(ct->tuplehash[!dir].tuple.dst.u.udp.port)); - return set_h225_addr(pskb, data, 0, &taddr[i], + return set_h225_addr(skb, data, 0, &taddr[i], &ct->tuplehash[!dir].tuple.dst.u3, ct->tuplehash[!dir].tuple. dst.u.udp.port); @@ -179,7 +177,7 @@ static int set_ras_addr(struct sk_buff **pskb, struct nf_conn *ct, } /****************************************************************************/ -static int nat_rtp_rtcp(struct sk_buff **pskb, struct nf_conn *ct, +static int nat_rtp_rtcp(struct sk_buff *skb, struct nf_conn *ct, enum ip_conntrack_info ctinfo, unsigned char **data, int dataoff, H245_TransportAddress *taddr, @@ -244,7 +242,7 @@ static int nat_rtp_rtcp(struct sk_buff **pskb, struct nf_conn *ct, } /* Modify signal */ - if (set_h245_addr(pskb, data, dataoff, taddr, + if (set_h245_addr(skb, data, dataoff, taddr, &ct->tuplehash[!dir].tuple.dst.u3, htons((port & htons(1)) ? nated_port + 1 : nated_port)) == 0) { @@ -273,7 +271,7 @@ static int nat_rtp_rtcp(struct sk_buff **pskb, struct nf_conn *ct, } /****************************************************************************/ -static int nat_t120(struct sk_buff **pskb, struct nf_conn *ct, +static int nat_t120(struct sk_buff *skb, struct nf_conn *ct, enum ip_conntrack_info ctinfo, unsigned char **data, int dataoff, H245_TransportAddress *taddr, __be16 port, @@ -301,7 +299,7 @@ static int nat_t120(struct sk_buff **pskb, struct nf_conn *ct, } /* Modify signal */ - if (set_h245_addr(pskb, data, dataoff, taddr, + if (set_h245_addr(skb, data, dataoff, taddr, &ct->tuplehash[!dir].tuple.dst.u3, htons(nated_port)) < 0) { nf_ct_unexpect_related(exp); @@ -318,7 +316,7 @@ static int nat_t120(struct sk_buff **pskb, struct nf_conn *ct, } /****************************************************************************/ -static int nat_h245(struct sk_buff **pskb, struct nf_conn *ct, +static int nat_h245(struct sk_buff *skb, struct nf_conn *ct, enum ip_conntrack_info ctinfo, unsigned char **data, int dataoff, TransportAddress *taddr, __be16 port, @@ -351,7 +349,7 @@ static int nat_h245(struct sk_buff **pskb, struct nf_conn *ct, } /* Modify signal */ - if (set_h225_addr(pskb, data, dataoff, taddr, + if (set_h225_addr(skb, data, dataoff, taddr, &ct->tuplehash[!dir].tuple.dst.u3, htons(nated_port)) == 0) { /* Save ports */ @@ -406,7 +404,7 @@ static void ip_nat_q931_expect(struct nf_conn *new, } /****************************************************************************/ -static int nat_q931(struct sk_buff **pskb, struct nf_conn *ct, +static int nat_q931(struct sk_buff *skb, struct nf_conn *ct, enum ip_conntrack_info ctinfo, unsigned char **data, TransportAddress *taddr, int idx, __be16 port, struct nf_conntrack_expect *exp) @@ -439,7 +437,7 @@ static int nat_q931(struct sk_buff **pskb, struct nf_conn *ct, } /* Modify signal */ - if (set_h225_addr(pskb, data, 0, &taddr[idx], + if (set_h225_addr(skb, data, 0, &taddr[idx], &ct->tuplehash[!dir].tuple.dst.u3, htons(nated_port)) == 0) { /* Save ports */ @@ -450,7 +448,7 @@ static int nat_q931(struct sk_buff **pskb, struct nf_conn *ct, if (idx > 0 && get_h225_addr(ct, *data, &taddr[0], &addr, &port) && (ntohl(addr.ip) & 0xff000000) == 0x7f000000) { - set_h225_addr(pskb, data, 0, &taddr[0], + set_h225_addr(skb, data, 0, &taddr[0], &ct->tuplehash[!dir].tuple.dst.u3, info->sig_port[!dir]); } @@ -495,7 +493,7 @@ static void ip_nat_callforwarding_expect(struct nf_conn *new, } /****************************************************************************/ -static int nat_callforwarding(struct sk_buff **pskb, struct nf_conn *ct, +static int nat_callforwarding(struct sk_buff *skb, struct nf_conn *ct, enum ip_conntrack_info ctinfo, unsigned char **data, int dataoff, TransportAddress *taddr, __be16 port, @@ -525,7 +523,7 @@ static int nat_callforwarding(struct sk_buff **pskb, struct nf_conn *ct, } /* Modify signal */ - if (!set_h225_addr(pskb, data, dataoff, taddr, + if (!set_h225_addr(skb, data, dataoff, taddr, &ct->tuplehash[!dir].tuple.dst.u3, htons(nated_port)) == 0) { nf_ct_unexpect_related(exp); diff --git a/net/ipv4/netfilter/nf_nat_helper.c b/net/ipv4/netfilter/nf_nat_helper.c index 93d8a0a8f035..8718da00ef2a 100644 --- a/net/ipv4/netfilter/nf_nat_helper.c +++ b/net/ipv4/netfilter/nf_nat_helper.c @@ -111,22 +111,14 @@ static void mangle_contents(struct sk_buff *skb, } /* Unusual, but possible case. */ -static int enlarge_skb(struct sk_buff **pskb, unsigned int extra) +static int enlarge_skb(struct sk_buff *skb, unsigned int extra) { - struct sk_buff *nskb; - - if ((*pskb)->len + extra > 65535) + if (skb->len + extra > 65535) return 0; - nskb = skb_copy_expand(*pskb, skb_headroom(*pskb), extra, GFP_ATOMIC); - if (!nskb) + if (pskb_expand_head(skb, 0, extra - skb_tailroom(skb), GFP_ATOMIC)) return 0; - /* Transfer socket to new skb. */ - if ((*pskb)->sk) - skb_set_owner_w(nskb, (*pskb)->sk); - kfree_skb(*pskb); - *pskb = nskb; return 1; } @@ -139,7 +131,7 @@ static int enlarge_skb(struct sk_buff **pskb, unsigned int extra) * * */ int -nf_nat_mangle_tcp_packet(struct sk_buff **pskb, +nf_nat_mangle_tcp_packet(struct sk_buff *skb, struct nf_conn *ct, enum ip_conntrack_info ctinfo, unsigned int match_offset, @@ -147,37 +139,37 @@ nf_nat_mangle_tcp_packet(struct sk_buff **pskb, const char *rep_buffer, unsigned int rep_len) { - struct rtable *rt = (struct rtable *)(*pskb)->dst; + struct rtable *rt = (struct rtable *)skb->dst; struct iphdr *iph; struct tcphdr *tcph; int oldlen, datalen; - if (!skb_make_writable(pskb, (*pskb)->len)) + if (!skb_make_writable(skb, skb->len)) return 0; if (rep_len > match_len && - rep_len - match_len > skb_tailroom(*pskb) && - !enlarge_skb(pskb, rep_len - match_len)) + rep_len - match_len > skb_tailroom(skb) && + !enlarge_skb(skb, rep_len - match_len)) return 0; - SKB_LINEAR_ASSERT(*pskb); + SKB_LINEAR_ASSERT(skb); - iph = ip_hdr(*pskb); + iph = ip_hdr(skb); tcph = (void *)iph + iph->ihl*4; - oldlen = (*pskb)->len - iph->ihl*4; - mangle_contents(*pskb, iph->ihl*4 + tcph->doff*4, + oldlen = skb->len - iph->ihl*4; + mangle_contents(skb, iph->ihl*4 + tcph->doff*4, match_offset, match_len, rep_buffer, rep_len); - datalen = (*pskb)->len - iph->ihl*4; - if ((*pskb)->ip_summed != CHECKSUM_PARTIAL) { + datalen = skb->len - iph->ihl*4; + if (skb->ip_summed != CHECKSUM_PARTIAL) { if (!(rt->rt_flags & RTCF_LOCAL) && - (*pskb)->dev->features & NETIF_F_V4_CSUM) { - (*pskb)->ip_summed = CHECKSUM_PARTIAL; - (*pskb)->csum_start = skb_headroom(*pskb) + - skb_network_offset(*pskb) + - iph->ihl * 4; - (*pskb)->csum_offset = offsetof(struct tcphdr, check); + skb->dev->features & NETIF_F_V4_CSUM) { + skb->ip_summed = CHECKSUM_PARTIAL; + skb->csum_start = skb_headroom(skb) + + skb_network_offset(skb) + + iph->ihl * 4; + skb->csum_offset = offsetof(struct tcphdr, check); tcph->check = ~tcp_v4_check(datalen, iph->saddr, iph->daddr, 0); } else { @@ -188,7 +180,7 @@ nf_nat_mangle_tcp_packet(struct sk_buff **pskb, datalen, 0)); } } else - nf_proto_csum_replace2(&tcph->check, *pskb, + nf_proto_csum_replace2(&tcph->check, skb, htons(oldlen), htons(datalen), 1); if (rep_len != match_len) { @@ -197,7 +189,7 @@ nf_nat_mangle_tcp_packet(struct sk_buff **pskb, (int)rep_len - (int)match_len, ct, ctinfo); /* Tell TCP window tracking about seq change */ - nf_conntrack_tcp_update(*pskb, ip_hdrlen(*pskb), + nf_conntrack_tcp_update(skb, ip_hdrlen(skb), ct, CTINFO2DIR(ctinfo)); } return 1; @@ -215,7 +207,7 @@ EXPORT_SYMBOL(nf_nat_mangle_tcp_packet); * should be fairly easy to do. */ int -nf_nat_mangle_udp_packet(struct sk_buff **pskb, +nf_nat_mangle_udp_packet(struct sk_buff *skb, struct nf_conn *ct, enum ip_conntrack_info ctinfo, unsigned int match_offset, @@ -223,48 +215,48 @@ nf_nat_mangle_udp_packet(struct sk_buff **pskb, const char *rep_buffer, unsigned int rep_len) { - struct rtable *rt = (struct rtable *)(*pskb)->dst; + struct rtable *rt = (struct rtable *)skb->dst; struct iphdr *iph; struct udphdr *udph; int datalen, oldlen; /* UDP helpers might accidentally mangle the wrong packet */ - iph = ip_hdr(*pskb); - if ((*pskb)->len < iph->ihl*4 + sizeof(*udph) + + iph = ip_hdr(skb); + if (skb->len < iph->ihl*4 + sizeof(*udph) + match_offset + match_len) return 0; - if (!skb_make_writable(pskb, (*pskb)->len)) + if (!skb_make_writable(skb, skb->len)) return 0; if (rep_len > match_len && - rep_len - match_len > skb_tailroom(*pskb) && - !enlarge_skb(pskb, rep_len - match_len)) + rep_len - match_len > skb_tailroom(skb) && + !enlarge_skb(skb, rep_len - match_len)) return 0; - iph = ip_hdr(*pskb); + iph = ip_hdr(skb); udph = (void *)iph + iph->ihl*4; - oldlen = (*pskb)->len - iph->ihl*4; - mangle_contents(*pskb, iph->ihl*4 + sizeof(*udph), + oldlen = skb->len - iph->ihl*4; + mangle_contents(skb, iph->ihl*4 + sizeof(*udph), match_offset, match_len, rep_buffer, rep_len); /* update the length of the UDP packet */ - datalen = (*pskb)->len - iph->ihl*4; + datalen = skb->len - iph->ihl*4; udph->len = htons(datalen); /* fix udp checksum if udp checksum was previously calculated */ - if (!udph->check && (*pskb)->ip_summed != CHECKSUM_PARTIAL) + if (!udph->check && skb->ip_summed != CHECKSUM_PARTIAL) return 1; - if ((*pskb)->ip_summed != CHECKSUM_PARTIAL) { + if (skb->ip_summed != CHECKSUM_PARTIAL) { if (!(rt->rt_flags & RTCF_LOCAL) && - (*pskb)->dev->features & NETIF_F_V4_CSUM) { - (*pskb)->ip_summed = CHECKSUM_PARTIAL; - (*pskb)->csum_start = skb_headroom(*pskb) + - skb_network_offset(*pskb) + - iph->ihl * 4; - (*pskb)->csum_offset = offsetof(struct udphdr, check); + skb->dev->features & NETIF_F_V4_CSUM) { + skb->ip_summed = CHECKSUM_PARTIAL; + skb->csum_start = skb_headroom(skb) + + skb_network_offset(skb) + + iph->ihl * 4; + skb->csum_offset = offsetof(struct udphdr, check); udph->check = ~csum_tcpudp_magic(iph->saddr, iph->daddr, datalen, IPPROTO_UDP, 0); @@ -278,7 +270,7 @@ nf_nat_mangle_udp_packet(struct sk_buff **pskb, udph->check = CSUM_MANGLED_0; } } else - nf_proto_csum_replace2(&udph->check, *pskb, + nf_proto_csum_replace2(&udph->check, skb, htons(oldlen), htons(datalen), 1); return 1; @@ -330,7 +322,7 @@ sack_adjust(struct sk_buff *skb, /* TCP SACK sequence number adjustment */ static inline unsigned int -nf_nat_sack_adjust(struct sk_buff **pskb, +nf_nat_sack_adjust(struct sk_buff *skb, struct tcphdr *tcph, struct nf_conn *ct, enum ip_conntrack_info ctinfo) @@ -338,17 +330,17 @@ nf_nat_sack_adjust(struct sk_buff **pskb, unsigned int dir, optoff, optend; struct nf_conn_nat *nat = nfct_nat(ct); - optoff = ip_hdrlen(*pskb) + sizeof(struct tcphdr); - optend = ip_hdrlen(*pskb) + tcph->doff * 4; + optoff = ip_hdrlen(skb) + sizeof(struct tcphdr); + optend = ip_hdrlen(skb) + tcph->doff * 4; - if (!skb_make_writable(pskb, optend)) + if (!skb_make_writable(skb, optend)) return 0; dir = CTINFO2DIR(ctinfo); while (optoff < optend) { /* Usually: option, length. */ - unsigned char *op = (*pskb)->data + optoff; + unsigned char *op = skb->data + optoff; switch (op[0]) { case TCPOPT_EOL: @@ -365,7 +357,7 @@ nf_nat_sack_adjust(struct sk_buff **pskb, if (op[0] == TCPOPT_SACK && op[1] >= 2+TCPOLEN_SACK_PERBLOCK && ((op[1] - 2) % TCPOLEN_SACK_PERBLOCK) == 0) - sack_adjust(*pskb, tcph, optoff+2, + sack_adjust(skb, tcph, optoff+2, optoff+op[1], &nat->seq[!dir]); optoff += op[1]; } @@ -375,7 +367,7 @@ nf_nat_sack_adjust(struct sk_buff **pskb, /* TCP sequence number adjustment. Returns 1 on success, 0 on failure */ int -nf_nat_seq_adjust(struct sk_buff **pskb, +nf_nat_seq_adjust(struct sk_buff *skb, struct nf_conn *ct, enum ip_conntrack_info ctinfo) { @@ -390,10 +382,10 @@ nf_nat_seq_adjust(struct sk_buff **pskb, this_way = &nat->seq[dir]; other_way = &nat->seq[!dir]; - if (!skb_make_writable(pskb, ip_hdrlen(*pskb) + sizeof(*tcph))) + if (!skb_make_writable(skb, ip_hdrlen(skb) + sizeof(*tcph))) return 0; - tcph = (void *)(*pskb)->data + ip_hdrlen(*pskb); + tcph = (void *)skb->data + ip_hdrlen(skb); if (after(ntohl(tcph->seq), this_way->correction_pos)) newseq = htonl(ntohl(tcph->seq) + this_way->offset_after); else @@ -405,8 +397,8 @@ nf_nat_seq_adjust(struct sk_buff **pskb, else newack = htonl(ntohl(tcph->ack_seq) - other_way->offset_before); - nf_proto_csum_replace4(&tcph->check, *pskb, tcph->seq, newseq, 0); - nf_proto_csum_replace4(&tcph->check, *pskb, tcph->ack_seq, newack, 0); + nf_proto_csum_replace4(&tcph->check, skb, tcph->seq, newseq, 0); + nf_proto_csum_replace4(&tcph->check, skb, tcph->ack_seq, newack, 0); pr_debug("Adjusting sequence number from %u->%u, ack from %u->%u\n", ntohl(tcph->seq), ntohl(newseq), ntohl(tcph->ack_seq), @@ -415,10 +407,10 @@ nf_nat_seq_adjust(struct sk_buff **pskb, tcph->seq = newseq; tcph->ack_seq = newack; - if (!nf_nat_sack_adjust(pskb, tcph, ct, ctinfo)) + if (!nf_nat_sack_adjust(skb, tcph, ct, ctinfo)) return 0; - nf_conntrack_tcp_update(*pskb, ip_hdrlen(*pskb), ct, dir); + nf_conntrack_tcp_update(skb, ip_hdrlen(skb), ct, dir); return 1; } diff --git a/net/ipv4/netfilter/nf_nat_irc.c b/net/ipv4/netfilter/nf_nat_irc.c index bcf274bba602..766e2c16c6b9 100644 --- a/net/ipv4/netfilter/nf_nat_irc.c +++ b/net/ipv4/netfilter/nf_nat_irc.c @@ -27,7 +27,7 @@ MODULE_DESCRIPTION("IRC (DCC) NAT helper"); MODULE_LICENSE("GPL"); MODULE_ALIAS("ip_nat_irc"); -static unsigned int help(struct sk_buff **pskb, +static unsigned int help(struct sk_buff *skb, enum ip_conntrack_info ctinfo, unsigned int matchoff, unsigned int matchlen, @@ -58,7 +58,7 @@ static unsigned int help(struct sk_buff **pskb, pr_debug("nf_nat_irc: inserting '%s' == %u.%u.%u.%u, port %u\n", buffer, NIPQUAD(ip), port); - ret = nf_nat_mangle_tcp_packet(pskb, exp->master, ctinfo, + ret = nf_nat_mangle_tcp_packet(skb, exp->master, ctinfo, matchoff, matchlen, buffer, strlen(buffer)); if (ret != NF_ACCEPT) diff --git a/net/ipv4/netfilter/nf_nat_pptp.c b/net/ipv4/netfilter/nf_nat_pptp.c index 984ec8308b2e..e1385a099079 100644 --- a/net/ipv4/netfilter/nf_nat_pptp.c +++ b/net/ipv4/netfilter/nf_nat_pptp.c @@ -110,7 +110,7 @@ static void pptp_nat_expected(struct nf_conn *ct, /* outbound packets == from PNS to PAC */ static int -pptp_outbound_pkt(struct sk_buff **pskb, +pptp_outbound_pkt(struct sk_buff *skb, struct nf_conn *ct, enum ip_conntrack_info ctinfo, struct PptpControlHeader *ctlh, @@ -175,7 +175,7 @@ pptp_outbound_pkt(struct sk_buff **pskb, ntohs(REQ_CID(pptpReq, cid_off)), ntohs(new_callid)); /* mangle packet */ - if (nf_nat_mangle_tcp_packet(pskb, ct, ctinfo, + if (nf_nat_mangle_tcp_packet(skb, ct, ctinfo, cid_off + sizeof(struct pptp_pkt_hdr) + sizeof(struct PptpControlHeader), sizeof(new_callid), (char *)&new_callid, @@ -213,7 +213,7 @@ pptp_exp_gre(struct nf_conntrack_expect *expect_orig, /* inbound packets == from PAC to PNS */ static int -pptp_inbound_pkt(struct sk_buff **pskb, +pptp_inbound_pkt(struct sk_buff *skb, struct nf_conn *ct, enum ip_conntrack_info ctinfo, struct PptpControlHeader *ctlh, @@ -268,7 +268,7 @@ pptp_inbound_pkt(struct sk_buff **pskb, pr_debug("altering peer call id from 0x%04x to 0x%04x\n", ntohs(REQ_CID(pptpReq, pcid_off)), ntohs(new_pcid)); - if (nf_nat_mangle_tcp_packet(pskb, ct, ctinfo, + if (nf_nat_mangle_tcp_packet(skb, ct, ctinfo, pcid_off + sizeof(struct pptp_pkt_hdr) + sizeof(struct PptpControlHeader), sizeof(new_pcid), (char *)&new_pcid, diff --git a/net/ipv4/netfilter/nf_nat_proto_gre.c b/net/ipv4/netfilter/nf_nat_proto_gre.c index d562290b1820..b820f9960356 100644 --- a/net/ipv4/netfilter/nf_nat_proto_gre.c +++ b/net/ipv4/netfilter/nf_nat_proto_gre.c @@ -98,21 +98,21 @@ gre_unique_tuple(struct nf_conntrack_tuple *tuple, /* manipulate a GRE packet according to maniptype */ static int -gre_manip_pkt(struct sk_buff **pskb, unsigned int iphdroff, +gre_manip_pkt(struct sk_buff *skb, unsigned int iphdroff, const struct nf_conntrack_tuple *tuple, enum nf_nat_manip_type maniptype) { struct gre_hdr *greh; struct gre_hdr_pptp *pgreh; - struct iphdr *iph = (struct iphdr *)((*pskb)->data + iphdroff); + struct iphdr *iph = (struct iphdr *)(skb->data + iphdroff); unsigned int hdroff = iphdroff + iph->ihl * 4; /* pgreh includes two optional 32bit fields which are not required * to be there. That's where the magic '8' comes from */ - if (!skb_make_writable(pskb, hdroff + sizeof(*pgreh) - 8)) + if (!skb_make_writable(skb, hdroff + sizeof(*pgreh) - 8)) return 0; - greh = (void *)(*pskb)->data + hdroff; + greh = (void *)skb->data + hdroff; pgreh = (struct gre_hdr_pptp *)greh; /* we only have destination manip of a packet, since 'source key' diff --git a/net/ipv4/netfilter/nf_nat_proto_icmp.c b/net/ipv4/netfilter/nf_nat_proto_icmp.c index 898d73771155..b9fc724388fc 100644 --- a/net/ipv4/netfilter/nf_nat_proto_icmp.c +++ b/net/ipv4/netfilter/nf_nat_proto_icmp.c @@ -52,20 +52,20 @@ icmp_unique_tuple(struct nf_conntrack_tuple *tuple, } static int -icmp_manip_pkt(struct sk_buff **pskb, +icmp_manip_pkt(struct sk_buff *skb, unsigned int iphdroff, const struct nf_conntrack_tuple *tuple, enum nf_nat_manip_type maniptype) { - struct iphdr *iph = (struct iphdr *)((*pskb)->data + iphdroff); + struct iphdr *iph = (struct iphdr *)(skb->data + iphdroff); struct icmphdr *hdr; unsigned int hdroff = iphdroff + iph->ihl*4; - if (!skb_make_writable(pskb, hdroff + sizeof(*hdr))) + if (!skb_make_writable(skb, hdroff + sizeof(*hdr))) return 0; - hdr = (struct icmphdr *)((*pskb)->data + hdroff); - nf_proto_csum_replace2(&hdr->checksum, *pskb, + hdr = (struct icmphdr *)(skb->data + hdroff); + nf_proto_csum_replace2(&hdr->checksum, skb, hdr->un.echo.id, tuple->src.u.icmp.id, 0); hdr->un.echo.id = tuple->src.u.icmp.id; return 1; diff --git a/net/ipv4/netfilter/nf_nat_proto_tcp.c b/net/ipv4/netfilter/nf_nat_proto_tcp.c index 5bbbb2acdc70..6bab2e184455 100644 --- a/net/ipv4/netfilter/nf_nat_proto_tcp.c +++ b/net/ipv4/netfilter/nf_nat_proto_tcp.c @@ -88,12 +88,12 @@ tcp_unique_tuple(struct nf_conntrack_tuple *tuple, } static int -tcp_manip_pkt(struct sk_buff **pskb, +tcp_manip_pkt(struct sk_buff *skb, unsigned int iphdroff, const struct nf_conntrack_tuple *tuple, enum nf_nat_manip_type maniptype) { - struct iphdr *iph = (struct iphdr *)((*pskb)->data + iphdroff); + struct iphdr *iph = (struct iphdr *)(skb->data + iphdroff); struct tcphdr *hdr; unsigned int hdroff = iphdroff + iph->ihl*4; __be32 oldip, newip; @@ -103,14 +103,14 @@ tcp_manip_pkt(struct sk_buff **pskb, /* this could be a inner header returned in icmp packet; in such cases we cannot update the checksum field since it is outside of the 8 bytes of transport layer headers we are guaranteed */ - if ((*pskb)->len >= hdroff + sizeof(struct tcphdr)) + if (skb->len >= hdroff + sizeof(struct tcphdr)) hdrsize = sizeof(struct tcphdr); - if (!skb_make_writable(pskb, hdroff + hdrsize)) + if (!skb_make_writable(skb, hdroff + hdrsize)) return 0; - iph = (struct iphdr *)((*pskb)->data + iphdroff); - hdr = (struct tcphdr *)((*pskb)->data + hdroff); + iph = (struct iphdr *)(skb->data + iphdroff); + hdr = (struct tcphdr *)(skb->data + hdroff); if (maniptype == IP_NAT_MANIP_SRC) { /* Get rid of src ip and src pt */ @@ -132,8 +132,8 @@ tcp_manip_pkt(struct sk_buff **pskb, if (hdrsize < sizeof(*hdr)) return 1; - nf_proto_csum_replace4(&hdr->check, *pskb, oldip, newip, 1); - nf_proto_csum_replace2(&hdr->check, *pskb, oldport, newport, 0); + nf_proto_csum_replace4(&hdr->check, skb, oldip, newip, 1); + nf_proto_csum_replace2(&hdr->check, skb, oldport, newport, 0); return 1; } diff --git a/net/ipv4/netfilter/nf_nat_proto_udp.c b/net/ipv4/netfilter/nf_nat_proto_udp.c index a0af4fd95584..cbf1a61e2908 100644 --- a/net/ipv4/netfilter/nf_nat_proto_udp.c +++ b/net/ipv4/netfilter/nf_nat_proto_udp.c @@ -86,22 +86,22 @@ udp_unique_tuple(struct nf_conntrack_tuple *tuple, } static int -udp_manip_pkt(struct sk_buff **pskb, +udp_manip_pkt(struct sk_buff *skb, unsigned int iphdroff, const struct nf_conntrack_tuple *tuple, enum nf_nat_manip_type maniptype) { - struct iphdr *iph = (struct iphdr *)((*pskb)->data + iphdroff); + struct iphdr *iph = (struct iphdr *)(skb->data + iphdroff); struct udphdr *hdr; unsigned int hdroff = iphdroff + iph->ihl*4; __be32 oldip, newip; __be16 *portptr, newport; - if (!skb_make_writable(pskb, hdroff + sizeof(*hdr))) + if (!skb_make_writable(skb, hdroff + sizeof(*hdr))) return 0; - iph = (struct iphdr *)((*pskb)->data + iphdroff); - hdr = (struct udphdr *)((*pskb)->data + hdroff); + iph = (struct iphdr *)(skb->data + iphdroff); + hdr = (struct udphdr *)(skb->data + hdroff); if (maniptype == IP_NAT_MANIP_SRC) { /* Get rid of src ip and src pt */ @@ -116,9 +116,9 @@ udp_manip_pkt(struct sk_buff **pskb, newport = tuple->dst.u.udp.port; portptr = &hdr->dest; } - if (hdr->check || (*pskb)->ip_summed == CHECKSUM_PARTIAL) { - nf_proto_csum_replace4(&hdr->check, *pskb, oldip, newip, 1); - nf_proto_csum_replace2(&hdr->check, *pskb, *portptr, newport, + if (hdr->check || skb->ip_summed == CHECKSUM_PARTIAL) { + nf_proto_csum_replace4(&hdr->check, skb, oldip, newip, 1); + nf_proto_csum_replace2(&hdr->check, skb, *portptr, newport, 0); if (!hdr->check) hdr->check = CSUM_MANGLED_0; diff --git a/net/ipv4/netfilter/nf_nat_proto_unknown.c b/net/ipv4/netfilter/nf_nat_proto_unknown.c index f50d0203f9c0..cfd2742e9706 100644 --- a/net/ipv4/netfilter/nf_nat_proto_unknown.c +++ b/net/ipv4/netfilter/nf_nat_proto_unknown.c @@ -37,7 +37,7 @@ static int unknown_unique_tuple(struct nf_conntrack_tuple *tuple, } static int -unknown_manip_pkt(struct sk_buff **pskb, +unknown_manip_pkt(struct sk_buff *skb, unsigned int iphdroff, const struct nf_conntrack_tuple *tuple, enum nf_nat_manip_type maniptype) diff --git a/net/ipv4/netfilter/nf_nat_rule.c b/net/ipv4/netfilter/nf_nat_rule.c index 76ec59ae524d..46b25ab5f78b 100644 --- a/net/ipv4/netfilter/nf_nat_rule.c +++ b/net/ipv4/netfilter/nf_nat_rule.c @@ -65,7 +65,7 @@ static struct xt_table nat_table = { }; /* Source NAT */ -static unsigned int ipt_snat_target(struct sk_buff **pskb, +static unsigned int ipt_snat_target(struct sk_buff *skb, const struct net_device *in, const struct net_device *out, unsigned int hooknum, @@ -78,7 +78,7 @@ static unsigned int ipt_snat_target(struct sk_buff **pskb, NF_CT_ASSERT(hooknum == NF_IP_POST_ROUTING); - ct = nf_ct_get(*pskb, &ctinfo); + ct = nf_ct_get(skb, &ctinfo); /* Connection must be valid and new. */ NF_CT_ASSERT(ct && (ctinfo == IP_CT_NEW || ctinfo == IP_CT_RELATED || @@ -107,7 +107,7 @@ static void warn_if_extra_mangle(__be32 dstip, __be32 srcip) ip_rt_put(rt); } -static unsigned int ipt_dnat_target(struct sk_buff **pskb, +static unsigned int ipt_dnat_target(struct sk_buff *skb, const struct net_device *in, const struct net_device *out, unsigned int hooknum, @@ -121,14 +121,14 @@ static unsigned int ipt_dnat_target(struct sk_buff **pskb, NF_CT_ASSERT(hooknum == NF_IP_PRE_ROUTING || hooknum == NF_IP_LOCAL_OUT); - ct = nf_ct_get(*pskb, &ctinfo); + ct = nf_ct_get(skb, &ctinfo); /* Connection must be valid and new. */ NF_CT_ASSERT(ct && (ctinfo == IP_CT_NEW || ctinfo == IP_CT_RELATED)); if (hooknum == NF_IP_LOCAL_OUT && mr->range[0].flags & IP_NAT_RANGE_MAP_IPS) - warn_if_extra_mangle(ip_hdr(*pskb)->daddr, + warn_if_extra_mangle(ip_hdr(skb)->daddr, mr->range[0].min_ip); return nf_nat_setup_info(ct, &mr->range[0], hooknum); @@ -204,7 +204,7 @@ alloc_null_binding_confirmed(struct nf_conn *ct, unsigned int hooknum) return nf_nat_setup_info(ct, &range, hooknum); } -int nf_nat_rule_find(struct sk_buff **pskb, +int nf_nat_rule_find(struct sk_buff *skb, unsigned int hooknum, const struct net_device *in, const struct net_device *out, @@ -212,7 +212,7 @@ int nf_nat_rule_find(struct sk_buff **pskb, { int ret; - ret = ipt_do_table(pskb, hooknum, in, out, &nat_table); + ret = ipt_do_table(skb, hooknum, in, out, &nat_table); if (ret == NF_ACCEPT) { if (!nf_nat_initialized(ct, HOOK2MANIP(hooknum))) diff --git a/net/ipv4/netfilter/nf_nat_sip.c b/net/ipv4/netfilter/nf_nat_sip.c index e14d41976c27..ce9edbcc01e3 100644 --- a/net/ipv4/netfilter/nf_nat_sip.c +++ b/net/ipv4/netfilter/nf_nat_sip.c @@ -60,7 +60,7 @@ static void addr_map_init(struct nf_conn *ct, struct addr_map *map) } } -static int map_sip_addr(struct sk_buff **pskb, enum ip_conntrack_info ctinfo, +static int map_sip_addr(struct sk_buff *skb, enum ip_conntrack_info ctinfo, struct nf_conn *ct, const char **dptr, size_t dlen, enum sip_header_pos pos, struct addr_map *map) { @@ -84,15 +84,15 @@ static int map_sip_addr(struct sk_buff **pskb, enum ip_conntrack_info ctinfo, } else return 1; - if (!nf_nat_mangle_udp_packet(pskb, ct, ctinfo, + if (!nf_nat_mangle_udp_packet(skb, ct, ctinfo, matchoff, matchlen, addr, addrlen)) return 0; - *dptr = (*pskb)->data + ip_hdrlen(*pskb) + sizeof(struct udphdr); + *dptr = skb->data + ip_hdrlen(skb) + sizeof(struct udphdr); return 1; } -static unsigned int ip_nat_sip(struct sk_buff **pskb, +static unsigned int ip_nat_sip(struct sk_buff *skb, enum ip_conntrack_info ctinfo, struct nf_conn *ct, const char **dptr) @@ -101,8 +101,8 @@ static unsigned int ip_nat_sip(struct sk_buff **pskb, struct addr_map map; int dataoff, datalen; - dataoff = ip_hdrlen(*pskb) + sizeof(struct udphdr); - datalen = (*pskb)->len - dataoff; + dataoff = ip_hdrlen(skb) + sizeof(struct udphdr); + datalen = skb->len - dataoff; if (datalen < sizeof("SIP/2.0") - 1) return NF_ACCEPT; @@ -121,19 +121,19 @@ static unsigned int ip_nat_sip(struct sk_buff **pskb, else pos = POS_REQ_URI; - if (!map_sip_addr(pskb, ctinfo, ct, dptr, datalen, pos, &map)) + if (!map_sip_addr(skb, ctinfo, ct, dptr, datalen, pos, &map)) return NF_DROP; } - if (!map_sip_addr(pskb, ctinfo, ct, dptr, datalen, POS_FROM, &map) || - !map_sip_addr(pskb, ctinfo, ct, dptr, datalen, POS_TO, &map) || - !map_sip_addr(pskb, ctinfo, ct, dptr, datalen, POS_VIA, &map) || - !map_sip_addr(pskb, ctinfo, ct, dptr, datalen, POS_CONTACT, &map)) + if (!map_sip_addr(skb, ctinfo, ct, dptr, datalen, POS_FROM, &map) || + !map_sip_addr(skb, ctinfo, ct, dptr, datalen, POS_TO, &map) || + !map_sip_addr(skb, ctinfo, ct, dptr, datalen, POS_VIA, &map) || + !map_sip_addr(skb, ctinfo, ct, dptr, datalen, POS_CONTACT, &map)) return NF_DROP; return NF_ACCEPT; } -static unsigned int mangle_sip_packet(struct sk_buff **pskb, +static unsigned int mangle_sip_packet(struct sk_buff *skb, enum ip_conntrack_info ctinfo, struct nf_conn *ct, const char **dptr, size_t dlen, @@ -145,16 +145,16 @@ static unsigned int mangle_sip_packet(struct sk_buff **pskb, if (ct_sip_get_info(ct, *dptr, dlen, &matchoff, &matchlen, pos) <= 0) return 0; - if (!nf_nat_mangle_udp_packet(pskb, ct, ctinfo, + if (!nf_nat_mangle_udp_packet(skb, ct, ctinfo, matchoff, matchlen, buffer, bufflen)) return 0; /* We need to reload this. Thanks Patrick. */ - *dptr = (*pskb)->data + ip_hdrlen(*pskb) + sizeof(struct udphdr); + *dptr = skb->data + ip_hdrlen(skb) + sizeof(struct udphdr); return 1; } -static int mangle_content_len(struct sk_buff **pskb, +static int mangle_content_len(struct sk_buff *skb, enum ip_conntrack_info ctinfo, struct nf_conn *ct, const char *dptr) @@ -163,22 +163,22 @@ static int mangle_content_len(struct sk_buff **pskb, char buffer[sizeof("65536")]; int bufflen; - dataoff = ip_hdrlen(*pskb) + sizeof(struct udphdr); + dataoff = ip_hdrlen(skb) + sizeof(struct udphdr); /* Get actual SDP lenght */ - if (ct_sip_get_info(ct, dptr, (*pskb)->len - dataoff, &matchoff, + if (ct_sip_get_info(ct, dptr, skb->len - dataoff, &matchoff, &matchlen, POS_SDP_HEADER) > 0) { /* since ct_sip_get_info() give us a pointer passing 'v=' we need to add 2 bytes in this count. */ - int c_len = (*pskb)->len - dataoff - matchoff + 2; + int c_len = skb->len - dataoff - matchoff + 2; /* Now, update SDP length */ - if (ct_sip_get_info(ct, dptr, (*pskb)->len - dataoff, &matchoff, + if (ct_sip_get_info(ct, dptr, skb->len - dataoff, &matchoff, &matchlen, POS_CONTENT) > 0) { bufflen = sprintf(buffer, "%u", c_len); - return nf_nat_mangle_udp_packet(pskb, ct, ctinfo, + return nf_nat_mangle_udp_packet(skb, ct, ctinfo, matchoff, matchlen, buffer, bufflen); } @@ -186,7 +186,7 @@ static int mangle_content_len(struct sk_buff **pskb, return 0; } -static unsigned int mangle_sdp(struct sk_buff **pskb, +static unsigned int mangle_sdp(struct sk_buff *skb, enum ip_conntrack_info ctinfo, struct nf_conn *ct, __be32 newip, u_int16_t port, @@ -195,25 +195,25 @@ static unsigned int mangle_sdp(struct sk_buff **pskb, char buffer[sizeof("nnn.nnn.nnn.nnn")]; unsigned int dataoff, bufflen; - dataoff = ip_hdrlen(*pskb) + sizeof(struct udphdr); + dataoff = ip_hdrlen(skb) + sizeof(struct udphdr); /* Mangle owner and contact info. */ bufflen = sprintf(buffer, "%u.%u.%u.%u", NIPQUAD(newip)); - if (!mangle_sip_packet(pskb, ctinfo, ct, &dptr, (*pskb)->len - dataoff, + if (!mangle_sip_packet(skb, ctinfo, ct, &dptr, skb->len - dataoff, buffer, bufflen, POS_OWNER_IP4)) return 0; - if (!mangle_sip_packet(pskb, ctinfo, ct, &dptr, (*pskb)->len - dataoff, + if (!mangle_sip_packet(skb, ctinfo, ct, &dptr, skb->len - dataoff, buffer, bufflen, POS_CONNECTION_IP4)) return 0; /* Mangle media port. */ bufflen = sprintf(buffer, "%u", port); - if (!mangle_sip_packet(pskb, ctinfo, ct, &dptr, (*pskb)->len - dataoff, + if (!mangle_sip_packet(skb, ctinfo, ct, &dptr, skb->len - dataoff, buffer, bufflen, POS_MEDIA)) return 0; - return mangle_content_len(pskb, ctinfo, ct, dptr); + return mangle_content_len(skb, ctinfo, ct, dptr); } static void ip_nat_sdp_expect(struct nf_conn *ct, @@ -241,7 +241,7 @@ static void ip_nat_sdp_expect(struct nf_conn *ct, /* So, this packet has hit the connection tracking matching code. Mangle it, and change the expectation to match the new version. */ -static unsigned int ip_nat_sdp(struct sk_buff **pskb, +static unsigned int ip_nat_sdp(struct sk_buff *skb, enum ip_conntrack_info ctinfo, struct nf_conntrack_expect *exp, const char *dptr) @@ -277,7 +277,7 @@ static unsigned int ip_nat_sdp(struct sk_buff **pskb, if (port == 0) return NF_DROP; - if (!mangle_sdp(pskb, ctinfo, ct, newip, port, dptr)) { + if (!mangle_sdp(skb, ctinfo, ct, newip, port, dptr)) { nf_ct_unexpect_related(exp); return NF_DROP; } diff --git a/net/ipv4/netfilter/nf_nat_snmp_basic.c b/net/ipv4/netfilter/nf_nat_snmp_basic.c index 6bfcd3a90f08..03709d6b4b06 100644 --- a/net/ipv4/netfilter/nf_nat_snmp_basic.c +++ b/net/ipv4/netfilter/nf_nat_snmp_basic.c @@ -1188,9 +1188,9 @@ static int snmp_parse_mangle(unsigned char *msg, */ static int snmp_translate(struct nf_conn *ct, enum ip_conntrack_info ctinfo, - struct sk_buff **pskb) + struct sk_buff *skb) { - struct iphdr *iph = ip_hdr(*pskb); + struct iphdr *iph = ip_hdr(skb); struct udphdr *udph = (struct udphdr *)((__be32 *)iph + iph->ihl); u_int16_t udplen = ntohs(udph->len); u_int16_t paylen = udplen - sizeof(struct udphdr); @@ -1225,13 +1225,13 @@ static int snmp_translate(struct nf_conn *ct, /* We don't actually set up expectations, just adjust internal IP * addresses if this is being NATted */ -static int help(struct sk_buff **pskb, unsigned int protoff, +static int help(struct sk_buff *skb, unsigned int protoff, struct nf_conn *ct, enum ip_conntrack_info ctinfo) { int dir = CTINFO2DIR(ctinfo); unsigned int ret; - struct iphdr *iph = ip_hdr(*pskb); + struct iphdr *iph = ip_hdr(skb); struct udphdr *udph = (struct udphdr *)((u_int32_t *)iph + iph->ihl); /* SNMP replies and originating SNMP traps get mangled */ @@ -1250,7 +1250,7 @@ static int help(struct sk_buff **pskb, unsigned int protoff, * enough room for a UDP header. Just verify the UDP length field so we * can mess around with the payload. */ - if (ntohs(udph->len) != (*pskb)->len - (iph->ihl << 2)) { + if (ntohs(udph->len) != skb->len - (iph->ihl << 2)) { if (net_ratelimit()) printk(KERN_WARNING "SNMP: dropping malformed packet " "src=%u.%u.%u.%u dst=%u.%u.%u.%u\n", @@ -1258,11 +1258,11 @@ static int help(struct sk_buff **pskb, unsigned int protoff, return NF_DROP; } - if (!skb_make_writable(pskb, (*pskb)->len)) + if (!skb_make_writable(skb, skb->len)) return NF_DROP; spin_lock_bh(&snmp_lock); - ret = snmp_translate(ct, ctinfo, pskb); + ret = snmp_translate(ct, ctinfo, skb); spin_unlock_bh(&snmp_lock); return ret; } diff --git a/net/ipv4/netfilter/nf_nat_standalone.c b/net/ipv4/netfilter/nf_nat_standalone.c index 46cc99def165..7db76ea9af91 100644 --- a/net/ipv4/netfilter/nf_nat_standalone.c +++ b/net/ipv4/netfilter/nf_nat_standalone.c @@ -67,7 +67,7 @@ static void nat_decode_session(struct sk_buff *skb, struct flowi *fl) static unsigned int nf_nat_fn(unsigned int hooknum, - struct sk_buff **pskb, + struct sk_buff *skb, const struct net_device *in, const struct net_device *out, int (*okfn)(struct sk_buff *)) @@ -80,9 +80,9 @@ nf_nat_fn(unsigned int hooknum, /* We never see fragments: conntrack defrags on pre-routing and local-out, and nf_nat_out protects post-routing. */ - NF_CT_ASSERT(!(ip_hdr(*pskb)->frag_off & htons(IP_MF | IP_OFFSET))); + NF_CT_ASSERT(!(ip_hdr(skb)->frag_off & htons(IP_MF | IP_OFFSET))); - ct = nf_ct_get(*pskb, &ctinfo); + ct = nf_ct_get(skb, &ctinfo); /* Can't track? It's not due to stress, or conntrack would have dropped it. Hence it's the user's responsibilty to packet filter it out, or implement conntrack/NAT for that @@ -91,10 +91,10 @@ nf_nat_fn(unsigned int hooknum, /* Exception: ICMP redirect to new connection (not in hash table yet). We must not let this through, in case we're doing NAT to the same network. */ - if (ip_hdr(*pskb)->protocol == IPPROTO_ICMP) { + if (ip_hdr(skb)->protocol == IPPROTO_ICMP) { struct icmphdr _hdr, *hp; - hp = skb_header_pointer(*pskb, ip_hdrlen(*pskb), + hp = skb_header_pointer(skb, ip_hdrlen(skb), sizeof(_hdr), &_hdr); if (hp != NULL && hp->type == ICMP_REDIRECT) @@ -119,9 +119,9 @@ nf_nat_fn(unsigned int hooknum, switch (ctinfo) { case IP_CT_RELATED: case IP_CT_RELATED+IP_CT_IS_REPLY: - if (ip_hdr(*pskb)->protocol == IPPROTO_ICMP) { + if (ip_hdr(skb)->protocol == IPPROTO_ICMP) { if (!nf_nat_icmp_reply_translation(ct, ctinfo, - hooknum, pskb)) + hooknum, skb)) return NF_DROP; else return NF_ACCEPT; @@ -141,7 +141,7 @@ nf_nat_fn(unsigned int hooknum, /* LOCAL_IN hook doesn't have a chain! */ ret = alloc_null_binding(ct, hooknum); else - ret = nf_nat_rule_find(pskb, hooknum, in, out, + ret = nf_nat_rule_find(skb, hooknum, in, out, ct); if (ret != NF_ACCEPT) { @@ -159,31 +159,31 @@ nf_nat_fn(unsigned int hooknum, ctinfo == (IP_CT_ESTABLISHED+IP_CT_IS_REPLY)); } - return nf_nat_packet(ct, ctinfo, hooknum, pskb); + return nf_nat_packet(ct, ctinfo, hooknum, skb); } static unsigned int nf_nat_in(unsigned int hooknum, - struct sk_buff **pskb, + struct sk_buff *skb, const struct net_device *in, const struct net_device *out, int (*okfn)(struct sk_buff *)) { unsigned int ret; - __be32 daddr = ip_hdr(*pskb)->daddr; + __be32 daddr = ip_hdr(skb)->daddr; - ret = nf_nat_fn(hooknum, pskb, in, out, okfn); + ret = nf_nat_fn(hooknum, skb, in, out, okfn); if (ret != NF_DROP && ret != NF_STOLEN && - daddr != ip_hdr(*pskb)->daddr) { - dst_release((*pskb)->dst); - (*pskb)->dst = NULL; + daddr != ip_hdr(skb)->daddr) { + dst_release(skb->dst); + skb->dst = NULL; } return ret; } static unsigned int nf_nat_out(unsigned int hooknum, - struct sk_buff **pskb, + struct sk_buff *skb, const struct net_device *in, const struct net_device *out, int (*okfn)(struct sk_buff *)) @@ -195,14 +195,14 @@ nf_nat_out(unsigned int hooknum, unsigned int ret; /* root is playing with raw sockets. */ - if ((*pskb)->len < sizeof(struct iphdr) || - ip_hdrlen(*pskb) < sizeof(struct iphdr)) + if (skb->len < sizeof(struct iphdr) || + ip_hdrlen(skb) < sizeof(struct iphdr)) return NF_ACCEPT; - ret = nf_nat_fn(hooknum, pskb, in, out, okfn); + ret = nf_nat_fn(hooknum, skb, in, out, okfn); #ifdef CONFIG_XFRM if (ret != NF_DROP && ret != NF_STOLEN && - (ct = nf_ct_get(*pskb, &ctinfo)) != NULL) { + (ct = nf_ct_get(skb, &ctinfo)) != NULL) { enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo); if (ct->tuplehash[dir].tuple.src.u3.ip != @@ -210,7 +210,7 @@ nf_nat_out(unsigned int hooknum, || ct->tuplehash[dir].tuple.src.u.all != ct->tuplehash[!dir].tuple.dst.u.all ) - return ip_xfrm_me_harder(pskb) == 0 ? ret : NF_DROP; + return ip_xfrm_me_harder(skb) == 0 ? ret : NF_DROP; } #endif return ret; @@ -218,7 +218,7 @@ nf_nat_out(unsigned int hooknum, static unsigned int nf_nat_local_fn(unsigned int hooknum, - struct sk_buff **pskb, + struct sk_buff *skb, const struct net_device *in, const struct net_device *out, int (*okfn)(struct sk_buff *)) @@ -228,24 +228,24 @@ nf_nat_local_fn(unsigned int hooknum, unsigned int ret; /* root is playing with raw sockets. */ - if ((*pskb)->len < sizeof(struct iphdr) || - ip_hdrlen(*pskb) < sizeof(struct iphdr)) + if (skb->len < sizeof(struct iphdr) || + ip_hdrlen(skb) < sizeof(struct iphdr)) return NF_ACCEPT; - ret = nf_nat_fn(hooknum, pskb, in, out, okfn); + ret = nf_nat_fn(hooknum, skb, in, out, okfn); if (ret != NF_DROP && ret != NF_STOLEN && - (ct = nf_ct_get(*pskb, &ctinfo)) != NULL) { + (ct = nf_ct_get(skb, &ctinfo)) != NULL) { enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo); if (ct->tuplehash[dir].tuple.dst.u3.ip != ct->tuplehash[!dir].tuple.src.u3.ip) { - if (ip_route_me_harder(pskb, RTN_UNSPEC)) + if (ip_route_me_harder(skb, RTN_UNSPEC)) ret = NF_DROP; } #ifdef CONFIG_XFRM else if (ct->tuplehash[dir].tuple.dst.u.all != ct->tuplehash[!dir].tuple.src.u.all) - if (ip_xfrm_me_harder(pskb)) + if (ip_xfrm_me_harder(skb)) ret = NF_DROP; #endif } @@ -254,7 +254,7 @@ nf_nat_local_fn(unsigned int hooknum, static unsigned int nf_nat_adjust(unsigned int hooknum, - struct sk_buff **pskb, + struct sk_buff *skb, const struct net_device *in, const struct net_device *out, int (*okfn)(struct sk_buff *)) @@ -262,10 +262,10 @@ nf_nat_adjust(unsigned int hooknum, struct nf_conn *ct; enum ip_conntrack_info ctinfo; - ct = nf_ct_get(*pskb, &ctinfo); + ct = nf_ct_get(skb, &ctinfo); if (ct && test_bit(IPS_SEQ_ADJUST_BIT, &ct->status)) { pr_debug("nf_nat_standalone: adjusting sequence number\n"); - if (!nf_nat_seq_adjust(pskb, ct, ctinfo)) + if (!nf_nat_seq_adjust(skb, ct, ctinfo)) return NF_DROP; } return NF_ACCEPT; diff --git a/net/ipv4/netfilter/nf_nat_tftp.c b/net/ipv4/netfilter/nf_nat_tftp.c index 04dfeaefec02..0ecec701cb44 100644 --- a/net/ipv4/netfilter/nf_nat_tftp.c +++ b/net/ipv4/netfilter/nf_nat_tftp.c @@ -20,7 +20,7 @@ MODULE_DESCRIPTION("TFTP NAT helper"); MODULE_LICENSE("GPL"); MODULE_ALIAS("ip_nat_tftp"); -static unsigned int help(struct sk_buff **pskb, +static unsigned int help(struct sk_buff *skb, enum ip_conntrack_info ctinfo, struct nf_conntrack_expect *exp) { diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c index e5b05b039101..fd16cb8f8abe 100644 --- a/net/ipv4/proc.c +++ b/net/ipv4/proc.c @@ -70,8 +70,8 @@ static int sockstat_seq_show(struct seq_file *seq, void *v) seq_printf(seq, "UDP: inuse %d\n", fold_prot_inuse(&udp_prot)); seq_printf(seq, "UDPLITE: inuse %d\n", fold_prot_inuse(&udplite_prot)); seq_printf(seq, "RAW: inuse %d\n", fold_prot_inuse(&raw_prot)); - seq_printf(seq, "FRAG: inuse %d memory %d\n", ip_frag_nqueues, - atomic_read(&ip_frag_mem)); + seq_printf(seq, "FRAG: inuse %d memory %d\n", + ip_frag_nqueues(), ip_frag_mem()); return 0; } diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c index eb286abcf5dc..c98ef16effd2 100644 --- a/net/ipv4/sysctl_net_ipv4.c +++ b/net/ipv4/sysctl_net_ipv4.c @@ -19,6 +19,7 @@ #include <net/route.h> #include <net/tcp.h> #include <net/cipso_ipv4.h> +#include <net/inet_frag.h> /* From af_inet.c */ extern int sysctl_ip_nonlocal_bind; @@ -357,7 +358,7 @@ ctl_table ipv4_table[] = { { .ctl_name = NET_IPV4_IPFRAG_HIGH_THRESH, .procname = "ipfrag_high_thresh", - .data = &sysctl_ipfrag_high_thresh, + .data = &ip4_frags_ctl.high_thresh, .maxlen = sizeof(int), .mode = 0644, .proc_handler = &proc_dointvec @@ -365,7 +366,7 @@ ctl_table ipv4_table[] = { { .ctl_name = NET_IPV4_IPFRAG_LOW_THRESH, .procname = "ipfrag_low_thresh", - .data = &sysctl_ipfrag_low_thresh, + .data = &ip4_frags_ctl.low_thresh, .maxlen = sizeof(int), .mode = 0644, .proc_handler = &proc_dointvec @@ -381,7 +382,7 @@ ctl_table ipv4_table[] = { { .ctl_name = NET_IPV4_IPFRAG_TIME, .procname = "ipfrag_time", - .data = &sysctl_ipfrag_time, + .data = &ip4_frags_ctl.timeout, .maxlen = sizeof(int), .mode = 0644, .proc_handler = &proc_dointvec_jiffies, @@ -732,7 +733,7 @@ ctl_table ipv4_table[] = { { .ctl_name = NET_IPV4_IPFRAG_SECRET_INTERVAL, .procname = "ipfrag_secret_interval", - .data = &sysctl_ipfrag_secret_interval, + .data = &ip4_frags_ctl.secret_interval, .maxlen = sizeof(int), .mode = 0644, .proc_handler = &proc_dointvec_jiffies, diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 0a42e9340346..0f00966b1784 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -1995,8 +1995,7 @@ static void tcp_verify_retransmit_hint(struct tcp_sock *tp, } /* Mark head of queue up as lost. */ -static void tcp_mark_head_lost(struct sock *sk, - int packets, u32 high_seq) +static void tcp_mark_head_lost(struct sock *sk, int packets) { struct tcp_sock *tp = tcp_sk(sk); struct sk_buff *skb; @@ -2019,7 +2018,7 @@ static void tcp_mark_head_lost(struct sock *sk, tp->lost_skb_hint = skb; tp->lost_cnt_hint = cnt; cnt += tcp_skb_pcount(skb); - if (cnt > packets || after(TCP_SKB_CB(skb)->end_seq, high_seq)) + if (cnt > packets || after(TCP_SKB_CB(skb)->end_seq, tp->high_seq)) break; if (!(TCP_SKB_CB(skb)->sacked & (TCPCB_SACKED_ACKED|TCPCB_LOST))) { TCP_SKB_CB(skb)->sacked |= TCPCB_LOST; @@ -2040,9 +2039,9 @@ static void tcp_update_scoreboard(struct sock *sk) int lost = tp->fackets_out - tp->reordering; if (lost <= 0) lost = 1; - tcp_mark_head_lost(sk, lost, tp->high_seq); + tcp_mark_head_lost(sk, lost); } else { - tcp_mark_head_lost(sk, 1, tp->high_seq); + tcp_mark_head_lost(sk, 1); } /* New heuristics: it is possible only after we switched @@ -2381,7 +2380,7 @@ tcp_fastretrans_alert(struct sock *sk, int pkts_acked, int flag) before(tp->snd_una, tp->high_seq) && icsk->icsk_ca_state != TCP_CA_Open && tp->fackets_out > tp->reordering) { - tcp_mark_head_lost(sk, tp->fackets_out-tp->reordering, tp->high_seq); + tcp_mark_head_lost(sk, tp->fackets_out - tp->reordering); NET_INC_STATS_BH(LINUX_MIB_TCPLOSS); } diff --git a/net/ipv4/xfrm4_output.c b/net/ipv4/xfrm4_output.c index 434ef302ba83..a4edd666318b 100644 --- a/net/ipv4/xfrm4_output.c +++ b/net/ipv4/xfrm4_output.c @@ -78,7 +78,7 @@ static int xfrm4_output_finish2(struct sk_buff *skb) while (likely((err = xfrm4_output_one(skb)) == 0)) { nf_reset(skb); - err = nf_hook(PF_INET, NF_IP_LOCAL_OUT, &skb, NULL, + err = nf_hook(PF_INET, NF_IP_LOCAL_OUT, skb, NULL, skb->dst->dev, dst_output); if (unlikely(err != 1)) break; @@ -86,7 +86,7 @@ static int xfrm4_output_finish2(struct sk_buff *skb) if (!skb->dst->xfrm) return dst_output(skb); - err = nf_hook(PF_INET, NF_IP_POST_ROUTING, &skb, NULL, + err = nf_hook(PF_INET, NF_IP_POST_ROUTING, skb, NULL, skb->dst->dev, xfrm4_output_finish2); if (unlikely(err != 1)) break; diff --git a/net/ipv6/exthdrs.c b/net/ipv6/exthdrs.c index c82d4d49f71f..1e89efd38a0c 100644 --- a/net/ipv6/exthdrs.c +++ b/net/ipv6/exthdrs.c @@ -102,7 +102,7 @@ EXPORT_SYMBOL_GPL(ipv6_find_tlv); struct tlvtype_proc { int type; - int (*func)(struct sk_buff **skbp, int offset); + int (*func)(struct sk_buff *skb, int offset); }; /********************* @@ -111,10 +111,8 @@ struct tlvtype_proc { /* An unknown option is detected, decide what to do */ -static int ip6_tlvopt_unknown(struct sk_buff **skbp, int optoff) +static int ip6_tlvopt_unknown(struct sk_buff *skb, int optoff) { - struct sk_buff *skb = *skbp; - switch ((skb_network_header(skb)[optoff] & 0xC0) >> 6) { case 0: /* ignore */ return 1; @@ -139,9 +137,8 @@ static int ip6_tlvopt_unknown(struct sk_buff **skbp, int optoff) /* Parse tlv encoded option header (hop-by-hop or destination) */ -static int ip6_parse_tlv(struct tlvtype_proc *procs, struct sk_buff **skbp) +static int ip6_parse_tlv(struct tlvtype_proc *procs, struct sk_buff *skb) { - struct sk_buff *skb = *skbp; struct tlvtype_proc *curr; const unsigned char *nh = skb_network_header(skb); int off = skb_network_header_len(skb); @@ -172,13 +169,13 @@ static int ip6_parse_tlv(struct tlvtype_proc *procs, struct sk_buff **skbp) /* type specific length/alignment checks will be performed in the func(). */ - if (curr->func(skbp, off) == 0) + if (curr->func(skb, off) == 0) return 0; break; } } if (curr->type < 0) { - if (ip6_tlvopt_unknown(skbp, off) == 0) + if (ip6_tlvopt_unknown(skb, off) == 0) return 0; } break; @@ -198,9 +195,8 @@ bad: *****************************/ #if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE) -static int ipv6_dest_hao(struct sk_buff **skbp, int optoff) +static int ipv6_dest_hao(struct sk_buff *skb, int optoff) { - struct sk_buff *skb = *skbp; struct ipv6_destopt_hao *hao; struct inet6_skb_parm *opt = IP6CB(skb); struct ipv6hdr *ipv6h = ipv6_hdr(skb); @@ -234,22 +230,13 @@ static int ipv6_dest_hao(struct sk_buff **skbp, int optoff) goto discard; if (skb_cloned(skb)) { - struct sk_buff *skb2 = skb_copy(skb, GFP_ATOMIC); - struct inet6_skb_parm *opt2; - - if (skb2 == NULL) + if (pskb_expand_head(skb, 0, 0, GFP_ATOMIC)) goto discard; - opt2 = IP6CB(skb2); - memcpy(opt2, opt, sizeof(*opt2)); - - kfree_skb(skb); - /* update all variable using below by copied skbuff */ - *skbp = skb = skb2; - hao = (struct ipv6_destopt_hao *)(skb_network_header(skb2) + + hao = (struct ipv6_destopt_hao *)(skb_network_header(skb) + optoff); - ipv6h = ipv6_hdr(skb2); + ipv6h = ipv6_hdr(skb); } if (skb->ip_summed == CHECKSUM_COMPLETE) @@ -280,9 +267,8 @@ static struct tlvtype_proc tlvprocdestopt_lst[] = { {-1, NULL} }; -static int ipv6_destopt_rcv(struct sk_buff **skbp) +static int ipv6_destopt_rcv(struct sk_buff *skb) { - struct sk_buff *skb = *skbp; struct inet6_skb_parm *opt = IP6CB(skb); #if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE) __u16 dstbuf; @@ -304,9 +290,8 @@ static int ipv6_destopt_rcv(struct sk_buff **skbp) #endif dst = dst_clone(skb->dst); - if (ip6_parse_tlv(tlvprocdestopt_lst, skbp)) { + if (ip6_parse_tlv(tlvprocdestopt_lst, skb)) { dst_release(dst); - skb = *skbp; skb->transport_header += (skb_transport_header(skb)[1] + 1) << 3; opt = IP6CB(skb); #if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE) @@ -337,10 +322,8 @@ void __init ipv6_destopt_init(void) NONE header. No data in packet. ********************************/ -static int ipv6_nodata_rcv(struct sk_buff **skbp) +static int ipv6_nodata_rcv(struct sk_buff *skb) { - struct sk_buff *skb = *skbp; - kfree_skb(skb); return 0; } @@ -360,9 +343,8 @@ void __init ipv6_nodata_init(void) Routing header. ********************************/ -static int ipv6_rthdr_rcv(struct sk_buff **skbp) +static int ipv6_rthdr_rcv(struct sk_buff *skb) { - struct sk_buff *skb = *skbp; struct inet6_skb_parm *opt = IP6CB(skb); struct in6_addr *addr = NULL; struct in6_addr daddr; @@ -464,18 +446,14 @@ looped_back: Do not damage packets queued somewhere. */ if (skb_cloned(skb)) { - struct sk_buff *skb2 = skb_copy(skb, GFP_ATOMIC); /* the copy is a forwarded packet */ - if (skb2 == NULL) { + if (pskb_expand_head(skb, 0, 0, GFP_ATOMIC)) { IP6_INC_STATS_BH(ip6_dst_idev(skb->dst), IPSTATS_MIB_OUTDISCARDS); kfree_skb(skb); return -1; } - kfree_skb(skb); - *skbp = skb = skb2; - opt = IP6CB(skb2); - hdr = (struct ipv6_rt_hdr *)skb_transport_header(skb2); + hdr = (struct ipv6_rt_hdr *)skb_transport_header(skb); } if (skb->ip_summed == CHECKSUM_COMPLETE) @@ -578,9 +556,8 @@ static inline struct inet6_dev *ipv6_skb_idev(struct sk_buff *skb) /* Router Alert as of RFC 2711 */ -static int ipv6_hop_ra(struct sk_buff **skbp, int optoff) +static int ipv6_hop_ra(struct sk_buff *skb, int optoff) { - struct sk_buff *skb = *skbp; const unsigned char *nh = skb_network_header(skb); if (nh[optoff + 1] == 2) { @@ -595,9 +572,8 @@ static int ipv6_hop_ra(struct sk_buff **skbp, int optoff) /* Jumbo payload */ -static int ipv6_hop_jumbo(struct sk_buff **skbp, int optoff) +static int ipv6_hop_jumbo(struct sk_buff *skb, int optoff) { - struct sk_buff *skb = *skbp; const unsigned char *nh = skb_network_header(skb); u32 pkt_len; @@ -648,9 +624,8 @@ static struct tlvtype_proc tlvprochopopt_lst[] = { { -1, } }; -int ipv6_parse_hopopts(struct sk_buff **skbp) +int ipv6_parse_hopopts(struct sk_buff *skb) { - struct sk_buff *skb = *skbp; struct inet6_skb_parm *opt = IP6CB(skb); /* @@ -667,8 +642,7 @@ int ipv6_parse_hopopts(struct sk_buff **skbp) } opt->hop = sizeof(struct ipv6hdr); - if (ip6_parse_tlv(tlvprochopopt_lst, skbp)) { - skb = *skbp; + if (ip6_parse_tlv(tlvprochopopt_lst, skb)) { skb->transport_header += (skb_transport_header(skb)[1] + 1) << 3; opt = IP6CB(skb); opt->nhoff = sizeof(struct ipv6hdr); diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c index 47b8ce232e84..9bb031fa1c2f 100644 --- a/net/ipv6/icmp.c +++ b/net/ipv6/icmp.c @@ -82,7 +82,7 @@ EXPORT_SYMBOL(icmpv6msg_statistics); static DEFINE_PER_CPU(struct socket *, __icmpv6_socket) = NULL; #define icmpv6_socket __get_cpu_var(__icmpv6_socket) -static int icmpv6_rcv(struct sk_buff **pskb); +static int icmpv6_rcv(struct sk_buff *skb); static struct inet6_protocol icmpv6_protocol = { .handler = icmpv6_rcv, @@ -614,9 +614,8 @@ static void icmpv6_notify(struct sk_buff *skb, int type, int code, __be32 info) * Handle icmp messages */ -static int icmpv6_rcv(struct sk_buff **pskb) +static int icmpv6_rcv(struct sk_buff *skb) { - struct sk_buff *skb = *pskb; struct net_device *dev = skb->dev; struct inet6_dev *idev = __in6_dev_get(dev); struct in6_addr *saddr, *daddr; diff --git a/net/ipv6/inet6_connection_sock.c b/net/ipv6/inet6_connection_sock.c index 25b931709749..78de42ada844 100644 --- a/net/ipv6/inet6_connection_sock.c +++ b/net/ipv6/inet6_connection_sock.c @@ -146,7 +146,7 @@ void __inet6_csk_dst_store(struct sock *sk, struct dst_entry *dst, __ip6_dst_store(sk, dst, daddr, saddr); #ifdef CONFIG_XFRM - if (dst) { + { struct rt6_info *rt = (struct rt6_info *)dst; rt->rt6i_flow_cache_genid = atomic_read(&flow_cache_genid); } diff --git a/net/ipv6/ip6_input.c b/net/ipv6/ip6_input.c index 9149fc239759..fac6f7f9dd73 100644 --- a/net/ipv6/ip6_input.c +++ b/net/ipv6/ip6_input.c @@ -125,7 +125,7 @@ int ipv6_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt } if (hdr->nexthdr == NEXTHDR_HOP) { - if (ipv6_parse_hopopts(&skb) < 0) { + if (ipv6_parse_hopopts(skb) < 0) { IP6_INC_STATS_BH(idev, IPSTATS_MIB_INHDRERRORS); rcu_read_unlock(); return 0; @@ -149,7 +149,7 @@ out: */ -static inline int ip6_input_finish(struct sk_buff *skb) +static int ip6_input_finish(struct sk_buff *skb) { struct inet6_protocol *ipprot; struct sock *raw_sk; @@ -199,7 +199,7 @@ resubmit: !xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb)) goto discard; - ret = ipprot->handler(&skb); + ret = ipprot->handler(skb); if (ret > 0) goto resubmit; else if (ret == 0) diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index 011082ed921a..13565dfb1b45 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -70,7 +70,7 @@ static __inline__ void ipv6_select_ident(struct sk_buff *skb, struct frag_hdr *f spin_unlock_bh(&ip6_id_lock); } -static inline int ip6_output_finish(struct sk_buff *skb) +static int ip6_output_finish(struct sk_buff *skb) { struct dst_entry *dst = skb->dst; diff --git a/net/ipv6/netfilter.c b/net/ipv6/netfilter.c index 38b149613915..b1326c2bf8aa 100644 --- a/net/ipv6/netfilter.c +++ b/net/ipv6/netfilter.c @@ -68,15 +68,15 @@ static void nf_ip6_saveroute(const struct sk_buff *skb, struct nf_info *info) } } -static int nf_ip6_reroute(struct sk_buff **pskb, const struct nf_info *info) +static int nf_ip6_reroute(struct sk_buff *skb, const struct nf_info *info) { struct ip6_rt_info *rt_info = nf_info_reroute(info); if (info->hook == NF_IP6_LOCAL_OUT) { - struct ipv6hdr *iph = ipv6_hdr(*pskb); + struct ipv6hdr *iph = ipv6_hdr(skb); if (!ipv6_addr_equal(&iph->daddr, &rt_info->daddr) || !ipv6_addr_equal(&iph->saddr, &rt_info->saddr)) - return ip6_route_me_harder(*pskb); + return ip6_route_me_harder(skb); } return 0; } diff --git a/net/ipv6/netfilter/ip6_queue.c b/net/ipv6/netfilter/ip6_queue.c index 0473145ac534..6413a30d9f68 100644 --- a/net/ipv6/netfilter/ip6_queue.c +++ b/net/ipv6/netfilter/ip6_queue.c @@ -332,6 +332,7 @@ static int ipq_mangle_ipv6(ipq_verdict_msg_t *v, struct ipq_queue_entry *e) { int diff; + int err; struct ipv6hdr *user_iph = (struct ipv6hdr *)v->payload; if (v->data_len < sizeof(*user_iph)) @@ -344,25 +345,18 @@ ipq_mangle_ipv6(ipq_verdict_msg_t *v, struct ipq_queue_entry *e) if (v->data_len > 0xFFFF) return -EINVAL; if (diff > skb_tailroom(e->skb)) { - struct sk_buff *newskb; - - newskb = skb_copy_expand(e->skb, - skb_headroom(e->skb), - diff, - GFP_ATOMIC); - if (newskb == NULL) { + err = pskb_expand_head(e->skb, 0, + diff - skb_tailroom(e->skb), + GFP_ATOMIC); + if (err) { printk(KERN_WARNING "ip6_queue: OOM " "in mangle, dropping packet\n"); - return -ENOMEM; + return err; } - if (e->skb->sk) - skb_set_owner_w(newskb, e->skb->sk); - kfree_skb(e->skb); - e->skb = newskb; } skb_put(e->skb, diff); } - if (!skb_make_writable(&e->skb, v->data_len)) + if (!skb_make_writable(e->skb, v->data_len)) return -ENOMEM; skb_copy_to_linear_data(e->skb, v->payload, v->data_len); e->skb->ip_summed = CHECKSUM_NONE; diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c index cd9df02bb85c..acaba1537931 100644 --- a/net/ipv6/netfilter/ip6_tables.c +++ b/net/ipv6/netfilter/ip6_tables.c @@ -205,7 +205,7 @@ ip6_checkentry(const struct ip6t_ip6 *ipv6) } static unsigned int -ip6t_error(struct sk_buff **pskb, +ip6t_error(struct sk_buff *skb, const struct net_device *in, const struct net_device *out, unsigned int hooknum, @@ -350,7 +350,7 @@ static void trace_packet(struct sk_buff *skb, /* Returns one of the generic firewall policies, like NF_ACCEPT. */ unsigned int -ip6t_do_table(struct sk_buff **pskb, +ip6t_do_table(struct sk_buff *skb, unsigned int hook, const struct net_device *in, const struct net_device *out, @@ -389,17 +389,17 @@ ip6t_do_table(struct sk_buff **pskb, do { IP_NF_ASSERT(e); IP_NF_ASSERT(back); - if (ip6_packet_match(*pskb, indev, outdev, &e->ipv6, + if (ip6_packet_match(skb, indev, outdev, &e->ipv6, &protoff, &offset, &hotdrop)) { struct ip6t_entry_target *t; if (IP6T_MATCH_ITERATE(e, do_match, - *pskb, in, out, + skb, in, out, offset, protoff, &hotdrop) != 0) goto no_match; ADD_COUNTER(e->counters, - ntohs(ipv6_hdr(*pskb)->payload_len) + ntohs(ipv6_hdr(skb)->payload_len) + IPV6_HDR_LEN, 1); @@ -409,8 +409,8 @@ ip6t_do_table(struct sk_buff **pskb, #if defined(CONFIG_NETFILTER_XT_TARGET_TRACE) || \ defined(CONFIG_NETFILTER_XT_TARGET_TRACE_MODULE) /* The packet is traced: log it */ - if (unlikely((*pskb)->nf_trace)) - trace_packet(*pskb, hook, in, out, + if (unlikely(skb->nf_trace)) + trace_packet(skb, hook, in, out, table->name, private, e); #endif /* Standard target? */ @@ -448,7 +448,7 @@ ip6t_do_table(struct sk_buff **pskb, ((struct ip6t_entry *)table_base)->comefrom = 0xeeeeeeec; #endif - verdict = t->u.kernel.target->target(pskb, + verdict = t->u.kernel.target->target(skb, in, out, hook, t->u.kernel.target, diff --git a/net/ipv6/netfilter/ip6t_HL.c b/net/ipv6/netfilter/ip6t_HL.c index ad4d94310b87..9afc836fd454 100644 --- a/net/ipv6/netfilter/ip6t_HL.c +++ b/net/ipv6/netfilter/ip6t_HL.c @@ -18,7 +18,7 @@ MODULE_AUTHOR("Maciej Soltysiak <solt@dns.toxicfilms.tv>"); MODULE_DESCRIPTION("IP6 tables Hop Limit modification module"); MODULE_LICENSE("GPL"); -static unsigned int ip6t_hl_target(struct sk_buff **pskb, +static unsigned int ip6t_hl_target(struct sk_buff *skb, const struct net_device *in, const struct net_device *out, unsigned int hooknum, @@ -29,10 +29,10 @@ static unsigned int ip6t_hl_target(struct sk_buff **pskb, const struct ip6t_HL_info *info = targinfo; int new_hl; - if (!skb_make_writable(pskb, (*pskb)->len)) + if (!skb_make_writable(skb, skb->len)) return NF_DROP; - ip6h = ipv6_hdr(*pskb); + ip6h = ipv6_hdr(skb); switch (info->mode) { case IP6T_HL_SET: diff --git a/net/ipv6/netfilter/ip6t_LOG.c b/net/ipv6/netfilter/ip6t_LOG.c index 6ab99001dccc..7a48c342df46 100644 --- a/net/ipv6/netfilter/ip6t_LOG.c +++ b/net/ipv6/netfilter/ip6t_LOG.c @@ -431,7 +431,7 @@ ip6t_log_packet(unsigned int pf, } static unsigned int -ip6t_log_target(struct sk_buff **pskb, +ip6t_log_target(struct sk_buff *skb, const struct net_device *in, const struct net_device *out, unsigned int hooknum, @@ -445,8 +445,7 @@ ip6t_log_target(struct sk_buff **pskb, li.u.log.level = loginfo->level; li.u.log.logflags = loginfo->logflags; - ip6t_log_packet(PF_INET6, hooknum, *pskb, in, out, &li, - loginfo->prefix); + ip6t_log_packet(PF_INET6, hooknum, skb, in, out, &li, loginfo->prefix); return XT_CONTINUE; } diff --git a/net/ipv6/netfilter/ip6t_REJECT.c b/net/ipv6/netfilter/ip6t_REJECT.c index 3fd08d5567a6..1a7d2917545d 100644 --- a/net/ipv6/netfilter/ip6t_REJECT.c +++ b/net/ipv6/netfilter/ip6t_REJECT.c @@ -172,7 +172,7 @@ send_unreach(struct sk_buff *skb_in, unsigned char code, unsigned int hooknum) icmpv6_send(skb_in, ICMPV6_DEST_UNREACH, code, 0, NULL); } -static unsigned int reject6_target(struct sk_buff **pskb, +static unsigned int reject6_target(struct sk_buff *skb, const struct net_device *in, const struct net_device *out, unsigned int hooknum, @@ -187,25 +187,25 @@ static unsigned int reject6_target(struct sk_buff **pskb, must return an absolute verdict. --RR */ switch (reject->with) { case IP6T_ICMP6_NO_ROUTE: - send_unreach(*pskb, ICMPV6_NOROUTE, hooknum); + send_unreach(skb, ICMPV6_NOROUTE, hooknum); break; case IP6T_ICMP6_ADM_PROHIBITED: - send_unreach(*pskb, ICMPV6_ADM_PROHIBITED, hooknum); + send_unreach(skb, ICMPV6_ADM_PROHIBITED, hooknum); break; case IP6T_ICMP6_NOT_NEIGHBOUR: - send_unreach(*pskb, ICMPV6_NOT_NEIGHBOUR, hooknum); + send_unreach(skb, ICMPV6_NOT_NEIGHBOUR, hooknum); break; case IP6T_ICMP6_ADDR_UNREACH: - send_unreach(*pskb, ICMPV6_ADDR_UNREACH, hooknum); + send_unreach(skb, ICMPV6_ADDR_UNREACH, hooknum); break; case IP6T_ICMP6_PORT_UNREACH: - send_unreach(*pskb, ICMPV6_PORT_UNREACH, hooknum); + send_unreach(skb, ICMPV6_PORT_UNREACH, hooknum); break; case IP6T_ICMP6_ECHOREPLY: /* Do nothing */ break; case IP6T_TCP_RESET: - send_reset(*pskb); + send_reset(skb); break; default: if (net_ratelimit()) diff --git a/net/ipv6/netfilter/ip6table_filter.c b/net/ipv6/netfilter/ip6table_filter.c index 7e32e2aaf7f7..1d26b202bf30 100644 --- a/net/ipv6/netfilter/ip6table_filter.c +++ b/net/ipv6/netfilter/ip6table_filter.c @@ -60,32 +60,32 @@ static struct xt_table packet_filter = { /* The work comes in here from netfilter.c. */ static unsigned int ip6t_hook(unsigned int hook, - struct sk_buff **pskb, + struct sk_buff *skb, const struct net_device *in, const struct net_device *out, int (*okfn)(struct sk_buff *)) { - return ip6t_do_table(pskb, hook, in, out, &packet_filter); + return ip6t_do_table(skb, hook, in, out, &packet_filter); } static unsigned int ip6t_local_out_hook(unsigned int hook, - struct sk_buff **pskb, + struct sk_buff *skb, const struct net_device *in, const struct net_device *out, int (*okfn)(struct sk_buff *)) { #if 0 /* root is playing with raw sockets. */ - if ((*pskb)->len < sizeof(struct iphdr) - || ip_hdrlen(*pskb) < sizeof(struct iphdr)) { + if (skb->len < sizeof(struct iphdr) + || ip_hdrlen(skb) < sizeof(struct iphdr)) { if (net_ratelimit()) printk("ip6t_hook: happy cracking.\n"); return NF_ACCEPT; } #endif - return ip6t_do_table(pskb, hook, in, out, &packet_filter); + return ip6t_do_table(skb, hook, in, out, &packet_filter); } static struct nf_hook_ops ip6t_ops[] = { diff --git a/net/ipv6/netfilter/ip6table_mangle.c b/net/ipv6/netfilter/ip6table_mangle.c index f0a9efa67fb5..a0b6381f1e8c 100644 --- a/net/ipv6/netfilter/ip6table_mangle.c +++ b/net/ipv6/netfilter/ip6table_mangle.c @@ -68,17 +68,17 @@ static struct xt_table packet_mangler = { /* The work comes in here from netfilter.c. */ static unsigned int ip6t_route_hook(unsigned int hook, - struct sk_buff **pskb, + struct sk_buff *skb, const struct net_device *in, const struct net_device *out, int (*okfn)(struct sk_buff *)) { - return ip6t_do_table(pskb, hook, in, out, &packet_mangler); + return ip6t_do_table(skb, hook, in, out, &packet_mangler); } static unsigned int ip6t_local_hook(unsigned int hook, - struct sk_buff **pskb, + struct sk_buff *skb, const struct net_device *in, const struct net_device *out, int (*okfn)(struct sk_buff *)) @@ -91,8 +91,8 @@ ip6t_local_hook(unsigned int hook, #if 0 /* root is playing with raw sockets. */ - if ((*pskb)->len < sizeof(struct iphdr) - || ip_hdrlen(*pskb) < sizeof(struct iphdr)) { + if (skb->len < sizeof(struct iphdr) + || ip_hdrlen(skb) < sizeof(struct iphdr)) { if (net_ratelimit()) printk("ip6t_hook: happy cracking.\n"); return NF_ACCEPT; @@ -100,22 +100,22 @@ ip6t_local_hook(unsigned int hook, #endif /* save source/dest address, mark, hoplimit, flowlabel, priority, */ - memcpy(&saddr, &ipv6_hdr(*pskb)->saddr, sizeof(saddr)); - memcpy(&daddr, &ipv6_hdr(*pskb)->daddr, sizeof(daddr)); - mark = (*pskb)->mark; - hop_limit = ipv6_hdr(*pskb)->hop_limit; + memcpy(&saddr, &ipv6_hdr(skb)->saddr, sizeof(saddr)); + memcpy(&daddr, &ipv6_hdr(skb)->daddr, sizeof(daddr)); + mark = skb->mark; + hop_limit = ipv6_hdr(skb)->hop_limit; /* flowlabel and prio (includes version, which shouldn't change either */ - flowlabel = *((u_int32_t *)ipv6_hdr(*pskb)); + flowlabel = *((u_int32_t *)ipv6_hdr(skb)); - ret = ip6t_do_table(pskb, hook, in, out, &packet_mangler); + ret = ip6t_do_table(skb, hook, in, out, &packet_mangler); if (ret != NF_DROP && ret != NF_STOLEN - && (memcmp(&ipv6_hdr(*pskb)->saddr, &saddr, sizeof(saddr)) - || memcmp(&ipv6_hdr(*pskb)->daddr, &daddr, sizeof(daddr)) - || (*pskb)->mark != mark - || ipv6_hdr(*pskb)->hop_limit != hop_limit)) - return ip6_route_me_harder(*pskb) == 0 ? ret : NF_DROP; + && (memcmp(&ipv6_hdr(skb)->saddr, &saddr, sizeof(saddr)) + || memcmp(&ipv6_hdr(skb)->daddr, &daddr, sizeof(daddr)) + || skb->mark != mark + || ipv6_hdr(skb)->hop_limit != hop_limit)) + return ip6_route_me_harder(skb) == 0 ? ret : NF_DROP; return ret; } diff --git a/net/ipv6/netfilter/ip6table_raw.c b/net/ipv6/netfilter/ip6table_raw.c index ec290e4ebdd8..8f7109f991e6 100644 --- a/net/ipv6/netfilter/ip6table_raw.c +++ b/net/ipv6/netfilter/ip6table_raw.c @@ -46,12 +46,12 @@ static struct xt_table packet_raw = { /* The work comes in here from netfilter.c. */ static unsigned int ip6t_hook(unsigned int hook, - struct sk_buff **pskb, + struct sk_buff *skb, const struct net_device *in, const struct net_device *out, int (*okfn)(struct sk_buff *)) { - return ip6t_do_table(pskb, hook, in, out, &packet_raw); + return ip6t_do_table(skb, hook, in, out, &packet_raw); } static struct nf_hook_ops ip6t_ops[] = { diff --git a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c index 37a3db926953..0e40948f4fc6 100644 --- a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c +++ b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c @@ -18,6 +18,7 @@ #include <linux/icmp.h> #include <linux/sysctl.h> #include <net/ipv6.h> +#include <net/inet_frag.h> #include <linux/netfilter_ipv6.h> #include <net/netfilter/nf_conntrack.h> @@ -145,7 +146,7 @@ static int ipv6_get_l4proto(const struct sk_buff *skb, unsigned int nhoff, } static unsigned int ipv6_confirm(unsigned int hooknum, - struct sk_buff **pskb, + struct sk_buff *skb, const struct net_device *in, const struct net_device *out, int (*okfn)(struct sk_buff *)) @@ -155,12 +156,12 @@ static unsigned int ipv6_confirm(unsigned int hooknum, struct nf_conntrack_helper *helper; enum ip_conntrack_info ctinfo; unsigned int ret, protoff; - unsigned int extoff = (u8 *)(ipv6_hdr(*pskb) + 1) - (*pskb)->data; - unsigned char pnum = ipv6_hdr(*pskb)->nexthdr; + unsigned int extoff = (u8 *)(ipv6_hdr(skb) + 1) - skb->data; + unsigned char pnum = ipv6_hdr(skb)->nexthdr; /* This is where we call the helper: as the packet goes out. */ - ct = nf_ct_get(*pskb, &ctinfo); + ct = nf_ct_get(skb, &ctinfo); if (!ct || ctinfo == IP_CT_RELATED + IP_CT_IS_REPLY) goto out; @@ -172,23 +173,23 @@ static unsigned int ipv6_confirm(unsigned int hooknum, if (!helper) goto out; - protoff = nf_ct_ipv6_skip_exthdr(*pskb, extoff, &pnum, - (*pskb)->len - extoff); - if (protoff > (*pskb)->len || pnum == NEXTHDR_FRAGMENT) { + protoff = nf_ct_ipv6_skip_exthdr(skb, extoff, &pnum, + skb->len - extoff); + if (protoff > skb->len || pnum == NEXTHDR_FRAGMENT) { pr_debug("proto header not found\n"); return NF_ACCEPT; } - ret = helper->help(pskb, protoff, ct, ctinfo); + ret = helper->help(skb, protoff, ct, ctinfo); if (ret != NF_ACCEPT) return ret; out: /* We've seen it coming out the other side: confirm it */ - return nf_conntrack_confirm(pskb); + return nf_conntrack_confirm(skb); } static unsigned int ipv6_defrag(unsigned int hooknum, - struct sk_buff **pskb, + struct sk_buff *skb, const struct net_device *in, const struct net_device *out, int (*okfn)(struct sk_buff *)) @@ -196,17 +197,17 @@ static unsigned int ipv6_defrag(unsigned int hooknum, struct sk_buff *reasm; /* Previously seen (loopback)? */ - if ((*pskb)->nfct) + if (skb->nfct) return NF_ACCEPT; - reasm = nf_ct_frag6_gather(*pskb); + reasm = nf_ct_frag6_gather(skb); /* queued */ if (reasm == NULL) return NF_STOLEN; /* error occured or not fragmented */ - if (reasm == *pskb) + if (reasm == skb) return NF_ACCEPT; nf_ct_frag6_output(hooknum, reasm, (struct net_device *)in, @@ -216,12 +217,12 @@ static unsigned int ipv6_defrag(unsigned int hooknum, } static unsigned int ipv6_conntrack_in(unsigned int hooknum, - struct sk_buff **pskb, + struct sk_buff *skb, const struct net_device *in, const struct net_device *out, int (*okfn)(struct sk_buff *)) { - struct sk_buff *reasm = (*pskb)->nfct_reasm; + struct sk_buff *reasm = skb->nfct_reasm; /* This packet is fragmented and has reassembled packet. */ if (reasm) { @@ -229,32 +230,32 @@ static unsigned int ipv6_conntrack_in(unsigned int hooknum, if (!reasm->nfct) { unsigned int ret; - ret = nf_conntrack_in(PF_INET6, hooknum, &reasm); + ret = nf_conntrack_in(PF_INET6, hooknum, reasm); if (ret != NF_ACCEPT) return ret; } nf_conntrack_get(reasm->nfct); - (*pskb)->nfct = reasm->nfct; - (*pskb)->nfctinfo = reasm->nfctinfo; + skb->nfct = reasm->nfct; + skb->nfctinfo = reasm->nfctinfo; return NF_ACCEPT; } - return nf_conntrack_in(PF_INET6, hooknum, pskb); + return nf_conntrack_in(PF_INET6, hooknum, skb); } static unsigned int ipv6_conntrack_local(unsigned int hooknum, - struct sk_buff **pskb, + struct sk_buff *skb, const struct net_device *in, const struct net_device *out, int (*okfn)(struct sk_buff *)) { /* root is playing with raw sockets. */ - if ((*pskb)->len < sizeof(struct ipv6hdr)) { + if (skb->len < sizeof(struct ipv6hdr)) { if (net_ratelimit()) printk("ipv6_conntrack_local: packet too short\n"); return NF_ACCEPT; } - return ipv6_conntrack_in(hooknum, pskb, in, out, okfn); + return ipv6_conntrack_in(hooknum, skb, in, out, okfn); } static struct nf_hook_ops ipv6_conntrack_ops[] = { @@ -307,7 +308,7 @@ static ctl_table nf_ct_ipv6_sysctl_table[] = { { .ctl_name = NET_NF_CONNTRACK_FRAG6_TIMEOUT, .procname = "nf_conntrack_frag6_timeout", - .data = &nf_ct_frag6_timeout, + .data = &nf_frags_ctl.timeout, .maxlen = sizeof(unsigned int), .mode = 0644, .proc_handler = &proc_dointvec_jiffies, @@ -315,7 +316,7 @@ static ctl_table nf_ct_ipv6_sysctl_table[] = { { .ctl_name = NET_NF_CONNTRACK_FRAG6_LOW_THRESH, .procname = "nf_conntrack_frag6_low_thresh", - .data = &nf_ct_frag6_low_thresh, + .data = &nf_frags_ctl.low_thresh, .maxlen = sizeof(unsigned int), .mode = 0644, .proc_handler = &proc_dointvec, @@ -323,7 +324,7 @@ static ctl_table nf_ct_ipv6_sysctl_table[] = { { .ctl_name = NET_NF_CONNTRACK_FRAG6_HIGH_THRESH, .procname = "nf_conntrack_frag6_high_thresh", - .data = &nf_ct_frag6_high_thresh, + .data = &nf_frags_ctl.high_thresh, .maxlen = sizeof(unsigned int), .mode = 0644, .proc_handler = &proc_dointvec, diff --git a/net/ipv6/netfilter/nf_conntrack_reasm.c b/net/ipv6/netfilter/nf_conntrack_reasm.c index 25442a8c1ba8..726fafd41961 100644 --- a/net/ipv6/netfilter/nf_conntrack_reasm.c +++ b/net/ipv6/netfilter/nf_conntrack_reasm.c @@ -31,6 +31,7 @@ #include <net/sock.h> #include <net/snmp.h> +#include <net/inet_frag.h> #include <net/ipv6.h> #include <net/protocol.h> @@ -48,10 +49,6 @@ #define NF_CT_FRAG6_LOW_THRESH 196608 /* == 192*1024 */ #define NF_CT_FRAG6_TIMEOUT IPV6_FRAG_TIMEOUT -unsigned int nf_ct_frag6_high_thresh __read_mostly = 256*1024; -unsigned int nf_ct_frag6_low_thresh __read_mostly = 192*1024; -unsigned long nf_ct_frag6_timeout __read_mostly = IPV6_FRAG_TIMEOUT; - struct nf_ct_frag6_skb_cb { struct inet6_skb_parm h; @@ -63,51 +60,24 @@ struct nf_ct_frag6_skb_cb struct nf_ct_frag6_queue { - struct hlist_node list; - struct list_head lru_list; /* lru list member */ + struct inet_frag_queue q; __be32 id; /* fragment id */ struct in6_addr saddr; struct in6_addr daddr; - spinlock_t lock; - atomic_t refcnt; - struct timer_list timer; /* expire timer */ - struct sk_buff *fragments; - int len; - int meat; - ktime_t stamp; unsigned int csum; - __u8 last_in; /* has first/last segment arrived? */ -#define COMPLETE 4 -#define FIRST_IN 2 -#define LAST_IN 1 __u16 nhoffset; }; -/* Hash table. */ - -#define FRAG6Q_HASHSZ 64 - -static struct hlist_head nf_ct_frag6_hash[FRAG6Q_HASHSZ]; -static DEFINE_RWLOCK(nf_ct_frag6_lock); -static u32 nf_ct_frag6_hash_rnd; -static LIST_HEAD(nf_ct_frag6_lru_list); -int nf_ct_frag6_nqueues = 0; - -static __inline__ void __fq_unlink(struct nf_ct_frag6_queue *fq) -{ - hlist_del(&fq->list); - list_del(&fq->lru_list); - nf_ct_frag6_nqueues--; -} +struct inet_frags_ctl nf_frags_ctl __read_mostly = { + .high_thresh = 256 * 1024, + .low_thresh = 192 * 1024, + .timeout = IPV6_FRAG_TIMEOUT, + .secret_interval = 10 * 60 * HZ, +}; -static __inline__ void fq_unlink(struct nf_ct_frag6_queue *fq) -{ - write_lock(&nf_ct_frag6_lock); - __fq_unlink(fq); - write_unlock(&nf_ct_frag6_lock); -} +static struct inet_frags nf_frags; static unsigned int ip6qhashfn(__be32 id, struct in6_addr *saddr, struct in6_addr *daddr) @@ -120,7 +90,7 @@ static unsigned int ip6qhashfn(__be32 id, struct in6_addr *saddr, a += JHASH_GOLDEN_RATIO; b += JHASH_GOLDEN_RATIO; - c += nf_ct_frag6_hash_rnd; + c += nf_frags.rnd; __jhash_mix(a, b, c); a += (__force u32)saddr->s6_addr32[3]; @@ -133,100 +103,54 @@ static unsigned int ip6qhashfn(__be32 id, struct in6_addr *saddr, c += (__force u32)id; __jhash_mix(a, b, c); - return c & (FRAG6Q_HASHSZ - 1); + return c & (INETFRAGS_HASHSZ - 1); } -static struct timer_list nf_ct_frag6_secret_timer; -int nf_ct_frag6_secret_interval = 10 * 60 * HZ; - -static void nf_ct_frag6_secret_rebuild(unsigned long dummy) +static unsigned int nf_hashfn(struct inet_frag_queue *q) { - unsigned long now = jiffies; - int i; - - write_lock(&nf_ct_frag6_lock); - get_random_bytes(&nf_ct_frag6_hash_rnd, sizeof(u32)); - for (i = 0; i < FRAG6Q_HASHSZ; i++) { - struct nf_ct_frag6_queue *q; - struct hlist_node *p, *n; - - hlist_for_each_entry_safe(q, p, n, &nf_ct_frag6_hash[i], list) { - unsigned int hval = ip6qhashfn(q->id, - &q->saddr, - &q->daddr); - if (hval != i) { - hlist_del(&q->list); - /* Relink to new hash chain. */ - hlist_add_head(&q->list, - &nf_ct_frag6_hash[hval]); - } - } - } - write_unlock(&nf_ct_frag6_lock); + struct nf_ct_frag6_queue *nq; - mod_timer(&nf_ct_frag6_secret_timer, now + nf_ct_frag6_secret_interval); + nq = container_of(q, struct nf_ct_frag6_queue, q); + return ip6qhashfn(nq->id, &nq->saddr, &nq->daddr); } -atomic_t nf_ct_frag6_mem = ATOMIC_INIT(0); +static void nf_skb_free(struct sk_buff *skb) +{ + if (NFCT_FRAG6_CB(skb)->orig) + kfree_skb(NFCT_FRAG6_CB(skb)->orig); +} /* Memory Tracking Functions. */ static inline void frag_kfree_skb(struct sk_buff *skb, unsigned int *work) { if (work) *work -= skb->truesize; - atomic_sub(skb->truesize, &nf_ct_frag6_mem); - if (NFCT_FRAG6_CB(skb)->orig) - kfree_skb(NFCT_FRAG6_CB(skb)->orig); - + atomic_sub(skb->truesize, &nf_frags.mem); + nf_skb_free(skb); kfree_skb(skb); } -static inline void frag_free_queue(struct nf_ct_frag6_queue *fq, - unsigned int *work) +static void nf_frag_free(struct inet_frag_queue *q) { - if (work) - *work -= sizeof(struct nf_ct_frag6_queue); - atomic_sub(sizeof(struct nf_ct_frag6_queue), &nf_ct_frag6_mem); - kfree(fq); + kfree(container_of(q, struct nf_ct_frag6_queue, q)); } static inline struct nf_ct_frag6_queue *frag_alloc_queue(void) { - struct nf_ct_frag6_queue *fq = kmalloc(sizeof(struct nf_ct_frag6_queue), GFP_ATOMIC); + struct nf_ct_frag6_queue *fq; - if (!fq) + fq = kzalloc(sizeof(struct nf_ct_frag6_queue), GFP_ATOMIC); + if (fq == NULL) return NULL; - atomic_add(sizeof(struct nf_ct_frag6_queue), &nf_ct_frag6_mem); + atomic_add(sizeof(struct nf_ct_frag6_queue), &nf_frags.mem); return fq; } /* Destruction primitives. */ -/* Complete destruction of fq. */ -static void nf_ct_frag6_destroy(struct nf_ct_frag6_queue *fq, - unsigned int *work) +static __inline__ void fq_put(struct nf_ct_frag6_queue *fq) { - struct sk_buff *fp; - - BUG_TRAP(fq->last_in&COMPLETE); - BUG_TRAP(del_timer(&fq->timer) == 0); - - /* Release all fragment data. */ - fp = fq->fragments; - while (fp) { - struct sk_buff *xp = fp->next; - - frag_kfree_skb(fp, work); - fp = xp; - } - - frag_free_queue(fq, work); -} - -static __inline__ void fq_put(struct nf_ct_frag6_queue *fq, unsigned int *work) -{ - if (atomic_dec_and_test(&fq->refcnt)) - nf_ct_frag6_destroy(fq, work); + inet_frag_put(&fq->q, &nf_frags); } /* Kill fq entry. It is not destroyed immediately, @@ -234,62 +158,28 @@ static __inline__ void fq_put(struct nf_ct_frag6_queue *fq, unsigned int *work) */ static __inline__ void fq_kill(struct nf_ct_frag6_queue *fq) { - if (del_timer(&fq->timer)) - atomic_dec(&fq->refcnt); - - if (!(fq->last_in & COMPLETE)) { - fq_unlink(fq); - atomic_dec(&fq->refcnt); - fq->last_in |= COMPLETE; - } + inet_frag_kill(&fq->q, &nf_frags); } static void nf_ct_frag6_evictor(void) { - struct nf_ct_frag6_queue *fq; - struct list_head *tmp; - unsigned int work; - - work = atomic_read(&nf_ct_frag6_mem); - if (work <= nf_ct_frag6_low_thresh) - return; - - work -= nf_ct_frag6_low_thresh; - while (work > 0) { - read_lock(&nf_ct_frag6_lock); - if (list_empty(&nf_ct_frag6_lru_list)) { - read_unlock(&nf_ct_frag6_lock); - return; - } - tmp = nf_ct_frag6_lru_list.next; - BUG_ON(tmp == NULL); - fq = list_entry(tmp, struct nf_ct_frag6_queue, lru_list); - atomic_inc(&fq->refcnt); - read_unlock(&nf_ct_frag6_lock); - - spin_lock(&fq->lock); - if (!(fq->last_in&COMPLETE)) - fq_kill(fq); - spin_unlock(&fq->lock); - - fq_put(fq, &work); - } + inet_frag_evictor(&nf_frags); } static void nf_ct_frag6_expire(unsigned long data) { struct nf_ct_frag6_queue *fq = (struct nf_ct_frag6_queue *) data; - spin_lock(&fq->lock); + spin_lock(&fq->q.lock); - if (fq->last_in & COMPLETE) + if (fq->q.last_in & COMPLETE) goto out; fq_kill(fq); out: - spin_unlock(&fq->lock); - fq_put(fq, NULL); + spin_unlock(&fq->q.lock); + fq_put(fq); } /* Creation primitives. */ @@ -302,31 +192,31 @@ static struct nf_ct_frag6_queue *nf_ct_frag6_intern(unsigned int hash, struct hlist_node *n; #endif - write_lock(&nf_ct_frag6_lock); + write_lock(&nf_frags.lock); #ifdef CONFIG_SMP - hlist_for_each_entry(fq, n, &nf_ct_frag6_hash[hash], list) { + hlist_for_each_entry(fq, n, &nf_frags.hash[hash], q.list) { if (fq->id == fq_in->id && ipv6_addr_equal(&fq_in->saddr, &fq->saddr) && ipv6_addr_equal(&fq_in->daddr, &fq->daddr)) { - atomic_inc(&fq->refcnt); - write_unlock(&nf_ct_frag6_lock); - fq_in->last_in |= COMPLETE; - fq_put(fq_in, NULL); + atomic_inc(&fq->q.refcnt); + write_unlock(&nf_frags.lock); + fq_in->q.last_in |= COMPLETE; + fq_put(fq_in); return fq; } } #endif fq = fq_in; - if (!mod_timer(&fq->timer, jiffies + nf_ct_frag6_timeout)) - atomic_inc(&fq->refcnt); + if (!mod_timer(&fq->q.timer, jiffies + nf_frags_ctl.timeout)) + atomic_inc(&fq->q.refcnt); - atomic_inc(&fq->refcnt); - hlist_add_head(&fq->list, &nf_ct_frag6_hash[hash]); - INIT_LIST_HEAD(&fq->lru_list); - list_add_tail(&fq->lru_list, &nf_ct_frag6_lru_list); - nf_ct_frag6_nqueues++; - write_unlock(&nf_ct_frag6_lock); + atomic_inc(&fq->q.refcnt); + hlist_add_head(&fq->q.list, &nf_frags.hash[hash]); + INIT_LIST_HEAD(&fq->q.lru_list); + list_add_tail(&fq->q.lru_list, &nf_frags.lru_list); + nf_frags.nqueues++; + write_unlock(&nf_frags.lock); return fq; } @@ -341,15 +231,13 @@ nf_ct_frag6_create(unsigned int hash, __be32 id, struct in6_addr *src, str goto oom; } - memset(fq, 0, sizeof(struct nf_ct_frag6_queue)); - fq->id = id; ipv6_addr_copy(&fq->saddr, src); ipv6_addr_copy(&fq->daddr, dst); - setup_timer(&fq->timer, nf_ct_frag6_expire, (unsigned long)fq); - spin_lock_init(&fq->lock); - atomic_set(&fq->refcnt, 1); + setup_timer(&fq->q.timer, nf_ct_frag6_expire, (unsigned long)fq); + spin_lock_init(&fq->q.lock); + atomic_set(&fq->q.refcnt, 1); return nf_ct_frag6_intern(hash, fq); @@ -364,17 +252,17 @@ fq_find(__be32 id, struct in6_addr *src, struct in6_addr *dst) struct hlist_node *n; unsigned int hash = ip6qhashfn(id, src, dst); - read_lock(&nf_ct_frag6_lock); - hlist_for_each_entry(fq, n, &nf_ct_frag6_hash[hash], list) { + read_lock(&nf_frags.lock); + hlist_for_each_entry(fq, n, &nf_frags.hash[hash], q.list) { if (fq->id == id && ipv6_addr_equal(src, &fq->saddr) && ipv6_addr_equal(dst, &fq->daddr)) { - atomic_inc(&fq->refcnt); - read_unlock(&nf_ct_frag6_lock); + atomic_inc(&fq->q.refcnt); + read_unlock(&nf_frags.lock); return fq; } } - read_unlock(&nf_ct_frag6_lock); + read_unlock(&nf_frags.lock); return nf_ct_frag6_create(hash, id, src, dst); } @@ -386,7 +274,7 @@ static int nf_ct_frag6_queue(struct nf_ct_frag6_queue *fq, struct sk_buff *skb, struct sk_buff *prev, *next; int offset, end; - if (fq->last_in & COMPLETE) { + if (fq->q.last_in & COMPLETE) { pr_debug("Allready completed\n"); goto err; } @@ -412,13 +300,13 @@ static int nf_ct_frag6_queue(struct nf_ct_frag6_queue *fq, struct sk_buff *skb, /* If we already have some bits beyond end * or have different end, the segment is corrupted. */ - if (end < fq->len || - ((fq->last_in & LAST_IN) && end != fq->len)) { + if (end < fq->q.len || + ((fq->q.last_in & LAST_IN) && end != fq->q.len)) { pr_debug("already received last fragment\n"); goto err; } - fq->last_in |= LAST_IN; - fq->len = end; + fq->q.last_in |= LAST_IN; + fq->q.len = end; } else { /* Check if the fragment is rounded to 8 bytes. * Required by the RFC. @@ -430,13 +318,13 @@ static int nf_ct_frag6_queue(struct nf_ct_frag6_queue *fq, struct sk_buff *skb, pr_debug("end of fragment not rounded to 8 bytes.\n"); return -1; } - if (end > fq->len) { + if (end > fq->q.len) { /* Some bits beyond end -> corruption. */ - if (fq->last_in & LAST_IN) { + if (fq->q.last_in & LAST_IN) { pr_debug("last packet already reached.\n"); goto err; } - fq->len = end; + fq->q.len = end; } } @@ -458,7 +346,7 @@ static int nf_ct_frag6_queue(struct nf_ct_frag6_queue *fq, struct sk_buff *skb, * this fragment, right? */ prev = NULL; - for (next = fq->fragments; next != NULL; next = next->next) { + for (next = fq->q.fragments; next != NULL; next = next->next) { if (NFCT_FRAG6_CB(next)->offset >= offset) break; /* bingo! */ prev = next; @@ -503,7 +391,7 @@ static int nf_ct_frag6_queue(struct nf_ct_frag6_queue *fq, struct sk_buff *skb, /* next fragment */ NFCT_FRAG6_CB(next)->offset += i; - fq->meat -= i; + fq->q.meat -= i; if (next->ip_summed != CHECKSUM_UNNECESSARY) next->ip_summed = CHECKSUM_NONE; break; @@ -518,9 +406,9 @@ static int nf_ct_frag6_queue(struct nf_ct_frag6_queue *fq, struct sk_buff *skb, if (prev) prev->next = next; else - fq->fragments = next; + fq->q.fragments = next; - fq->meat -= free_it->len; + fq->q.meat -= free_it->len; frag_kfree_skb(free_it, NULL); } } @@ -532,23 +420,23 @@ static int nf_ct_frag6_queue(struct nf_ct_frag6_queue *fq, struct sk_buff *skb, if (prev) prev->next = skb; else - fq->fragments = skb; + fq->q.fragments = skb; skb->dev = NULL; - fq->stamp = skb->tstamp; - fq->meat += skb->len; - atomic_add(skb->truesize, &nf_ct_frag6_mem); + fq->q.stamp = skb->tstamp; + fq->q.meat += skb->len; + atomic_add(skb->truesize, &nf_frags.mem); /* The first fragment. * nhoffset is obtained from the first fragment, of course. */ if (offset == 0) { fq->nhoffset = nhoff; - fq->last_in |= FIRST_IN; + fq->q.last_in |= FIRST_IN; } - write_lock(&nf_ct_frag6_lock); - list_move_tail(&fq->lru_list, &nf_ct_frag6_lru_list); - write_unlock(&nf_ct_frag6_lock); + write_lock(&nf_frags.lock); + list_move_tail(&fq->q.lru_list, &nf_frags.lru_list); + write_unlock(&nf_frags.lock); return 0; err: @@ -567,7 +455,7 @@ err: static struct sk_buff * nf_ct_frag6_reasm(struct nf_ct_frag6_queue *fq, struct net_device *dev) { - struct sk_buff *fp, *op, *head = fq->fragments; + struct sk_buff *fp, *op, *head = fq->q.fragments; int payload_len; fq_kill(fq); @@ -577,7 +465,7 @@ nf_ct_frag6_reasm(struct nf_ct_frag6_queue *fq, struct net_device *dev) /* Unfragmented part is taken from the first segment. */ payload_len = ((head->data - skb_network_header(head)) - - sizeof(struct ipv6hdr) + fq->len - + sizeof(struct ipv6hdr) + fq->q.len - sizeof(struct frag_hdr)); if (payload_len > IPV6_MAXPLEN) { pr_debug("payload len is too large.\n"); @@ -614,7 +502,7 @@ nf_ct_frag6_reasm(struct nf_ct_frag6_queue *fq, struct net_device *dev) clone->ip_summed = head->ip_summed; NFCT_FRAG6_CB(clone)->orig = NULL; - atomic_add(clone->truesize, &nf_ct_frag6_mem); + atomic_add(clone->truesize, &nf_frags.mem); } /* We have to remove fragment header from datagram and to relocate @@ -628,7 +516,7 @@ nf_ct_frag6_reasm(struct nf_ct_frag6_queue *fq, struct net_device *dev) skb_shinfo(head)->frag_list = head->next; skb_reset_transport_header(head); skb_push(head, head->data - skb_network_header(head)); - atomic_sub(head->truesize, &nf_ct_frag6_mem); + atomic_sub(head->truesize, &nf_frags.mem); for (fp=head->next; fp; fp = fp->next) { head->data_len += fp->len; @@ -638,12 +526,12 @@ nf_ct_frag6_reasm(struct nf_ct_frag6_queue *fq, struct net_device *dev) else if (head->ip_summed == CHECKSUM_COMPLETE) head->csum = csum_add(head->csum, fp->csum); head->truesize += fp->truesize; - atomic_sub(fp->truesize, &nf_ct_frag6_mem); + atomic_sub(fp->truesize, &nf_frags.mem); } head->next = NULL; head->dev = dev; - head->tstamp = fq->stamp; + head->tstamp = fq->q.stamp; ipv6_hdr(head)->payload_len = htons(payload_len); /* Yes, and fold redundant checksum back. 8) */ @@ -652,7 +540,7 @@ nf_ct_frag6_reasm(struct nf_ct_frag6_queue *fq, struct net_device *dev) skb_network_header_len(head), head->csum); - fq->fragments = NULL; + fq->q.fragments = NULL; /* all original skbs are linked into the NFCT_FRAG6_CB(head).orig */ fp = skb_shinfo(head)->frag_list; @@ -788,7 +676,7 @@ struct sk_buff *nf_ct_frag6_gather(struct sk_buff *skb) goto ret_orig; } - if (atomic_read(&nf_ct_frag6_mem) > nf_ct_frag6_high_thresh) + if (atomic_read(&nf_frags.mem) > nf_frags_ctl.high_thresh) nf_ct_frag6_evictor(); fq = fq_find(fhdr->identification, &hdr->saddr, &hdr->daddr); @@ -797,23 +685,23 @@ struct sk_buff *nf_ct_frag6_gather(struct sk_buff *skb) goto ret_orig; } - spin_lock(&fq->lock); + spin_lock(&fq->q.lock); if (nf_ct_frag6_queue(fq, clone, fhdr, nhoff) < 0) { - spin_unlock(&fq->lock); + spin_unlock(&fq->q.lock); pr_debug("Can't insert skb to queue\n"); - fq_put(fq, NULL); + fq_put(fq); goto ret_orig; } - if (fq->last_in == (FIRST_IN|LAST_IN) && fq->meat == fq->len) { + if (fq->q.last_in == (FIRST_IN|LAST_IN) && fq->q.meat == fq->q.len) { ret_skb = nf_ct_frag6_reasm(fq, dev); if (ret_skb == NULL) pr_debug("Can't reassemble fragmented packets\n"); } - spin_unlock(&fq->lock); + spin_unlock(&fq->q.lock); - fq_put(fq, NULL); + fq_put(fq); return ret_skb; ret_orig: @@ -859,20 +747,20 @@ int nf_ct_frag6_kfree_frags(struct sk_buff *skb) int nf_ct_frag6_init(void) { - nf_ct_frag6_hash_rnd = (u32) ((num_physpages ^ (num_physpages>>7)) ^ - (jiffies ^ (jiffies >> 6))); - - setup_timer(&nf_ct_frag6_secret_timer, nf_ct_frag6_secret_rebuild, 0); - nf_ct_frag6_secret_timer.expires = jiffies - + nf_ct_frag6_secret_interval; - add_timer(&nf_ct_frag6_secret_timer); + nf_frags.ctl = &nf_frags_ctl; + nf_frags.hashfn = nf_hashfn; + nf_frags.destructor = nf_frag_free; + nf_frags.skb_free = nf_skb_free; + nf_frags.qsize = sizeof(struct nf_ct_frag6_queue); + inet_frags_init(&nf_frags); return 0; } void nf_ct_frag6_cleanup(void) { - del_timer(&nf_ct_frag6_secret_timer); - nf_ct_frag6_low_thresh = 0; + inet_frags_fini(&nf_frags); + + nf_frags_ctl.low_thresh = 0; nf_ct_frag6_evictor(); } diff --git a/net/ipv6/proc.c b/net/ipv6/proc.c index db945018579e..be526ad92543 100644 --- a/net/ipv6/proc.c +++ b/net/ipv6/proc.c @@ -54,7 +54,7 @@ static int sockstat6_seq_show(struct seq_file *seq, void *v) seq_printf(seq, "RAW6: inuse %d\n", fold_prot_inuse(&rawv6_prot)); seq_printf(seq, "FRAG6: inuse %d memory %d\n", - ip6_frag_nqueues, atomic_read(&ip6_frag_mem)); + ip6_frag_nqueues(), ip6_frag_mem()); return 0; } diff --git a/net/ipv6/reassembly.c b/net/ipv6/reassembly.c index 31601c993541..6ad19cfc2025 100644 --- a/net/ipv6/reassembly.c +++ b/net/ipv6/reassembly.c @@ -42,6 +42,7 @@ #include <linux/icmpv6.h> #include <linux/random.h> #include <linux/jhash.h> +#include <linux/skbuff.h> #include <net/sock.h> #include <net/snmp.h> @@ -53,11 +54,7 @@ #include <net/rawv6.h> #include <net/ndisc.h> #include <net/addrconf.h> - -int sysctl_ip6frag_high_thresh __read_mostly = 256*1024; -int sysctl_ip6frag_low_thresh __read_mostly = 192*1024; - -int sysctl_ip6frag_time __read_mostly = IPV6_FRAG_TIMEOUT; +#include <net/inet_frag.h> struct ip6frag_skb_cb { @@ -74,53 +71,39 @@ struct ip6frag_skb_cb struct frag_queue { - struct hlist_node list; - struct list_head lru_list; /* lru list member */ + struct inet_frag_queue q; __be32 id; /* fragment id */ struct in6_addr saddr; struct in6_addr daddr; - spinlock_t lock; - atomic_t refcnt; - struct timer_list timer; /* expire timer */ - struct sk_buff *fragments; - int len; - int meat; int iif; - ktime_t stamp; unsigned int csum; - __u8 last_in; /* has first/last segment arrived? */ -#define COMPLETE 4 -#define FIRST_IN 2 -#define LAST_IN 1 __u16 nhoffset; }; -/* Hash table. */ - -#define IP6Q_HASHSZ 64 +struct inet_frags_ctl ip6_frags_ctl __read_mostly = { + .high_thresh = 256 * 1024, + .low_thresh = 192 * 1024, + .timeout = IPV6_FRAG_TIMEOUT, + .secret_interval = 10 * 60 * HZ, +}; -static struct hlist_head ip6_frag_hash[IP6Q_HASHSZ]; -static DEFINE_RWLOCK(ip6_frag_lock); -static u32 ip6_frag_hash_rnd; -static LIST_HEAD(ip6_frag_lru_list); -int ip6_frag_nqueues = 0; +static struct inet_frags ip6_frags; -static __inline__ void __fq_unlink(struct frag_queue *fq) +int ip6_frag_nqueues(void) { - hlist_del(&fq->list); - list_del(&fq->lru_list); - ip6_frag_nqueues--; + return ip6_frags.nqueues; } -static __inline__ void fq_unlink(struct frag_queue *fq) +int ip6_frag_mem(void) { - write_lock(&ip6_frag_lock); - __fq_unlink(fq); - write_unlock(&ip6_frag_lock); + return atomic_read(&ip6_frags.mem); } +static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff *prev, + struct net_device *dev); + /* * callers should be careful not to use the hash value outside the ipfrag_lock * as doing so could race with ipfrag_hash_rnd being recalculated. @@ -136,7 +119,7 @@ static unsigned int ip6qhashfn(__be32 id, struct in6_addr *saddr, a += JHASH_GOLDEN_RATIO; b += JHASH_GOLDEN_RATIO; - c += ip6_frag_hash_rnd; + c += ip6_frags.rnd; __jhash_mix(a, b, c); a += (__force u32)saddr->s6_addr32[3]; @@ -149,60 +132,29 @@ static unsigned int ip6qhashfn(__be32 id, struct in6_addr *saddr, c += (__force u32)id; __jhash_mix(a, b, c); - return c & (IP6Q_HASHSZ - 1); + return c & (INETFRAGS_HASHSZ - 1); } -static struct timer_list ip6_frag_secret_timer; -int sysctl_ip6frag_secret_interval __read_mostly = 10 * 60 * HZ; - -static void ip6_frag_secret_rebuild(unsigned long dummy) +static unsigned int ip6_hashfn(struct inet_frag_queue *q) { - unsigned long now = jiffies; - int i; - - write_lock(&ip6_frag_lock); - get_random_bytes(&ip6_frag_hash_rnd, sizeof(u32)); - for (i = 0; i < IP6Q_HASHSZ; i++) { - struct frag_queue *q; - struct hlist_node *p, *n; - - hlist_for_each_entry_safe(q, p, n, &ip6_frag_hash[i], list) { - unsigned int hval = ip6qhashfn(q->id, - &q->saddr, - &q->daddr); - - if (hval != i) { - hlist_del(&q->list); - - /* Relink to new hash chain. */ - hlist_add_head(&q->list, - &ip6_frag_hash[hval]); - - } - } - } - write_unlock(&ip6_frag_lock); + struct frag_queue *fq; - mod_timer(&ip6_frag_secret_timer, now + sysctl_ip6frag_secret_interval); + fq = container_of(q, struct frag_queue, q); + return ip6qhashfn(fq->id, &fq->saddr, &fq->daddr); } -atomic_t ip6_frag_mem = ATOMIC_INIT(0); - /* Memory Tracking Functions. */ static inline void frag_kfree_skb(struct sk_buff *skb, int *work) { if (work) *work -= skb->truesize; - atomic_sub(skb->truesize, &ip6_frag_mem); + atomic_sub(skb->truesize, &ip6_frags.mem); kfree_skb(skb); } -static inline void frag_free_queue(struct frag_queue *fq, int *work) +static void ip6_frag_free(struct inet_frag_queue *fq) { - if (work) - *work -= sizeof(struct frag_queue); - atomic_sub(sizeof(struct frag_queue), &ip6_frag_mem); - kfree(fq); + kfree(container_of(fq, struct frag_queue, q)); } static inline struct frag_queue *frag_alloc_queue(void) @@ -211,36 +163,15 @@ static inline struct frag_queue *frag_alloc_queue(void) if(!fq) return NULL; - atomic_add(sizeof(struct frag_queue), &ip6_frag_mem); + atomic_add(sizeof(struct frag_queue), &ip6_frags.mem); return fq; } /* Destruction primitives. */ -/* Complete destruction of fq. */ -static void ip6_frag_destroy(struct frag_queue *fq, int *work) -{ - struct sk_buff *fp; - - BUG_TRAP(fq->last_in&COMPLETE); - BUG_TRAP(del_timer(&fq->timer) == 0); - - /* Release all fragment data. */ - fp = fq->fragments; - while (fp) { - struct sk_buff *xp = fp->next; - - frag_kfree_skb(fp, work); - fp = xp; - } - - frag_free_queue(fq, work); -} - -static __inline__ void fq_put(struct frag_queue *fq, int *work) +static __inline__ void fq_put(struct frag_queue *fq) { - if (atomic_dec_and_test(&fq->refcnt)) - ip6_frag_destroy(fq, work); + inet_frag_put(&fq->q, &ip6_frags); } /* Kill fq entry. It is not destroyed immediately, @@ -248,45 +179,16 @@ static __inline__ void fq_put(struct frag_queue *fq, int *work) */ static __inline__ void fq_kill(struct frag_queue *fq) { - if (del_timer(&fq->timer)) - atomic_dec(&fq->refcnt); - - if (!(fq->last_in & COMPLETE)) { - fq_unlink(fq); - atomic_dec(&fq->refcnt); - fq->last_in |= COMPLETE; - } + inet_frag_kill(&fq->q, &ip6_frags); } static void ip6_evictor(struct inet6_dev *idev) { - struct frag_queue *fq; - struct list_head *tmp; - int work; - - work = atomic_read(&ip6_frag_mem) - sysctl_ip6frag_low_thresh; - if (work <= 0) - return; - - while(work > 0) { - read_lock(&ip6_frag_lock); - if (list_empty(&ip6_frag_lru_list)) { - read_unlock(&ip6_frag_lock); - return; - } - tmp = ip6_frag_lru_list.next; - fq = list_entry(tmp, struct frag_queue, lru_list); - atomic_inc(&fq->refcnt); - read_unlock(&ip6_frag_lock); - - spin_lock(&fq->lock); - if (!(fq->last_in&COMPLETE)) - fq_kill(fq); - spin_unlock(&fq->lock); - - fq_put(fq, &work); - IP6_INC_STATS_BH(idev, IPSTATS_MIB_REASMFAILS); - } + int evicted; + + evicted = inet_frag_evictor(&ip6_frags); + if (evicted) + IP6_ADD_STATS_BH(idev, IPSTATS_MIB_REASMFAILS, evicted); } static void ip6_frag_expire(unsigned long data) @@ -294,9 +196,9 @@ static void ip6_frag_expire(unsigned long data) struct frag_queue *fq = (struct frag_queue *) data; struct net_device *dev = NULL; - spin_lock(&fq->lock); + spin_lock(&fq->q.lock); - if (fq->last_in & COMPLETE) + if (fq->q.last_in & COMPLETE) goto out; fq_kill(fq); @@ -311,7 +213,7 @@ static void ip6_frag_expire(unsigned long data) rcu_read_unlock(); /* Don't send error if the first segment did not arrive. */ - if (!(fq->last_in&FIRST_IN) || !fq->fragments) + if (!(fq->q.last_in&FIRST_IN) || !fq->q.fragments) goto out; /* @@ -319,13 +221,13 @@ static void ip6_frag_expire(unsigned long data) segment was received. And do not use fq->dev pointer directly, device might already disappeared. */ - fq->fragments->dev = dev; - icmpv6_send(fq->fragments, ICMPV6_TIME_EXCEED, ICMPV6_EXC_FRAGTIME, 0, dev); + fq->q.fragments->dev = dev; + icmpv6_send(fq->q.fragments, ICMPV6_TIME_EXCEED, ICMPV6_EXC_FRAGTIME, 0, dev); out: if (dev) dev_put(dev); - spin_unlock(&fq->lock); - fq_put(fq, NULL); + spin_unlock(&fq->q.lock); + fq_put(fq); } /* Creation primitives. */ @@ -339,32 +241,32 @@ static struct frag_queue *ip6_frag_intern(struct frag_queue *fq_in) struct hlist_node *n; #endif - write_lock(&ip6_frag_lock); + write_lock(&ip6_frags.lock); hash = ip6qhashfn(fq_in->id, &fq_in->saddr, &fq_in->daddr); #ifdef CONFIG_SMP - hlist_for_each_entry(fq, n, &ip6_frag_hash[hash], list) { + hlist_for_each_entry(fq, n, &ip6_frags.hash[hash], q.list) { if (fq->id == fq_in->id && ipv6_addr_equal(&fq_in->saddr, &fq->saddr) && ipv6_addr_equal(&fq_in->daddr, &fq->daddr)) { - atomic_inc(&fq->refcnt); - write_unlock(&ip6_frag_lock); - fq_in->last_in |= COMPLETE; - fq_put(fq_in, NULL); + atomic_inc(&fq->q.refcnt); + write_unlock(&ip6_frags.lock); + fq_in->q.last_in |= COMPLETE; + fq_put(fq_in); return fq; } } #endif fq = fq_in; - if (!mod_timer(&fq->timer, jiffies + sysctl_ip6frag_time)) - atomic_inc(&fq->refcnt); + if (!mod_timer(&fq->q.timer, jiffies + ip6_frags_ctl.timeout)) + atomic_inc(&fq->q.refcnt); - atomic_inc(&fq->refcnt); - hlist_add_head(&fq->list, &ip6_frag_hash[hash]); - INIT_LIST_HEAD(&fq->lru_list); - list_add_tail(&fq->lru_list, &ip6_frag_lru_list); - ip6_frag_nqueues++; - write_unlock(&ip6_frag_lock); + atomic_inc(&fq->q.refcnt); + hlist_add_head(&fq->q.list, &ip6_frags.hash[hash]); + INIT_LIST_HEAD(&fq->q.lru_list); + list_add_tail(&fq->q.lru_list, &ip6_frags.lru_list); + ip6_frags.nqueues++; + write_unlock(&ip6_frags.lock); return fq; } @@ -382,11 +284,11 @@ ip6_frag_create(__be32 id, struct in6_addr *src, struct in6_addr *dst, ipv6_addr_copy(&fq->saddr, src); ipv6_addr_copy(&fq->daddr, dst); - init_timer(&fq->timer); - fq->timer.function = ip6_frag_expire; - fq->timer.data = (long) fq; - spin_lock_init(&fq->lock); - atomic_set(&fq->refcnt, 1); + init_timer(&fq->q.timer); + fq->q.timer.function = ip6_frag_expire; + fq->q.timer.data = (long) fq; + spin_lock_init(&fq->q.lock); + atomic_set(&fq->q.refcnt, 1); return ip6_frag_intern(fq); @@ -403,30 +305,31 @@ fq_find(__be32 id, struct in6_addr *src, struct in6_addr *dst, struct hlist_node *n; unsigned int hash; - read_lock(&ip6_frag_lock); + read_lock(&ip6_frags.lock); hash = ip6qhashfn(id, src, dst); - hlist_for_each_entry(fq, n, &ip6_frag_hash[hash], list) { + hlist_for_each_entry(fq, n, &ip6_frags.hash[hash], q.list) { if (fq->id == id && ipv6_addr_equal(src, &fq->saddr) && ipv6_addr_equal(dst, &fq->daddr)) { - atomic_inc(&fq->refcnt); - read_unlock(&ip6_frag_lock); + atomic_inc(&fq->q.refcnt); + read_unlock(&ip6_frags.lock); return fq; } } - read_unlock(&ip6_frag_lock); + read_unlock(&ip6_frags.lock); return ip6_frag_create(id, src, dst, idev); } -static void ip6_frag_queue(struct frag_queue *fq, struct sk_buff *skb, +static int ip6_frag_queue(struct frag_queue *fq, struct sk_buff *skb, struct frag_hdr *fhdr, int nhoff) { struct sk_buff *prev, *next; + struct net_device *dev; int offset, end; - if (fq->last_in & COMPLETE) + if (fq->q.last_in & COMPLETE) goto err; offset = ntohs(fhdr->frag_off) & ~0x7; @@ -439,7 +342,7 @@ static void ip6_frag_queue(struct frag_queue *fq, struct sk_buff *skb, icmpv6_param_prob(skb, ICMPV6_HDR_FIELD, ((u8 *)&fhdr->frag_off - skb_network_header(skb))); - return; + return -1; } if (skb->ip_summed == CHECKSUM_COMPLETE) { @@ -454,11 +357,11 @@ static void ip6_frag_queue(struct frag_queue *fq, struct sk_buff *skb, /* If we already have some bits beyond end * or have different end, the segment is corrupted. */ - if (end < fq->len || - ((fq->last_in & LAST_IN) && end != fq->len)) + if (end < fq->q.len || + ((fq->q.last_in & LAST_IN) && end != fq->q.len)) goto err; - fq->last_in |= LAST_IN; - fq->len = end; + fq->q.last_in |= LAST_IN; + fq->q.len = end; } else { /* Check if the fragment is rounded to 8 bytes. * Required by the RFC. @@ -471,13 +374,13 @@ static void ip6_frag_queue(struct frag_queue *fq, struct sk_buff *skb, IPSTATS_MIB_INHDRERRORS); icmpv6_param_prob(skb, ICMPV6_HDR_FIELD, offsetof(struct ipv6hdr, payload_len)); - return; + return -1; } - if (end > fq->len) { + if (end > fq->q.len) { /* Some bits beyond end -> corruption. */ - if (fq->last_in & LAST_IN) + if (fq->q.last_in & LAST_IN) goto err; - fq->len = end; + fq->q.len = end; } } @@ -496,7 +399,7 @@ static void ip6_frag_queue(struct frag_queue *fq, struct sk_buff *skb, * this fragment, right? */ prev = NULL; - for(next = fq->fragments; next != NULL; next = next->next) { + for(next = fq->q.fragments; next != NULL; next = next->next) { if (FRAG6_CB(next)->offset >= offset) break; /* bingo! */ prev = next; @@ -533,7 +436,7 @@ static void ip6_frag_queue(struct frag_queue *fq, struct sk_buff *skb, if (!pskb_pull(next, i)) goto err; FRAG6_CB(next)->offset += i; /* next fragment */ - fq->meat -= i; + fq->q.meat -= i; if (next->ip_summed != CHECKSUM_UNNECESSARY) next->ip_summed = CHECKSUM_NONE; break; @@ -548,9 +451,9 @@ static void ip6_frag_queue(struct frag_queue *fq, struct sk_buff *skb, if (prev) prev->next = next; else - fq->fragments = next; + fq->q.fragments = next; - fq->meat -= free_it->len; + fq->q.meat -= free_it->len; frag_kfree_skb(free_it, NULL); } } @@ -562,30 +465,37 @@ static void ip6_frag_queue(struct frag_queue *fq, struct sk_buff *skb, if (prev) prev->next = skb; else - fq->fragments = skb; + fq->q.fragments = skb; - if (skb->dev) - fq->iif = skb->dev->ifindex; - skb->dev = NULL; - fq->stamp = skb->tstamp; - fq->meat += skb->len; - atomic_add(skb->truesize, &ip6_frag_mem); + dev = skb->dev; + if (dev) { + fq->iif = dev->ifindex; + skb->dev = NULL; + } + fq->q.stamp = skb->tstamp; + fq->q.meat += skb->len; + atomic_add(skb->truesize, &ip6_frags.mem); /* The first fragment. * nhoffset is obtained from the first fragment, of course. */ if (offset == 0) { fq->nhoffset = nhoff; - fq->last_in |= FIRST_IN; + fq->q.last_in |= FIRST_IN; } - write_lock(&ip6_frag_lock); - list_move_tail(&fq->lru_list, &ip6_frag_lru_list); - write_unlock(&ip6_frag_lock); - return; + + if (fq->q.last_in == (FIRST_IN | LAST_IN) && fq->q.meat == fq->q.len) + return ip6_frag_reasm(fq, prev, dev); + + write_lock(&ip6_frags.lock); + list_move_tail(&fq->q.lru_list, &ip6_frags.lru_list); + write_unlock(&ip6_frags.lock); + return -1; err: IP6_INC_STATS(ip6_dst_idev(skb->dst), IPSTATS_MIB_REASMFAILS); kfree_skb(skb); + return -1; } /* @@ -597,21 +507,39 @@ err: * queue is eligible for reassembly i.e. it is not COMPLETE, * the last and the first frames arrived and all the bits are here. */ -static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff **skb_in, +static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff *prev, struct net_device *dev) { - struct sk_buff *fp, *head = fq->fragments; + struct sk_buff *fp, *head = fq->q.fragments; int payload_len; unsigned int nhoff; fq_kill(fq); + /* Make the one we just received the head. */ + if (prev) { + head = prev->next; + fp = skb_clone(head, GFP_ATOMIC); + + if (!fp) + goto out_oom; + + fp->next = head->next; + prev->next = fp; + + skb_morph(head, fq->q.fragments); + head->next = fq->q.fragments->next; + + kfree_skb(fq->q.fragments); + fq->q.fragments = head; + } + BUG_TRAP(head != NULL); BUG_TRAP(FRAG6_CB(head)->offset == 0); /* Unfragmented part is taken from the first segment. */ payload_len = ((head->data - skb_network_header(head)) - - sizeof(struct ipv6hdr) + fq->len - + sizeof(struct ipv6hdr) + fq->q.len - sizeof(struct frag_hdr)); if (payload_len > IPV6_MAXPLEN) goto out_oversize; @@ -640,7 +568,7 @@ static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff **skb_in, head->len -= clone->len; clone->csum = 0; clone->ip_summed = head->ip_summed; - atomic_add(clone->truesize, &ip6_frag_mem); + atomic_add(clone->truesize, &ip6_frags.mem); } /* We have to remove fragment header from datagram and to relocate @@ -655,7 +583,7 @@ static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff **skb_in, skb_shinfo(head)->frag_list = head->next; skb_reset_transport_header(head); skb_push(head, head->data - skb_network_header(head)); - atomic_sub(head->truesize, &ip6_frag_mem); + atomic_sub(head->truesize, &ip6_frags.mem); for (fp=head->next; fp; fp = fp->next) { head->data_len += fp->len; @@ -665,17 +593,15 @@ static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff **skb_in, else if (head->ip_summed == CHECKSUM_COMPLETE) head->csum = csum_add(head->csum, fp->csum); head->truesize += fp->truesize; - atomic_sub(fp->truesize, &ip6_frag_mem); + atomic_sub(fp->truesize, &ip6_frags.mem); } head->next = NULL; head->dev = dev; - head->tstamp = fq->stamp; + head->tstamp = fq->q.stamp; ipv6_hdr(head)->payload_len = htons(payload_len); IP6CB(head)->nhoff = nhoff; - *skb_in = head; - /* Yes, and fold redundant checksum back. 8) */ if (head->ip_summed == CHECKSUM_COMPLETE) head->csum = csum_partial(skb_network_header(head), @@ -685,7 +611,7 @@ static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff **skb_in, rcu_read_lock(); IP6_INC_STATS_BH(__in6_dev_get(dev), IPSTATS_MIB_REASMOKS); rcu_read_unlock(); - fq->fragments = NULL; + fq->q.fragments = NULL; return 1; out_oversize: @@ -702,10 +628,8 @@ out_fail: return -1; } -static int ipv6_frag_rcv(struct sk_buff **skbp) +static int ipv6_frag_rcv(struct sk_buff *skb) { - struct sk_buff *skb = *skbp; - struct net_device *dev = skb->dev; struct frag_hdr *fhdr; struct frag_queue *fq; struct ipv6hdr *hdr = ipv6_hdr(skb); @@ -739,23 +663,19 @@ static int ipv6_frag_rcv(struct sk_buff **skbp) return 1; } - if (atomic_read(&ip6_frag_mem) > sysctl_ip6frag_high_thresh) + if (atomic_read(&ip6_frags.mem) > ip6_frags_ctl.high_thresh) ip6_evictor(ip6_dst_idev(skb->dst)); if ((fq = fq_find(fhdr->identification, &hdr->saddr, &hdr->daddr, ip6_dst_idev(skb->dst))) != NULL) { - int ret = -1; + int ret; - spin_lock(&fq->lock); + spin_lock(&fq->q.lock); - ip6_frag_queue(fq, skb, fhdr, IP6CB(skb)->nhoff); + ret = ip6_frag_queue(fq, skb, fhdr, IP6CB(skb)->nhoff); - if (fq->last_in == (FIRST_IN|LAST_IN) && - fq->meat == fq->len) - ret = ip6_frag_reasm(fq, skbp, dev); - - spin_unlock(&fq->lock); - fq_put(fq, NULL); + spin_unlock(&fq->q.lock); + fq_put(fq); return ret; } @@ -775,11 +695,10 @@ void __init ipv6_frag_init(void) if (inet6_add_protocol(&frag_protocol, IPPROTO_FRAGMENT) < 0) printk(KERN_ERR "ipv6_frag_init: Could not register protocol\n"); - ip6_frag_hash_rnd = (u32) ((num_physpages ^ (num_physpages>>7)) ^ - (jiffies ^ (jiffies >> 6))); - - init_timer(&ip6_frag_secret_timer); - ip6_frag_secret_timer.function = ip6_frag_secret_rebuild; - ip6_frag_secret_timer.expires = jiffies + sysctl_ip6frag_secret_interval; - add_timer(&ip6_frag_secret_timer); + ip6_frags.ctl = &ip6_frags_ctl; + ip6_frags.hashfn = ip6_hashfn; + ip6_frags.destructor = ip6_frag_free; + ip6_frags.skb_free = NULL; + ip6_frags.qsize = sizeof(struct frag_queue); + inet_frags_init(&ip6_frags); } diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 6ff19f9eb9ee..cce9941c11c6 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -663,7 +663,7 @@ static struct rt6_info *rt6_alloc_clone(struct rt6_info *ort, struct in6_addr *d return rt; } -static struct rt6_info *ip6_pol_route_input(struct fib6_table *table, +static struct rt6_info *ip6_pol_route(struct fib6_table *table, int oif, struct flowi *fl, int flags) { struct fib6_node *fn; @@ -682,7 +682,7 @@ restart_2: fn = fib6_lookup(&table->tb6_root, &fl->fl6_dst, &fl->fl6_src); restart: - rt = rt6_select(fn, fl->iif, strict | reachable); + rt = rt6_select(fn, oif, strict | reachable); BACKTRACK(&fl->fl6_src); if (rt == &ip6_null_entry || rt->rt6i_flags & RTF_CACHE) @@ -735,6 +735,12 @@ out2: return rt; } +static struct rt6_info *ip6_pol_route_input(struct fib6_table *table, + struct flowi *fl, int flags) +{ + return ip6_pol_route(table, fl->iif, fl, flags); +} + void ip6_route_input(struct sk_buff *skb) { struct ipv6hdr *iph = ipv6_hdr(skb); @@ -761,72 +767,7 @@ void ip6_route_input(struct sk_buff *skb) static struct rt6_info *ip6_pol_route_output(struct fib6_table *table, struct flowi *fl, int flags) { - struct fib6_node *fn; - struct rt6_info *rt, *nrt; - int strict = 0; - int attempts = 3; - int err; - int reachable = ipv6_devconf.forwarding ? 0 : RT6_LOOKUP_F_REACHABLE; - - strict |= flags & RT6_LOOKUP_F_IFACE; - -relookup: - read_lock_bh(&table->tb6_lock); - -restart_2: - fn = fib6_lookup(&table->tb6_root, &fl->fl6_dst, &fl->fl6_src); - -restart: - rt = rt6_select(fn, fl->oif, strict | reachable); - BACKTRACK(&fl->fl6_src); - if (rt == &ip6_null_entry || - rt->rt6i_flags & RTF_CACHE) - goto out; - - dst_hold(&rt->u.dst); - read_unlock_bh(&table->tb6_lock); - - if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP)) - nrt = rt6_alloc_cow(rt, &fl->fl6_dst, &fl->fl6_src); - else { -#if CLONE_OFFLINK_ROUTE - nrt = rt6_alloc_clone(rt, &fl->fl6_dst); -#else - goto out2; -#endif - } - - dst_release(&rt->u.dst); - rt = nrt ? : &ip6_null_entry; - - dst_hold(&rt->u.dst); - if (nrt) { - err = ip6_ins_rt(nrt); - if (!err) - goto out2; - } - - if (--attempts <= 0) - goto out2; - - /* - * Race condition! In the gap, when table->tb6_lock was - * released someone could insert this route. Relookup. - */ - dst_release(&rt->u.dst); - goto relookup; - -out: - if (reachable) { - reachable = 0; - goto restart_2; - } - dst_hold(&rt->u.dst); - read_unlock_bh(&table->tb6_lock); -out2: - rt->u.dst.lastuse = jiffies; - rt->u.dst.__use++; - return rt; + return ip6_pol_route(table, fl->oif, fl, flags); } struct dst_entry * ip6_route_output(struct sock *sk, struct flowi *fl) diff --git a/net/ipv6/sysctl_net_ipv6.c b/net/ipv6/sysctl_net_ipv6.c index 3fb44277207b..68bb2548e469 100644 --- a/net/ipv6/sysctl_net_ipv6.c +++ b/net/ipv6/sysctl_net_ipv6.c @@ -12,6 +12,7 @@ #include <net/ndisc.h> #include <net/ipv6.h> #include <net/addrconf.h> +#include <net/inet_frag.h> #ifdef CONFIG_SYSCTL @@ -41,7 +42,7 @@ static ctl_table ipv6_table[] = { { .ctl_name = NET_IPV6_IP6FRAG_HIGH_THRESH, .procname = "ip6frag_high_thresh", - .data = &sysctl_ip6frag_high_thresh, + .data = &ip6_frags_ctl.high_thresh, .maxlen = sizeof(int), .mode = 0644, .proc_handler = &proc_dointvec @@ -49,7 +50,7 @@ static ctl_table ipv6_table[] = { { .ctl_name = NET_IPV6_IP6FRAG_LOW_THRESH, .procname = "ip6frag_low_thresh", - .data = &sysctl_ip6frag_low_thresh, + .data = &ip6_frags_ctl.low_thresh, .maxlen = sizeof(int), .mode = 0644, .proc_handler = &proc_dointvec @@ -57,7 +58,7 @@ static ctl_table ipv6_table[] = { { .ctl_name = NET_IPV6_IP6FRAG_TIME, .procname = "ip6frag_time", - .data = &sysctl_ip6frag_time, + .data = &ip6_frags_ctl.timeout, .maxlen = sizeof(int), .mode = 0644, .proc_handler = &proc_dointvec_jiffies, @@ -66,7 +67,7 @@ static ctl_table ipv6_table[] = { { .ctl_name = NET_IPV6_IP6FRAG_SECRET_INTERVAL, .procname = "ip6frag_secret_interval", - .data = &sysctl_ip6frag_secret_interval, + .data = &ip6_frags_ctl.secret_interval, .maxlen = sizeof(int), .mode = 0644, .proc_handler = &proc_dointvec_jiffies, diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index a07b59c528f3..737b755342bd 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -1668,9 +1668,8 @@ ipv6_pktoptions: return 0; } -static int tcp_v6_rcv(struct sk_buff **pskb) +static int tcp_v6_rcv(struct sk_buff *skb) { - struct sk_buff *skb = *pskb; struct tcphdr *th; struct sock *sk; int ret; diff --git a/net/ipv6/tunnel6.c b/net/ipv6/tunnel6.c index 23e2809878ae..6323921b40be 100644 --- a/net/ipv6/tunnel6.c +++ b/net/ipv6/tunnel6.c @@ -87,9 +87,8 @@ int xfrm6_tunnel_deregister(struct xfrm6_tunnel *handler, unsigned short family) EXPORT_SYMBOL(xfrm6_tunnel_deregister); -static int tunnel6_rcv(struct sk_buff **pskb) +static int tunnel6_rcv(struct sk_buff *skb) { - struct sk_buff *skb = *pskb; struct xfrm6_tunnel *handler; if (!pskb_may_pull(skb, sizeof(struct ipv6hdr))) @@ -106,9 +105,8 @@ drop: return 0; } -static int tunnel46_rcv(struct sk_buff **pskb) +static int tunnel46_rcv(struct sk_buff *skb) { - struct sk_buff *skb = *pskb; struct xfrm6_tunnel *handler; if (!pskb_may_pull(skb, sizeof(struct ipv6hdr))) diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index 82ff26dd4470..caebad6ee510 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -405,10 +405,9 @@ static inline int udp6_csum_init(struct sk_buff *skb, struct udphdr *uh, return 0; } -int __udp6_lib_rcv(struct sk_buff **pskb, struct hlist_head udptable[], +int __udp6_lib_rcv(struct sk_buff *skb, struct hlist_head udptable[], int proto) { - struct sk_buff *skb = *pskb; struct sock *sk; struct udphdr *uh; struct net_device *dev = skb->dev; @@ -494,9 +493,9 @@ discard: return 0; } -static __inline__ int udpv6_rcv(struct sk_buff **pskb) +static __inline__ int udpv6_rcv(struct sk_buff *skb) { - return __udp6_lib_rcv(pskb, udp_hash, IPPROTO_UDP); + return __udp6_lib_rcv(skb, udp_hash, IPPROTO_UDP); } /* diff --git a/net/ipv6/udp_impl.h b/net/ipv6/udp_impl.h index 6e252f318f7c..2d3fda601232 100644 --- a/net/ipv6/udp_impl.h +++ b/net/ipv6/udp_impl.h @@ -6,7 +6,7 @@ #include <net/addrconf.h> #include <net/inet_common.h> -extern int __udp6_lib_rcv(struct sk_buff **, struct hlist_head [], int ); +extern int __udp6_lib_rcv(struct sk_buff *, struct hlist_head [], int ); extern void __udp6_lib_err(struct sk_buff *, struct inet6_skb_parm *, int , int , int , __be32 , struct hlist_head []); diff --git a/net/ipv6/udplite.c b/net/ipv6/udplite.c index f54016a55004..766566f7de47 100644 --- a/net/ipv6/udplite.c +++ b/net/ipv6/udplite.c @@ -17,9 +17,9 @@ DEFINE_SNMP_STAT(struct udp_mib, udplite_stats_in6) __read_mostly; -static int udplitev6_rcv(struct sk_buff **pskb) +static int udplitev6_rcv(struct sk_buff *skb) { - return __udp6_lib_rcv(pskb, udplite_hash, IPPROTO_UDPLITE); + return __udp6_lib_rcv(skb, udplite_hash, IPPROTO_UDPLITE); } static void udplitev6_err(struct sk_buff *skb, diff --git a/net/ipv6/xfrm6_input.c b/net/ipv6/xfrm6_input.c index c858537cec4b..02f69e544f6f 100644 --- a/net/ipv6/xfrm6_input.c +++ b/net/ipv6/xfrm6_input.c @@ -133,9 +133,9 @@ drop: EXPORT_SYMBOL(xfrm6_rcv_spi); -int xfrm6_rcv(struct sk_buff **pskb) +int xfrm6_rcv(struct sk_buff *skb) { - return xfrm6_rcv_spi(*pskb, 0); + return xfrm6_rcv_spi(skb, 0); } EXPORT_SYMBOL(xfrm6_rcv); diff --git a/net/ipv6/xfrm6_output.c b/net/ipv6/xfrm6_output.c index 4618c18e611d..a5a32c17249d 100644 --- a/net/ipv6/xfrm6_output.c +++ b/net/ipv6/xfrm6_output.c @@ -80,7 +80,7 @@ static int xfrm6_output_finish2(struct sk_buff *skb) while (likely((err = xfrm6_output_one(skb)) == 0)) { nf_reset(skb); - err = nf_hook(PF_INET6, NF_IP6_LOCAL_OUT, &skb, NULL, + err = nf_hook(PF_INET6, NF_IP6_LOCAL_OUT, skb, NULL, skb->dst->dev, dst_output); if (unlikely(err != 1)) break; @@ -88,7 +88,7 @@ static int xfrm6_output_finish2(struct sk_buff *skb) if (!skb->dst->xfrm) return dst_output(skb); - err = nf_hook(PF_INET6, NF_IP6_POST_ROUTING, &skb, NULL, + err = nf_hook(PF_INET6, NF_IP6_POST_ROUTING, skb, NULL, skb->dst->dev, xfrm6_output_finish2); if (unlikely(err != 1)) break; diff --git a/net/netfilter/core.c b/net/netfilter/core.c index a523fa4136ed..bed9ba01e8ec 100644 --- a/net/netfilter/core.c +++ b/net/netfilter/core.c @@ -117,7 +117,7 @@ void nf_unregister_hooks(struct nf_hook_ops *reg, unsigned int n) EXPORT_SYMBOL(nf_unregister_hooks); unsigned int nf_iterate(struct list_head *head, - struct sk_buff **skb, + struct sk_buff *skb, int hook, const struct net_device *indev, const struct net_device *outdev, @@ -160,7 +160,7 @@ unsigned int nf_iterate(struct list_head *head, /* Returns 1 if okfn() needs to be executed by the caller, * -EPERM for NF_DROP, 0 otherwise. */ -int nf_hook_slow(int pf, unsigned int hook, struct sk_buff **pskb, +int nf_hook_slow(int pf, unsigned int hook, struct sk_buff *skb, struct net_device *indev, struct net_device *outdev, int (*okfn)(struct sk_buff *), @@ -175,17 +175,17 @@ int nf_hook_slow(int pf, unsigned int hook, struct sk_buff **pskb, elem = &nf_hooks[pf][hook]; next_hook: - verdict = nf_iterate(&nf_hooks[pf][hook], pskb, hook, indev, + verdict = nf_iterate(&nf_hooks[pf][hook], skb, hook, indev, outdev, &elem, okfn, hook_thresh); if (verdict == NF_ACCEPT || verdict == NF_STOP) { ret = 1; goto unlock; } else if (verdict == NF_DROP) { - kfree_skb(*pskb); + kfree_skb(skb); ret = -EPERM; } else if ((verdict & NF_VERDICT_MASK) == NF_QUEUE) { NFDEBUG("nf_hook: Verdict = QUEUE.\n"); - if (!nf_queue(*pskb, elem, pf, hook, indev, outdev, okfn, + if (!nf_queue(skb, elem, pf, hook, indev, outdev, okfn, verdict >> NF_VERDICT_BITS)) goto next_hook; } @@ -196,34 +196,24 @@ unlock: EXPORT_SYMBOL(nf_hook_slow); -int skb_make_writable(struct sk_buff **pskb, unsigned int writable_len) +int skb_make_writable(struct sk_buff *skb, unsigned int writable_len) { - struct sk_buff *nskb; - - if (writable_len > (*pskb)->len) + if (writable_len > skb->len) return 0; /* Not exclusive use of packet? Must copy. */ - if (skb_cloned(*pskb) && !skb_clone_writable(*pskb, writable_len)) - goto copy_skb; - if (skb_shared(*pskb)) - goto copy_skb; - - return pskb_may_pull(*pskb, writable_len); - -copy_skb: - nskb = skb_copy(*pskb, GFP_ATOMIC); - if (!nskb) - return 0; - BUG_ON(skb_is_nonlinear(nskb)); - - /* Rest of kernel will get very unhappy if we pass it a - suddenly-orphaned skbuff */ - if ((*pskb)->sk) - skb_set_owner_w(nskb, (*pskb)->sk); - kfree_skb(*pskb); - *pskb = nskb; - return 1; + if (!skb_cloned(skb)) { + if (writable_len <= skb_headlen(skb)) + return 1; + } else if (skb_clone_writable(skb, writable_len)) + return 1; + + if (writable_len <= skb_headlen(skb)) + writable_len = 0; + else + writable_len -= skb_headlen(skb); + + return !!__pskb_pull_tail(skb, writable_len); } EXPORT_SYMBOL(skb_make_writable); diff --git a/net/netfilter/nf_conntrack_amanda.c b/net/netfilter/nf_conntrack_amanda.c index e42ab230ad88..7b8239c0cd5e 100644 --- a/net/netfilter/nf_conntrack_amanda.c +++ b/net/netfilter/nf_conntrack_amanda.c @@ -36,7 +36,7 @@ MODULE_PARM_DESC(master_timeout, "timeout for the master connection"); module_param(ts_algo, charp, 0400); MODULE_PARM_DESC(ts_algo, "textsearch algorithm to use (default kmp)"); -unsigned int (*nf_nat_amanda_hook)(struct sk_buff **pskb, +unsigned int (*nf_nat_amanda_hook)(struct sk_buff *skb, enum ip_conntrack_info ctinfo, unsigned int matchoff, unsigned int matchlen, @@ -79,7 +79,7 @@ static struct { }, }; -static int amanda_help(struct sk_buff **pskb, +static int amanda_help(struct sk_buff *skb, unsigned int protoff, struct nf_conn *ct, enum ip_conntrack_info ctinfo) @@ -101,25 +101,25 @@ static int amanda_help(struct sk_buff **pskb, /* increase the UDP timeout of the master connection as replies from * Amanda clients to the server can be quite delayed */ - nf_ct_refresh(ct, *pskb, master_timeout * HZ); + nf_ct_refresh(ct, skb, master_timeout * HZ); /* No data? */ dataoff = protoff + sizeof(struct udphdr); - if (dataoff >= (*pskb)->len) { + if (dataoff >= skb->len) { if (net_ratelimit()) - printk("amanda_help: skblen = %u\n", (*pskb)->len); + printk("amanda_help: skblen = %u\n", skb->len); return NF_ACCEPT; } memset(&ts, 0, sizeof(ts)); - start = skb_find_text(*pskb, dataoff, (*pskb)->len, + start = skb_find_text(skb, dataoff, skb->len, search[SEARCH_CONNECT].ts, &ts); if (start == UINT_MAX) goto out; start += dataoff + search[SEARCH_CONNECT].len; memset(&ts, 0, sizeof(ts)); - stop = skb_find_text(*pskb, start, (*pskb)->len, + stop = skb_find_text(skb, start, skb->len, search[SEARCH_NEWLINE].ts, &ts); if (stop == UINT_MAX) goto out; @@ -127,13 +127,13 @@ static int amanda_help(struct sk_buff **pskb, for (i = SEARCH_DATA; i <= SEARCH_INDEX; i++) { memset(&ts, 0, sizeof(ts)); - off = skb_find_text(*pskb, start, stop, search[i].ts, &ts); + off = skb_find_text(skb, start, stop, search[i].ts, &ts); if (off == UINT_MAX) continue; off += start + search[i].len; len = min_t(unsigned int, sizeof(pbuf) - 1, stop - off); - if (skb_copy_bits(*pskb, off, pbuf, len)) + if (skb_copy_bits(skb, off, pbuf, len)) break; pbuf[len] = '\0'; @@ -153,7 +153,7 @@ static int amanda_help(struct sk_buff **pskb, nf_nat_amanda = rcu_dereference(nf_nat_amanda_hook); if (nf_nat_amanda && ct->status & IPS_NAT_MASK) - ret = nf_nat_amanda(pskb, ctinfo, off - dataoff, + ret = nf_nat_amanda(skb, ctinfo, off - dataoff, len, exp); else if (nf_ct_expect_related(exp) != 0) ret = NF_DROP; diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c index 83c30b45d170..4d6171bc0829 100644 --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c @@ -307,7 +307,7 @@ EXPORT_SYMBOL_GPL(nf_conntrack_hash_insert); /* Confirm a connection given skb; places it in hash table */ int -__nf_conntrack_confirm(struct sk_buff **pskb) +__nf_conntrack_confirm(struct sk_buff *skb) { unsigned int hash, repl_hash; struct nf_conntrack_tuple_hash *h; @@ -316,7 +316,7 @@ __nf_conntrack_confirm(struct sk_buff **pskb) struct hlist_node *n; enum ip_conntrack_info ctinfo; - ct = nf_ct_get(*pskb, &ctinfo); + ct = nf_ct_get(skb, &ctinfo); /* ipt_REJECT uses nf_conntrack_attach to attach related ICMP/TCP RST packets in other direction. Actual packet @@ -367,14 +367,14 @@ __nf_conntrack_confirm(struct sk_buff **pskb) write_unlock_bh(&nf_conntrack_lock); help = nfct_help(ct); if (help && help->helper) - nf_conntrack_event_cache(IPCT_HELPER, *pskb); + nf_conntrack_event_cache(IPCT_HELPER, skb); #ifdef CONFIG_NF_NAT_NEEDED if (test_bit(IPS_SRC_NAT_DONE_BIT, &ct->status) || test_bit(IPS_DST_NAT_DONE_BIT, &ct->status)) - nf_conntrack_event_cache(IPCT_NATINFO, *pskb); + nf_conntrack_event_cache(IPCT_NATINFO, skb); #endif nf_conntrack_event_cache(master_ct(ct) ? - IPCT_RELATED : IPCT_NEW, *pskb); + IPCT_RELATED : IPCT_NEW, skb); return NF_ACCEPT; out: @@ -632,7 +632,7 @@ resolve_normal_ct(struct sk_buff *skb, } unsigned int -nf_conntrack_in(int pf, unsigned int hooknum, struct sk_buff **pskb) +nf_conntrack_in(int pf, unsigned int hooknum, struct sk_buff *skb) { struct nf_conn *ct; enum ip_conntrack_info ctinfo; @@ -644,14 +644,14 @@ nf_conntrack_in(int pf, unsigned int hooknum, struct sk_buff **pskb) int ret; /* Previously seen (loopback or untracked)? Ignore. */ - if ((*pskb)->nfct) { + if (skb->nfct) { NF_CT_STAT_INC_ATOMIC(ignore); return NF_ACCEPT; } /* rcu_read_lock()ed by nf_hook_slow */ l3proto = __nf_ct_l3proto_find((u_int16_t)pf); - ret = l3proto->get_l4proto(*pskb, skb_network_offset(*pskb), + ret = l3proto->get_l4proto(skb, skb_network_offset(skb), &dataoff, &protonum); if (ret <= 0) { pr_debug("not prepared to track yet or error occured\n"); @@ -666,13 +666,13 @@ nf_conntrack_in(int pf, unsigned int hooknum, struct sk_buff **pskb) * inverse of the return code tells to the netfilter * core what to do with the packet. */ if (l4proto->error != NULL && - (ret = l4proto->error(*pskb, dataoff, &ctinfo, pf, hooknum)) <= 0) { + (ret = l4proto->error(skb, dataoff, &ctinfo, pf, hooknum)) <= 0) { NF_CT_STAT_INC_ATOMIC(error); NF_CT_STAT_INC_ATOMIC(invalid); return -ret; } - ct = resolve_normal_ct(*pskb, dataoff, pf, protonum, l3proto, l4proto, + ct = resolve_normal_ct(skb, dataoff, pf, protonum, l3proto, l4proto, &set_reply, &ctinfo); if (!ct) { /* Not valid part of a connection */ @@ -686,21 +686,21 @@ nf_conntrack_in(int pf, unsigned int hooknum, struct sk_buff **pskb) return NF_DROP; } - NF_CT_ASSERT((*pskb)->nfct); + NF_CT_ASSERT(skb->nfct); - ret = l4proto->packet(ct, *pskb, dataoff, ctinfo, pf, hooknum); + ret = l4proto->packet(ct, skb, dataoff, ctinfo, pf, hooknum); if (ret < 0) { /* Invalid: inverse of the return code tells * the netfilter core what to do */ pr_debug("nf_conntrack_in: Can't track with proto module\n"); - nf_conntrack_put((*pskb)->nfct); - (*pskb)->nfct = NULL; + nf_conntrack_put(skb->nfct); + skb->nfct = NULL; NF_CT_STAT_INC_ATOMIC(invalid); return -ret; } if (set_reply && !test_and_set_bit(IPS_SEEN_REPLY_BIT, &ct->status)) - nf_conntrack_event_cache(IPCT_STATUS, *pskb); + nf_conntrack_event_cache(IPCT_STATUS, skb); return ret; } diff --git a/net/netfilter/nf_conntrack_ftp.c b/net/netfilter/nf_conntrack_ftp.c index c763ee74ea02..6df259067f7e 100644 --- a/net/netfilter/nf_conntrack_ftp.c +++ b/net/netfilter/nf_conntrack_ftp.c @@ -43,7 +43,7 @@ module_param_array(ports, ushort, &ports_c, 0400); static int loose; module_param(loose, bool, 0600); -unsigned int (*nf_nat_ftp_hook)(struct sk_buff **pskb, +unsigned int (*nf_nat_ftp_hook)(struct sk_buff *skb, enum ip_conntrack_info ctinfo, enum nf_ct_ftp_type type, unsigned int matchoff, @@ -344,7 +344,7 @@ static void update_nl_seq(u32 nl_seq, struct nf_ct_ftp_master *info, int dir, } } -static int help(struct sk_buff **pskb, +static int help(struct sk_buff *skb, unsigned int protoff, struct nf_conn *ct, enum ip_conntrack_info ctinfo) @@ -371,21 +371,21 @@ static int help(struct sk_buff **pskb, return NF_ACCEPT; } - th = skb_header_pointer(*pskb, protoff, sizeof(_tcph), &_tcph); + th = skb_header_pointer(skb, protoff, sizeof(_tcph), &_tcph); if (th == NULL) return NF_ACCEPT; dataoff = protoff + th->doff * 4; /* No data? */ - if (dataoff >= (*pskb)->len) { + if (dataoff >= skb->len) { pr_debug("ftp: dataoff(%u) >= skblen(%u)\n", dataoff, - (*pskb)->len); + skb->len); return NF_ACCEPT; } - datalen = (*pskb)->len - dataoff; + datalen = skb->len - dataoff; spin_lock_bh(&nf_ftp_lock); - fb_ptr = skb_header_pointer(*pskb, dataoff, datalen, ftp_buffer); + fb_ptr = skb_header_pointer(skb, dataoff, datalen, ftp_buffer); BUG_ON(fb_ptr == NULL); ends_in_nl = (fb_ptr[datalen - 1] == '\n'); @@ -491,7 +491,7 @@ static int help(struct sk_buff **pskb, * (possibly changed) expectation itself. */ nf_nat_ftp = rcu_dereference(nf_nat_ftp_hook); if (nf_nat_ftp && ct->status & IPS_NAT_MASK) - ret = nf_nat_ftp(pskb, ctinfo, search[dir][i].ftptype, + ret = nf_nat_ftp(skb, ctinfo, search[dir][i].ftptype, matchoff, matchlen, exp); else { /* Can't expect this? Best to drop packet now. */ @@ -508,7 +508,7 @@ out_update_nl: /* Now if this ends in \n, update ftp info. Seq may have been * adjusted by NAT code. */ if (ends_in_nl) - update_nl_seq(seq, ct_ftp_info, dir, *pskb); + update_nl_seq(seq, ct_ftp_info, dir, skb); out: spin_unlock_bh(&nf_ftp_lock); return ret; diff --git a/net/netfilter/nf_conntrack_h323_main.c b/net/netfilter/nf_conntrack_h323_main.c index a8a9dfbe7a67..f23fd9598e19 100644 --- a/net/netfilter/nf_conntrack_h323_main.c +++ b/net/netfilter/nf_conntrack_h323_main.c @@ -47,27 +47,27 @@ MODULE_PARM_DESC(callforward_filter, "only create call forwarding expectations " "(determined by routing information)"); /* Hooks for NAT */ -int (*set_h245_addr_hook) (struct sk_buff **pskb, +int (*set_h245_addr_hook) (struct sk_buff *skb, unsigned char **data, int dataoff, H245_TransportAddress *taddr, union nf_conntrack_address *addr, __be16 port) __read_mostly; -int (*set_h225_addr_hook) (struct sk_buff **pskb, +int (*set_h225_addr_hook) (struct sk_buff *skb, unsigned char **data, int dataoff, TransportAddress *taddr, union nf_conntrack_address *addr, __be16 port) __read_mostly; -int (*set_sig_addr_hook) (struct sk_buff **pskb, +int (*set_sig_addr_hook) (struct sk_buff *skb, struct nf_conn *ct, enum ip_conntrack_info ctinfo, unsigned char **data, TransportAddress *taddr, int count) __read_mostly; -int (*set_ras_addr_hook) (struct sk_buff **pskb, +int (*set_ras_addr_hook) (struct sk_buff *skb, struct nf_conn *ct, enum ip_conntrack_info ctinfo, unsigned char **data, TransportAddress *taddr, int count) __read_mostly; -int (*nat_rtp_rtcp_hook) (struct sk_buff **pskb, +int (*nat_rtp_rtcp_hook) (struct sk_buff *skb, struct nf_conn *ct, enum ip_conntrack_info ctinfo, unsigned char **data, int dataoff, @@ -75,25 +75,25 @@ int (*nat_rtp_rtcp_hook) (struct sk_buff **pskb, __be16 port, __be16 rtp_port, struct nf_conntrack_expect *rtp_exp, struct nf_conntrack_expect *rtcp_exp) __read_mostly; -int (*nat_t120_hook) (struct sk_buff **pskb, +int (*nat_t120_hook) (struct sk_buff *skb, struct nf_conn *ct, enum ip_conntrack_info ctinfo, unsigned char **data, int dataoff, H245_TransportAddress *taddr, __be16 port, struct nf_conntrack_expect *exp) __read_mostly; -int (*nat_h245_hook) (struct sk_buff **pskb, +int (*nat_h245_hook) (struct sk_buff *skb, struct nf_conn *ct, enum ip_conntrack_info ctinfo, unsigned char **data, int dataoff, TransportAddress *taddr, __be16 port, struct nf_conntrack_expect *exp) __read_mostly; -int (*nat_callforwarding_hook) (struct sk_buff **pskb, +int (*nat_callforwarding_hook) (struct sk_buff *skb, struct nf_conn *ct, enum ip_conntrack_info ctinfo, unsigned char **data, int dataoff, TransportAddress *taddr, __be16 port, struct nf_conntrack_expect *exp) __read_mostly; -int (*nat_q931_hook) (struct sk_buff **pskb, +int (*nat_q931_hook) (struct sk_buff *skb, struct nf_conn *ct, enum ip_conntrack_info ctinfo, unsigned char **data, TransportAddress *taddr, int idx, @@ -108,7 +108,7 @@ static struct nf_conntrack_helper nf_conntrack_helper_q931[]; static struct nf_conntrack_helper nf_conntrack_helper_ras[]; /****************************************************************************/ -static int get_tpkt_data(struct sk_buff **pskb, unsigned int protoff, +static int get_tpkt_data(struct sk_buff *skb, unsigned int protoff, struct nf_conn *ct, enum ip_conntrack_info ctinfo, unsigned char **data, int *datalen, int *dataoff) { @@ -122,7 +122,7 @@ static int get_tpkt_data(struct sk_buff **pskb, unsigned int protoff, int tpktoff; /* Get TCP header */ - th = skb_header_pointer(*pskb, protoff, sizeof(_tcph), &_tcph); + th = skb_header_pointer(skb, protoff, sizeof(_tcph), &_tcph); if (th == NULL) return 0; @@ -130,13 +130,13 @@ static int get_tpkt_data(struct sk_buff **pskb, unsigned int protoff, tcpdataoff = protoff + th->doff * 4; /* Get TCP data length */ - tcpdatalen = (*pskb)->len - tcpdataoff; + tcpdatalen = skb->len - tcpdataoff; if (tcpdatalen <= 0) /* No TCP data */ goto clear_out; if (*data == NULL) { /* first TPKT */ /* Get first TPKT pointer */ - tpkt = skb_header_pointer(*pskb, tcpdataoff, tcpdatalen, + tpkt = skb_header_pointer(skb, tcpdataoff, tcpdatalen, h323_buffer); BUG_ON(tpkt == NULL); @@ -248,7 +248,7 @@ static int get_h245_addr(struct nf_conn *ct, unsigned char *data, } /****************************************************************************/ -static int expect_rtp_rtcp(struct sk_buff **pskb, struct nf_conn *ct, +static int expect_rtp_rtcp(struct sk_buff *skb, struct nf_conn *ct, enum ip_conntrack_info ctinfo, unsigned char **data, int dataoff, H245_TransportAddress *taddr) @@ -297,7 +297,7 @@ static int expect_rtp_rtcp(struct sk_buff **pskb, struct nf_conn *ct, (nat_rtp_rtcp = rcu_dereference(nat_rtp_rtcp_hook)) && ct->status & IPS_NAT_MASK) { /* NAT needed */ - ret = nat_rtp_rtcp(pskb, ct, ctinfo, data, dataoff, + ret = nat_rtp_rtcp(skb, ct, ctinfo, data, dataoff, taddr, port, rtp_port, rtp_exp, rtcp_exp); } else { /* Conntrack only */ if (nf_ct_expect_related(rtp_exp) == 0) { @@ -321,7 +321,7 @@ static int expect_rtp_rtcp(struct sk_buff **pskb, struct nf_conn *ct, } /****************************************************************************/ -static int expect_t120(struct sk_buff **pskb, +static int expect_t120(struct sk_buff *skb, struct nf_conn *ct, enum ip_conntrack_info ctinfo, unsigned char **data, int dataoff, @@ -355,7 +355,7 @@ static int expect_t120(struct sk_buff **pskb, (nat_t120 = rcu_dereference(nat_t120_hook)) && ct->status & IPS_NAT_MASK) { /* NAT needed */ - ret = nat_t120(pskb, ct, ctinfo, data, dataoff, taddr, + ret = nat_t120(skb, ct, ctinfo, data, dataoff, taddr, port, exp); } else { /* Conntrack only */ if (nf_ct_expect_related(exp) == 0) { @@ -371,7 +371,7 @@ static int expect_t120(struct sk_buff **pskb, } /****************************************************************************/ -static int process_h245_channel(struct sk_buff **pskb, +static int process_h245_channel(struct sk_buff *skb, struct nf_conn *ct, enum ip_conntrack_info ctinfo, unsigned char **data, int dataoff, @@ -381,7 +381,7 @@ static int process_h245_channel(struct sk_buff **pskb, if (channel->options & eH2250LogicalChannelParameters_mediaChannel) { /* RTP */ - ret = expect_rtp_rtcp(pskb, ct, ctinfo, data, dataoff, + ret = expect_rtp_rtcp(skb, ct, ctinfo, data, dataoff, &channel->mediaChannel); if (ret < 0) return -1; @@ -390,7 +390,7 @@ static int process_h245_channel(struct sk_buff **pskb, if (channel-> options & eH2250LogicalChannelParameters_mediaControlChannel) { /* RTCP */ - ret = expect_rtp_rtcp(pskb, ct, ctinfo, data, dataoff, + ret = expect_rtp_rtcp(skb, ct, ctinfo, data, dataoff, &channel->mediaControlChannel); if (ret < 0) return -1; @@ -400,7 +400,7 @@ static int process_h245_channel(struct sk_buff **pskb, } /****************************************************************************/ -static int process_olc(struct sk_buff **pskb, struct nf_conn *ct, +static int process_olc(struct sk_buff *skb, struct nf_conn *ct, enum ip_conntrack_info ctinfo, unsigned char **data, int dataoff, OpenLogicalChannel *olc) @@ -412,7 +412,7 @@ static int process_olc(struct sk_buff **pskb, struct nf_conn *ct, if (olc->forwardLogicalChannelParameters.multiplexParameters.choice == eOpenLogicalChannel_forwardLogicalChannelParameters_multiplexParameters_h2250LogicalChannelParameters) { - ret = process_h245_channel(pskb, ct, ctinfo, data, dataoff, + ret = process_h245_channel(skb, ct, ctinfo, data, dataoff, &olc-> forwardLogicalChannelParameters. multiplexParameters. @@ -430,7 +430,7 @@ static int process_olc(struct sk_buff **pskb, struct nf_conn *ct, eOpenLogicalChannel_reverseLogicalChannelParameters_multiplexParameters_h2250LogicalChannelParameters)) { ret = - process_h245_channel(pskb, ct, ctinfo, data, dataoff, + process_h245_channel(skb, ct, ctinfo, data, dataoff, &olc-> reverseLogicalChannelParameters. multiplexParameters. @@ -448,7 +448,7 @@ static int process_olc(struct sk_buff **pskb, struct nf_conn *ct, t120.choice == eDataProtocolCapability_separateLANStack && olc->separateStack.networkAddress.choice == eNetworkAccessParameters_networkAddress_localAreaAddress) { - ret = expect_t120(pskb, ct, ctinfo, data, dataoff, + ret = expect_t120(skb, ct, ctinfo, data, dataoff, &olc->separateStack.networkAddress. localAreaAddress); if (ret < 0) @@ -459,7 +459,7 @@ static int process_olc(struct sk_buff **pskb, struct nf_conn *ct, } /****************************************************************************/ -static int process_olca(struct sk_buff **pskb, struct nf_conn *ct, +static int process_olca(struct sk_buff *skb, struct nf_conn *ct, enum ip_conntrack_info ctinfo, unsigned char **data, int dataoff, OpenLogicalChannelAck *olca) @@ -477,7 +477,7 @@ static int process_olca(struct sk_buff **pskb, struct nf_conn *ct, choice == eOpenLogicalChannelAck_reverseLogicalChannelParameters_multiplexParameters_h2250LogicalChannelParameters)) { - ret = process_h245_channel(pskb, ct, ctinfo, data, dataoff, + ret = process_h245_channel(skb, ct, ctinfo, data, dataoff, &olca-> reverseLogicalChannelParameters. multiplexParameters. @@ -496,7 +496,7 @@ static int process_olca(struct sk_buff **pskb, struct nf_conn *ct, if (ack->options & eH2250LogicalChannelAckParameters_mediaChannel) { /* RTP */ - ret = expect_rtp_rtcp(pskb, ct, ctinfo, data, dataoff, + ret = expect_rtp_rtcp(skb, ct, ctinfo, data, dataoff, &ack->mediaChannel); if (ret < 0) return -1; @@ -505,7 +505,7 @@ static int process_olca(struct sk_buff **pskb, struct nf_conn *ct, if (ack->options & eH2250LogicalChannelAckParameters_mediaControlChannel) { /* RTCP */ - ret = expect_rtp_rtcp(pskb, ct, ctinfo, data, dataoff, + ret = expect_rtp_rtcp(skb, ct, ctinfo, data, dataoff, &ack->mediaControlChannel); if (ret < 0) return -1; @@ -515,7 +515,7 @@ static int process_olca(struct sk_buff **pskb, struct nf_conn *ct, if ((olca->options & eOpenLogicalChannelAck_separateStack) && olca->separateStack.networkAddress.choice == eNetworkAccessParameters_networkAddress_localAreaAddress) { - ret = expect_t120(pskb, ct, ctinfo, data, dataoff, + ret = expect_t120(skb, ct, ctinfo, data, dataoff, &olca->separateStack.networkAddress. localAreaAddress); if (ret < 0) @@ -526,7 +526,7 @@ static int process_olca(struct sk_buff **pskb, struct nf_conn *ct, } /****************************************************************************/ -static int process_h245(struct sk_buff **pskb, struct nf_conn *ct, +static int process_h245(struct sk_buff *skb, struct nf_conn *ct, enum ip_conntrack_info ctinfo, unsigned char **data, int dataoff, MultimediaSystemControlMessage *mscm) @@ -535,7 +535,7 @@ static int process_h245(struct sk_buff **pskb, struct nf_conn *ct, case eMultimediaSystemControlMessage_request: if (mscm->request.choice == eRequestMessage_openLogicalChannel) { - return process_olc(pskb, ct, ctinfo, data, dataoff, + return process_olc(skb, ct, ctinfo, data, dataoff, &mscm->request.openLogicalChannel); } pr_debug("nf_ct_h323: H.245 Request %d\n", @@ -544,7 +544,7 @@ static int process_h245(struct sk_buff **pskb, struct nf_conn *ct, case eMultimediaSystemControlMessage_response: if (mscm->response.choice == eResponseMessage_openLogicalChannelAck) { - return process_olca(pskb, ct, ctinfo, data, dataoff, + return process_olca(skb, ct, ctinfo, data, dataoff, &mscm->response. openLogicalChannelAck); } @@ -560,7 +560,7 @@ static int process_h245(struct sk_buff **pskb, struct nf_conn *ct, } /****************************************************************************/ -static int h245_help(struct sk_buff **pskb, unsigned int protoff, +static int h245_help(struct sk_buff *skb, unsigned int protoff, struct nf_conn *ct, enum ip_conntrack_info ctinfo) { static MultimediaSystemControlMessage mscm; @@ -574,12 +574,12 @@ static int h245_help(struct sk_buff **pskb, unsigned int protoff, ctinfo != IP_CT_ESTABLISHED + IP_CT_IS_REPLY) { return NF_ACCEPT; } - pr_debug("nf_ct_h245: skblen = %u\n", (*pskb)->len); + pr_debug("nf_ct_h245: skblen = %u\n", skb->len); spin_lock_bh(&nf_h323_lock); /* Process each TPKT */ - while (get_tpkt_data(pskb, protoff, ct, ctinfo, + while (get_tpkt_data(skb, protoff, ct, ctinfo, &data, &datalen, &dataoff)) { pr_debug("nf_ct_h245: TPKT len=%d ", datalen); NF_CT_DUMP_TUPLE(&ct->tuplehash[CTINFO2DIR(ctinfo)].tuple); @@ -596,7 +596,7 @@ static int h245_help(struct sk_buff **pskb, unsigned int protoff, } /* Process H.245 signal */ - if (process_h245(pskb, ct, ctinfo, &data, dataoff, &mscm) < 0) + if (process_h245(skb, ct, ctinfo, &data, dataoff, &mscm) < 0) goto drop; } @@ -654,7 +654,7 @@ int get_h225_addr(struct nf_conn *ct, unsigned char *data, } /****************************************************************************/ -static int expect_h245(struct sk_buff **pskb, struct nf_conn *ct, +static int expect_h245(struct sk_buff *skb, struct nf_conn *ct, enum ip_conntrack_info ctinfo, unsigned char **data, int dataoff, TransportAddress *taddr) @@ -687,7 +687,7 @@ static int expect_h245(struct sk_buff **pskb, struct nf_conn *ct, (nat_h245 = rcu_dereference(nat_h245_hook)) && ct->status & IPS_NAT_MASK) { /* NAT needed */ - ret = nat_h245(pskb, ct, ctinfo, data, dataoff, taddr, + ret = nat_h245(skb, ct, ctinfo, data, dataoff, taddr, port, exp); } else { /* Conntrack only */ if (nf_ct_expect_related(exp) == 0) { @@ -758,7 +758,7 @@ static int callforward_do_filter(union nf_conntrack_address *src, } /****************************************************************************/ -static int expect_callforwarding(struct sk_buff **pskb, +static int expect_callforwarding(struct sk_buff *skb, struct nf_conn *ct, enum ip_conntrack_info ctinfo, unsigned char **data, int dataoff, @@ -798,7 +798,7 @@ static int expect_callforwarding(struct sk_buff **pskb, (nat_callforwarding = rcu_dereference(nat_callforwarding_hook)) && ct->status & IPS_NAT_MASK) { /* Need NAT */ - ret = nat_callforwarding(pskb, ct, ctinfo, data, dataoff, + ret = nat_callforwarding(skb, ct, ctinfo, data, dataoff, taddr, port, exp); } else { /* Conntrack only */ if (nf_ct_expect_related(exp) == 0) { @@ -814,7 +814,7 @@ static int expect_callforwarding(struct sk_buff **pskb, } /****************************************************************************/ -static int process_setup(struct sk_buff **pskb, struct nf_conn *ct, +static int process_setup(struct sk_buff *skb, struct nf_conn *ct, enum ip_conntrack_info ctinfo, unsigned char **data, int dataoff, Setup_UUIE *setup) @@ -829,7 +829,7 @@ static int process_setup(struct sk_buff **pskb, struct nf_conn *ct, pr_debug("nf_ct_q931: Setup\n"); if (setup->options & eSetup_UUIE_h245Address) { - ret = expect_h245(pskb, ct, ctinfo, data, dataoff, + ret = expect_h245(skb, ct, ctinfo, data, dataoff, &setup->h245Address); if (ret < 0) return -1; @@ -846,7 +846,7 @@ static int process_setup(struct sk_buff **pskb, struct nf_conn *ct, NIP6(*(struct in6_addr *)&addr), ntohs(port), NIP6(*(struct in6_addr *)&ct->tuplehash[!dir].tuple.src.u3), ntohs(ct->tuplehash[!dir].tuple.src.u.tcp.port)); - ret = set_h225_addr(pskb, data, dataoff, + ret = set_h225_addr(skb, data, dataoff, &setup->destCallSignalAddress, &ct->tuplehash[!dir].tuple.src.u3, ct->tuplehash[!dir].tuple.src.u.tcp.port); @@ -864,7 +864,7 @@ static int process_setup(struct sk_buff **pskb, struct nf_conn *ct, NIP6(*(struct in6_addr *)&addr), ntohs(port), NIP6(*(struct in6_addr *)&ct->tuplehash[!dir].tuple.dst.u3), ntohs(ct->tuplehash[!dir].tuple.dst.u.tcp.port)); - ret = set_h225_addr(pskb, data, dataoff, + ret = set_h225_addr(skb, data, dataoff, &setup->sourceCallSignalAddress, &ct->tuplehash[!dir].tuple.dst.u3, ct->tuplehash[!dir].tuple.dst.u.tcp.port); @@ -874,7 +874,7 @@ static int process_setup(struct sk_buff **pskb, struct nf_conn *ct, if (setup->options & eSetup_UUIE_fastStart) { for (i = 0; i < setup->fastStart.count; i++) { - ret = process_olc(pskb, ct, ctinfo, data, dataoff, + ret = process_olc(skb, ct, ctinfo, data, dataoff, &setup->fastStart.item[i]); if (ret < 0) return -1; @@ -885,7 +885,7 @@ static int process_setup(struct sk_buff **pskb, struct nf_conn *ct, } /****************************************************************************/ -static int process_callproceeding(struct sk_buff **pskb, +static int process_callproceeding(struct sk_buff *skb, struct nf_conn *ct, enum ip_conntrack_info ctinfo, unsigned char **data, int dataoff, @@ -897,7 +897,7 @@ static int process_callproceeding(struct sk_buff **pskb, pr_debug("nf_ct_q931: CallProceeding\n"); if (callproc->options & eCallProceeding_UUIE_h245Address) { - ret = expect_h245(pskb, ct, ctinfo, data, dataoff, + ret = expect_h245(skb, ct, ctinfo, data, dataoff, &callproc->h245Address); if (ret < 0) return -1; @@ -905,7 +905,7 @@ static int process_callproceeding(struct sk_buff **pskb, if (callproc->options & eCallProceeding_UUIE_fastStart) { for (i = 0; i < callproc->fastStart.count; i++) { - ret = process_olc(pskb, ct, ctinfo, data, dataoff, + ret = process_olc(skb, ct, ctinfo, data, dataoff, &callproc->fastStart.item[i]); if (ret < 0) return -1; @@ -916,7 +916,7 @@ static int process_callproceeding(struct sk_buff **pskb, } /****************************************************************************/ -static int process_connect(struct sk_buff **pskb, struct nf_conn *ct, +static int process_connect(struct sk_buff *skb, struct nf_conn *ct, enum ip_conntrack_info ctinfo, unsigned char **data, int dataoff, Connect_UUIE *connect) @@ -927,7 +927,7 @@ static int process_connect(struct sk_buff **pskb, struct nf_conn *ct, pr_debug("nf_ct_q931: Connect\n"); if (connect->options & eConnect_UUIE_h245Address) { - ret = expect_h245(pskb, ct, ctinfo, data, dataoff, + ret = expect_h245(skb, ct, ctinfo, data, dataoff, &connect->h245Address); if (ret < 0) return -1; @@ -935,7 +935,7 @@ static int process_connect(struct sk_buff **pskb, struct nf_conn *ct, if (connect->options & eConnect_UUIE_fastStart) { for (i = 0; i < connect->fastStart.count; i++) { - ret = process_olc(pskb, ct, ctinfo, data, dataoff, + ret = process_olc(skb, ct, ctinfo, data, dataoff, &connect->fastStart.item[i]); if (ret < 0) return -1; @@ -946,7 +946,7 @@ static int process_connect(struct sk_buff **pskb, struct nf_conn *ct, } /****************************************************************************/ -static int process_alerting(struct sk_buff **pskb, struct nf_conn *ct, +static int process_alerting(struct sk_buff *skb, struct nf_conn *ct, enum ip_conntrack_info ctinfo, unsigned char **data, int dataoff, Alerting_UUIE *alert) @@ -957,7 +957,7 @@ static int process_alerting(struct sk_buff **pskb, struct nf_conn *ct, pr_debug("nf_ct_q931: Alerting\n"); if (alert->options & eAlerting_UUIE_h245Address) { - ret = expect_h245(pskb, ct, ctinfo, data, dataoff, + ret = expect_h245(skb, ct, ctinfo, data, dataoff, &alert->h245Address); if (ret < 0) return -1; @@ -965,7 +965,7 @@ static int process_alerting(struct sk_buff **pskb, struct nf_conn *ct, if (alert->options & eAlerting_UUIE_fastStart) { for (i = 0; i < alert->fastStart.count; i++) { - ret = process_olc(pskb, ct, ctinfo, data, dataoff, + ret = process_olc(skb, ct, ctinfo, data, dataoff, &alert->fastStart.item[i]); if (ret < 0) return -1; @@ -976,7 +976,7 @@ static int process_alerting(struct sk_buff **pskb, struct nf_conn *ct, } /****************************************************************************/ -static int process_facility(struct sk_buff **pskb, struct nf_conn *ct, +static int process_facility(struct sk_buff *skb, struct nf_conn *ct, enum ip_conntrack_info ctinfo, unsigned char **data, int dataoff, Facility_UUIE *facility) @@ -988,7 +988,7 @@ static int process_facility(struct sk_buff **pskb, struct nf_conn *ct, if (facility->reason.choice == eFacilityReason_callForwarded) { if (facility->options & eFacility_UUIE_alternativeAddress) - return expect_callforwarding(pskb, ct, ctinfo, data, + return expect_callforwarding(skb, ct, ctinfo, data, dataoff, &facility-> alternativeAddress); @@ -996,7 +996,7 @@ static int process_facility(struct sk_buff **pskb, struct nf_conn *ct, } if (facility->options & eFacility_UUIE_h245Address) { - ret = expect_h245(pskb, ct, ctinfo, data, dataoff, + ret = expect_h245(skb, ct, ctinfo, data, dataoff, &facility->h245Address); if (ret < 0) return -1; @@ -1004,7 +1004,7 @@ static int process_facility(struct sk_buff **pskb, struct nf_conn *ct, if (facility->options & eFacility_UUIE_fastStart) { for (i = 0; i < facility->fastStart.count; i++) { - ret = process_olc(pskb, ct, ctinfo, data, dataoff, + ret = process_olc(skb, ct, ctinfo, data, dataoff, &facility->fastStart.item[i]); if (ret < 0) return -1; @@ -1015,7 +1015,7 @@ static int process_facility(struct sk_buff **pskb, struct nf_conn *ct, } /****************************************************************************/ -static int process_progress(struct sk_buff **pskb, struct nf_conn *ct, +static int process_progress(struct sk_buff *skb, struct nf_conn *ct, enum ip_conntrack_info ctinfo, unsigned char **data, int dataoff, Progress_UUIE *progress) @@ -1026,7 +1026,7 @@ static int process_progress(struct sk_buff **pskb, struct nf_conn *ct, pr_debug("nf_ct_q931: Progress\n"); if (progress->options & eProgress_UUIE_h245Address) { - ret = expect_h245(pskb, ct, ctinfo, data, dataoff, + ret = expect_h245(skb, ct, ctinfo, data, dataoff, &progress->h245Address); if (ret < 0) return -1; @@ -1034,7 +1034,7 @@ static int process_progress(struct sk_buff **pskb, struct nf_conn *ct, if (progress->options & eProgress_UUIE_fastStart) { for (i = 0; i < progress->fastStart.count; i++) { - ret = process_olc(pskb, ct, ctinfo, data, dataoff, + ret = process_olc(skb, ct, ctinfo, data, dataoff, &progress->fastStart.item[i]); if (ret < 0) return -1; @@ -1045,7 +1045,7 @@ static int process_progress(struct sk_buff **pskb, struct nf_conn *ct, } /****************************************************************************/ -static int process_q931(struct sk_buff **pskb, struct nf_conn *ct, +static int process_q931(struct sk_buff *skb, struct nf_conn *ct, enum ip_conntrack_info ctinfo, unsigned char **data, int dataoff, Q931 *q931) { @@ -1055,28 +1055,28 @@ static int process_q931(struct sk_buff **pskb, struct nf_conn *ct, switch (pdu->h323_message_body.choice) { case eH323_UU_PDU_h323_message_body_setup: - ret = process_setup(pskb, ct, ctinfo, data, dataoff, + ret = process_setup(skb, ct, ctinfo, data, dataoff, &pdu->h323_message_body.setup); break; case eH323_UU_PDU_h323_message_body_callProceeding: - ret = process_callproceeding(pskb, ct, ctinfo, data, dataoff, + ret = process_callproceeding(skb, ct, ctinfo, data, dataoff, &pdu->h323_message_body. callProceeding); break; case eH323_UU_PDU_h323_message_body_connect: - ret = process_connect(pskb, ct, ctinfo, data, dataoff, + ret = process_connect(skb, ct, ctinfo, data, dataoff, &pdu->h323_message_body.connect); break; case eH323_UU_PDU_h323_message_body_alerting: - ret = process_alerting(pskb, ct, ctinfo, data, dataoff, + ret = process_alerting(skb, ct, ctinfo, data, dataoff, &pdu->h323_message_body.alerting); break; case eH323_UU_PDU_h323_message_body_facility: - ret = process_facility(pskb, ct, ctinfo, data, dataoff, + ret = process_facility(skb, ct, ctinfo, data, dataoff, &pdu->h323_message_body.facility); break; case eH323_UU_PDU_h323_message_body_progress: - ret = process_progress(pskb, ct, ctinfo, data, dataoff, + ret = process_progress(skb, ct, ctinfo, data, dataoff, &pdu->h323_message_body.progress); break; default: @@ -1090,7 +1090,7 @@ static int process_q931(struct sk_buff **pskb, struct nf_conn *ct, if (pdu->options & eH323_UU_PDU_h245Control) { for (i = 0; i < pdu->h245Control.count; i++) { - ret = process_h245(pskb, ct, ctinfo, data, dataoff, + ret = process_h245(skb, ct, ctinfo, data, dataoff, &pdu->h245Control.item[i]); if (ret < 0) return -1; @@ -1101,7 +1101,7 @@ static int process_q931(struct sk_buff **pskb, struct nf_conn *ct, } /****************************************************************************/ -static int q931_help(struct sk_buff **pskb, unsigned int protoff, +static int q931_help(struct sk_buff *skb, unsigned int protoff, struct nf_conn *ct, enum ip_conntrack_info ctinfo) { static Q931 q931; @@ -1115,12 +1115,12 @@ static int q931_help(struct sk_buff **pskb, unsigned int protoff, ctinfo != IP_CT_ESTABLISHED + IP_CT_IS_REPLY) { return NF_ACCEPT; } - pr_debug("nf_ct_q931: skblen = %u\n", (*pskb)->len); + pr_debug("nf_ct_q931: skblen = %u\n", skb->len); spin_lock_bh(&nf_h323_lock); /* Process each TPKT */ - while (get_tpkt_data(pskb, protoff, ct, ctinfo, + while (get_tpkt_data(skb, protoff, ct, ctinfo, &data, &datalen, &dataoff)) { pr_debug("nf_ct_q931: TPKT len=%d ", datalen); NF_CT_DUMP_TUPLE(&ct->tuplehash[CTINFO2DIR(ctinfo)].tuple); @@ -1136,7 +1136,7 @@ static int q931_help(struct sk_buff **pskb, unsigned int protoff, } /* Process Q.931 signal */ - if (process_q931(pskb, ct, ctinfo, &data, dataoff, &q931) < 0) + if (process_q931(skb, ct, ctinfo, &data, dataoff, &q931) < 0) goto drop; } @@ -1177,20 +1177,20 @@ static struct nf_conntrack_helper nf_conntrack_helper_q931[] __read_mostly = { }; /****************************************************************************/ -static unsigned char *get_udp_data(struct sk_buff **pskb, unsigned int protoff, +static unsigned char *get_udp_data(struct sk_buff *skb, unsigned int protoff, int *datalen) { struct udphdr _uh, *uh; int dataoff; - uh = skb_header_pointer(*pskb, protoff, sizeof(_uh), &_uh); + uh = skb_header_pointer(skb, protoff, sizeof(_uh), &_uh); if (uh == NULL) return NULL; dataoff = protoff + sizeof(_uh); - if (dataoff >= (*pskb)->len) + if (dataoff >= skb->len) return NULL; - *datalen = (*pskb)->len - dataoff; - return skb_header_pointer(*pskb, dataoff, *datalen, h323_buffer); + *datalen = skb->len - dataoff; + return skb_header_pointer(skb, dataoff, *datalen, h323_buffer); } /****************************************************************************/ @@ -1227,7 +1227,7 @@ static int set_expect_timeout(struct nf_conntrack_expect *exp, } /****************************************************************************/ -static int expect_q931(struct sk_buff **pskb, struct nf_conn *ct, +static int expect_q931(struct sk_buff *skb, struct nf_conn *ct, enum ip_conntrack_info ctinfo, unsigned char **data, TransportAddress *taddr, int count) @@ -1265,7 +1265,7 @@ static int expect_q931(struct sk_buff **pskb, struct nf_conn *ct, nat_q931 = rcu_dereference(nat_q931_hook); if (nat_q931 && ct->status & IPS_NAT_MASK) { /* Need NAT */ - ret = nat_q931(pskb, ct, ctinfo, data, taddr, i, port, exp); + ret = nat_q931(skb, ct, ctinfo, data, taddr, i, port, exp); } else { /* Conntrack only */ if (nf_ct_expect_related(exp) == 0) { pr_debug("nf_ct_ras: expect Q.931 "); @@ -1283,7 +1283,7 @@ static int expect_q931(struct sk_buff **pskb, struct nf_conn *ct, } /****************************************************************************/ -static int process_grq(struct sk_buff **pskb, struct nf_conn *ct, +static int process_grq(struct sk_buff *skb, struct nf_conn *ct, enum ip_conntrack_info ctinfo, unsigned char **data, GatekeeperRequest *grq) { @@ -1293,13 +1293,13 @@ static int process_grq(struct sk_buff **pskb, struct nf_conn *ct, set_ras_addr = rcu_dereference(set_ras_addr_hook); if (set_ras_addr && ct->status & IPS_NAT_MASK) /* NATed */ - return set_ras_addr(pskb, ct, ctinfo, data, + return set_ras_addr(skb, ct, ctinfo, data, &grq->rasAddress, 1); return 0; } /****************************************************************************/ -static int process_gcf(struct sk_buff **pskb, struct nf_conn *ct, +static int process_gcf(struct sk_buff *skb, struct nf_conn *ct, enum ip_conntrack_info ctinfo, unsigned char **data, GatekeeperConfirm *gcf) { @@ -1343,7 +1343,7 @@ static int process_gcf(struct sk_buff **pskb, struct nf_conn *ct, } /****************************************************************************/ -static int process_rrq(struct sk_buff **pskb, struct nf_conn *ct, +static int process_rrq(struct sk_buff *skb, struct nf_conn *ct, enum ip_conntrack_info ctinfo, unsigned char **data, RegistrationRequest *rrq) { @@ -1353,7 +1353,7 @@ static int process_rrq(struct sk_buff **pskb, struct nf_conn *ct, pr_debug("nf_ct_ras: RRQ\n"); - ret = expect_q931(pskb, ct, ctinfo, data, + ret = expect_q931(skb, ct, ctinfo, data, rrq->callSignalAddress.item, rrq->callSignalAddress.count); if (ret < 0) @@ -1361,7 +1361,7 @@ static int process_rrq(struct sk_buff **pskb, struct nf_conn *ct, set_ras_addr = rcu_dereference(set_ras_addr_hook); if (set_ras_addr && ct->status & IPS_NAT_MASK) { - ret = set_ras_addr(pskb, ct, ctinfo, data, + ret = set_ras_addr(skb, ct, ctinfo, data, rrq->rasAddress.item, rrq->rasAddress.count); if (ret < 0) @@ -1378,7 +1378,7 @@ static int process_rrq(struct sk_buff **pskb, struct nf_conn *ct, } /****************************************************************************/ -static int process_rcf(struct sk_buff **pskb, struct nf_conn *ct, +static int process_rcf(struct sk_buff *skb, struct nf_conn *ct, enum ip_conntrack_info ctinfo, unsigned char **data, RegistrationConfirm *rcf) { @@ -1392,7 +1392,7 @@ static int process_rcf(struct sk_buff **pskb, struct nf_conn *ct, set_sig_addr = rcu_dereference(set_sig_addr_hook); if (set_sig_addr && ct->status & IPS_NAT_MASK) { - ret = set_sig_addr(pskb, ct, ctinfo, data, + ret = set_sig_addr(skb, ct, ctinfo, data, rcf->callSignalAddress.item, rcf->callSignalAddress.count); if (ret < 0) @@ -1407,7 +1407,7 @@ static int process_rcf(struct sk_buff **pskb, struct nf_conn *ct, if (info->timeout > 0) { pr_debug("nf_ct_ras: set RAS connection timeout to " "%u seconds\n", info->timeout); - nf_ct_refresh(ct, *pskb, info->timeout * HZ); + nf_ct_refresh(ct, skb, info->timeout * HZ); /* Set expect timeout */ read_lock_bh(&nf_conntrack_lock); @@ -1427,7 +1427,7 @@ static int process_rcf(struct sk_buff **pskb, struct nf_conn *ct, } /****************************************************************************/ -static int process_urq(struct sk_buff **pskb, struct nf_conn *ct, +static int process_urq(struct sk_buff *skb, struct nf_conn *ct, enum ip_conntrack_info ctinfo, unsigned char **data, UnregistrationRequest *urq) { @@ -1440,7 +1440,7 @@ static int process_urq(struct sk_buff **pskb, struct nf_conn *ct, set_sig_addr = rcu_dereference(set_sig_addr_hook); if (set_sig_addr && ct->status & IPS_NAT_MASK) { - ret = set_sig_addr(pskb, ct, ctinfo, data, + ret = set_sig_addr(skb, ct, ctinfo, data, urq->callSignalAddress.item, urq->callSignalAddress.count); if (ret < 0) @@ -1453,13 +1453,13 @@ static int process_urq(struct sk_buff **pskb, struct nf_conn *ct, info->sig_port[!dir] = 0; /* Give it 30 seconds for UCF or URJ */ - nf_ct_refresh(ct, *pskb, 30 * HZ); + nf_ct_refresh(ct, skb, 30 * HZ); return 0; } /****************************************************************************/ -static int process_arq(struct sk_buff **pskb, struct nf_conn *ct, +static int process_arq(struct sk_buff *skb, struct nf_conn *ct, enum ip_conntrack_info ctinfo, unsigned char **data, AdmissionRequest *arq) { @@ -1479,7 +1479,7 @@ static int process_arq(struct sk_buff **pskb, struct nf_conn *ct, port == info->sig_port[dir] && set_h225_addr && ct->status & IPS_NAT_MASK) { /* Answering ARQ */ - return set_h225_addr(pskb, data, 0, + return set_h225_addr(skb, data, 0, &arq->destCallSignalAddress, &ct->tuplehash[!dir].tuple.dst.u3, info->sig_port[!dir]); @@ -1491,7 +1491,7 @@ static int process_arq(struct sk_buff **pskb, struct nf_conn *ct, !memcmp(&addr, &ct->tuplehash[dir].tuple.src.u3, sizeof(addr)) && set_h225_addr && ct->status & IPS_NAT_MASK) { /* Calling ARQ */ - return set_h225_addr(pskb, data, 0, + return set_h225_addr(skb, data, 0, &arq->srcCallSignalAddress, &ct->tuplehash[!dir].tuple.dst.u3, port); @@ -1501,7 +1501,7 @@ static int process_arq(struct sk_buff **pskb, struct nf_conn *ct, } /****************************************************************************/ -static int process_acf(struct sk_buff **pskb, struct nf_conn *ct, +static int process_acf(struct sk_buff *skb, struct nf_conn *ct, enum ip_conntrack_info ctinfo, unsigned char **data, AdmissionConfirm *acf) { @@ -1522,7 +1522,7 @@ static int process_acf(struct sk_buff **pskb, struct nf_conn *ct, /* Answering ACF */ set_sig_addr = rcu_dereference(set_sig_addr_hook); if (set_sig_addr && ct->status & IPS_NAT_MASK) - return set_sig_addr(pskb, ct, ctinfo, data, + return set_sig_addr(skb, ct, ctinfo, data, &acf->destCallSignalAddress, 1); return 0; } @@ -1548,7 +1548,7 @@ static int process_acf(struct sk_buff **pskb, struct nf_conn *ct, } /****************************************************************************/ -static int process_lrq(struct sk_buff **pskb, struct nf_conn *ct, +static int process_lrq(struct sk_buff *skb, struct nf_conn *ct, enum ip_conntrack_info ctinfo, unsigned char **data, LocationRequest *lrq) { @@ -1558,13 +1558,13 @@ static int process_lrq(struct sk_buff **pskb, struct nf_conn *ct, set_ras_addr = rcu_dereference(set_ras_addr_hook); if (set_ras_addr && ct->status & IPS_NAT_MASK) - return set_ras_addr(pskb, ct, ctinfo, data, + return set_ras_addr(skb, ct, ctinfo, data, &lrq->replyAddress, 1); return 0; } /****************************************************************************/ -static int process_lcf(struct sk_buff **pskb, struct nf_conn *ct, +static int process_lcf(struct sk_buff *skb, struct nf_conn *ct, enum ip_conntrack_info ctinfo, unsigned char **data, LocationConfirm *lcf) { @@ -1603,7 +1603,7 @@ static int process_lcf(struct sk_buff **pskb, struct nf_conn *ct, } /****************************************************************************/ -static int process_irr(struct sk_buff **pskb, struct nf_conn *ct, +static int process_irr(struct sk_buff *skb, struct nf_conn *ct, enum ip_conntrack_info ctinfo, unsigned char **data, InfoRequestResponse *irr) { @@ -1615,7 +1615,7 @@ static int process_irr(struct sk_buff **pskb, struct nf_conn *ct, set_ras_addr = rcu_dereference(set_ras_addr_hook); if (set_ras_addr && ct->status & IPS_NAT_MASK) { - ret = set_ras_addr(pskb, ct, ctinfo, data, + ret = set_ras_addr(skb, ct, ctinfo, data, &irr->rasAddress, 1); if (ret < 0) return -1; @@ -1623,7 +1623,7 @@ static int process_irr(struct sk_buff **pskb, struct nf_conn *ct, set_sig_addr = rcu_dereference(set_sig_addr_hook); if (set_sig_addr && ct->status & IPS_NAT_MASK) { - ret = set_sig_addr(pskb, ct, ctinfo, data, + ret = set_sig_addr(skb, ct, ctinfo, data, irr->callSignalAddress.item, irr->callSignalAddress.count); if (ret < 0) @@ -1634,40 +1634,40 @@ static int process_irr(struct sk_buff **pskb, struct nf_conn *ct, } /****************************************************************************/ -static int process_ras(struct sk_buff **pskb, struct nf_conn *ct, +static int process_ras(struct sk_buff *skb, struct nf_conn *ct, enum ip_conntrack_info ctinfo, unsigned char **data, RasMessage *ras) { switch (ras->choice) { case eRasMessage_gatekeeperRequest: - return process_grq(pskb, ct, ctinfo, data, + return process_grq(skb, ct, ctinfo, data, &ras->gatekeeperRequest); case eRasMessage_gatekeeperConfirm: - return process_gcf(pskb, ct, ctinfo, data, + return process_gcf(skb, ct, ctinfo, data, &ras->gatekeeperConfirm); case eRasMessage_registrationRequest: - return process_rrq(pskb, ct, ctinfo, data, + return process_rrq(skb, ct, ctinfo, data, &ras->registrationRequest); case eRasMessage_registrationConfirm: - return process_rcf(pskb, ct, ctinfo, data, + return process_rcf(skb, ct, ctinfo, data, &ras->registrationConfirm); case eRasMessage_unregistrationRequest: - return process_urq(pskb, ct, ctinfo, data, + return process_urq(skb, ct, ctinfo, data, &ras->unregistrationRequest); case eRasMessage_admissionRequest: - return process_arq(pskb, ct, ctinfo, data, + return process_arq(skb, ct, ctinfo, data, &ras->admissionRequest); case eRasMessage_admissionConfirm: - return process_acf(pskb, ct, ctinfo, data, + return process_acf(skb, ct, ctinfo, data, &ras->admissionConfirm); case eRasMessage_locationRequest: - return process_lrq(pskb, ct, ctinfo, data, + return process_lrq(skb, ct, ctinfo, data, &ras->locationRequest); case eRasMessage_locationConfirm: - return process_lcf(pskb, ct, ctinfo, data, + return process_lcf(skb, ct, ctinfo, data, &ras->locationConfirm); case eRasMessage_infoRequestResponse: - return process_irr(pskb, ct, ctinfo, data, + return process_irr(skb, ct, ctinfo, data, &ras->infoRequestResponse); default: pr_debug("nf_ct_ras: RAS message %d\n", ras->choice); @@ -1678,7 +1678,7 @@ static int process_ras(struct sk_buff **pskb, struct nf_conn *ct, } /****************************************************************************/ -static int ras_help(struct sk_buff **pskb, unsigned int protoff, +static int ras_help(struct sk_buff *skb, unsigned int protoff, struct nf_conn *ct, enum ip_conntrack_info ctinfo) { static RasMessage ras; @@ -1686,12 +1686,12 @@ static int ras_help(struct sk_buff **pskb, unsigned int protoff, int datalen = 0; int ret; - pr_debug("nf_ct_ras: skblen = %u\n", (*pskb)->len); + pr_debug("nf_ct_ras: skblen = %u\n", skb->len); spin_lock_bh(&nf_h323_lock); /* Get UDP data */ - data = get_udp_data(pskb, protoff, &datalen); + data = get_udp_data(skb, protoff, &datalen); if (data == NULL) goto accept; pr_debug("nf_ct_ras: RAS message len=%d ", datalen); @@ -1707,7 +1707,7 @@ static int ras_help(struct sk_buff **pskb, unsigned int protoff, } /* Process RAS message */ - if (process_ras(pskb, ct, ctinfo, &data, &ras) < 0) + if (process_ras(skb, ct, ctinfo, &data, &ras) < 0) goto drop; accept: diff --git a/net/netfilter/nf_conntrack_irc.c b/net/netfilter/nf_conntrack_irc.c index 1562ca97a349..dfaed4ba83cd 100644 --- a/net/netfilter/nf_conntrack_irc.c +++ b/net/netfilter/nf_conntrack_irc.c @@ -30,7 +30,7 @@ static unsigned int dcc_timeout __read_mostly = 300; static char *irc_buffer; static DEFINE_SPINLOCK(irc_buffer_lock); -unsigned int (*nf_nat_irc_hook)(struct sk_buff **pskb, +unsigned int (*nf_nat_irc_hook)(struct sk_buff *skb, enum ip_conntrack_info ctinfo, unsigned int matchoff, unsigned int matchlen, @@ -89,7 +89,7 @@ static int parse_dcc(char *data, char *data_end, u_int32_t *ip, return 0; } -static int help(struct sk_buff **pskb, unsigned int protoff, +static int help(struct sk_buff *skb, unsigned int protoff, struct nf_conn *ct, enum ip_conntrack_info ctinfo) { unsigned int dataoff; @@ -116,22 +116,22 @@ static int help(struct sk_buff **pskb, unsigned int protoff, return NF_ACCEPT; /* Not a full tcp header? */ - th = skb_header_pointer(*pskb, protoff, sizeof(_tcph), &_tcph); + th = skb_header_pointer(skb, protoff, sizeof(_tcph), &_tcph); if (th == NULL) return NF_ACCEPT; /* No data? */ dataoff = protoff + th->doff*4; - if (dataoff >= (*pskb)->len) + if (dataoff >= skb->len) return NF_ACCEPT; spin_lock_bh(&irc_buffer_lock); - ib_ptr = skb_header_pointer(*pskb, dataoff, (*pskb)->len - dataoff, + ib_ptr = skb_header_pointer(skb, dataoff, skb->len - dataoff, irc_buffer); BUG_ON(ib_ptr == NULL); data = ib_ptr; - data_limit = ib_ptr + (*pskb)->len - dataoff; + data_limit = ib_ptr + skb->len - dataoff; /* strlen("\1DCC SENT t AAAAAAAA P\1\n")=24 * 5+MINMATCHLEN+strlen("t AAAAAAAA P\1\n")=14 */ @@ -143,7 +143,7 @@ static int help(struct sk_buff **pskb, unsigned int protoff, data += 5; /* we have at least (19+MINMATCHLEN)-5 bytes valid data left */ - iph = ip_hdr(*pskb); + iph = ip_hdr(skb); pr_debug("DCC found in master %u.%u.%u.%u:%u %u.%u.%u.%u:%u\n", NIPQUAD(iph->saddr), ntohs(th->source), NIPQUAD(iph->daddr), ntohs(th->dest)); @@ -193,7 +193,7 @@ static int help(struct sk_buff **pskb, unsigned int protoff, nf_nat_irc = rcu_dereference(nf_nat_irc_hook); if (nf_nat_irc && ct->status & IPS_NAT_MASK) - ret = nf_nat_irc(pskb, ctinfo, + ret = nf_nat_irc(skb, ctinfo, addr_beg_p - ib_ptr, addr_end_p - addr_beg_p, exp); diff --git a/net/netfilter/nf_conntrack_netbios_ns.c b/net/netfilter/nf_conntrack_netbios_ns.c index 1d59fabeb5f7..9810d81e2a06 100644 --- a/net/netfilter/nf_conntrack_netbios_ns.c +++ b/net/netfilter/nf_conntrack_netbios_ns.c @@ -42,17 +42,17 @@ static unsigned int timeout __read_mostly = 3; module_param(timeout, uint, 0400); MODULE_PARM_DESC(timeout, "timeout for master connection/replies in seconds"); -static int help(struct sk_buff **pskb, unsigned int protoff, +static int help(struct sk_buff *skb, unsigned int protoff, struct nf_conn *ct, enum ip_conntrack_info ctinfo) { struct nf_conntrack_expect *exp; - struct iphdr *iph = ip_hdr(*pskb); - struct rtable *rt = (struct rtable *)(*pskb)->dst; + struct iphdr *iph = ip_hdr(skb); + struct rtable *rt = (struct rtable *)skb->dst; struct in_device *in_dev; __be32 mask = 0; /* we're only interested in locally generated packets */ - if ((*pskb)->sk == NULL) + if (skb->sk == NULL) goto out; if (rt == NULL || !(rt->rt_flags & RTCF_BROADCAST)) goto out; @@ -91,7 +91,7 @@ static int help(struct sk_buff **pskb, unsigned int protoff, nf_ct_expect_related(exp); nf_ct_expect_put(exp); - nf_ct_refresh(ct, *pskb, timeout * HZ); + nf_ct_refresh(ct, skb, timeout * HZ); out: return NF_ACCEPT; } diff --git a/net/netfilter/nf_conntrack_pptp.c b/net/netfilter/nf_conntrack_pptp.c index b0804199ab59..099b6df3e2b5 100644 --- a/net/netfilter/nf_conntrack_pptp.c +++ b/net/netfilter/nf_conntrack_pptp.c @@ -41,14 +41,14 @@ MODULE_ALIAS("ip_conntrack_pptp"); static DEFINE_SPINLOCK(nf_pptp_lock); int -(*nf_nat_pptp_hook_outbound)(struct sk_buff **pskb, +(*nf_nat_pptp_hook_outbound)(struct sk_buff *skb, struct nf_conn *ct, enum ip_conntrack_info ctinfo, struct PptpControlHeader *ctlh, union pptp_ctrl_union *pptpReq) __read_mostly; EXPORT_SYMBOL_GPL(nf_nat_pptp_hook_outbound); int -(*nf_nat_pptp_hook_inbound)(struct sk_buff **pskb, +(*nf_nat_pptp_hook_inbound)(struct sk_buff *skb, struct nf_conn *ct, enum ip_conntrack_info ctinfo, struct PptpControlHeader *ctlh, union pptp_ctrl_union *pptpReq) __read_mostly; @@ -254,7 +254,7 @@ out_unexpect_orig: } static inline int -pptp_inbound_pkt(struct sk_buff **pskb, +pptp_inbound_pkt(struct sk_buff *skb, struct PptpControlHeader *ctlh, union pptp_ctrl_union *pptpReq, unsigned int reqlen, @@ -367,7 +367,7 @@ pptp_inbound_pkt(struct sk_buff **pskb, nf_nat_pptp_inbound = rcu_dereference(nf_nat_pptp_hook_inbound); if (nf_nat_pptp_inbound && ct->status & IPS_NAT_MASK) - return nf_nat_pptp_inbound(pskb, ct, ctinfo, ctlh, pptpReq); + return nf_nat_pptp_inbound(skb, ct, ctinfo, ctlh, pptpReq); return NF_ACCEPT; invalid: @@ -380,7 +380,7 @@ invalid: } static inline int -pptp_outbound_pkt(struct sk_buff **pskb, +pptp_outbound_pkt(struct sk_buff *skb, struct PptpControlHeader *ctlh, union pptp_ctrl_union *pptpReq, unsigned int reqlen, @@ -462,7 +462,7 @@ pptp_outbound_pkt(struct sk_buff **pskb, nf_nat_pptp_outbound = rcu_dereference(nf_nat_pptp_hook_outbound); if (nf_nat_pptp_outbound && ct->status & IPS_NAT_MASK) - return nf_nat_pptp_outbound(pskb, ct, ctinfo, ctlh, pptpReq); + return nf_nat_pptp_outbound(skb, ct, ctinfo, ctlh, pptpReq); return NF_ACCEPT; invalid: @@ -492,7 +492,7 @@ static const unsigned int pptp_msg_size[] = { /* track caller id inside control connection, call expect_related */ static int -conntrack_pptp_help(struct sk_buff **pskb, unsigned int protoff, +conntrack_pptp_help(struct sk_buff *skb, unsigned int protoff, struct nf_conn *ct, enum ip_conntrack_info ctinfo) { @@ -502,7 +502,7 @@ conntrack_pptp_help(struct sk_buff **pskb, unsigned int protoff, struct pptp_pkt_hdr _pptph, *pptph; struct PptpControlHeader _ctlh, *ctlh; union pptp_ctrl_union _pptpReq, *pptpReq; - unsigned int tcplen = (*pskb)->len - protoff; + unsigned int tcplen = skb->len - protoff; unsigned int datalen, reqlen, nexthdr_off; int oldsstate, oldcstate; int ret; @@ -514,12 +514,12 @@ conntrack_pptp_help(struct sk_buff **pskb, unsigned int protoff, return NF_ACCEPT; nexthdr_off = protoff; - tcph = skb_header_pointer(*pskb, nexthdr_off, sizeof(_tcph), &_tcph); + tcph = skb_header_pointer(skb, nexthdr_off, sizeof(_tcph), &_tcph); BUG_ON(!tcph); nexthdr_off += tcph->doff * 4; datalen = tcplen - tcph->doff * 4; - pptph = skb_header_pointer(*pskb, nexthdr_off, sizeof(_pptph), &_pptph); + pptph = skb_header_pointer(skb, nexthdr_off, sizeof(_pptph), &_pptph); if (!pptph) { pr_debug("no full PPTP header, can't track\n"); return NF_ACCEPT; @@ -534,7 +534,7 @@ conntrack_pptp_help(struct sk_buff **pskb, unsigned int protoff, return NF_ACCEPT; } - ctlh = skb_header_pointer(*pskb, nexthdr_off, sizeof(_ctlh), &_ctlh); + ctlh = skb_header_pointer(skb, nexthdr_off, sizeof(_ctlh), &_ctlh); if (!ctlh) return NF_ACCEPT; nexthdr_off += sizeof(_ctlh); @@ -547,7 +547,7 @@ conntrack_pptp_help(struct sk_buff **pskb, unsigned int protoff, if (reqlen > sizeof(*pptpReq)) reqlen = sizeof(*pptpReq); - pptpReq = skb_header_pointer(*pskb, nexthdr_off, reqlen, &_pptpReq); + pptpReq = skb_header_pointer(skb, nexthdr_off, reqlen, &_pptpReq); if (!pptpReq) return NF_ACCEPT; @@ -560,11 +560,11 @@ conntrack_pptp_help(struct sk_buff **pskb, unsigned int protoff, * established from PNS->PAC. However, RFC makes no guarantee */ if (dir == IP_CT_DIR_ORIGINAL) /* client -> server (PNS -> PAC) */ - ret = pptp_outbound_pkt(pskb, ctlh, pptpReq, reqlen, ct, + ret = pptp_outbound_pkt(skb, ctlh, pptpReq, reqlen, ct, ctinfo); else /* server -> client (PAC -> PNS) */ - ret = pptp_inbound_pkt(pskb, ctlh, pptpReq, reqlen, ct, + ret = pptp_inbound_pkt(skb, ctlh, pptpReq, reqlen, ct, ctinfo); pr_debug("sstate: %d->%d, cstate: %d->%d\n", oldsstate, info->sstate, oldcstate, info->cstate); diff --git a/net/netfilter/nf_conntrack_sane.c b/net/netfilter/nf_conntrack_sane.c index 355d371bac93..b5a16c6e21c2 100644 --- a/net/netfilter/nf_conntrack_sane.c +++ b/net/netfilter/nf_conntrack_sane.c @@ -56,7 +56,7 @@ struct sane_reply_net_start { /* other fields aren't interesting for conntrack */ }; -static int help(struct sk_buff **pskb, +static int help(struct sk_buff *skb, unsigned int protoff, struct nf_conn *ct, enum ip_conntrack_info ctinfo) @@ -80,19 +80,19 @@ static int help(struct sk_buff **pskb, return NF_ACCEPT; /* Not a full tcp header? */ - th = skb_header_pointer(*pskb, protoff, sizeof(_tcph), &_tcph); + th = skb_header_pointer(skb, protoff, sizeof(_tcph), &_tcph); if (th == NULL) return NF_ACCEPT; /* No data? */ dataoff = protoff + th->doff * 4; - if (dataoff >= (*pskb)->len) + if (dataoff >= skb->len) return NF_ACCEPT; - datalen = (*pskb)->len - dataoff; + datalen = skb->len - dataoff; spin_lock_bh(&nf_sane_lock); - sb_ptr = skb_header_pointer(*pskb, dataoff, datalen, sane_buffer); + sb_ptr = skb_header_pointer(skb, dataoff, datalen, sane_buffer); BUG_ON(sb_ptr == NULL); if (dir == IP_CT_DIR_ORIGINAL) { diff --git a/net/netfilter/nf_conntrack_sip.c b/net/netfilter/nf_conntrack_sip.c index d449fa47491c..8f8b5a48df38 100644 --- a/net/netfilter/nf_conntrack_sip.c +++ b/net/netfilter/nf_conntrack_sip.c @@ -36,13 +36,13 @@ static unsigned int sip_timeout __read_mostly = SIP_TIMEOUT; module_param(sip_timeout, uint, 0600); MODULE_PARM_DESC(sip_timeout, "timeout for the master SIP session"); -unsigned int (*nf_nat_sip_hook)(struct sk_buff **pskb, +unsigned int (*nf_nat_sip_hook)(struct sk_buff *skb, enum ip_conntrack_info ctinfo, struct nf_conn *ct, const char **dptr) __read_mostly; EXPORT_SYMBOL_GPL(nf_nat_sip_hook); -unsigned int (*nf_nat_sdp_hook)(struct sk_buff **pskb, +unsigned int (*nf_nat_sdp_hook)(struct sk_buff *skb, enum ip_conntrack_info ctinfo, struct nf_conntrack_expect *exp, const char *dptr) __read_mostly; @@ -363,7 +363,7 @@ int ct_sip_get_info(struct nf_conn *ct, } EXPORT_SYMBOL_GPL(ct_sip_get_info); -static int set_expected_rtp(struct sk_buff **pskb, +static int set_expected_rtp(struct sk_buff *skb, struct nf_conn *ct, enum ip_conntrack_info ctinfo, union nf_conntrack_address *addr, @@ -385,7 +385,7 @@ static int set_expected_rtp(struct sk_buff **pskb, nf_nat_sdp = rcu_dereference(nf_nat_sdp_hook); if (nf_nat_sdp && ct->status & IPS_NAT_MASK) - ret = nf_nat_sdp(pskb, ctinfo, exp, dptr); + ret = nf_nat_sdp(skb, ctinfo, exp, dptr); else { if (nf_ct_expect_related(exp) != 0) ret = NF_DROP; @@ -397,7 +397,7 @@ static int set_expected_rtp(struct sk_buff **pskb, return ret; } -static int sip_help(struct sk_buff **pskb, +static int sip_help(struct sk_buff *skb, unsigned int protoff, struct nf_conn *ct, enum ip_conntrack_info ctinfo) @@ -414,13 +414,13 @@ static int sip_help(struct sk_buff **pskb, /* No Data ? */ dataoff = protoff + sizeof(struct udphdr); - if (dataoff >= (*pskb)->len) + if (dataoff >= skb->len) return NF_ACCEPT; - nf_ct_refresh(ct, *pskb, sip_timeout * HZ); + nf_ct_refresh(ct, skb, sip_timeout * HZ); - if (!skb_is_nonlinear(*pskb)) - dptr = (*pskb)->data + dataoff; + if (!skb_is_nonlinear(skb)) + dptr = skb->data + dataoff; else { pr_debug("Copy of skbuff not supported yet.\n"); goto out; @@ -428,13 +428,13 @@ static int sip_help(struct sk_buff **pskb, nf_nat_sip = rcu_dereference(nf_nat_sip_hook); if (nf_nat_sip && ct->status & IPS_NAT_MASK) { - if (!nf_nat_sip(pskb, ctinfo, ct, &dptr)) { + if (!nf_nat_sip(skb, ctinfo, ct, &dptr)) { ret = NF_DROP; goto out; } } - datalen = (*pskb)->len - dataoff; + datalen = skb->len - dataoff; if (datalen < sizeof("SIP/2.0 200") - 1) goto out; @@ -464,7 +464,7 @@ static int sip_help(struct sk_buff **pskb, ret = NF_DROP; goto out; } - ret = set_expected_rtp(pskb, ct, ctinfo, &addr, + ret = set_expected_rtp(skb, ct, ctinfo, &addr, htons(port), dptr); } } diff --git a/net/netfilter/nf_conntrack_tftp.c b/net/netfilter/nf_conntrack_tftp.c index cc19506cf2f8..e894aa1ff3ad 100644 --- a/net/netfilter/nf_conntrack_tftp.c +++ b/net/netfilter/nf_conntrack_tftp.c @@ -29,12 +29,12 @@ static int ports_c; module_param_array(ports, ushort, &ports_c, 0400); MODULE_PARM_DESC(ports, "Port numbers of TFTP servers"); -unsigned int (*nf_nat_tftp_hook)(struct sk_buff **pskb, +unsigned int (*nf_nat_tftp_hook)(struct sk_buff *skb, enum ip_conntrack_info ctinfo, struct nf_conntrack_expect *exp) __read_mostly; EXPORT_SYMBOL_GPL(nf_nat_tftp_hook); -static int tftp_help(struct sk_buff **pskb, +static int tftp_help(struct sk_buff *skb, unsigned int protoff, struct nf_conn *ct, enum ip_conntrack_info ctinfo) @@ -46,7 +46,7 @@ static int tftp_help(struct sk_buff **pskb, int family = ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.l3num; typeof(nf_nat_tftp_hook) nf_nat_tftp; - tfh = skb_header_pointer(*pskb, protoff + sizeof(struct udphdr), + tfh = skb_header_pointer(skb, protoff + sizeof(struct udphdr), sizeof(_tftph), &_tftph); if (tfh == NULL) return NF_ACCEPT; @@ -70,7 +70,7 @@ static int tftp_help(struct sk_buff **pskb, nf_nat_tftp = rcu_dereference(nf_nat_tftp_hook); if (nf_nat_tftp && ct->status & IPS_NAT_MASK) - ret = nf_nat_tftp(pskb, ctinfo, exp); + ret = nf_nat_tftp(skb, ctinfo, exp); else if (nf_ct_expect_related(exp) != 0) ret = NF_DROP; nf_ct_expect_put(exp); diff --git a/net/netfilter/nf_internals.h b/net/netfilter/nf_internals.h index 0df7fff196a7..196269c1e586 100644 --- a/net/netfilter/nf_internals.h +++ b/net/netfilter/nf_internals.h @@ -14,7 +14,7 @@ /* core.c */ extern unsigned int nf_iterate(struct list_head *head, - struct sk_buff **skb, + struct sk_buff *skb, int hook, const struct net_device *indev, const struct net_device *outdev, diff --git a/net/netfilter/nf_queue.c b/net/netfilter/nf_queue.c index a481a349f7bf..0cef1433d660 100644 --- a/net/netfilter/nf_queue.c +++ b/net/netfilter/nf_queue.c @@ -256,14 +256,14 @@ void nf_reinject(struct sk_buff *skb, struct nf_info *info, if (verdict == NF_ACCEPT) { afinfo = nf_get_afinfo(info->pf); - if (!afinfo || afinfo->reroute(&skb, info) < 0) + if (!afinfo || afinfo->reroute(skb, info) < 0) verdict = NF_DROP; } if (verdict == NF_ACCEPT) { next_hook: verdict = nf_iterate(&nf_hooks[info->pf][info->hook], - &skb, info->hook, + skb, info->hook, info->indev, info->outdev, &elem, info->okfn, INT_MIN); } diff --git a/net/netfilter/nfnetlink_queue.c b/net/netfilter/nfnetlink_queue.c index 49f0480afe09..3ceeffcf6f9d 100644 --- a/net/netfilter/nfnetlink_queue.c +++ b/net/netfilter/nfnetlink_queue.c @@ -617,6 +617,7 @@ static int nfqnl_mangle(void *data, int data_len, struct nfqnl_queue_entry *e) { int diff; + int err; diff = data_len - e->skb->len; if (diff < 0) { @@ -626,25 +627,18 @@ nfqnl_mangle(void *data, int data_len, struct nfqnl_queue_entry *e) if (data_len > 0xFFFF) return -EINVAL; if (diff > skb_tailroom(e->skb)) { - struct sk_buff *newskb; - - newskb = skb_copy_expand(e->skb, - skb_headroom(e->skb), - diff, - GFP_ATOMIC); - if (newskb == NULL) { + err = pskb_expand_head(e->skb, 0, + diff - skb_tailroom(e->skb), + GFP_ATOMIC); + if (err) { printk(KERN_WARNING "nf_queue: OOM " "in mangle, dropping packet\n"); - return -ENOMEM; + return err; } - if (e->skb->sk) - skb_set_owner_w(newskb, e->skb->sk); - kfree_skb(e->skb); - e->skb = newskb; } skb_put(e->skb, diff); } - if (!skb_make_writable(&e->skb, data_len)) + if (!skb_make_writable(e->skb, data_len)) return -ENOMEM; skb_copy_to_linear_data(e->skb, data, data_len); e->skb->ip_summed = CHECKSUM_NONE; diff --git a/net/netfilter/xt_CLASSIFY.c b/net/netfilter/xt_CLASSIFY.c index 07a1b9665005..77eeae658d42 100644 --- a/net/netfilter/xt_CLASSIFY.c +++ b/net/netfilter/xt_CLASSIFY.c @@ -27,7 +27,7 @@ MODULE_ALIAS("ipt_CLASSIFY"); MODULE_ALIAS("ip6t_CLASSIFY"); static unsigned int -target(struct sk_buff **pskb, +target(struct sk_buff *skb, const struct net_device *in, const struct net_device *out, unsigned int hooknum, @@ -36,7 +36,7 @@ target(struct sk_buff **pskb, { const struct xt_classify_target_info *clinfo = targinfo; - (*pskb)->priority = clinfo->priority; + skb->priority = clinfo->priority; return XT_CONTINUE; } diff --git a/net/netfilter/xt_CONNMARK.c b/net/netfilter/xt_CONNMARK.c index 7043c2757e09..8cc324b159e9 100644 --- a/net/netfilter/xt_CONNMARK.c +++ b/net/netfilter/xt_CONNMARK.c @@ -34,7 +34,7 @@ MODULE_ALIAS("ip6t_CONNMARK"); #include <net/netfilter/nf_conntrack_ecache.h> static unsigned int -target(struct sk_buff **pskb, +target(struct sk_buff *skb, const struct net_device *in, const struct net_device *out, unsigned int hooknum, @@ -48,28 +48,28 @@ target(struct sk_buff **pskb, u_int32_t mark; u_int32_t newmark; - ct = nf_ct_get(*pskb, &ctinfo); + ct = nf_ct_get(skb, &ctinfo); if (ct) { switch(markinfo->mode) { case XT_CONNMARK_SET: newmark = (ct->mark & ~markinfo->mask) | markinfo->mark; if (newmark != ct->mark) { ct->mark = newmark; - nf_conntrack_event_cache(IPCT_MARK, *pskb); + nf_conntrack_event_cache(IPCT_MARK, skb); } break; case XT_CONNMARK_SAVE: newmark = (ct->mark & ~markinfo->mask) | - ((*pskb)->mark & markinfo->mask); + (skb->mark & markinfo->mask); if (ct->mark != newmark) { ct->mark = newmark; - nf_conntrack_event_cache(IPCT_MARK, *pskb); + nf_conntrack_event_cache(IPCT_MARK, skb); } break; case XT_CONNMARK_RESTORE: - mark = (*pskb)->mark; + mark = skb->mark; diff = (ct->mark ^ mark) & markinfo->mask; - (*pskb)->mark = mark ^ diff; + skb->mark = mark ^ diff; break; } } diff --git a/net/netfilter/xt_CONNSECMARK.c b/net/netfilter/xt_CONNSECMARK.c index 63d73138c1b9..021b5c8d20e2 100644 --- a/net/netfilter/xt_CONNSECMARK.c +++ b/net/netfilter/xt_CONNSECMARK.c @@ -61,12 +61,11 @@ static void secmark_restore(struct sk_buff *skb) } } -static unsigned int target(struct sk_buff **pskb, const struct net_device *in, +static unsigned int target(struct sk_buff *skb, const struct net_device *in, const struct net_device *out, unsigned int hooknum, const struct xt_target *target, const void *targinfo) { - struct sk_buff *skb = *pskb; const struct xt_connsecmark_target_info *info = targinfo; switch (info->mode) { diff --git a/net/netfilter/xt_DSCP.c b/net/netfilter/xt_DSCP.c index 798ab731009d..6322a933ab71 100644 --- a/net/netfilter/xt_DSCP.c +++ b/net/netfilter/xt_DSCP.c @@ -25,7 +25,7 @@ MODULE_LICENSE("GPL"); MODULE_ALIAS("ipt_DSCP"); MODULE_ALIAS("ip6t_DSCP"); -static unsigned int target(struct sk_buff **pskb, +static unsigned int target(struct sk_buff *skb, const struct net_device *in, const struct net_device *out, unsigned int hooknum, @@ -33,20 +33,20 @@ static unsigned int target(struct sk_buff **pskb, const void *targinfo) { const struct xt_DSCP_info *dinfo = targinfo; - u_int8_t dscp = ipv4_get_dsfield(ip_hdr(*pskb)) >> XT_DSCP_SHIFT; + u_int8_t dscp = ipv4_get_dsfield(ip_hdr(skb)) >> XT_DSCP_SHIFT; if (dscp != dinfo->dscp) { - if (!skb_make_writable(pskb, sizeof(struct iphdr))) + if (!skb_make_writable(skb, sizeof(struct iphdr))) return NF_DROP; - ipv4_change_dsfield(ip_hdr(*pskb), (__u8)(~XT_DSCP_MASK), + ipv4_change_dsfield(ip_hdr(skb), (__u8)(~XT_DSCP_MASK), dinfo->dscp << XT_DSCP_SHIFT); } return XT_CONTINUE; } -static unsigned int target6(struct sk_buff **pskb, +static unsigned int target6(struct sk_buff *skb, const struct net_device *in, const struct net_device *out, unsigned int hooknum, @@ -54,13 +54,13 @@ static unsigned int target6(struct sk_buff **pskb, const void *targinfo) { const struct xt_DSCP_info *dinfo = targinfo; - u_int8_t dscp = ipv6_get_dsfield(ipv6_hdr(*pskb)) >> XT_DSCP_SHIFT; + u_int8_t dscp = ipv6_get_dsfield(ipv6_hdr(skb)) >> XT_DSCP_SHIFT; if (dscp != dinfo->dscp) { - if (!skb_make_writable(pskb, sizeof(struct ipv6hdr))) + if (!skb_make_writable(skb, sizeof(struct ipv6hdr))) return NF_DROP; - ipv6_change_dsfield(ipv6_hdr(*pskb), (__u8)(~XT_DSCP_MASK), + ipv6_change_dsfield(ipv6_hdr(skb), (__u8)(~XT_DSCP_MASK), dinfo->dscp << XT_DSCP_SHIFT); } return XT_CONTINUE; diff --git a/net/netfilter/xt_MARK.c b/net/netfilter/xt_MARK.c index f30fe0baf7de..bc6503d77d75 100644 --- a/net/netfilter/xt_MARK.c +++ b/net/netfilter/xt_MARK.c @@ -22,7 +22,7 @@ MODULE_ALIAS("ipt_MARK"); MODULE_ALIAS("ip6t_MARK"); static unsigned int -target_v0(struct sk_buff **pskb, +target_v0(struct sk_buff *skb, const struct net_device *in, const struct net_device *out, unsigned int hooknum, @@ -31,12 +31,12 @@ target_v0(struct sk_buff **pskb, { const struct xt_mark_target_info *markinfo = targinfo; - (*pskb)->mark = markinfo->mark; + skb->mark = markinfo->mark; return XT_CONTINUE; } static unsigned int -target_v1(struct sk_buff **pskb, +target_v1(struct sk_buff *skb, const struct net_device *in, const struct net_device *out, unsigned int hooknum, @@ -52,15 +52,15 @@ target_v1(struct sk_buff **pskb, break; case XT_MARK_AND: - mark = (*pskb)->mark & markinfo->mark; + mark = skb->mark & markinfo->mark; break; case XT_MARK_OR: - mark = (*pskb)->mark | markinfo->mark; + mark = skb->mark | markinfo->mark; break; } - (*pskb)->mark = mark; + skb->mark = mark; return XT_CONTINUE; } diff --git a/net/netfilter/xt_NFLOG.c b/net/netfilter/xt_NFLOG.c index d3594c7ccb26..9fb449ffbf8b 100644 --- a/net/netfilter/xt_NFLOG.c +++ b/net/netfilter/xt_NFLOG.c @@ -20,7 +20,7 @@ MODULE_ALIAS("ipt_NFLOG"); MODULE_ALIAS("ip6t_NFLOG"); static unsigned int -nflog_target(struct sk_buff **pskb, +nflog_target(struct sk_buff *skb, const struct net_device *in, const struct net_device *out, unsigned int hooknum, const struct xt_target *target, const void *targinfo) @@ -33,7 +33,7 @@ nflog_target(struct sk_buff **pskb, li.u.ulog.group = info->group; li.u.ulog.qthreshold = info->threshold; - nf_log_packet(target->family, hooknum, *pskb, in, out, &li, + nf_log_packet(target->family, hooknum, skb, in, out, &li, "%s", info->prefix); return XT_CONTINUE; } diff --git a/net/netfilter/xt_NFQUEUE.c b/net/netfilter/xt_NFQUEUE.c index 13f59f3e8c38..c3984e9f766a 100644 --- a/net/netfilter/xt_NFQUEUE.c +++ b/net/netfilter/xt_NFQUEUE.c @@ -24,7 +24,7 @@ MODULE_ALIAS("ip6t_NFQUEUE"); MODULE_ALIAS("arpt_NFQUEUE"); static unsigned int -target(struct sk_buff **pskb, +target(struct sk_buff *skb, const struct net_device *in, const struct net_device *out, unsigned int hooknum, diff --git a/net/netfilter/xt_NOTRACK.c b/net/netfilter/xt_NOTRACK.c index fec1aefb1c32..4976ce186615 100644 --- a/net/netfilter/xt_NOTRACK.c +++ b/net/netfilter/xt_NOTRACK.c @@ -12,7 +12,7 @@ MODULE_ALIAS("ipt_NOTRACK"); MODULE_ALIAS("ip6t_NOTRACK"); static unsigned int -target(struct sk_buff **pskb, +target(struct sk_buff *skb, const struct net_device *in, const struct net_device *out, unsigned int hooknum, @@ -20,16 +20,16 @@ target(struct sk_buff **pskb, const void *targinfo) { /* Previously seen (loopback)? Ignore. */ - if ((*pskb)->nfct != NULL) + if (skb->nfct != NULL) return XT_CONTINUE; /* Attach fake conntrack entry. If there is a real ct entry correspondig to this packet, it'll hang aroun till timing out. We don't deal with it for performance reasons. JK */ - (*pskb)->nfct = &nf_conntrack_untracked.ct_general; - (*pskb)->nfctinfo = IP_CT_NEW; - nf_conntrack_get((*pskb)->nfct); + skb->nfct = &nf_conntrack_untracked.ct_general; + skb->nfctinfo = IP_CT_NEW; + nf_conntrack_get(skb->nfct); return XT_CONTINUE; } diff --git a/net/netfilter/xt_SECMARK.c b/net/netfilter/xt_SECMARK.c index c83779a941a1..235806eb6ecd 100644 --- a/net/netfilter/xt_SECMARK.c +++ b/net/netfilter/xt_SECMARK.c @@ -28,7 +28,7 @@ MODULE_ALIAS("ip6t_SECMARK"); static u8 mode; -static unsigned int target(struct sk_buff **pskb, const struct net_device *in, +static unsigned int target(struct sk_buff *skb, const struct net_device *in, const struct net_device *out, unsigned int hooknum, const struct xt_target *target, const void *targinfo) @@ -47,7 +47,7 @@ static unsigned int target(struct sk_buff **pskb, const struct net_device *in, BUG(); } - (*pskb)->secmark = secmark; + skb->secmark = secmark; return XT_CONTINUE; } diff --git a/net/netfilter/xt_TCPMSS.c b/net/netfilter/xt_TCPMSS.c index d40f7e4b1289..07435a602b11 100644 --- a/net/netfilter/xt_TCPMSS.c +++ b/net/netfilter/xt_TCPMSS.c @@ -39,7 +39,7 @@ optlen(const u_int8_t *opt, unsigned int offset) } static int -tcpmss_mangle_packet(struct sk_buff **pskb, +tcpmss_mangle_packet(struct sk_buff *skb, const struct xt_tcpmss_info *info, unsigned int tcphoff, unsigned int minlen) @@ -50,11 +50,11 @@ tcpmss_mangle_packet(struct sk_buff **pskb, u16 newmss; u8 *opt; - if (!skb_make_writable(pskb, (*pskb)->len)) + if (!skb_make_writable(skb, skb->len)) return -1; - tcplen = (*pskb)->len - tcphoff; - tcph = (struct tcphdr *)(skb_network_header(*pskb) + tcphoff); + tcplen = skb->len - tcphoff; + tcph = (struct tcphdr *)(skb_network_header(skb) + tcphoff); /* Since it passed flags test in tcp match, we know it is is not a fragment, and has data >= tcp header length. SYN @@ -64,19 +64,19 @@ tcpmss_mangle_packet(struct sk_buff **pskb, if (tcplen != tcph->doff*4) { if (net_ratelimit()) printk(KERN_ERR "xt_TCPMSS: bad length (%u bytes)\n", - (*pskb)->len); + skb->len); return -1; } if (info->mss == XT_TCPMSS_CLAMP_PMTU) { - if (dst_mtu((*pskb)->dst) <= minlen) { + if (dst_mtu(skb->dst) <= minlen) { if (net_ratelimit()) printk(KERN_ERR "xt_TCPMSS: " "unknown or invalid path-MTU (%u)\n", - dst_mtu((*pskb)->dst)); + dst_mtu(skb->dst)); return -1; } - newmss = dst_mtu((*pskb)->dst) - minlen; + newmss = dst_mtu(skb->dst) - minlen; } else newmss = info->mss; @@ -95,7 +95,7 @@ tcpmss_mangle_packet(struct sk_buff **pskb, opt[i+2] = (newmss & 0xff00) >> 8; opt[i+3] = newmss & 0x00ff; - nf_proto_csum_replace2(&tcph->check, *pskb, + nf_proto_csum_replace2(&tcph->check, skb, htons(oldmss), htons(newmss), 0); return 0; } @@ -104,57 +104,53 @@ tcpmss_mangle_packet(struct sk_buff **pskb, /* * MSS Option not found ?! add it.. */ - if (skb_tailroom((*pskb)) < TCPOLEN_MSS) { - struct sk_buff *newskb; - - newskb = skb_copy_expand(*pskb, skb_headroom(*pskb), - TCPOLEN_MSS, GFP_ATOMIC); - if (!newskb) + if (skb_tailroom(skb) < TCPOLEN_MSS) { + if (pskb_expand_head(skb, 0, + TCPOLEN_MSS - skb_tailroom(skb), + GFP_ATOMIC)) return -1; - kfree_skb(*pskb); - *pskb = newskb; - tcph = (struct tcphdr *)(skb_network_header(*pskb) + tcphoff); + tcph = (struct tcphdr *)(skb_network_header(skb) + tcphoff); } - skb_put((*pskb), TCPOLEN_MSS); + skb_put(skb, TCPOLEN_MSS); opt = (u_int8_t *)tcph + sizeof(struct tcphdr); memmove(opt + TCPOLEN_MSS, opt, tcplen - sizeof(struct tcphdr)); - nf_proto_csum_replace2(&tcph->check, *pskb, + nf_proto_csum_replace2(&tcph->check, skb, htons(tcplen), htons(tcplen + TCPOLEN_MSS), 1); opt[0] = TCPOPT_MSS; opt[1] = TCPOLEN_MSS; opt[2] = (newmss & 0xff00) >> 8; opt[3] = newmss & 0x00ff; - nf_proto_csum_replace4(&tcph->check, *pskb, 0, *((__be32 *)opt), 0); + nf_proto_csum_replace4(&tcph->check, skb, 0, *((__be32 *)opt), 0); oldval = ((__be16 *)tcph)[6]; tcph->doff += TCPOLEN_MSS/4; - nf_proto_csum_replace2(&tcph->check, *pskb, + nf_proto_csum_replace2(&tcph->check, skb, oldval, ((__be16 *)tcph)[6], 0); return TCPOLEN_MSS; } static unsigned int -xt_tcpmss_target4(struct sk_buff **pskb, +xt_tcpmss_target4(struct sk_buff *skb, const struct net_device *in, const struct net_device *out, unsigned int hooknum, const struct xt_target *target, const void *targinfo) { - struct iphdr *iph = ip_hdr(*pskb); + struct iphdr *iph = ip_hdr(skb); __be16 newlen; int ret; - ret = tcpmss_mangle_packet(pskb, targinfo, iph->ihl * 4, + ret = tcpmss_mangle_packet(skb, targinfo, iph->ihl * 4, sizeof(*iph) + sizeof(struct tcphdr)); if (ret < 0) return NF_DROP; if (ret > 0) { - iph = ip_hdr(*pskb); + iph = ip_hdr(skb); newlen = htons(ntohs(iph->tot_len) + ret); nf_csum_replace2(&iph->check, iph->tot_len, newlen); iph->tot_len = newlen; @@ -164,30 +160,30 @@ xt_tcpmss_target4(struct sk_buff **pskb, #if defined(CONFIG_IP6_NF_IPTABLES) || defined(CONFIG_IP6_NF_IPTABLES_MODULE) static unsigned int -xt_tcpmss_target6(struct sk_buff **pskb, +xt_tcpmss_target6(struct sk_buff *skb, const struct net_device *in, const struct net_device *out, unsigned int hooknum, const struct xt_target *target, const void *targinfo) { - struct ipv6hdr *ipv6h = ipv6_hdr(*pskb); + struct ipv6hdr *ipv6h = ipv6_hdr(skb); u8 nexthdr; int tcphoff; int ret; nexthdr = ipv6h->nexthdr; - tcphoff = ipv6_skip_exthdr(*pskb, sizeof(*ipv6h), &nexthdr); + tcphoff = ipv6_skip_exthdr(skb, sizeof(*ipv6h), &nexthdr); if (tcphoff < 0) { WARN_ON(1); return NF_DROP; } - ret = tcpmss_mangle_packet(pskb, targinfo, tcphoff, + ret = tcpmss_mangle_packet(skb, targinfo, tcphoff, sizeof(*ipv6h) + sizeof(struct tcphdr)); if (ret < 0) return NF_DROP; if (ret > 0) { - ipv6h = ipv6_hdr(*pskb); + ipv6h = ipv6_hdr(skb); ipv6h->payload_len = htons(ntohs(ipv6h->payload_len) + ret); } return XT_CONTINUE; diff --git a/net/netfilter/xt_TRACE.c b/net/netfilter/xt_TRACE.c index 4df2dedcc0b5..26c5d08ab2c2 100644 --- a/net/netfilter/xt_TRACE.c +++ b/net/netfilter/xt_TRACE.c @@ -10,14 +10,14 @@ MODULE_ALIAS("ipt_TRACE"); MODULE_ALIAS("ip6t_TRACE"); static unsigned int -target(struct sk_buff **pskb, +target(struct sk_buff *skb, const struct net_device *in, const struct net_device *out, unsigned int hooknum, const struct xt_target *target, const void *targinfo) { - (*pskb)->nf_trace = 1; + skb->nf_trace = 1; return XT_CONTINUE; } diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c index c776bcd9f825..98e313e5e594 100644 --- a/net/netlink/af_netlink.c +++ b/net/netlink/af_netlink.c @@ -1378,6 +1378,8 @@ netlink_kernel_create(struct net *net, int unit, unsigned int groups, nl_table[unit].cb_mutex = cb_mutex; nl_table[unit].module = module; nl_table[unit].registered = 1; + } else { + kfree(listeners); } netlink_table_ungrab(); diff --git a/net/sched/act_ipt.c b/net/sched/act_ipt.c index 6b407ece953c..fa006e06ce33 100644 --- a/net/sched/act_ipt.c +++ b/net/sched/act_ipt.c @@ -202,11 +202,7 @@ static int tcf_ipt(struct sk_buff *skb, struct tc_action *a, /* yes, we have to worry about both in and out dev worry later - danger - this API seems to have changed from earlier kernels */ - - /* iptables targets take a double skb pointer in case the skb - * needs to be replaced. We don't own the skb, so this must not - * happen. The pskb_expand_head above should make sure of this */ - ret = ipt->tcfi_t->u.kernel.target->target(&skb, skb->dev, NULL, + ret = ipt->tcfi_t->u.kernel.target->target(skb, skb->dev, NULL, ipt->tcfi_hook, ipt->tcfi_t->u.kernel.target, ipt->tcfi_t->data); diff --git a/net/sched/sch_ingress.c b/net/sched/sch_ingress.c index 2d32fd27496e..3f8335e6ea2e 100644 --- a/net/sched/sch_ingress.c +++ b/net/sched/sch_ingress.c @@ -205,20 +205,19 @@ static unsigned int ingress_drop(struct Qdisc *sch) #ifndef CONFIG_NET_CLS_ACT #ifdef CONFIG_NETFILTER static unsigned int -ing_hook(unsigned int hook, struct sk_buff **pskb, +ing_hook(unsigned int hook, struct sk_buff *skb, const struct net_device *indev, const struct net_device *outdev, int (*okfn)(struct sk_buff *)) { struct Qdisc *q; - struct sk_buff *skb = *pskb; struct net_device *dev = skb->dev; int fwres=NF_ACCEPT; DPRINTK("ing_hook: skb %s dev=%s len=%u\n", skb->sk ? "(owned)" : "(unowned)", - skb->dev ? (*pskb)->dev->name : "(no dev)", + skb->dev ? skb->dev->name : "(no dev)", skb->len); if (dev->qdisc_ingress) { diff --git a/net/sctp/ipv6.c b/net/sctp/ipv6.c index 9de3ddaa2768..eb4deaf58914 100644 --- a/net/sctp/ipv6.c +++ b/net/sctp/ipv6.c @@ -954,9 +954,9 @@ static struct inet_protosw sctpv6_stream_protosw = { .flags = SCTP_PROTOSW_FLAG, }; -static int sctp6_rcv(struct sk_buff **pskb) +static int sctp6_rcv(struct sk_buff *skb) { - return sctp_rcv(*pskb) ? -1 : 0; + return sctp_rcv(skb) ? -1 : 0; } static struct inet6_protocol sctpv6_protocol = { diff --git a/net/sunrpc/Makefile b/net/sunrpc/Makefile index 8ebfc4db7f51..5c69a725e530 100644 --- a/net/sunrpc/Makefile +++ b/net/sunrpc/Makefile @@ -5,6 +5,7 @@ obj-$(CONFIG_SUNRPC) += sunrpc.o obj-$(CONFIG_SUNRPC_GSS) += auth_gss/ +obj-$(CONFIG_SUNRPC_XPRT_RDMA) += xprtrdma/ sunrpc-y := clnt.o xprt.o socklib.o xprtsock.o sched.o \ auth.o auth_null.o auth_unix.o \ diff --git a/net/sunrpc/auth_gss/gss_krb5_wrap.c b/net/sunrpc/auth_gss/gss_krb5_wrap.c index 42b3220bed39..8bd074df27d3 100644 --- a/net/sunrpc/auth_gss/gss_krb5_wrap.c +++ b/net/sunrpc/auth_gss/gss_krb5_wrap.c @@ -42,7 +42,7 @@ gss_krb5_remove_padding(struct xdr_buf *buf, int blocksize) { u8 *ptr; u8 pad; - int len = buf->len; + size_t len = buf->len; if (len <= buf->head[0].iov_len) { pad = *(u8 *)(buf->head[0].iov_base + len - 1); @@ -53,9 +53,9 @@ gss_krb5_remove_padding(struct xdr_buf *buf, int blocksize) } else len -= buf->head[0].iov_len; if (len <= buf->page_len) { - int last = (buf->page_base + len - 1) + unsigned int last = (buf->page_base + len - 1) >>PAGE_CACHE_SHIFT; - int offset = (buf->page_base + len - 1) + unsigned int offset = (buf->page_base + len - 1) & (PAGE_CACHE_SIZE - 1); ptr = kmap_atomic(buf->pages[last], KM_USER0); pad = *(ptr + offset); diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index 52429b1ffcc1..76be83ee4b04 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c @@ -127,7 +127,14 @@ static struct rpc_clnt * rpc_new_client(struct rpc_xprt *xprt, char *servname, s struct rpc_clnt *clnt = NULL; struct rpc_auth *auth; int err; - int len; + size_t len; + + /* sanity check the name before trying to print it */ + err = -EINVAL; + len = strlen(servname); + if (len > RPC_MAXNETNAMELEN) + goto out_no_rpciod; + len++; dprintk("RPC: creating %s client for %s (xprt %p)\n", program->name, servname, xprt); @@ -148,7 +155,6 @@ static struct rpc_clnt * rpc_new_client(struct rpc_xprt *xprt, char *servname, s clnt->cl_parent = clnt; clnt->cl_server = clnt->cl_inline_name; - len = strlen(servname) + 1; if (len > sizeof(clnt->cl_inline_name)) { char *buf = kmalloc(len, GFP_KERNEL); if (buf != 0) @@ -234,8 +240,8 @@ struct rpc_clnt *rpc_create(struct rpc_create_args *args) { struct rpc_xprt *xprt; struct rpc_clnt *clnt; - struct rpc_xprtsock_create xprtargs = { - .proto = args->protocol, + struct xprt_create xprtargs = { + .ident = args->protocol, .srcaddr = args->saddress, .dstaddr = args->address, .addrlen = args->addrsize, @@ -253,7 +259,7 @@ struct rpc_clnt *rpc_create(struct rpc_create_args *args) */ if (args->servername == NULL) { struct sockaddr_in *addr = - (struct sockaddr_in *) &args->address; + (struct sockaddr_in *) args->address; snprintf(servername, sizeof(servername), NIPQUAD_FMT, NIPQUAD(addr->sin_addr.s_addr)); args->servername = servername; @@ -269,9 +275,6 @@ struct rpc_clnt *rpc_create(struct rpc_create_args *args) if (args->flags & RPC_CLNT_CREATE_NONPRIVPORT) xprt->resvport = 0; - dprintk("RPC: creating %s client for %s (xprt %p)\n", - args->program->name, args->servername, xprt); - clnt = rpc_new_client(xprt, args->servername, args->program, args->version, args->authflavor); if (IS_ERR(clnt)) @@ -439,7 +442,7 @@ rpc_release_client(struct rpc_clnt *clnt) */ struct rpc_clnt *rpc_bind_new_program(struct rpc_clnt *old, struct rpc_program *program, - int vers) + u32 vers) { struct rpc_clnt *clnt; struct rpc_version *version; @@ -843,8 +846,7 @@ call_allocate(struct rpc_task *task) dprintk("RPC: %5u rpc_buffer allocation failed\n", task->tk_pid); if (RPC_IS_ASYNC(task) || !signalled()) { - xprt_release(task); - task->tk_action = call_reserve; + task->tk_action = call_allocate; rpc_delay(task, HZ>>4); return; } @@ -871,6 +873,7 @@ rpc_xdr_buf_init(struct xdr_buf *buf, void *start, size_t len) buf->head[0].iov_len = len; buf->tail[0].iov_len = 0; buf->page_len = 0; + buf->flags = 0; buf->len = 0; buf->buflen = len; } @@ -937,7 +940,7 @@ call_bind(struct rpc_task *task) static void call_bind_status(struct rpc_task *task) { - int status = -EACCES; + int status = -EIO; if (task->tk_status >= 0) { dprint_status(task); @@ -947,9 +950,20 @@ call_bind_status(struct rpc_task *task) } switch (task->tk_status) { + case -EAGAIN: + dprintk("RPC: %5u rpcbind waiting for another request " + "to finish\n", task->tk_pid); + /* avoid busy-waiting here -- could be a network outage. */ + rpc_delay(task, 5*HZ); + goto retry_timeout; case -EACCES: dprintk("RPC: %5u remote rpcbind: RPC program/version " "unavailable\n", task->tk_pid); + /* fail immediately if this is an RPC ping */ + if (task->tk_msg.rpc_proc->p_proc == 0) { + status = -EOPNOTSUPP; + break; + } rpc_delay(task, 3*HZ); goto retry_timeout; case -ETIMEDOUT: @@ -957,6 +971,7 @@ call_bind_status(struct rpc_task *task) task->tk_pid); goto retry_timeout; case -EPFNOSUPPORT: + /* server doesn't support any rpcbind version we know of */ dprintk("RPC: %5u remote rpcbind service unavailable\n", task->tk_pid); break; @@ -969,7 +984,6 @@ call_bind_status(struct rpc_task *task) default: dprintk("RPC: %5u unrecognized rpcbind error (%d)\n", task->tk_pid, -task->tk_status); - status = -EIO; } rpc_exit(task, status); @@ -1257,7 +1271,6 @@ call_refresh(struct rpc_task *task) { dprint_status(task); - xprt_release(task); /* Must do to obtain new XID */ task->tk_action = call_refreshresult; task->tk_status = 0; task->tk_client->cl_stats->rpcauthrefresh++; @@ -1375,6 +1388,8 @@ call_verify(struct rpc_task *task) dprintk("RPC: %5u %s: retry stale creds\n", task->tk_pid, __FUNCTION__); rpcauth_invalcred(task); + /* Ensure we obtain a new XID! */ + xprt_release(task); task->tk_action = call_refresh; goto out_retry; case RPC_AUTH_BADCRED: @@ -1523,13 +1538,18 @@ void rpc_show_tasks(void) spin_lock(&clnt->cl_lock); list_for_each_entry(t, &clnt->cl_tasks, tk_task) { const char *rpc_waitq = "none"; + int proc; + + if (t->tk_msg.rpc_proc) + proc = t->tk_msg.rpc_proc->p_proc; + else + proc = -1; if (RPC_IS_QUEUED(t)) rpc_waitq = rpc_qname(t->u.tk_wait.rpc_waitq); printk("%5u %04d %04x %6d %8p %6d %8p %8ld %8s %8p %8p\n", - t->tk_pid, - (t->tk_msg.rpc_proc ? t->tk_msg.rpc_proc->p_proc : -1), + t->tk_pid, proc, t->tk_flags, t->tk_status, t->tk_client, (t->tk_client ? t->tk_client->cl_prog : 0), diff --git a/net/sunrpc/rpc_pipe.c b/net/sunrpc/rpc_pipe.c index 669e12a4ed18..c8433e8865aa 100644 --- a/net/sunrpc/rpc_pipe.c +++ b/net/sunrpc/rpc_pipe.c @@ -14,7 +14,7 @@ #include <linux/pagemap.h> #include <linux/mount.h> #include <linux/namei.h> -#include <linux/dnotify.h> +#include <linux/fsnotify.h> #include <linux/kernel.h> #include <asm/ioctls.h> @@ -329,6 +329,7 @@ rpc_show_info(struct seq_file *m, void *v) clnt->cl_prog, clnt->cl_vers); seq_printf(m, "address: %s\n", rpc_peeraddr2str(clnt, RPC_DISPLAY_ADDR)); seq_printf(m, "protocol: %s\n", rpc_peeraddr2str(clnt, RPC_DISPLAY_PROTO)); + seq_printf(m, "port: %s\n", rpc_peeraddr2str(clnt, RPC_DISPLAY_PORT)); return 0; } @@ -585,6 +586,7 @@ rpc_populate(struct dentry *parent, if (S_ISDIR(mode)) inc_nlink(dir); d_add(dentry, inode); + fsnotify_create(dir, dentry); } mutex_unlock(&dir->i_mutex); return 0; @@ -606,7 +608,7 @@ __rpc_mkdir(struct inode *dir, struct dentry *dentry) inode->i_ino = iunique(dir->i_sb, 100); d_instantiate(dentry, inode); inc_nlink(dir); - inode_dir_notify(dir, DN_CREATE); + fsnotify_mkdir(dir, dentry); return 0; out_err: printk(KERN_WARNING "%s: %s failed to allocate inode for dentry %s\n", @@ -748,7 +750,7 @@ rpc_mkpipe(struct dentry *parent, const char *name, void *private, struct rpc_pi rpci->flags = flags; rpci->ops = ops; rpci->nkern_readwriters = 1; - inode_dir_notify(dir, DN_CREATE); + fsnotify_create(dir, dentry); dget(dentry); out: mutex_unlock(&dir->i_mutex); diff --git a/net/sunrpc/rpcb_clnt.c b/net/sunrpc/rpcb_clnt.c index d1740dbab991..a05493aedb68 100644 --- a/net/sunrpc/rpcb_clnt.c +++ b/net/sunrpc/rpcb_clnt.c @@ -16,11 +16,14 @@ #include <linux/types.h> #include <linux/socket.h> +#include <linux/in.h> +#include <linux/in6.h> #include <linux/kernel.h> #include <linux/errno.h> #include <linux/sunrpc/clnt.h> #include <linux/sunrpc/sched.h> +#include <linux/sunrpc/xprtsock.h> #ifdef RPC_DEBUG # define RPCDBG_FACILITY RPCDBG_BIND @@ -91,26 +94,6 @@ enum { #define RPCB_MAXADDRLEN (128u) /* - * r_netid - * - * Quoting RFC 3530, section 2.2: - * - * For TCP over IPv4 the value of r_netid is the string "tcp". For UDP - * over IPv4 the value of r_netid is the string "udp". - * - * ... - * - * For TCP over IPv6 the value of r_netid is the string "tcp6". For UDP - * over IPv6 the value of r_netid is the string "udp6". - */ -#define RPCB_NETID_UDP "\165\144\160" /* "udp" */ -#define RPCB_NETID_TCP "\164\143\160" /* "tcp" */ -#define RPCB_NETID_UDP6 "\165\144\160\066" /* "udp6" */ -#define RPCB_NETID_TCP6 "\164\143\160\066" /* "tcp6" */ - -#define RPCB_MAXNETIDLEN (4u) - -/* * r_owner * * The "owner" is allowed to unset a service in the rpcbind database. @@ -120,7 +103,7 @@ enum { #define RPCB_MAXOWNERLEN sizeof(RPCB_OWNER_STRING) static void rpcb_getport_done(struct rpc_task *, void *); -extern struct rpc_program rpcb_program; +static struct rpc_program rpcb_program; struct rpcbind_args { struct rpc_xprt * r_xprt; @@ -137,10 +120,13 @@ struct rpcbind_args { static struct rpc_procinfo rpcb_procedures2[]; static struct rpc_procinfo rpcb_procedures3[]; -static struct rpcb_info { +struct rpcb_info { int rpc_vers; struct rpc_procinfo * rpc_proc; -} rpcb_next_version[]; +}; + +static struct rpcb_info rpcb_next_version[]; +static struct rpcb_info rpcb_next_version6[]; static void rpcb_getport_prepare(struct rpc_task *task, void *calldata) { @@ -190,7 +176,17 @@ static struct rpc_clnt *rpcb_create(char *hostname, struct sockaddr *srvaddr, RPC_CLNT_CREATE_INTR), }; - ((struct sockaddr_in *)srvaddr)->sin_port = htons(RPCBIND_PORT); + switch (srvaddr->sa_family) { + case AF_INET: + ((struct sockaddr_in *)srvaddr)->sin_port = htons(RPCBIND_PORT); + break; + case AF_INET6: + ((struct sockaddr_in6 *)srvaddr)->sin6_port = htons(RPCBIND_PORT); + break; + default: + return NULL; + } + if (!privileged) args.flags |= RPC_CLNT_CREATE_NONPRIVPORT; return rpc_create(&args); @@ -234,7 +230,7 @@ int rpcb_register(u32 prog, u32 vers, int prot, unsigned short port, int *okay) prog, vers, prot, port); rpcb_clnt = rpcb_create("localhost", (struct sockaddr *) &sin, - IPPROTO_UDP, 2, 1); + XPRT_TRANSPORT_UDP, 2, 1); if (IS_ERR(rpcb_clnt)) return PTR_ERR(rpcb_clnt); @@ -316,6 +312,7 @@ void rpcb_getport_async(struct rpc_task *task) struct rpc_task *child; struct sockaddr addr; int status; + struct rpcb_info *info; dprintk("RPC: %5u %s(%s, %u, %u, %d)\n", task->tk_pid, __FUNCTION__, @@ -325,7 +322,7 @@ void rpcb_getport_async(struct rpc_task *task) BUG_ON(clnt->cl_parent != clnt); if (xprt_test_and_set_binding(xprt)) { - status = -EACCES; /* tell caller to check again */ + status = -EAGAIN; /* tell caller to check again */ dprintk("RPC: %5u %s: waiting for another binder\n", task->tk_pid, __FUNCTION__); goto bailout_nowake; @@ -343,18 +340,43 @@ void rpcb_getport_async(struct rpc_task *task) goto bailout_nofree; } - if (rpcb_next_version[xprt->bind_index].rpc_proc == NULL) { + rpc_peeraddr(clnt, (void *)&addr, sizeof(addr)); + + /* Don't ever use rpcbind v2 for AF_INET6 requests */ + switch (addr.sa_family) { + case AF_INET: + info = rpcb_next_version; + break; + case AF_INET6: + info = rpcb_next_version6; + break; + default: + status = -EAFNOSUPPORT; + dprintk("RPC: %5u %s: bad address family\n", + task->tk_pid, __FUNCTION__); + goto bailout_nofree; + } + if (info[xprt->bind_index].rpc_proc == NULL) { xprt->bind_index = 0; - status = -EACCES; /* tell caller to try again later */ + status = -EPFNOSUPPORT; dprintk("RPC: %5u %s: no more getport versions available\n", task->tk_pid, __FUNCTION__); goto bailout_nofree; } - bind_version = rpcb_next_version[xprt->bind_index].rpc_vers; + bind_version = info[xprt->bind_index].rpc_vers; dprintk("RPC: %5u %s: trying rpcbind version %u\n", task->tk_pid, __FUNCTION__, bind_version); + rpcb_clnt = rpcb_create(clnt->cl_server, &addr, xprt->prot, + bind_version, 0); + if (IS_ERR(rpcb_clnt)) { + status = PTR_ERR(rpcb_clnt); + dprintk("RPC: %5u %s: rpcb_create failed, error %ld\n", + task->tk_pid, __FUNCTION__, PTR_ERR(rpcb_clnt)); + goto bailout_nofree; + } + map = kzalloc(sizeof(struct rpcbind_args), GFP_ATOMIC); if (!map) { status = -ENOMEM; @@ -367,28 +389,19 @@ void rpcb_getport_async(struct rpc_task *task) map->r_prot = xprt->prot; map->r_port = 0; map->r_xprt = xprt_get(xprt); - map->r_netid = (xprt->prot == IPPROTO_TCP) ? RPCB_NETID_TCP : - RPCB_NETID_UDP; - memcpy(&map->r_addr, rpc_peeraddr2str(clnt, RPC_DISPLAY_ADDR), - sizeof(map->r_addr)); + map->r_netid = rpc_peeraddr2str(clnt, RPC_DISPLAY_NETID); + memcpy(map->r_addr, + rpc_peeraddr2str(rpcb_clnt, RPC_DISPLAY_UNIVERSAL_ADDR), + sizeof(map->r_addr)); map->r_owner = RPCB_OWNER_STRING; /* ignored for GETADDR */ - rpc_peeraddr(clnt, (void *)&addr, sizeof(addr)); - rpcb_clnt = rpcb_create(clnt->cl_server, &addr, xprt->prot, bind_version, 0); - if (IS_ERR(rpcb_clnt)) { - status = PTR_ERR(rpcb_clnt); - dprintk("RPC: %5u %s: rpcb_create failed, error %ld\n", - task->tk_pid, __FUNCTION__, PTR_ERR(rpcb_clnt)); - goto bailout; - } - child = rpc_run_task(rpcb_clnt, RPC_TASK_ASYNC, &rpcb_getport_ops, map); rpc_release_client(rpcb_clnt); if (IS_ERR(child)) { status = -EIO; dprintk("RPC: %5u %s: rpc_run_task failed\n", task->tk_pid, __FUNCTION__); - goto bailout_nofree; + goto bailout; } rpc_put_task(child); @@ -403,6 +416,7 @@ bailout_nofree: bailout_nowake: task->tk_status = status; } +EXPORT_SYMBOL_GPL(rpcb_getport_async); /* * Rpcbind child task calls this callback via tk_exit. @@ -413,6 +427,10 @@ static void rpcb_getport_done(struct rpc_task *child, void *data) struct rpc_xprt *xprt = map->r_xprt; int status = child->tk_status; + /* Garbage reply: retry with a lesser rpcbind version */ + if (status == -EIO) + status = -EPROTONOSUPPORT; + /* rpcbind server doesn't support this rpcbind protocol version */ if (status == -EPROTONOSUPPORT) xprt->bind_index++; @@ -490,16 +508,24 @@ static int rpcb_decode_getaddr(struct rpc_rqst *req, __be32 *p, unsigned short *portp) { char *addr; - int addr_len, c, i, f, first, val; + u32 addr_len; + int c, i, f, first, val; *portp = 0; - addr_len = (unsigned int) ntohl(*p++); - if (addr_len > RPCB_MAXADDRLEN) /* sanity */ - return -EINVAL; - - dprintk("RPC: rpcb_decode_getaddr returned string: '%s'\n", - (char *) p); - + addr_len = ntohl(*p++); + + /* + * Simple sanity check. The smallest possible universal + * address is an IPv4 address string containing 11 bytes. + */ + if (addr_len < 11 || addr_len > RPCB_MAXADDRLEN) + goto out_err; + + /* + * Start at the end and walk backwards until the first dot + * is encountered. When the second dot is found, we have + * both parts of the port number. + */ addr = (char *)p; val = 0; first = 1; @@ -521,8 +547,19 @@ static int rpcb_decode_getaddr(struct rpc_rqst *req, __be32 *p, } } + /* + * Simple sanity check. If we never saw a dot in the reply, + * then this was probably just garbage. + */ + if (first) + goto out_err; + dprintk("RPC: rpcb_decode_getaddr port=%u\n", *portp); return 0; + +out_err: + dprintk("RPC: rpcbind server returned malformed reply\n"); + return -EIO; } #define RPCB_program_sz (1u) @@ -531,7 +568,7 @@ static int rpcb_decode_getaddr(struct rpc_rqst *req, __be32 *p, #define RPCB_port_sz (1u) #define RPCB_boolean_sz (1u) -#define RPCB_netid_sz (1+XDR_QUADLEN(RPCB_MAXNETIDLEN)) +#define RPCB_netid_sz (1+XDR_QUADLEN(RPCBIND_MAXNETIDLEN)) #define RPCB_addr_sz (1+XDR_QUADLEN(RPCB_MAXADDRLEN)) #define RPCB_ownerstring_sz (1+XDR_QUADLEN(RPCB_MAXOWNERLEN)) @@ -593,6 +630,14 @@ static struct rpcb_info rpcb_next_version[] = { { 0, NULL }, }; +static struct rpcb_info rpcb_next_version6[] = { +#ifdef CONFIG_SUNRPC_BIND34 + { 4, &rpcb_procedures4[RPCBPROC_GETVERSADDR] }, + { 3, &rpcb_procedures3[RPCBPROC_GETADDR] }, +#endif + { 0, NULL }, +}; + static struct rpc_version rpcb_version2 = { .number = 2, .nrprocs = RPCB_HIGHPROC_2, @@ -621,7 +666,7 @@ static struct rpc_version *rpcb_version[] = { static struct rpc_stat rpcb_stats; -struct rpc_program rpcb_program = { +static struct rpc_program rpcb_program = { .name = "rpcbind", .number = RPCBIND_PROGRAM, .nrvers = ARRAY_SIZE(rpcb_version), diff --git a/net/sunrpc/sched.c b/net/sunrpc/sched.c index 954d7ec86c7e..3c773c53e12e 100644 --- a/net/sunrpc/sched.c +++ b/net/sunrpc/sched.c @@ -777,6 +777,7 @@ void *rpc_malloc(struct rpc_task *task, size_t size) task->tk_pid, size, buf); return &buf->data; } +EXPORT_SYMBOL_GPL(rpc_malloc); /** * rpc_free - free buffer allocated via rpc_malloc @@ -802,6 +803,7 @@ void rpc_free(void *buffer) else kfree(buf); } +EXPORT_SYMBOL_GPL(rpc_free); /* * Creation and deletion of RPC task structures diff --git a/net/sunrpc/socklib.c b/net/sunrpc/socklib.c index 1d377d1ab7f4..97ac45f034d6 100644 --- a/net/sunrpc/socklib.c +++ b/net/sunrpc/socklib.c @@ -34,6 +34,7 @@ size_t xdr_skb_read_bits(struct xdr_skb_reader *desc, void *to, size_t len) desc->offset += len; return len; } +EXPORT_SYMBOL_GPL(xdr_skb_read_bits); /** * xdr_skb_read_and_csum_bits - copy and checksum from skb to buffer @@ -137,6 +138,7 @@ copy_tail: out: return copied; } +EXPORT_SYMBOL_GPL(xdr_partial_copy_from_skb); /** * csum_partial_copy_to_xdr - checksum and copy data @@ -179,3 +181,4 @@ no_checksum: return -1; return 0; } +EXPORT_SYMBOL_GPL(csum_partial_copy_to_xdr); diff --git a/net/sunrpc/sunrpc_syms.c b/net/sunrpc/sunrpc_syms.c index 384c4ad5ab86..33d89e842c85 100644 --- a/net/sunrpc/sunrpc_syms.c +++ b/net/sunrpc/sunrpc_syms.c @@ -20,7 +20,7 @@ #include <linux/sunrpc/auth.h> #include <linux/workqueue.h> #include <linux/sunrpc/rpc_pipe_fs.h> - +#include <linux/sunrpc/xprtsock.h> /* RPC scheduler */ EXPORT_SYMBOL(rpc_execute); diff --git a/net/sunrpc/timer.c b/net/sunrpc/timer.c index 8142fdb8a930..31becbf09263 100644 --- a/net/sunrpc/timer.c +++ b/net/sunrpc/timer.c @@ -17,6 +17,7 @@ #include <linux/types.h> #include <linux/unistd.h> +#include <linux/module.h> #include <linux/sunrpc/clnt.h> @@ -40,6 +41,7 @@ rpc_init_rtt(struct rpc_rtt *rt, unsigned long timeo) rt->ntimeouts[i] = 0; } } +EXPORT_SYMBOL_GPL(rpc_init_rtt); /* * NB: When computing the smoothed RTT and standard deviation, @@ -75,6 +77,7 @@ rpc_update_rtt(struct rpc_rtt *rt, unsigned timer, long m) if (*sdrtt < RPC_RTO_MIN) *sdrtt = RPC_RTO_MIN; } +EXPORT_SYMBOL_GPL(rpc_update_rtt); /* * Estimate rto for an nfs rpc sent via. an unreliable datagram. @@ -103,3 +106,4 @@ rpc_calc_rto(struct rpc_rtt *rt, unsigned timer) return res; } +EXPORT_SYMBOL_GPL(rpc_calc_rto); diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c index c8c2edccad7e..282a9a2ec90c 100644 --- a/net/sunrpc/xprt.c +++ b/net/sunrpc/xprt.c @@ -62,6 +62,9 @@ static inline void do_xprt_reserve(struct rpc_task *); static void xprt_connect_status(struct rpc_task *task); static int __xprt_get_cong(struct rpc_xprt *, struct rpc_task *); +static spinlock_t xprt_list_lock = SPIN_LOCK_UNLOCKED; +static LIST_HEAD(xprt_list); + /* * The transport code maintains an estimate on the maximum number of out- * standing RPC requests, using a smoothed version of the congestion @@ -81,6 +84,78 @@ static int __xprt_get_cong(struct rpc_xprt *, struct rpc_task *); #define RPCXPRT_CONGESTED(xprt) ((xprt)->cong >= (xprt)->cwnd) /** + * xprt_register_transport - register a transport implementation + * @transport: transport to register + * + * If a transport implementation is loaded as a kernel module, it can + * call this interface to make itself known to the RPC client. + * + * Returns: + * 0: transport successfully registered + * -EEXIST: transport already registered + * -EINVAL: transport module being unloaded + */ +int xprt_register_transport(struct xprt_class *transport) +{ + struct xprt_class *t; + int result; + + result = -EEXIST; + spin_lock(&xprt_list_lock); + list_for_each_entry(t, &xprt_list, list) { + /* don't register the same transport class twice */ + if (t->ident == transport->ident) + goto out; + } + + result = -EINVAL; + if (try_module_get(THIS_MODULE)) { + list_add_tail(&transport->list, &xprt_list); + printk(KERN_INFO "RPC: Registered %s transport module.\n", + transport->name); + result = 0; + } + +out: + spin_unlock(&xprt_list_lock); + return result; +} +EXPORT_SYMBOL_GPL(xprt_register_transport); + +/** + * xprt_unregister_transport - unregister a transport implementation + * transport: transport to unregister + * + * Returns: + * 0: transport successfully unregistered + * -ENOENT: transport never registered + */ +int xprt_unregister_transport(struct xprt_class *transport) +{ + struct xprt_class *t; + int result; + + result = 0; + spin_lock(&xprt_list_lock); + list_for_each_entry(t, &xprt_list, list) { + if (t == transport) { + printk(KERN_INFO + "RPC: Unregistered %s transport module.\n", + transport->name); + list_del_init(&transport->list); + module_put(THIS_MODULE); + goto out; + } + } + result = -ENOENT; + +out: + spin_unlock(&xprt_list_lock); + return result; +} +EXPORT_SYMBOL_GPL(xprt_unregister_transport); + +/** * xprt_reserve_xprt - serialize write access to transports * @task: task that is requesting access to the transport * @@ -118,6 +193,7 @@ out_sleep: rpc_sleep_on(&xprt->sending, task, NULL, NULL); return 0; } +EXPORT_SYMBOL_GPL(xprt_reserve_xprt); static void xprt_clear_locked(struct rpc_xprt *xprt) { @@ -167,6 +243,7 @@ out_sleep: rpc_sleep_on(&xprt->sending, task, NULL, NULL); return 0; } +EXPORT_SYMBOL_GPL(xprt_reserve_xprt_cong); static inline int xprt_lock_write(struct rpc_xprt *xprt, struct rpc_task *task) { @@ -246,6 +323,7 @@ void xprt_release_xprt(struct rpc_xprt *xprt, struct rpc_task *task) __xprt_lock_write_next(xprt); } } +EXPORT_SYMBOL_GPL(xprt_release_xprt); /** * xprt_release_xprt_cong - allow other requests to use a transport @@ -262,6 +340,7 @@ void xprt_release_xprt_cong(struct rpc_xprt *xprt, struct rpc_task *task) __xprt_lock_write_next_cong(xprt); } } +EXPORT_SYMBOL_GPL(xprt_release_xprt_cong); static inline void xprt_release_write(struct rpc_xprt *xprt, struct rpc_task *task) { @@ -314,6 +393,7 @@ void xprt_release_rqst_cong(struct rpc_task *task) { __xprt_put_cong(task->tk_xprt, task->tk_rqstp); } +EXPORT_SYMBOL_GPL(xprt_release_rqst_cong); /** * xprt_adjust_cwnd - adjust transport congestion window @@ -345,6 +425,7 @@ void xprt_adjust_cwnd(struct rpc_task *task, int result) xprt->cwnd = cwnd; __xprt_put_cong(xprt, req); } +EXPORT_SYMBOL_GPL(xprt_adjust_cwnd); /** * xprt_wake_pending_tasks - wake all tasks on a transport's pending queue @@ -359,6 +440,7 @@ void xprt_wake_pending_tasks(struct rpc_xprt *xprt, int status) else rpc_wake_up(&xprt->pending); } +EXPORT_SYMBOL_GPL(xprt_wake_pending_tasks); /** * xprt_wait_for_buffer_space - wait for transport output buffer to clear @@ -373,6 +455,7 @@ void xprt_wait_for_buffer_space(struct rpc_task *task) task->tk_timeout = req->rq_timeout; rpc_sleep_on(&xprt->pending, task, NULL, NULL); } +EXPORT_SYMBOL_GPL(xprt_wait_for_buffer_space); /** * xprt_write_space - wake the task waiting for transport output buffer space @@ -393,6 +476,7 @@ void xprt_write_space(struct rpc_xprt *xprt) } spin_unlock_bh(&xprt->transport_lock); } +EXPORT_SYMBOL_GPL(xprt_write_space); /** * xprt_set_retrans_timeout_def - set a request's retransmit timeout @@ -406,6 +490,7 @@ void xprt_set_retrans_timeout_def(struct rpc_task *task) { task->tk_timeout = task->tk_rqstp->rq_timeout; } +EXPORT_SYMBOL_GPL(xprt_set_retrans_timeout_def); /* * xprt_set_retrans_timeout_rtt - set a request's retransmit timeout @@ -425,6 +510,7 @@ void xprt_set_retrans_timeout_rtt(struct rpc_task *task) if (task->tk_timeout > max_timeout || task->tk_timeout == 0) task->tk_timeout = max_timeout; } +EXPORT_SYMBOL_GPL(xprt_set_retrans_timeout_rtt); static void xprt_reset_majortimeo(struct rpc_rqst *req) { @@ -500,6 +586,7 @@ void xprt_disconnect(struct rpc_xprt *xprt) xprt_wake_pending_tasks(xprt, -ENOTCONN); spin_unlock_bh(&xprt->transport_lock); } +EXPORT_SYMBOL_GPL(xprt_disconnect); static void xprt_init_autodisconnect(unsigned long data) @@ -610,6 +697,7 @@ struct rpc_rqst *xprt_lookup_rqst(struct rpc_xprt *xprt, __be32 xid) xprt->stat.bad_xids++; return NULL; } +EXPORT_SYMBOL_GPL(xprt_lookup_rqst); /** * xprt_update_rtt - update an RPC client's RTT state after receiving a reply @@ -629,6 +717,7 @@ void xprt_update_rtt(struct rpc_task *task) rpc_set_timeo(rtt, timer, req->rq_ntrans - 1); } } +EXPORT_SYMBOL_GPL(xprt_update_rtt); /** * xprt_complete_rqst - called when reply processing is complete @@ -653,6 +742,7 @@ void xprt_complete_rqst(struct rpc_task *task, int copied) req->rq_received = req->rq_private_buf.len = copied; rpc_wake_up_task(task); } +EXPORT_SYMBOL_GPL(xprt_complete_rqst); static void xprt_timer(struct rpc_task *task) { @@ -889,23 +979,25 @@ void xprt_set_timeout(struct rpc_timeout *to, unsigned int retr, unsigned long i * @args: rpc transport creation arguments * */ -struct rpc_xprt *xprt_create_transport(struct rpc_xprtsock_create *args) +struct rpc_xprt *xprt_create_transport(struct xprt_create *args) { struct rpc_xprt *xprt; struct rpc_rqst *req; + struct xprt_class *t; - switch (args->proto) { - case IPPROTO_UDP: - xprt = xs_setup_udp(args); - break; - case IPPROTO_TCP: - xprt = xs_setup_tcp(args); - break; - default: - printk(KERN_ERR "RPC: unrecognized transport protocol: %d\n", - args->proto); - return ERR_PTR(-EIO); + spin_lock(&xprt_list_lock); + list_for_each_entry(t, &xprt_list, list) { + if (t->ident == args->ident) { + spin_unlock(&xprt_list_lock); + goto found; + } } + spin_unlock(&xprt_list_lock); + printk(KERN_ERR "RPC: transport (%d) not supported\n", args->ident); + return ERR_PTR(-EIO); + +found: + xprt = t->setup(args); if (IS_ERR(xprt)) { dprintk("RPC: xprt_create_transport: failed, %ld\n", -PTR_ERR(xprt)); diff --git a/net/sunrpc/xprtrdma/Makefile b/net/sunrpc/xprtrdma/Makefile new file mode 100644 index 000000000000..264f0feeb513 --- /dev/null +++ b/net/sunrpc/xprtrdma/Makefile @@ -0,0 +1,3 @@ +obj-$(CONFIG_SUNRPC_XPRT_RDMA) += xprtrdma.o + +xprtrdma-y := transport.o rpc_rdma.o verbs.o diff --git a/net/sunrpc/xprtrdma/rpc_rdma.c b/net/sunrpc/xprtrdma/rpc_rdma.c new file mode 100644 index 000000000000..12db63580427 --- /dev/null +++ b/net/sunrpc/xprtrdma/rpc_rdma.c @@ -0,0 +1,868 @@ +/* + * Copyright (c) 2003-2007 Network Appliance, Inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the BSD-type + * license below: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials provided + * with the distribution. + * + * Neither the name of the Network Appliance, Inc. nor the names of + * its contributors may be used to endorse or promote products + * derived from this software without specific prior written + * permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/* + * rpc_rdma.c + * + * This file contains the guts of the RPC RDMA protocol, and + * does marshaling/unmarshaling, etc. It is also where interfacing + * to the Linux RPC framework lives. + */ + +#include "xprt_rdma.h" + +#include <linux/highmem.h> + +#ifdef RPC_DEBUG +# define RPCDBG_FACILITY RPCDBG_TRANS +#endif + +enum rpcrdma_chunktype { + rpcrdma_noch = 0, + rpcrdma_readch, + rpcrdma_areadch, + rpcrdma_writech, + rpcrdma_replych +}; + +#ifdef RPC_DEBUG +static const char transfertypes[][12] = { + "pure inline", /* no chunks */ + " read chunk", /* some argument via rdma read */ + "*read chunk", /* entire request via rdma read */ + "write chunk", /* some result via rdma write */ + "reply chunk" /* entire reply via rdma write */ +}; +#endif + +/* + * Chunk assembly from upper layer xdr_buf. + * + * Prepare the passed-in xdr_buf into representation as RPC/RDMA chunk + * elements. Segments are then coalesced when registered, if possible + * within the selected memreg mode. + * + * Note, this routine is never called if the connection's memory + * registration strategy is 0 (bounce buffers). + */ + +static int +rpcrdma_convert_iovs(struct xdr_buf *xdrbuf, int pos, + enum rpcrdma_chunktype type, struct rpcrdma_mr_seg *seg, int nsegs) +{ + int len, n = 0, p; + + if (pos == 0 && xdrbuf->head[0].iov_len) { + seg[n].mr_page = NULL; + seg[n].mr_offset = xdrbuf->head[0].iov_base; + seg[n].mr_len = xdrbuf->head[0].iov_len; + pos += xdrbuf->head[0].iov_len; + ++n; + } + + if (xdrbuf->page_len && (xdrbuf->pages[0] != NULL)) { + if (n == nsegs) + return 0; + seg[n].mr_page = xdrbuf->pages[0]; + seg[n].mr_offset = (void *)(unsigned long) xdrbuf->page_base; + seg[n].mr_len = min_t(u32, + PAGE_SIZE - xdrbuf->page_base, xdrbuf->page_len); + len = xdrbuf->page_len - seg[n].mr_len; + pos += len; + ++n; + p = 1; + while (len > 0) { + if (n == nsegs) + return 0; + seg[n].mr_page = xdrbuf->pages[p]; + seg[n].mr_offset = NULL; + seg[n].mr_len = min_t(u32, PAGE_SIZE, len); + len -= seg[n].mr_len; + ++n; + ++p; + } + } + + if (pos < xdrbuf->len && xdrbuf->tail[0].iov_len) { + if (n == nsegs) + return 0; + seg[n].mr_page = NULL; + seg[n].mr_offset = xdrbuf->tail[0].iov_base; + seg[n].mr_len = xdrbuf->tail[0].iov_len; + pos += xdrbuf->tail[0].iov_len; + ++n; + } + + if (pos < xdrbuf->len) + dprintk("RPC: %s: marshaled only %d of %d\n", + __func__, pos, xdrbuf->len); + + return n; +} + +/* + * Create read/write chunk lists, and reply chunks, for RDMA + * + * Assume check against THRESHOLD has been done, and chunks are required. + * Assume only encoding one list entry for read|write chunks. The NFSv3 + * protocol is simple enough to allow this as it only has a single "bulk + * result" in each procedure - complicated NFSv4 COMPOUNDs are not. (The + * RDMA/Sessions NFSv4 proposal addresses this for future v4 revs.) + * + * When used for a single reply chunk (which is a special write + * chunk used for the entire reply, rather than just the data), it + * is used primarily for READDIR and READLINK which would otherwise + * be severely size-limited by a small rdma inline read max. The server + * response will come back as an RDMA Write, followed by a message + * of type RDMA_NOMSG carrying the xid and length. As a result, reply + * chunks do not provide data alignment, however they do not require + * "fixup" (moving the response to the upper layer buffer) either. + * + * Encoding key for single-list chunks (HLOO = Handle32 Length32 Offset64): + * + * Read chunklist (a linked list): + * N elements, position P (same P for all chunks of same arg!): + * 1 - PHLOO - 1 - PHLOO - ... - 1 - PHLOO - 0 + * + * Write chunklist (a list of (one) counted array): + * N elements: + * 1 - N - HLOO - HLOO - ... - HLOO - 0 + * + * Reply chunk (a counted array): + * N elements: + * 1 - N - HLOO - HLOO - ... - HLOO + */ + +static unsigned int +rpcrdma_create_chunks(struct rpc_rqst *rqst, struct xdr_buf *target, + struct rpcrdma_msg *headerp, enum rpcrdma_chunktype type) +{ + struct rpcrdma_req *req = rpcr_to_rdmar(rqst); + struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(rqst->rq_task->tk_xprt); + int nsegs, nchunks = 0; + int pos; + struct rpcrdma_mr_seg *seg = req->rl_segments; + struct rpcrdma_read_chunk *cur_rchunk = NULL; + struct rpcrdma_write_array *warray = NULL; + struct rpcrdma_write_chunk *cur_wchunk = NULL; + u32 *iptr = headerp->rm_body.rm_chunks; + + if (type == rpcrdma_readch || type == rpcrdma_areadch) { + /* a read chunk - server will RDMA Read our memory */ + cur_rchunk = (struct rpcrdma_read_chunk *) iptr; + } else { + /* a write or reply chunk - server will RDMA Write our memory */ + *iptr++ = xdr_zero; /* encode a NULL read chunk list */ + if (type == rpcrdma_replych) + *iptr++ = xdr_zero; /* a NULL write chunk list */ + warray = (struct rpcrdma_write_array *) iptr; + cur_wchunk = (struct rpcrdma_write_chunk *) (warray + 1); + } + + if (type == rpcrdma_replych || type == rpcrdma_areadch) + pos = 0; + else + pos = target->head[0].iov_len; + + nsegs = rpcrdma_convert_iovs(target, pos, type, seg, RPCRDMA_MAX_SEGS); + if (nsegs == 0) + return 0; + + do { + /* bind/register the memory, then build chunk from result. */ + int n = rpcrdma_register_external(seg, nsegs, + cur_wchunk != NULL, r_xprt); + if (n <= 0) + goto out; + if (cur_rchunk) { /* read */ + cur_rchunk->rc_discrim = xdr_one; + /* all read chunks have the same "position" */ + cur_rchunk->rc_position = htonl(pos); + cur_rchunk->rc_target.rs_handle = htonl(seg->mr_rkey); + cur_rchunk->rc_target.rs_length = htonl(seg->mr_len); + xdr_encode_hyper( + (u32 *)&cur_rchunk->rc_target.rs_offset, + seg->mr_base); + dprintk("RPC: %s: read chunk " + "elem %d@0x%llx:0x%x pos %d (%s)\n", __func__, + seg->mr_len, seg->mr_base, seg->mr_rkey, pos, + n < nsegs ? "more" : "last"); + cur_rchunk++; + r_xprt->rx_stats.read_chunk_count++; + } else { /* write/reply */ + cur_wchunk->wc_target.rs_handle = htonl(seg->mr_rkey); + cur_wchunk->wc_target.rs_length = htonl(seg->mr_len); + xdr_encode_hyper( + (u32 *)&cur_wchunk->wc_target.rs_offset, + seg->mr_base); + dprintk("RPC: %s: %s chunk " + "elem %d@0x%llx:0x%x (%s)\n", __func__, + (type == rpcrdma_replych) ? "reply" : "write", + seg->mr_len, seg->mr_base, seg->mr_rkey, + n < nsegs ? "more" : "last"); + cur_wchunk++; + if (type == rpcrdma_replych) + r_xprt->rx_stats.reply_chunk_count++; + else + r_xprt->rx_stats.write_chunk_count++; + r_xprt->rx_stats.total_rdma_request += seg->mr_len; + } + nchunks++; + seg += n; + nsegs -= n; + } while (nsegs); + + /* success. all failures return above */ + req->rl_nchunks = nchunks; + + BUG_ON(nchunks == 0); + + /* + * finish off header. If write, marshal discrim and nchunks. + */ + if (cur_rchunk) { + iptr = (u32 *) cur_rchunk; + *iptr++ = xdr_zero; /* finish the read chunk list */ + *iptr++ = xdr_zero; /* encode a NULL write chunk list */ + *iptr++ = xdr_zero; /* encode a NULL reply chunk */ + } else { + warray->wc_discrim = xdr_one; + warray->wc_nchunks = htonl(nchunks); + iptr = (u32 *) cur_wchunk; + if (type == rpcrdma_writech) { + *iptr++ = xdr_zero; /* finish the write chunk list */ + *iptr++ = xdr_zero; /* encode a NULL reply chunk */ + } + } + + /* + * Return header size. + */ + return (unsigned char *)iptr - (unsigned char *)headerp; + +out: + for (pos = 0; nchunks--;) + pos += rpcrdma_deregister_external( + &req->rl_segments[pos], r_xprt, NULL); + return 0; +} + +/* + * Copy write data inline. + * This function is used for "small" requests. Data which is passed + * to RPC via iovecs (or page list) is copied directly into the + * pre-registered memory buffer for this request. For small amounts + * of data, this is efficient. The cutoff value is tunable. + */ +static int +rpcrdma_inline_pullup(struct rpc_rqst *rqst, int pad) +{ + int i, npages, curlen; + int copy_len; + unsigned char *srcp, *destp; + struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(rqst->rq_xprt); + + destp = rqst->rq_svec[0].iov_base; + curlen = rqst->rq_svec[0].iov_len; + destp += curlen; + /* + * Do optional padding where it makes sense. Alignment of write + * payload can help the server, if our setting is accurate. + */ + pad -= (curlen + 36/*sizeof(struct rpcrdma_msg_padded)*/); + if (pad < 0 || rqst->rq_slen - curlen < RPCRDMA_INLINE_PAD_THRESH) + pad = 0; /* don't pad this request */ + + dprintk("RPC: %s: pad %d destp 0x%p len %d hdrlen %d\n", + __func__, pad, destp, rqst->rq_slen, curlen); + + copy_len = rqst->rq_snd_buf.page_len; + r_xprt->rx_stats.pullup_copy_count += copy_len; + npages = PAGE_ALIGN(rqst->rq_snd_buf.page_base+copy_len) >> PAGE_SHIFT; + for (i = 0; copy_len && i < npages; i++) { + if (i == 0) + curlen = PAGE_SIZE - rqst->rq_snd_buf.page_base; + else + curlen = PAGE_SIZE; + if (curlen > copy_len) + curlen = copy_len; + dprintk("RPC: %s: page %d destp 0x%p len %d curlen %d\n", + __func__, i, destp, copy_len, curlen); + srcp = kmap_atomic(rqst->rq_snd_buf.pages[i], + KM_SKB_SUNRPC_DATA); + if (i == 0) + memcpy(destp, srcp+rqst->rq_snd_buf.page_base, curlen); + else + memcpy(destp, srcp, curlen); + kunmap_atomic(srcp, KM_SKB_SUNRPC_DATA); + rqst->rq_svec[0].iov_len += curlen; + destp += curlen; + copy_len -= curlen; + } + if (rqst->rq_snd_buf.tail[0].iov_len) { + curlen = rqst->rq_snd_buf.tail[0].iov_len; + if (destp != rqst->rq_snd_buf.tail[0].iov_base) { + memcpy(destp, + rqst->rq_snd_buf.tail[0].iov_base, curlen); + r_xprt->rx_stats.pullup_copy_count += curlen; + } + dprintk("RPC: %s: tail destp 0x%p len %d curlen %d\n", + __func__, destp, copy_len, curlen); + rqst->rq_svec[0].iov_len += curlen; + } + /* header now contains entire send message */ + return pad; +} + +/* + * Marshal a request: the primary job of this routine is to choose + * the transfer modes. See comments below. + * + * Uses multiple RDMA IOVs for a request: + * [0] -- RPC RDMA header, which uses memory from the *start* of the + * preregistered buffer that already holds the RPC data in + * its middle. + * [1] -- the RPC header/data, marshaled by RPC and the NFS protocol. + * [2] -- optional padding. + * [3] -- if padded, header only in [1] and data here. + */ + +int +rpcrdma_marshal_req(struct rpc_rqst *rqst) +{ + struct rpc_xprt *xprt = rqst->rq_task->tk_xprt; + struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt); + struct rpcrdma_req *req = rpcr_to_rdmar(rqst); + char *base; + size_t hdrlen, rpclen, padlen; + enum rpcrdma_chunktype rtype, wtype; + struct rpcrdma_msg *headerp; + + /* + * rpclen gets amount of data in first buffer, which is the + * pre-registered buffer. + */ + base = rqst->rq_svec[0].iov_base; + rpclen = rqst->rq_svec[0].iov_len; + + /* build RDMA header in private area at front */ + headerp = (struct rpcrdma_msg *) req->rl_base; + /* don't htonl XID, it's already done in request */ + headerp->rm_xid = rqst->rq_xid; + headerp->rm_vers = xdr_one; + headerp->rm_credit = htonl(r_xprt->rx_buf.rb_max_requests); + headerp->rm_type = __constant_htonl(RDMA_MSG); + + /* + * Chunks needed for results? + * + * o If the expected result is under the inline threshold, all ops + * return as inline (but see later). + * o Large non-read ops return as a single reply chunk. + * o Large read ops return data as write chunk(s), header as inline. + * + * Note: the NFS code sending down multiple result segments implies + * the op is one of read, readdir[plus], readlink or NFSv4 getacl. + */ + + /* + * This code can handle read chunks, write chunks OR reply + * chunks -- only one type. If the request is too big to fit + * inline, then we will choose read chunks. If the request is + * a READ, then use write chunks to separate the file data + * into pages; otherwise use reply chunks. + */ + if (rqst->rq_rcv_buf.buflen <= RPCRDMA_INLINE_READ_THRESHOLD(rqst)) + wtype = rpcrdma_noch; + else if (rqst->rq_rcv_buf.page_len == 0) + wtype = rpcrdma_replych; + else if (rqst->rq_rcv_buf.flags & XDRBUF_READ) + wtype = rpcrdma_writech; + else + wtype = rpcrdma_replych; + + /* + * Chunks needed for arguments? + * + * o If the total request is under the inline threshold, all ops + * are sent as inline. + * o Large non-write ops are sent with the entire message as a + * single read chunk (protocol 0-position special case). + * o Large write ops transmit data as read chunk(s), header as + * inline. + * + * Note: the NFS code sending down multiple argument segments + * implies the op is a write. + * TBD check NFSv4 setacl + */ + if (rqst->rq_snd_buf.len <= RPCRDMA_INLINE_WRITE_THRESHOLD(rqst)) + rtype = rpcrdma_noch; + else if (rqst->rq_snd_buf.page_len == 0) + rtype = rpcrdma_areadch; + else + rtype = rpcrdma_readch; + + /* The following simplification is not true forever */ + if (rtype != rpcrdma_noch && wtype == rpcrdma_replych) + wtype = rpcrdma_noch; + BUG_ON(rtype != rpcrdma_noch && wtype != rpcrdma_noch); + + if (r_xprt->rx_ia.ri_memreg_strategy == RPCRDMA_BOUNCEBUFFERS && + (rtype != rpcrdma_noch || wtype != rpcrdma_noch)) { + /* forced to "pure inline"? */ + dprintk("RPC: %s: too much data (%d/%d) for inline\n", + __func__, rqst->rq_rcv_buf.len, rqst->rq_snd_buf.len); + return -1; + } + + hdrlen = 28; /*sizeof *headerp;*/ + padlen = 0; + + /* + * Pull up any extra send data into the preregistered buffer. + * When padding is in use and applies to the transfer, insert + * it and change the message type. + */ + if (rtype == rpcrdma_noch) { + + padlen = rpcrdma_inline_pullup(rqst, + RPCRDMA_INLINE_PAD_VALUE(rqst)); + + if (padlen) { + headerp->rm_type = __constant_htonl(RDMA_MSGP); + headerp->rm_body.rm_padded.rm_align = + htonl(RPCRDMA_INLINE_PAD_VALUE(rqst)); + headerp->rm_body.rm_padded.rm_thresh = + __constant_htonl(RPCRDMA_INLINE_PAD_THRESH); + headerp->rm_body.rm_padded.rm_pempty[0] = xdr_zero; + headerp->rm_body.rm_padded.rm_pempty[1] = xdr_zero; + headerp->rm_body.rm_padded.rm_pempty[2] = xdr_zero; + hdrlen += 2 * sizeof(u32); /* extra words in padhdr */ + BUG_ON(wtype != rpcrdma_noch); + + } else { + headerp->rm_body.rm_nochunks.rm_empty[0] = xdr_zero; + headerp->rm_body.rm_nochunks.rm_empty[1] = xdr_zero; + headerp->rm_body.rm_nochunks.rm_empty[2] = xdr_zero; + /* new length after pullup */ + rpclen = rqst->rq_svec[0].iov_len; + /* + * Currently we try to not actually use read inline. + * Reply chunks have the desirable property that + * they land, packed, directly in the target buffers + * without headers, so they require no fixup. The + * additional RDMA Write op sends the same amount + * of data, streams on-the-wire and adds no overhead + * on receive. Therefore, we request a reply chunk + * for non-writes wherever feasible and efficient. + */ + if (wtype == rpcrdma_noch && + r_xprt->rx_ia.ri_memreg_strategy > RPCRDMA_REGISTER) + wtype = rpcrdma_replych; + } + } + + /* + * Marshal chunks. This routine will return the header length + * consumed by marshaling. + */ + if (rtype != rpcrdma_noch) { + hdrlen = rpcrdma_create_chunks(rqst, + &rqst->rq_snd_buf, headerp, rtype); + wtype = rtype; /* simplify dprintk */ + + } else if (wtype != rpcrdma_noch) { + hdrlen = rpcrdma_create_chunks(rqst, + &rqst->rq_rcv_buf, headerp, wtype); + } + + if (hdrlen == 0) + return -1; + + dprintk("RPC: %s: %s: hdrlen %zd rpclen %zd padlen %zd\n" + " headerp 0x%p base 0x%p lkey 0x%x\n", + __func__, transfertypes[wtype], hdrlen, rpclen, padlen, + headerp, base, req->rl_iov.lkey); + + /* + * initialize send_iov's - normally only two: rdma chunk header and + * single preregistered RPC header buffer, but if padding is present, + * then use a preregistered (and zeroed) pad buffer between the RPC + * header and any write data. In all non-rdma cases, any following + * data has been copied into the RPC header buffer. + */ + req->rl_send_iov[0].addr = req->rl_iov.addr; + req->rl_send_iov[0].length = hdrlen; + req->rl_send_iov[0].lkey = req->rl_iov.lkey; + + req->rl_send_iov[1].addr = req->rl_iov.addr + (base - req->rl_base); + req->rl_send_iov[1].length = rpclen; + req->rl_send_iov[1].lkey = req->rl_iov.lkey; + + req->rl_niovs = 2; + + if (padlen) { + struct rpcrdma_ep *ep = &r_xprt->rx_ep; + + req->rl_send_iov[2].addr = ep->rep_pad.addr; + req->rl_send_iov[2].length = padlen; + req->rl_send_iov[2].lkey = ep->rep_pad.lkey; + + req->rl_send_iov[3].addr = req->rl_send_iov[1].addr + rpclen; + req->rl_send_iov[3].length = rqst->rq_slen - rpclen; + req->rl_send_iov[3].lkey = req->rl_iov.lkey; + + req->rl_niovs = 4; + } + + return 0; +} + +/* + * Chase down a received write or reply chunklist to get length + * RDMA'd by server. See map at rpcrdma_create_chunks()! :-) + */ +static int +rpcrdma_count_chunks(struct rpcrdma_rep *rep, int max, int wrchunk, u32 **iptrp) +{ + unsigned int i, total_len; + struct rpcrdma_write_chunk *cur_wchunk; + + i = ntohl(**iptrp); /* get array count */ + if (i > max) + return -1; + cur_wchunk = (struct rpcrdma_write_chunk *) (*iptrp + 1); + total_len = 0; + while (i--) { + struct rpcrdma_segment *seg = &cur_wchunk->wc_target; + ifdebug(FACILITY) { + u64 off; + xdr_decode_hyper((u32 *)&seg->rs_offset, &off); + dprintk("RPC: %s: chunk %d@0x%llx:0x%x\n", + __func__, + ntohl(seg->rs_length), + off, + ntohl(seg->rs_handle)); + } + total_len += ntohl(seg->rs_length); + ++cur_wchunk; + } + /* check and adjust for properly terminated write chunk */ + if (wrchunk) { + u32 *w = (u32 *) cur_wchunk; + if (*w++ != xdr_zero) + return -1; + cur_wchunk = (struct rpcrdma_write_chunk *) w; + } + if ((char *) cur_wchunk > rep->rr_base + rep->rr_len) + return -1; + + *iptrp = (u32 *) cur_wchunk; + return total_len; +} + +/* + * Scatter inline received data back into provided iov's. + */ +static void +rpcrdma_inline_fixup(struct rpc_rqst *rqst, char *srcp, int copy_len) +{ + int i, npages, curlen, olen; + char *destp; + + curlen = rqst->rq_rcv_buf.head[0].iov_len; + if (curlen > copy_len) { /* write chunk header fixup */ + curlen = copy_len; + rqst->rq_rcv_buf.head[0].iov_len = curlen; + } + + dprintk("RPC: %s: srcp 0x%p len %d hdrlen %d\n", + __func__, srcp, copy_len, curlen); + + /* Shift pointer for first receive segment only */ + rqst->rq_rcv_buf.head[0].iov_base = srcp; + srcp += curlen; + copy_len -= curlen; + + olen = copy_len; + i = 0; + rpcx_to_rdmax(rqst->rq_xprt)->rx_stats.fixup_copy_count += olen; + if (copy_len && rqst->rq_rcv_buf.page_len) { + npages = PAGE_ALIGN(rqst->rq_rcv_buf.page_base + + rqst->rq_rcv_buf.page_len) >> PAGE_SHIFT; + for (; i < npages; i++) { + if (i == 0) + curlen = PAGE_SIZE - rqst->rq_rcv_buf.page_base; + else + curlen = PAGE_SIZE; + if (curlen > copy_len) + curlen = copy_len; + dprintk("RPC: %s: page %d" + " srcp 0x%p len %d curlen %d\n", + __func__, i, srcp, copy_len, curlen); + destp = kmap_atomic(rqst->rq_rcv_buf.pages[i], + KM_SKB_SUNRPC_DATA); + if (i == 0) + memcpy(destp + rqst->rq_rcv_buf.page_base, + srcp, curlen); + else + memcpy(destp, srcp, curlen); + flush_dcache_page(rqst->rq_rcv_buf.pages[i]); + kunmap_atomic(destp, KM_SKB_SUNRPC_DATA); + srcp += curlen; + copy_len -= curlen; + if (copy_len == 0) + break; + } + rqst->rq_rcv_buf.page_len = olen - copy_len; + } else + rqst->rq_rcv_buf.page_len = 0; + + if (copy_len && rqst->rq_rcv_buf.tail[0].iov_len) { + curlen = copy_len; + if (curlen > rqst->rq_rcv_buf.tail[0].iov_len) + curlen = rqst->rq_rcv_buf.tail[0].iov_len; + if (rqst->rq_rcv_buf.tail[0].iov_base != srcp) + memcpy(rqst->rq_rcv_buf.tail[0].iov_base, srcp, curlen); + dprintk("RPC: %s: tail srcp 0x%p len %d curlen %d\n", + __func__, srcp, copy_len, curlen); + rqst->rq_rcv_buf.tail[0].iov_len = curlen; + copy_len -= curlen; ++i; + } else + rqst->rq_rcv_buf.tail[0].iov_len = 0; + + if (copy_len) + dprintk("RPC: %s: %d bytes in" + " %d extra segments (%d lost)\n", + __func__, olen, i, copy_len); + + /* TBD avoid a warning from call_decode() */ + rqst->rq_private_buf = rqst->rq_rcv_buf; +} + +/* + * This function is called when an async event is posted to + * the connection which changes the connection state. All it + * does at this point is mark the connection up/down, the rpc + * timers do the rest. + */ +void +rpcrdma_conn_func(struct rpcrdma_ep *ep) +{ + struct rpc_xprt *xprt = ep->rep_xprt; + + spin_lock_bh(&xprt->transport_lock); + if (ep->rep_connected > 0) { + if (!xprt_test_and_set_connected(xprt)) + xprt_wake_pending_tasks(xprt, 0); + } else { + if (xprt_test_and_clear_connected(xprt)) + xprt_wake_pending_tasks(xprt, ep->rep_connected); + } + spin_unlock_bh(&xprt->transport_lock); +} + +/* + * This function is called when memory window unbind which we are waiting + * for completes. Just use rr_func (zeroed by upcall) to signal completion. + */ +static void +rpcrdma_unbind_func(struct rpcrdma_rep *rep) +{ + wake_up(&rep->rr_unbind); +} + +/* + * Called as a tasklet to do req/reply match and complete a request + * Errors must result in the RPC task either being awakened, or + * allowed to timeout, to discover the errors at that time. + */ +void +rpcrdma_reply_handler(struct rpcrdma_rep *rep) +{ + struct rpcrdma_msg *headerp; + struct rpcrdma_req *req; + struct rpc_rqst *rqst; + struct rpc_xprt *xprt = rep->rr_xprt; + struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt); + u32 *iptr; + int i, rdmalen, status; + + /* Check status. If bad, signal disconnect and return rep to pool */ + if (rep->rr_len == ~0U) { + rpcrdma_recv_buffer_put(rep); + if (r_xprt->rx_ep.rep_connected == 1) { + r_xprt->rx_ep.rep_connected = -EIO; + rpcrdma_conn_func(&r_xprt->rx_ep); + } + return; + } + if (rep->rr_len < 28) { + dprintk("RPC: %s: short/invalid reply\n", __func__); + goto repost; + } + headerp = (struct rpcrdma_msg *) rep->rr_base; + if (headerp->rm_vers != xdr_one) { + dprintk("RPC: %s: invalid version %d\n", + __func__, ntohl(headerp->rm_vers)); + goto repost; + } + + /* Get XID and try for a match. */ + spin_lock(&xprt->transport_lock); + rqst = xprt_lookup_rqst(xprt, headerp->rm_xid); + if (rqst == NULL) { + spin_unlock(&xprt->transport_lock); + dprintk("RPC: %s: reply 0x%p failed " + "to match any request xid 0x%08x len %d\n", + __func__, rep, headerp->rm_xid, rep->rr_len); +repost: + r_xprt->rx_stats.bad_reply_count++; + rep->rr_func = rpcrdma_reply_handler; + if (rpcrdma_ep_post_recv(&r_xprt->rx_ia, &r_xprt->rx_ep, rep)) + rpcrdma_recv_buffer_put(rep); + + return; + } + + /* get request object */ + req = rpcr_to_rdmar(rqst); + + dprintk("RPC: %s: reply 0x%p completes request 0x%p\n" + " RPC request 0x%p xid 0x%08x\n", + __func__, rep, req, rqst, headerp->rm_xid); + + BUG_ON(!req || req->rl_reply); + + /* from here on, the reply is no longer an orphan */ + req->rl_reply = rep; + + /* check for expected message types */ + /* The order of some of these tests is important. */ + switch (headerp->rm_type) { + case __constant_htonl(RDMA_MSG): + /* never expect read chunks */ + /* never expect reply chunks (two ways to check) */ + /* never expect write chunks without having offered RDMA */ + if (headerp->rm_body.rm_chunks[0] != xdr_zero || + (headerp->rm_body.rm_chunks[1] == xdr_zero && + headerp->rm_body.rm_chunks[2] != xdr_zero) || + (headerp->rm_body.rm_chunks[1] != xdr_zero && + req->rl_nchunks == 0)) + goto badheader; + if (headerp->rm_body.rm_chunks[1] != xdr_zero) { + /* count any expected write chunks in read reply */ + /* start at write chunk array count */ + iptr = &headerp->rm_body.rm_chunks[2]; + rdmalen = rpcrdma_count_chunks(rep, + req->rl_nchunks, 1, &iptr); + /* check for validity, and no reply chunk after */ + if (rdmalen < 0 || *iptr++ != xdr_zero) + goto badheader; + rep->rr_len -= + ((unsigned char *)iptr - (unsigned char *)headerp); + status = rep->rr_len + rdmalen; + r_xprt->rx_stats.total_rdma_reply += rdmalen; + } else { + /* else ordinary inline */ + iptr = (u32 *)((unsigned char *)headerp + 28); + rep->rr_len -= 28; /*sizeof *headerp;*/ + status = rep->rr_len; + } + /* Fix up the rpc results for upper layer */ + rpcrdma_inline_fixup(rqst, (char *)iptr, rep->rr_len); + break; + + case __constant_htonl(RDMA_NOMSG): + /* never expect read or write chunks, always reply chunks */ + if (headerp->rm_body.rm_chunks[0] != xdr_zero || + headerp->rm_body.rm_chunks[1] != xdr_zero || + headerp->rm_body.rm_chunks[2] != xdr_one || + req->rl_nchunks == 0) + goto badheader; + iptr = (u32 *)((unsigned char *)headerp + 28); + rdmalen = rpcrdma_count_chunks(rep, req->rl_nchunks, 0, &iptr); + if (rdmalen < 0) + goto badheader; + r_xprt->rx_stats.total_rdma_reply += rdmalen; + /* Reply chunk buffer already is the reply vector - no fixup. */ + status = rdmalen; + break; + +badheader: + default: + dprintk("%s: invalid rpcrdma reply header (type %d):" + " chunks[012] == %d %d %d" + " expected chunks <= %d\n", + __func__, ntohl(headerp->rm_type), + headerp->rm_body.rm_chunks[0], + headerp->rm_body.rm_chunks[1], + headerp->rm_body.rm_chunks[2], + req->rl_nchunks); + status = -EIO; + r_xprt->rx_stats.bad_reply_count++; + break; + } + + /* If using mw bind, start the deregister process now. */ + /* (Note: if mr_free(), cannot perform it here, in tasklet context) */ + if (req->rl_nchunks) switch (r_xprt->rx_ia.ri_memreg_strategy) { + case RPCRDMA_MEMWINDOWS: + for (i = 0; req->rl_nchunks-- > 1;) + i += rpcrdma_deregister_external( + &req->rl_segments[i], r_xprt, NULL); + /* Optionally wait (not here) for unbinds to complete */ + rep->rr_func = rpcrdma_unbind_func; + (void) rpcrdma_deregister_external(&req->rl_segments[i], + r_xprt, rep); + break; + case RPCRDMA_MEMWINDOWS_ASYNC: + for (i = 0; req->rl_nchunks--;) + i += rpcrdma_deregister_external(&req->rl_segments[i], + r_xprt, NULL); + break; + default: + break; + } + + dprintk("RPC: %s: xprt_complete_rqst(0x%p, 0x%p, %d)\n", + __func__, xprt, rqst, status); + xprt_complete_rqst(rqst->rq_task, status); + spin_unlock(&xprt->transport_lock); +} diff --git a/net/sunrpc/xprtrdma/transport.c b/net/sunrpc/xprtrdma/transport.c new file mode 100644 index 000000000000..dc55cc974c90 --- /dev/null +++ b/net/sunrpc/xprtrdma/transport.c @@ -0,0 +1,800 @@ +/* + * Copyright (c) 2003-2007 Network Appliance, Inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the BSD-type + * license below: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials provided + * with the distribution. + * + * Neither the name of the Network Appliance, Inc. nor the names of + * its contributors may be used to endorse or promote products + * derived from this software without specific prior written + * permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/* + * transport.c + * + * This file contains the top-level implementation of an RPC RDMA + * transport. + * + * Naming convention: functions beginning with xprt_ are part of the + * transport switch. All others are RPC RDMA internal. + */ + +#include <linux/module.h> +#include <linux/init.h> +#include <linux/seq_file.h> + +#include "xprt_rdma.h" + +#ifdef RPC_DEBUG +# define RPCDBG_FACILITY RPCDBG_TRANS +#endif + +MODULE_LICENSE("Dual BSD/GPL"); + +MODULE_DESCRIPTION("RPC/RDMA Transport for Linux kernel NFS"); +MODULE_AUTHOR("Network Appliance, Inc."); + +/* + * tunables + */ + +static unsigned int xprt_rdma_slot_table_entries = RPCRDMA_DEF_SLOT_TABLE; +static unsigned int xprt_rdma_max_inline_read = RPCRDMA_DEF_INLINE; +static unsigned int xprt_rdma_max_inline_write = RPCRDMA_DEF_INLINE; +static unsigned int xprt_rdma_inline_write_padding; +#if !RPCRDMA_PERSISTENT_REGISTRATION +static unsigned int xprt_rdma_memreg_strategy = RPCRDMA_REGISTER; /* FMR? */ +#else +static unsigned int xprt_rdma_memreg_strategy = RPCRDMA_ALLPHYSICAL; +#endif + +#ifdef RPC_DEBUG + +static unsigned int min_slot_table_size = RPCRDMA_MIN_SLOT_TABLE; +static unsigned int max_slot_table_size = RPCRDMA_MAX_SLOT_TABLE; +static unsigned int zero; +static unsigned int max_padding = PAGE_SIZE; +static unsigned int min_memreg = RPCRDMA_BOUNCEBUFFERS; +static unsigned int max_memreg = RPCRDMA_LAST - 1; + +static struct ctl_table_header *sunrpc_table_header; + +static ctl_table xr_tunables_table[] = { + { + .ctl_name = CTL_SLOTTABLE_RDMA, + .procname = "rdma_slot_table_entries", + .data = &xprt_rdma_slot_table_entries, + .maxlen = sizeof(unsigned int), + .mode = 0644, + .proc_handler = &proc_dointvec_minmax, + .strategy = &sysctl_intvec, + .extra1 = &min_slot_table_size, + .extra2 = &max_slot_table_size + }, + { + .ctl_name = CTL_RDMA_MAXINLINEREAD, + .procname = "rdma_max_inline_read", + .data = &xprt_rdma_max_inline_read, + .maxlen = sizeof(unsigned int), + .mode = 0644, + .proc_handler = &proc_dointvec, + .strategy = &sysctl_intvec, + }, + { + .ctl_name = CTL_RDMA_MAXINLINEWRITE, + .procname = "rdma_max_inline_write", + .data = &xprt_rdma_max_inline_write, + .maxlen = sizeof(unsigned int), + .mode = 0644, + .proc_handler = &proc_dointvec, + .strategy = &sysctl_intvec, + }, + { + .ctl_name = CTL_RDMA_WRITEPADDING, + .procname = "rdma_inline_write_padding", + .data = &xprt_rdma_inline_write_padding, + .maxlen = sizeof(unsigned int), + .mode = 0644, + .proc_handler = &proc_dointvec_minmax, + .strategy = &sysctl_intvec, + .extra1 = &zero, + .extra2 = &max_padding, + }, + { + .ctl_name = CTL_RDMA_MEMREG, + .procname = "rdma_memreg_strategy", + .data = &xprt_rdma_memreg_strategy, + .maxlen = sizeof(unsigned int), + .mode = 0644, + .proc_handler = &proc_dointvec_minmax, + .strategy = &sysctl_intvec, + .extra1 = &min_memreg, + .extra2 = &max_memreg, + }, + { + .ctl_name = 0, + }, +}; + +static ctl_table sunrpc_table[] = { + { + .ctl_name = CTL_SUNRPC, + .procname = "sunrpc", + .mode = 0555, + .child = xr_tunables_table + }, + { + .ctl_name = 0, + }, +}; + +#endif + +static struct rpc_xprt_ops xprt_rdma_procs; /* forward reference */ + +static void +xprt_rdma_format_addresses(struct rpc_xprt *xprt) +{ + struct sockaddr_in *addr = (struct sockaddr_in *) + &rpcx_to_rdmad(xprt).addr; + char *buf; + + buf = kzalloc(20, GFP_KERNEL); + if (buf) + snprintf(buf, 20, NIPQUAD_FMT, NIPQUAD(addr->sin_addr.s_addr)); + xprt->address_strings[RPC_DISPLAY_ADDR] = buf; + + buf = kzalloc(8, GFP_KERNEL); + if (buf) + snprintf(buf, 8, "%u", ntohs(addr->sin_port)); + xprt->address_strings[RPC_DISPLAY_PORT] = buf; + + xprt->address_strings[RPC_DISPLAY_PROTO] = "rdma"; + + buf = kzalloc(48, GFP_KERNEL); + if (buf) + snprintf(buf, 48, "addr="NIPQUAD_FMT" port=%u proto=%s", + NIPQUAD(addr->sin_addr.s_addr), + ntohs(addr->sin_port), "rdma"); + xprt->address_strings[RPC_DISPLAY_ALL] = buf; + + buf = kzalloc(10, GFP_KERNEL); + if (buf) + snprintf(buf, 10, "%02x%02x%02x%02x", + NIPQUAD(addr->sin_addr.s_addr)); + xprt->address_strings[RPC_DISPLAY_HEX_ADDR] = buf; + + buf = kzalloc(8, GFP_KERNEL); + if (buf) + snprintf(buf, 8, "%4hx", ntohs(addr->sin_port)); + xprt->address_strings[RPC_DISPLAY_HEX_PORT] = buf; + + buf = kzalloc(30, GFP_KERNEL); + if (buf) + snprintf(buf, 30, NIPQUAD_FMT".%u.%u", + NIPQUAD(addr->sin_addr.s_addr), + ntohs(addr->sin_port) >> 8, + ntohs(addr->sin_port) & 0xff); + xprt->address_strings[RPC_DISPLAY_UNIVERSAL_ADDR] = buf; + + /* netid */ + xprt->address_strings[RPC_DISPLAY_NETID] = "rdma"; +} + +static void +xprt_rdma_free_addresses(struct rpc_xprt *xprt) +{ + kfree(xprt->address_strings[RPC_DISPLAY_ADDR]); + kfree(xprt->address_strings[RPC_DISPLAY_PORT]); + kfree(xprt->address_strings[RPC_DISPLAY_ALL]); + kfree(xprt->address_strings[RPC_DISPLAY_HEX_ADDR]); + kfree(xprt->address_strings[RPC_DISPLAY_HEX_PORT]); + kfree(xprt->address_strings[RPC_DISPLAY_UNIVERSAL_ADDR]); +} + +static void +xprt_rdma_connect_worker(struct work_struct *work) +{ + struct rpcrdma_xprt *r_xprt = + container_of(work, struct rpcrdma_xprt, rdma_connect.work); + struct rpc_xprt *xprt = &r_xprt->xprt; + int rc = 0; + + if (!xprt->shutdown) { + xprt_clear_connected(xprt); + + dprintk("RPC: %s: %sconnect\n", __func__, + r_xprt->rx_ep.rep_connected != 0 ? "re" : ""); + rc = rpcrdma_ep_connect(&r_xprt->rx_ep, &r_xprt->rx_ia); + if (rc) + goto out; + } + goto out_clear; + +out: + xprt_wake_pending_tasks(xprt, rc); + +out_clear: + dprintk("RPC: %s: exit\n", __func__); + xprt_clear_connecting(xprt); +} + +/* + * xprt_rdma_destroy + * + * Destroy the xprt. + * Free all memory associated with the object, including its own. + * NOTE: none of the *destroy methods free memory for their top-level + * objects, even though they may have allocated it (they do free + * private memory). It's up to the caller to handle it. In this + * case (RDMA transport), all structure memory is inlined with the + * struct rpcrdma_xprt. + */ +static void +xprt_rdma_destroy(struct rpc_xprt *xprt) +{ + struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt); + int rc; + + dprintk("RPC: %s: called\n", __func__); + + cancel_delayed_work(&r_xprt->rdma_connect); + flush_scheduled_work(); + + xprt_clear_connected(xprt); + + rpcrdma_buffer_destroy(&r_xprt->rx_buf); + rc = rpcrdma_ep_destroy(&r_xprt->rx_ep, &r_xprt->rx_ia); + if (rc) + dprintk("RPC: %s: rpcrdma_ep_destroy returned %i\n", + __func__, rc); + rpcrdma_ia_close(&r_xprt->rx_ia); + + xprt_rdma_free_addresses(xprt); + + kfree(xprt->slot); + xprt->slot = NULL; + kfree(xprt); + + dprintk("RPC: %s: returning\n", __func__); + + module_put(THIS_MODULE); +} + +/** + * xprt_setup_rdma - Set up transport to use RDMA + * + * @args: rpc transport arguments + */ +static struct rpc_xprt * +xprt_setup_rdma(struct xprt_create *args) +{ + struct rpcrdma_create_data_internal cdata; + struct rpc_xprt *xprt; + struct rpcrdma_xprt *new_xprt; + struct rpcrdma_ep *new_ep; + struct sockaddr_in *sin; + int rc; + + if (args->addrlen > sizeof(xprt->addr)) { + dprintk("RPC: %s: address too large\n", __func__); + return ERR_PTR(-EBADF); + } + + xprt = kzalloc(sizeof(struct rpcrdma_xprt), GFP_KERNEL); + if (xprt == NULL) { + dprintk("RPC: %s: couldn't allocate rpcrdma_xprt\n", + __func__); + return ERR_PTR(-ENOMEM); + } + + xprt->max_reqs = xprt_rdma_slot_table_entries; + xprt->slot = kcalloc(xprt->max_reqs, + sizeof(struct rpc_rqst), GFP_KERNEL); + if (xprt->slot == NULL) { + kfree(xprt); + dprintk("RPC: %s: couldn't allocate %d slots\n", + __func__, xprt->max_reqs); + return ERR_PTR(-ENOMEM); + } + + /* 60 second timeout, no retries */ + xprt_set_timeout(&xprt->timeout, 0, 60UL * HZ); + xprt->bind_timeout = (60U * HZ); + xprt->connect_timeout = (60U * HZ); + xprt->reestablish_timeout = (5U * HZ); + xprt->idle_timeout = (5U * 60 * HZ); + + xprt->resvport = 0; /* privileged port not needed */ + xprt->tsh_size = 0; /* RPC-RDMA handles framing */ + xprt->max_payload = RPCRDMA_MAX_DATA_SEGS * PAGE_SIZE; + xprt->ops = &xprt_rdma_procs; + + /* + * Set up RDMA-specific connect data. + */ + + /* Put server RDMA address in local cdata */ + memcpy(&cdata.addr, args->dstaddr, args->addrlen); + + /* Ensure xprt->addr holds valid server TCP (not RDMA) + * address, for any side protocols which peek at it */ + xprt->prot = IPPROTO_TCP; + xprt->addrlen = args->addrlen; + memcpy(&xprt->addr, &cdata.addr, xprt->addrlen); + + sin = (struct sockaddr_in *)&cdata.addr; + if (ntohs(sin->sin_port) != 0) + xprt_set_bound(xprt); + + dprintk("RPC: %s: %u.%u.%u.%u:%u\n", __func__, + NIPQUAD(sin->sin_addr.s_addr), ntohs(sin->sin_port)); + + /* Set max requests */ + cdata.max_requests = xprt->max_reqs; + + /* Set some length limits */ + cdata.rsize = RPCRDMA_MAX_SEGS * PAGE_SIZE; /* RDMA write max */ + cdata.wsize = RPCRDMA_MAX_SEGS * PAGE_SIZE; /* RDMA read max */ + + cdata.inline_wsize = xprt_rdma_max_inline_write; + if (cdata.inline_wsize > cdata.wsize) + cdata.inline_wsize = cdata.wsize; + + cdata.inline_rsize = xprt_rdma_max_inline_read; + if (cdata.inline_rsize > cdata.rsize) + cdata.inline_rsize = cdata.rsize; + + cdata.padding = xprt_rdma_inline_write_padding; + + /* + * Create new transport instance, which includes initialized + * o ia + * o endpoint + * o buffers + */ + + new_xprt = rpcx_to_rdmax(xprt); + + rc = rpcrdma_ia_open(new_xprt, (struct sockaddr *) &cdata.addr, + xprt_rdma_memreg_strategy); + if (rc) + goto out1; + + /* + * initialize and create ep + */ + new_xprt->rx_data = cdata; + new_ep = &new_xprt->rx_ep; + new_ep->rep_remote_addr = cdata.addr; + + rc = rpcrdma_ep_create(&new_xprt->rx_ep, + &new_xprt->rx_ia, &new_xprt->rx_data); + if (rc) + goto out2; + + /* + * Allocate pre-registered send and receive buffers for headers and + * any inline data. Also specify any padding which will be provided + * from a preregistered zero buffer. + */ + rc = rpcrdma_buffer_create(&new_xprt->rx_buf, new_ep, &new_xprt->rx_ia, + &new_xprt->rx_data); + if (rc) + goto out3; + + /* + * Register a callback for connection events. This is necessary because + * connection loss notification is async. We also catch connection loss + * when reaping receives. + */ + INIT_DELAYED_WORK(&new_xprt->rdma_connect, xprt_rdma_connect_worker); + new_ep->rep_func = rpcrdma_conn_func; + new_ep->rep_xprt = xprt; + + xprt_rdma_format_addresses(xprt); + + if (!try_module_get(THIS_MODULE)) + goto out4; + + return xprt; + +out4: + xprt_rdma_free_addresses(xprt); + rc = -EINVAL; +out3: + (void) rpcrdma_ep_destroy(new_ep, &new_xprt->rx_ia); +out2: + rpcrdma_ia_close(&new_xprt->rx_ia); +out1: + kfree(xprt->slot); + kfree(xprt); + return ERR_PTR(rc); +} + +/* + * Close a connection, during shutdown or timeout/reconnect + */ +static void +xprt_rdma_close(struct rpc_xprt *xprt) +{ + struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt); + + dprintk("RPC: %s: closing\n", __func__); + xprt_disconnect(xprt); + (void) rpcrdma_ep_disconnect(&r_xprt->rx_ep, &r_xprt->rx_ia); +} + +static void +xprt_rdma_set_port(struct rpc_xprt *xprt, u16 port) +{ + struct sockaddr_in *sap; + + sap = (struct sockaddr_in *)&xprt->addr; + sap->sin_port = htons(port); + sap = (struct sockaddr_in *)&rpcx_to_rdmad(xprt).addr; + sap->sin_port = htons(port); + dprintk("RPC: %s: %u\n", __func__, port); +} + +static void +xprt_rdma_connect(struct rpc_task *task) +{ + struct rpc_xprt *xprt = (struct rpc_xprt *)task->tk_xprt; + struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt); + + if (!xprt_test_and_set_connecting(xprt)) { + if (r_xprt->rx_ep.rep_connected != 0) { + /* Reconnect */ + schedule_delayed_work(&r_xprt->rdma_connect, + xprt->reestablish_timeout); + } else { + schedule_delayed_work(&r_xprt->rdma_connect, 0); + if (!RPC_IS_ASYNC(task)) + flush_scheduled_work(); + } + } +} + +static int +xprt_rdma_reserve_xprt(struct rpc_task *task) +{ + struct rpc_xprt *xprt = task->tk_xprt; + struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt); + int credits = atomic_read(&r_xprt->rx_buf.rb_credits); + + /* == RPC_CWNDSCALE @ init, but *after* setup */ + if (r_xprt->rx_buf.rb_cwndscale == 0UL) { + r_xprt->rx_buf.rb_cwndscale = xprt->cwnd; + dprintk("RPC: %s: cwndscale %lu\n", __func__, + r_xprt->rx_buf.rb_cwndscale); + BUG_ON(r_xprt->rx_buf.rb_cwndscale <= 0); + } + xprt->cwnd = credits * r_xprt->rx_buf.rb_cwndscale; + return xprt_reserve_xprt_cong(task); +} + +/* + * The RDMA allocate/free functions need the task structure as a place + * to hide the struct rpcrdma_req, which is necessary for the actual send/recv + * sequence. For this reason, the recv buffers are attached to send + * buffers for portions of the RPC. Note that the RPC layer allocates + * both send and receive buffers in the same call. We may register + * the receive buffer portion when using reply chunks. + */ +static void * +xprt_rdma_allocate(struct rpc_task *task, size_t size) +{ + struct rpc_xprt *xprt = task->tk_xprt; + struct rpcrdma_req *req, *nreq; + + req = rpcrdma_buffer_get(&rpcx_to_rdmax(xprt)->rx_buf); + BUG_ON(NULL == req); + + if (size > req->rl_size) { + dprintk("RPC: %s: size %zd too large for buffer[%zd]: " + "prog %d vers %d proc %d\n", + __func__, size, req->rl_size, + task->tk_client->cl_prog, task->tk_client->cl_vers, + task->tk_msg.rpc_proc->p_proc); + /* + * Outgoing length shortage. Our inline write max must have + * been configured to perform direct i/o. + * + * This is therefore a large metadata operation, and the + * allocate call was made on the maximum possible message, + * e.g. containing long filename(s) or symlink data. In + * fact, while these metadata operations *might* carry + * large outgoing payloads, they rarely *do*. However, we + * have to commit to the request here, so reallocate and + * register it now. The data path will never require this + * reallocation. + * + * If the allocation or registration fails, the RPC framework + * will (doggedly) retry. + */ + if (rpcx_to_rdmax(xprt)->rx_ia.ri_memreg_strategy == + RPCRDMA_BOUNCEBUFFERS) { + /* forced to "pure inline" */ + dprintk("RPC: %s: too much data (%zd) for inline " + "(r/w max %d/%d)\n", __func__, size, + rpcx_to_rdmad(xprt).inline_rsize, + rpcx_to_rdmad(xprt).inline_wsize); + size = req->rl_size; + rpc_exit(task, -EIO); /* fail the operation */ + rpcx_to_rdmax(xprt)->rx_stats.failed_marshal_count++; + goto out; + } + if (task->tk_flags & RPC_TASK_SWAPPER) + nreq = kmalloc(sizeof *req + size, GFP_ATOMIC); + else + nreq = kmalloc(sizeof *req + size, GFP_NOFS); + if (nreq == NULL) + goto outfail; + + if (rpcrdma_register_internal(&rpcx_to_rdmax(xprt)->rx_ia, + nreq->rl_base, size + sizeof(struct rpcrdma_req) + - offsetof(struct rpcrdma_req, rl_base), + &nreq->rl_handle, &nreq->rl_iov)) { + kfree(nreq); + goto outfail; + } + rpcx_to_rdmax(xprt)->rx_stats.hardway_register_count += size; + nreq->rl_size = size; + nreq->rl_niovs = 0; + nreq->rl_nchunks = 0; + nreq->rl_buffer = (struct rpcrdma_buffer *)req; + nreq->rl_reply = req->rl_reply; + memcpy(nreq->rl_segments, + req->rl_segments, sizeof nreq->rl_segments); + /* flag the swap with an unused field */ + nreq->rl_iov.length = 0; + req->rl_reply = NULL; + req = nreq; + } + dprintk("RPC: %s: size %zd, request 0x%p\n", __func__, size, req); +out: + return req->rl_xdr_buf; + +outfail: + rpcrdma_buffer_put(req); + rpcx_to_rdmax(xprt)->rx_stats.failed_marshal_count++; + return NULL; +} + +/* + * This function returns all RDMA resources to the pool. + */ +static void +xprt_rdma_free(void *buffer) +{ + struct rpcrdma_req *req; + struct rpcrdma_xprt *r_xprt; + struct rpcrdma_rep *rep; + int i; + + if (buffer == NULL) + return; + + req = container_of(buffer, struct rpcrdma_req, rl_xdr_buf[0]); + r_xprt = container_of(req->rl_buffer, struct rpcrdma_xprt, rx_buf); + rep = req->rl_reply; + + dprintk("RPC: %s: called on 0x%p%s\n", + __func__, rep, (rep && rep->rr_func) ? " (with waiter)" : ""); + + /* + * Finish the deregistration. When using mw bind, this was + * begun in rpcrdma_reply_handler(). In all other modes, we + * do it here, in thread context. The process is considered + * complete when the rr_func vector becomes NULL - this + * was put in place during rpcrdma_reply_handler() - the wait + * call below will not block if the dereg is "done". If + * interrupted, our framework will clean up. + */ + for (i = 0; req->rl_nchunks;) { + --req->rl_nchunks; + i += rpcrdma_deregister_external( + &req->rl_segments[i], r_xprt, NULL); + } + + if (rep && wait_event_interruptible(rep->rr_unbind, !rep->rr_func)) { + rep->rr_func = NULL; /* abandon the callback */ + req->rl_reply = NULL; + } + + if (req->rl_iov.length == 0) { /* see allocate above */ + struct rpcrdma_req *oreq = (struct rpcrdma_req *)req->rl_buffer; + oreq->rl_reply = req->rl_reply; + (void) rpcrdma_deregister_internal(&r_xprt->rx_ia, + req->rl_handle, + &req->rl_iov); + kfree(req); + req = oreq; + } + + /* Put back request+reply buffers */ + rpcrdma_buffer_put(req); +} + +/* + * send_request invokes the meat of RPC RDMA. It must do the following: + * 1. Marshal the RPC request into an RPC RDMA request, which means + * putting a header in front of data, and creating IOVs for RDMA + * from those in the request. + * 2. In marshaling, detect opportunities for RDMA, and use them. + * 3. Post a recv message to set up asynch completion, then send + * the request (rpcrdma_ep_post). + * 4. No partial sends are possible in the RPC-RDMA protocol (as in UDP). + */ + +static int +xprt_rdma_send_request(struct rpc_task *task) +{ + struct rpc_rqst *rqst = task->tk_rqstp; + struct rpc_xprt *xprt = task->tk_xprt; + struct rpcrdma_req *req = rpcr_to_rdmar(rqst); + struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt); + + /* marshal the send itself */ + if (req->rl_niovs == 0 && rpcrdma_marshal_req(rqst) != 0) { + r_xprt->rx_stats.failed_marshal_count++; + dprintk("RPC: %s: rpcrdma_marshal_req failed\n", + __func__); + return -EIO; + } + + if (req->rl_reply == NULL) /* e.g. reconnection */ + rpcrdma_recv_buffer_get(req); + + if (req->rl_reply) { + req->rl_reply->rr_func = rpcrdma_reply_handler; + /* this need only be done once, but... */ + req->rl_reply->rr_xprt = xprt; + } + + if (rpcrdma_ep_post(&r_xprt->rx_ia, &r_xprt->rx_ep, req)) { + xprt_disconnect(xprt); + return -ENOTCONN; /* implies disconnect */ + } + + rqst->rq_bytes_sent = 0; + return 0; +} + +static void xprt_rdma_print_stats(struct rpc_xprt *xprt, struct seq_file *seq) +{ + struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt); + long idle_time = 0; + + if (xprt_connected(xprt)) + idle_time = (long)(jiffies - xprt->last_used) / HZ; + + seq_printf(seq, + "\txprt:\trdma %u %lu %lu %lu %ld %lu %lu %lu %Lu %Lu " + "%lu %lu %lu %Lu %Lu %Lu %Lu %lu %lu %lu\n", + + 0, /* need a local port? */ + xprt->stat.bind_count, + xprt->stat.connect_count, + xprt->stat.connect_time, + idle_time, + xprt->stat.sends, + xprt->stat.recvs, + xprt->stat.bad_xids, + xprt->stat.req_u, + xprt->stat.bklog_u, + + r_xprt->rx_stats.read_chunk_count, + r_xprt->rx_stats.write_chunk_count, + r_xprt->rx_stats.reply_chunk_count, + r_xprt->rx_stats.total_rdma_request, + r_xprt->rx_stats.total_rdma_reply, + r_xprt->rx_stats.pullup_copy_count, + r_xprt->rx_stats.fixup_copy_count, + r_xprt->rx_stats.hardway_register_count, + r_xprt->rx_stats.failed_marshal_count, + r_xprt->rx_stats.bad_reply_count); +} + +/* + * Plumbing for rpc transport switch and kernel module + */ + +static struct rpc_xprt_ops xprt_rdma_procs = { + .reserve_xprt = xprt_rdma_reserve_xprt, + .release_xprt = xprt_release_xprt_cong, /* sunrpc/xprt.c */ + .release_request = xprt_release_rqst_cong, /* ditto */ + .set_retrans_timeout = xprt_set_retrans_timeout_def, /* ditto */ + .rpcbind = rpcb_getport_async, /* sunrpc/rpcb_clnt.c */ + .set_port = xprt_rdma_set_port, + .connect = xprt_rdma_connect, + .buf_alloc = xprt_rdma_allocate, + .buf_free = xprt_rdma_free, + .send_request = xprt_rdma_send_request, + .close = xprt_rdma_close, + .destroy = xprt_rdma_destroy, + .print_stats = xprt_rdma_print_stats +}; + +static struct xprt_class xprt_rdma = { + .list = LIST_HEAD_INIT(xprt_rdma.list), + .name = "rdma", + .owner = THIS_MODULE, + .ident = XPRT_TRANSPORT_RDMA, + .setup = xprt_setup_rdma, +}; + +static void __exit xprt_rdma_cleanup(void) +{ + int rc; + + dprintk("RPCRDMA Module Removed, deregister RPC RDMA transport\n"); +#ifdef RPC_DEBUG + if (sunrpc_table_header) { + unregister_sysctl_table(sunrpc_table_header); + sunrpc_table_header = NULL; + } +#endif + rc = xprt_unregister_transport(&xprt_rdma); + if (rc) + dprintk("RPC: %s: xprt_unregister returned %i\n", + __func__, rc); +} + +static int __init xprt_rdma_init(void) +{ + int rc; + + rc = xprt_register_transport(&xprt_rdma); + + if (rc) + return rc; + + dprintk(KERN_INFO "RPCRDMA Module Init, register RPC RDMA transport\n"); + + dprintk(KERN_INFO "Defaults:\n"); + dprintk(KERN_INFO "\tSlots %d\n" + "\tMaxInlineRead %d\n\tMaxInlineWrite %d\n", + xprt_rdma_slot_table_entries, + xprt_rdma_max_inline_read, xprt_rdma_max_inline_write); + dprintk(KERN_INFO "\tPadding %d\n\tMemreg %d\n", + xprt_rdma_inline_write_padding, xprt_rdma_memreg_strategy); + +#ifdef RPC_DEBUG + if (!sunrpc_table_header) + sunrpc_table_header = register_sysctl_table(sunrpc_table); +#endif + return 0; +} + +module_init(xprt_rdma_init); +module_exit(xprt_rdma_cleanup); diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c new file mode 100644 index 000000000000..9ec8ca4f6028 --- /dev/null +++ b/net/sunrpc/xprtrdma/verbs.c @@ -0,0 +1,1626 @@ +/* + * Copyright (c) 2003-2007 Network Appliance, Inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the BSD-type + * license below: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials provided + * with the distribution. + * + * Neither the name of the Network Appliance, Inc. nor the names of + * its contributors may be used to endorse or promote products + * derived from this software without specific prior written + * permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/* + * verbs.c + * + * Encapsulates the major functions managing: + * o adapters + * o endpoints + * o connections + * o buffer memory + */ + +#include <linux/pci.h> /* for Tavor hack below */ + +#include "xprt_rdma.h" + +/* + * Globals/Macros + */ + +#ifdef RPC_DEBUG +# define RPCDBG_FACILITY RPCDBG_TRANS +#endif + +/* + * internal functions + */ + +/* + * handle replies in tasklet context, using a single, global list + * rdma tasklet function -- just turn around and call the func + * for all replies on the list + */ + +static DEFINE_SPINLOCK(rpcrdma_tk_lock_g); +static LIST_HEAD(rpcrdma_tasklets_g); + +static void +rpcrdma_run_tasklet(unsigned long data) +{ + struct rpcrdma_rep *rep; + void (*func)(struct rpcrdma_rep *); + unsigned long flags; + + data = data; + spin_lock_irqsave(&rpcrdma_tk_lock_g, flags); + while (!list_empty(&rpcrdma_tasklets_g)) { + rep = list_entry(rpcrdma_tasklets_g.next, + struct rpcrdma_rep, rr_list); + list_del(&rep->rr_list); + func = rep->rr_func; + rep->rr_func = NULL; + spin_unlock_irqrestore(&rpcrdma_tk_lock_g, flags); + + if (func) + func(rep); + else + rpcrdma_recv_buffer_put(rep); + + spin_lock_irqsave(&rpcrdma_tk_lock_g, flags); + } + spin_unlock_irqrestore(&rpcrdma_tk_lock_g, flags); +} + +static DECLARE_TASKLET(rpcrdma_tasklet_g, rpcrdma_run_tasklet, 0UL); + +static inline void +rpcrdma_schedule_tasklet(struct rpcrdma_rep *rep) +{ + unsigned long flags; + + spin_lock_irqsave(&rpcrdma_tk_lock_g, flags); + list_add_tail(&rep->rr_list, &rpcrdma_tasklets_g); + spin_unlock_irqrestore(&rpcrdma_tk_lock_g, flags); + tasklet_schedule(&rpcrdma_tasklet_g); +} + +static void +rpcrdma_qp_async_error_upcall(struct ib_event *event, void *context) +{ + struct rpcrdma_ep *ep = context; + + dprintk("RPC: %s: QP error %X on device %s ep %p\n", + __func__, event->event, event->device->name, context); + if (ep->rep_connected == 1) { + ep->rep_connected = -EIO; + ep->rep_func(ep); + wake_up_all(&ep->rep_connect_wait); + } +} + +static void +rpcrdma_cq_async_error_upcall(struct ib_event *event, void *context) +{ + struct rpcrdma_ep *ep = context; + + dprintk("RPC: %s: CQ error %X on device %s ep %p\n", + __func__, event->event, event->device->name, context); + if (ep->rep_connected == 1) { + ep->rep_connected = -EIO; + ep->rep_func(ep); + wake_up_all(&ep->rep_connect_wait); + } +} + +static inline +void rpcrdma_event_process(struct ib_wc *wc) +{ + struct rpcrdma_rep *rep = + (struct rpcrdma_rep *)(unsigned long) wc->wr_id; + + dprintk("RPC: %s: event rep %p status %X opcode %X length %u\n", + __func__, rep, wc->status, wc->opcode, wc->byte_len); + + if (!rep) /* send or bind completion that we don't care about */ + return; + + if (IB_WC_SUCCESS != wc->status) { + dprintk("RPC: %s: %s WC status %X, connection lost\n", + __func__, (wc->opcode & IB_WC_RECV) ? "recv" : "send", + wc->status); + rep->rr_len = ~0U; + rpcrdma_schedule_tasklet(rep); + return; + } + + switch (wc->opcode) { + case IB_WC_RECV: + rep->rr_len = wc->byte_len; + ib_dma_sync_single_for_cpu( + rdmab_to_ia(rep->rr_buffer)->ri_id->device, + rep->rr_iov.addr, rep->rr_len, DMA_FROM_DEVICE); + /* Keep (only) the most recent credits, after check validity */ + if (rep->rr_len >= 16) { + struct rpcrdma_msg *p = + (struct rpcrdma_msg *) rep->rr_base; + unsigned int credits = ntohl(p->rm_credit); + if (credits == 0) { + dprintk("RPC: %s: server" + " dropped credits to 0!\n", __func__); + /* don't deadlock */ + credits = 1; + } else if (credits > rep->rr_buffer->rb_max_requests) { + dprintk("RPC: %s: server" + " over-crediting: %d (%d)\n", + __func__, credits, + rep->rr_buffer->rb_max_requests); + credits = rep->rr_buffer->rb_max_requests; + } + atomic_set(&rep->rr_buffer->rb_credits, credits); + } + /* fall through */ + case IB_WC_BIND_MW: + rpcrdma_schedule_tasklet(rep); + break; + default: + dprintk("RPC: %s: unexpected WC event %X\n", + __func__, wc->opcode); + break; + } +} + +static inline int +rpcrdma_cq_poll(struct ib_cq *cq) +{ + struct ib_wc wc; + int rc; + + for (;;) { + rc = ib_poll_cq(cq, 1, &wc); + if (rc < 0) { + dprintk("RPC: %s: ib_poll_cq failed %i\n", + __func__, rc); + return rc; + } + if (rc == 0) + break; + + rpcrdma_event_process(&wc); + } + + return 0; +} + +/* + * rpcrdma_cq_event_upcall + * + * This upcall handles recv, send, bind and unbind events. + * It is reentrant but processes single events in order to maintain + * ordering of receives to keep server credits. + * + * It is the responsibility of the scheduled tasklet to return + * recv buffers to the pool. NOTE: this affects synchronization of + * connection shutdown. That is, the structures required for + * the completion of the reply handler must remain intact until + * all memory has been reclaimed. + * + * Note that send events are suppressed and do not result in an upcall. + */ +static void +rpcrdma_cq_event_upcall(struct ib_cq *cq, void *context) +{ + int rc; + + rc = rpcrdma_cq_poll(cq); + if (rc) + return; + + rc = ib_req_notify_cq(cq, IB_CQ_NEXT_COMP); + if (rc) { + dprintk("RPC: %s: ib_req_notify_cq failed %i\n", + __func__, rc); + return; + } + + rpcrdma_cq_poll(cq); +} + +#ifdef RPC_DEBUG +static const char * const conn[] = { + "address resolved", + "address error", + "route resolved", + "route error", + "connect request", + "connect response", + "connect error", + "unreachable", + "rejected", + "established", + "disconnected", + "device removal" +}; +#endif + +static int +rpcrdma_conn_upcall(struct rdma_cm_id *id, struct rdma_cm_event *event) +{ + struct rpcrdma_xprt *xprt = id->context; + struct rpcrdma_ia *ia = &xprt->rx_ia; + struct rpcrdma_ep *ep = &xprt->rx_ep; + struct sockaddr_in *addr = (struct sockaddr_in *) &ep->rep_remote_addr; + struct ib_qp_attr attr; + struct ib_qp_init_attr iattr; + int connstate = 0; + + switch (event->event) { + case RDMA_CM_EVENT_ADDR_RESOLVED: + case RDMA_CM_EVENT_ROUTE_RESOLVED: + complete(&ia->ri_done); + break; + case RDMA_CM_EVENT_ADDR_ERROR: + ia->ri_async_rc = -EHOSTUNREACH; + dprintk("RPC: %s: CM address resolution error, ep 0x%p\n", + __func__, ep); + complete(&ia->ri_done); + break; + case RDMA_CM_EVENT_ROUTE_ERROR: + ia->ri_async_rc = -ENETUNREACH; + dprintk("RPC: %s: CM route resolution error, ep 0x%p\n", + __func__, ep); + complete(&ia->ri_done); + break; + case RDMA_CM_EVENT_ESTABLISHED: + connstate = 1; + ib_query_qp(ia->ri_id->qp, &attr, + IB_QP_MAX_QP_RD_ATOMIC | IB_QP_MAX_DEST_RD_ATOMIC, + &iattr); + dprintk("RPC: %s: %d responder resources" + " (%d initiator)\n", + __func__, attr.max_dest_rd_atomic, attr.max_rd_atomic); + goto connected; + case RDMA_CM_EVENT_CONNECT_ERROR: + connstate = -ENOTCONN; + goto connected; + case RDMA_CM_EVENT_UNREACHABLE: + connstate = -ENETDOWN; + goto connected; + case RDMA_CM_EVENT_REJECTED: + connstate = -ECONNREFUSED; + goto connected; + case RDMA_CM_EVENT_DISCONNECTED: + connstate = -ECONNABORTED; + goto connected; + case RDMA_CM_EVENT_DEVICE_REMOVAL: + connstate = -ENODEV; +connected: + dprintk("RPC: %s: %s: %u.%u.%u.%u:%u" + " (ep 0x%p event 0x%x)\n", + __func__, + (event->event <= 11) ? conn[event->event] : + "unknown connection error", + NIPQUAD(addr->sin_addr.s_addr), + ntohs(addr->sin_port), + ep, event->event); + atomic_set(&rpcx_to_rdmax(ep->rep_xprt)->rx_buf.rb_credits, 1); + dprintk("RPC: %s: %sconnected\n", + __func__, connstate > 0 ? "" : "dis"); + ep->rep_connected = connstate; + ep->rep_func(ep); + wake_up_all(&ep->rep_connect_wait); + break; + default: + ia->ri_async_rc = -EINVAL; + dprintk("RPC: %s: unexpected CM event %X\n", + __func__, event->event); + complete(&ia->ri_done); + break; + } + + return 0; +} + +static struct rdma_cm_id * +rpcrdma_create_id(struct rpcrdma_xprt *xprt, + struct rpcrdma_ia *ia, struct sockaddr *addr) +{ + struct rdma_cm_id *id; + int rc; + + id = rdma_create_id(rpcrdma_conn_upcall, xprt, RDMA_PS_TCP); + if (IS_ERR(id)) { + rc = PTR_ERR(id); + dprintk("RPC: %s: rdma_create_id() failed %i\n", + __func__, rc); + return id; + } + + ia->ri_async_rc = 0; + rc = rdma_resolve_addr(id, NULL, addr, RDMA_RESOLVE_TIMEOUT); + if (rc) { + dprintk("RPC: %s: rdma_resolve_addr() failed %i\n", + __func__, rc); + goto out; + } + wait_for_completion(&ia->ri_done); + rc = ia->ri_async_rc; + if (rc) + goto out; + + ia->ri_async_rc = 0; + rc = rdma_resolve_route(id, RDMA_RESOLVE_TIMEOUT); + if (rc) { + dprintk("RPC: %s: rdma_resolve_route() failed %i\n", + __func__, rc); + goto out; + } + wait_for_completion(&ia->ri_done); + rc = ia->ri_async_rc; + if (rc) + goto out; + + return id; + +out: + rdma_destroy_id(id); + return ERR_PTR(rc); +} + +/* + * Drain any cq, prior to teardown. + */ +static void +rpcrdma_clean_cq(struct ib_cq *cq) +{ + struct ib_wc wc; + int count = 0; + + while (1 == ib_poll_cq(cq, 1, &wc)) + ++count; + + if (count) + dprintk("RPC: %s: flushed %d events (last 0x%x)\n", + __func__, count, wc.opcode); +} + +/* + * Exported functions. + */ + +/* + * Open and initialize an Interface Adapter. + * o initializes fields of struct rpcrdma_ia, including + * interface and provider attributes and protection zone. + */ +int +rpcrdma_ia_open(struct rpcrdma_xprt *xprt, struct sockaddr *addr, int memreg) +{ + int rc; + struct rpcrdma_ia *ia = &xprt->rx_ia; + + init_completion(&ia->ri_done); + + ia->ri_id = rpcrdma_create_id(xprt, ia, addr); + if (IS_ERR(ia->ri_id)) { + rc = PTR_ERR(ia->ri_id); + goto out1; + } + + ia->ri_pd = ib_alloc_pd(ia->ri_id->device); + if (IS_ERR(ia->ri_pd)) { + rc = PTR_ERR(ia->ri_pd); + dprintk("RPC: %s: ib_alloc_pd() failed %i\n", + __func__, rc); + goto out2; + } + + /* + * Optionally obtain an underlying physical identity mapping in + * order to do a memory window-based bind. This base registration + * is protected from remote access - that is enabled only by binding + * for the specific bytes targeted during each RPC operation, and + * revoked after the corresponding completion similar to a storage + * adapter. + */ + if (memreg > RPCRDMA_REGISTER) { + int mem_priv = IB_ACCESS_LOCAL_WRITE; + switch (memreg) { +#if RPCRDMA_PERSISTENT_REGISTRATION + case RPCRDMA_ALLPHYSICAL: + mem_priv |= IB_ACCESS_REMOTE_WRITE; + mem_priv |= IB_ACCESS_REMOTE_READ; + break; +#endif + case RPCRDMA_MEMWINDOWS_ASYNC: + case RPCRDMA_MEMWINDOWS: + mem_priv |= IB_ACCESS_MW_BIND; + break; + default: + break; + } + ia->ri_bind_mem = ib_get_dma_mr(ia->ri_pd, mem_priv); + if (IS_ERR(ia->ri_bind_mem)) { + printk(KERN_ALERT "%s: ib_get_dma_mr for " + "phys register failed with %lX\n\t" + "Will continue with degraded performance\n", + __func__, PTR_ERR(ia->ri_bind_mem)); + memreg = RPCRDMA_REGISTER; + ia->ri_bind_mem = NULL; + } + } + + /* Else will do memory reg/dereg for each chunk */ + ia->ri_memreg_strategy = memreg; + + return 0; +out2: + rdma_destroy_id(ia->ri_id); +out1: + return rc; +} + +/* + * Clean up/close an IA. + * o if event handles and PD have been initialized, free them. + * o close the IA + */ +void +rpcrdma_ia_close(struct rpcrdma_ia *ia) +{ + int rc; + + dprintk("RPC: %s: entering\n", __func__); + if (ia->ri_bind_mem != NULL) { + rc = ib_dereg_mr(ia->ri_bind_mem); + dprintk("RPC: %s: ib_dereg_mr returned %i\n", + __func__, rc); + } + if (ia->ri_id != NULL && !IS_ERR(ia->ri_id) && ia->ri_id->qp) + rdma_destroy_qp(ia->ri_id); + if (ia->ri_pd != NULL && !IS_ERR(ia->ri_pd)) { + rc = ib_dealloc_pd(ia->ri_pd); + dprintk("RPC: %s: ib_dealloc_pd returned %i\n", + __func__, rc); + } + if (ia->ri_id != NULL && !IS_ERR(ia->ri_id)) + rdma_destroy_id(ia->ri_id); +} + +/* + * Create unconnected endpoint. + */ +int +rpcrdma_ep_create(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia, + struct rpcrdma_create_data_internal *cdata) +{ + struct ib_device_attr devattr; + int rc; + + rc = ib_query_device(ia->ri_id->device, &devattr); + if (rc) { + dprintk("RPC: %s: ib_query_device failed %d\n", + __func__, rc); + return rc; + } + + /* check provider's send/recv wr limits */ + if (cdata->max_requests > devattr.max_qp_wr) + cdata->max_requests = devattr.max_qp_wr; + + ep->rep_attr.event_handler = rpcrdma_qp_async_error_upcall; + ep->rep_attr.qp_context = ep; + /* send_cq and recv_cq initialized below */ + ep->rep_attr.srq = NULL; + ep->rep_attr.cap.max_send_wr = cdata->max_requests; + switch (ia->ri_memreg_strategy) { + case RPCRDMA_MEMWINDOWS_ASYNC: + case RPCRDMA_MEMWINDOWS: + /* Add room for mw_binds+unbinds - overkill! */ + ep->rep_attr.cap.max_send_wr++; + ep->rep_attr.cap.max_send_wr *= (2 * RPCRDMA_MAX_SEGS); + if (ep->rep_attr.cap.max_send_wr > devattr.max_qp_wr) + return -EINVAL; + break; + default: + break; + } + ep->rep_attr.cap.max_recv_wr = cdata->max_requests; + ep->rep_attr.cap.max_send_sge = (cdata->padding ? 4 : 2); + ep->rep_attr.cap.max_recv_sge = 1; + ep->rep_attr.cap.max_inline_data = 0; + ep->rep_attr.sq_sig_type = IB_SIGNAL_REQ_WR; + ep->rep_attr.qp_type = IB_QPT_RC; + ep->rep_attr.port_num = ~0; + + dprintk("RPC: %s: requested max: dtos: send %d recv %d; " + "iovs: send %d recv %d\n", + __func__, + ep->rep_attr.cap.max_send_wr, + ep->rep_attr.cap.max_recv_wr, + ep->rep_attr.cap.max_send_sge, + ep->rep_attr.cap.max_recv_sge); + + /* set trigger for requesting send completion */ + ep->rep_cqinit = ep->rep_attr.cap.max_send_wr/2 /* - 1*/; + switch (ia->ri_memreg_strategy) { + case RPCRDMA_MEMWINDOWS_ASYNC: + case RPCRDMA_MEMWINDOWS: + ep->rep_cqinit -= RPCRDMA_MAX_SEGS; + break; + default: + break; + } + if (ep->rep_cqinit <= 2) + ep->rep_cqinit = 0; + INIT_CQCOUNT(ep); + ep->rep_ia = ia; + init_waitqueue_head(&ep->rep_connect_wait); + + /* + * Create a single cq for receive dto and mw_bind (only ever + * care about unbind, really). Send completions are suppressed. + * Use single threaded tasklet upcalls to maintain ordering. + */ + ep->rep_cq = ib_create_cq(ia->ri_id->device, rpcrdma_cq_event_upcall, + rpcrdma_cq_async_error_upcall, NULL, + ep->rep_attr.cap.max_recv_wr + + ep->rep_attr.cap.max_send_wr + 1, 0); + if (IS_ERR(ep->rep_cq)) { + rc = PTR_ERR(ep->rep_cq); + dprintk("RPC: %s: ib_create_cq failed: %i\n", + __func__, rc); + goto out1; + } + + rc = ib_req_notify_cq(ep->rep_cq, IB_CQ_NEXT_COMP); + if (rc) { + dprintk("RPC: %s: ib_req_notify_cq failed: %i\n", + __func__, rc); + goto out2; + } + + ep->rep_attr.send_cq = ep->rep_cq; + ep->rep_attr.recv_cq = ep->rep_cq; + + /* Initialize cma parameters */ + + /* RPC/RDMA does not use private data */ + ep->rep_remote_cma.private_data = NULL; + ep->rep_remote_cma.private_data_len = 0; + + /* Client offers RDMA Read but does not initiate */ + switch (ia->ri_memreg_strategy) { + case RPCRDMA_BOUNCEBUFFERS: + ep->rep_remote_cma.responder_resources = 0; + break; + case RPCRDMA_MTHCAFMR: + case RPCRDMA_REGISTER: + ep->rep_remote_cma.responder_resources = cdata->max_requests * + (RPCRDMA_MAX_DATA_SEGS / 8); + break; + case RPCRDMA_MEMWINDOWS: + case RPCRDMA_MEMWINDOWS_ASYNC: +#if RPCRDMA_PERSISTENT_REGISTRATION + case RPCRDMA_ALLPHYSICAL: +#endif + ep->rep_remote_cma.responder_resources = cdata->max_requests * + (RPCRDMA_MAX_DATA_SEGS / 2); + break; + default: + break; + } + if (ep->rep_remote_cma.responder_resources > devattr.max_qp_rd_atom) + ep->rep_remote_cma.responder_resources = devattr.max_qp_rd_atom; + ep->rep_remote_cma.initiator_depth = 0; + + ep->rep_remote_cma.retry_count = 7; + ep->rep_remote_cma.flow_control = 0; + ep->rep_remote_cma.rnr_retry_count = 0; + + return 0; + +out2: + if (ib_destroy_cq(ep->rep_cq)) + ; +out1: + return rc; +} + +/* + * rpcrdma_ep_destroy + * + * Disconnect and destroy endpoint. After this, the only + * valid operations on the ep are to free it (if dynamically + * allocated) or re-create it. + * + * The caller's error handling must be sure to not leak the endpoint + * if this function fails. + */ +int +rpcrdma_ep_destroy(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia) +{ + int rc; + + dprintk("RPC: %s: entering, connected is %d\n", + __func__, ep->rep_connected); + + if (ia->ri_id->qp) { + rc = rpcrdma_ep_disconnect(ep, ia); + if (rc) + dprintk("RPC: %s: rpcrdma_ep_disconnect" + " returned %i\n", __func__, rc); + } + + ep->rep_func = NULL; + + /* padding - could be done in rpcrdma_buffer_destroy... */ + if (ep->rep_pad_mr) { + rpcrdma_deregister_internal(ia, ep->rep_pad_mr, &ep->rep_pad); + ep->rep_pad_mr = NULL; + } + + if (ia->ri_id->qp) { + rdma_destroy_qp(ia->ri_id); + ia->ri_id->qp = NULL; + } + + rpcrdma_clean_cq(ep->rep_cq); + rc = ib_destroy_cq(ep->rep_cq); + if (rc) + dprintk("RPC: %s: ib_destroy_cq returned %i\n", + __func__, rc); + + return rc; +} + +/* + * Connect unconnected endpoint. + */ +int +rpcrdma_ep_connect(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia) +{ + struct rdma_cm_id *id; + int rc = 0; + int retry_count = 0; + int reconnect = (ep->rep_connected != 0); + + if (reconnect) { + struct rpcrdma_xprt *xprt; +retry: + rc = rpcrdma_ep_disconnect(ep, ia); + if (rc && rc != -ENOTCONN) + dprintk("RPC: %s: rpcrdma_ep_disconnect" + " status %i\n", __func__, rc); + rpcrdma_clean_cq(ep->rep_cq); + + xprt = container_of(ia, struct rpcrdma_xprt, rx_ia); + id = rpcrdma_create_id(xprt, ia, + (struct sockaddr *)&xprt->rx_data.addr); + if (IS_ERR(id)) { + rc = PTR_ERR(id); + goto out; + } + /* TEMP TEMP TEMP - fail if new device: + * Deregister/remarshal *all* requests! + * Close and recreate adapter, pd, etc! + * Re-determine all attributes still sane! + * More stuff I haven't thought of! + * Rrrgh! + */ + if (ia->ri_id->device != id->device) { + printk("RPC: %s: can't reconnect on " + "different device!\n", __func__); + rdma_destroy_id(id); + rc = -ENETDOWN; + goto out; + } + /* END TEMP */ + rdma_destroy_id(ia->ri_id); + ia->ri_id = id; + } + + rc = rdma_create_qp(ia->ri_id, ia->ri_pd, &ep->rep_attr); + if (rc) { + dprintk("RPC: %s: rdma_create_qp failed %i\n", + __func__, rc); + goto out; + } + +/* XXX Tavor device performs badly with 2K MTU! */ +if (strnicmp(ia->ri_id->device->dma_device->bus->name, "pci", 3) == 0) { + struct pci_dev *pcid = to_pci_dev(ia->ri_id->device->dma_device); + if (pcid->device == PCI_DEVICE_ID_MELLANOX_TAVOR && + (pcid->vendor == PCI_VENDOR_ID_MELLANOX || + pcid->vendor == PCI_VENDOR_ID_TOPSPIN)) { + struct ib_qp_attr attr = { + .path_mtu = IB_MTU_1024 + }; + rc = ib_modify_qp(ia->ri_id->qp, &attr, IB_QP_PATH_MTU); + } +} + + /* Theoretically a client initiator_depth > 0 is not needed, + * but many peers fail to complete the connection unless they + * == responder_resources! */ + if (ep->rep_remote_cma.initiator_depth != + ep->rep_remote_cma.responder_resources) + ep->rep_remote_cma.initiator_depth = + ep->rep_remote_cma.responder_resources; + + ep->rep_connected = 0; + + rc = rdma_connect(ia->ri_id, &ep->rep_remote_cma); + if (rc) { + dprintk("RPC: %s: rdma_connect() failed with %i\n", + __func__, rc); + goto out; + } + + if (reconnect) + return 0; + + wait_event_interruptible(ep->rep_connect_wait, ep->rep_connected != 0); + + /* + * Check state. A non-peer reject indicates no listener + * (ECONNREFUSED), which may be a transient state. All + * others indicate a transport condition which has already + * undergone a best-effort. + */ + if (ep->rep_connected == -ECONNREFUSED + && ++retry_count <= RDMA_CONNECT_RETRY_MAX) { + dprintk("RPC: %s: non-peer_reject, retry\n", __func__); + goto retry; + } + if (ep->rep_connected <= 0) { + /* Sometimes, the only way to reliably connect to remote + * CMs is to use same nonzero values for ORD and IRD. */ + ep->rep_remote_cma.initiator_depth = + ep->rep_remote_cma.responder_resources; + if (ep->rep_remote_cma.initiator_depth == 0) + ++ep->rep_remote_cma.initiator_depth; + if (ep->rep_remote_cma.responder_resources == 0) + ++ep->rep_remote_cma.responder_resources; + if (retry_count++ == 0) + goto retry; + rc = ep->rep_connected; + } else { + dprintk("RPC: %s: connected\n", __func__); + } + +out: + if (rc) + ep->rep_connected = rc; + return rc; +} + +/* + * rpcrdma_ep_disconnect + * + * This is separate from destroy to facilitate the ability + * to reconnect without recreating the endpoint. + * + * This call is not reentrant, and must not be made in parallel + * on the same endpoint. + */ +int +rpcrdma_ep_disconnect(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia) +{ + int rc; + + rpcrdma_clean_cq(ep->rep_cq); + rc = rdma_disconnect(ia->ri_id); + if (!rc) { + /* returns without wait if not connected */ + wait_event_interruptible(ep->rep_connect_wait, + ep->rep_connected != 1); + dprintk("RPC: %s: after wait, %sconnected\n", __func__, + (ep->rep_connected == 1) ? "still " : "dis"); + } else { + dprintk("RPC: %s: rdma_disconnect %i\n", __func__, rc); + ep->rep_connected = rc; + } + return rc; +} + +/* + * Initialize buffer memory + */ +int +rpcrdma_buffer_create(struct rpcrdma_buffer *buf, struct rpcrdma_ep *ep, + struct rpcrdma_ia *ia, struct rpcrdma_create_data_internal *cdata) +{ + char *p; + size_t len; + int i, rc; + + buf->rb_max_requests = cdata->max_requests; + spin_lock_init(&buf->rb_lock); + atomic_set(&buf->rb_credits, 1); + + /* Need to allocate: + * 1. arrays for send and recv pointers + * 2. arrays of struct rpcrdma_req to fill in pointers + * 3. array of struct rpcrdma_rep for replies + * 4. padding, if any + * 5. mw's, if any + * Send/recv buffers in req/rep need to be registered + */ + + len = buf->rb_max_requests * + (sizeof(struct rpcrdma_req *) + sizeof(struct rpcrdma_rep *)); + len += cdata->padding; + switch (ia->ri_memreg_strategy) { + case RPCRDMA_MTHCAFMR: + /* TBD we are perhaps overallocating here */ + len += (buf->rb_max_requests + 1) * RPCRDMA_MAX_SEGS * + sizeof(struct rpcrdma_mw); + break; + case RPCRDMA_MEMWINDOWS_ASYNC: + case RPCRDMA_MEMWINDOWS: + len += (buf->rb_max_requests + 1) * RPCRDMA_MAX_SEGS * + sizeof(struct rpcrdma_mw); + break; + default: + break; + } + + /* allocate 1, 4 and 5 in one shot */ + p = kzalloc(len, GFP_KERNEL); + if (p == NULL) { + dprintk("RPC: %s: req_t/rep_t/pad kzalloc(%zd) failed\n", + __func__, len); + rc = -ENOMEM; + goto out; + } + buf->rb_pool = p; /* for freeing it later */ + + buf->rb_send_bufs = (struct rpcrdma_req **) p; + p = (char *) &buf->rb_send_bufs[buf->rb_max_requests]; + buf->rb_recv_bufs = (struct rpcrdma_rep **) p; + p = (char *) &buf->rb_recv_bufs[buf->rb_max_requests]; + + /* + * Register the zeroed pad buffer, if any. + */ + if (cdata->padding) { + rc = rpcrdma_register_internal(ia, p, cdata->padding, + &ep->rep_pad_mr, &ep->rep_pad); + if (rc) + goto out; + } + p += cdata->padding; + + /* + * Allocate the fmr's, or mw's for mw_bind chunk registration. + * We "cycle" the mw's in order to minimize rkey reuse, + * and also reduce unbind-to-bind collision. + */ + INIT_LIST_HEAD(&buf->rb_mws); + switch (ia->ri_memreg_strategy) { + case RPCRDMA_MTHCAFMR: + { + struct rpcrdma_mw *r = (struct rpcrdma_mw *)p; + struct ib_fmr_attr fa = { + RPCRDMA_MAX_DATA_SEGS, 1, PAGE_SHIFT + }; + /* TBD we are perhaps overallocating here */ + for (i = (buf->rb_max_requests+1) * RPCRDMA_MAX_SEGS; i; i--) { + r->r.fmr = ib_alloc_fmr(ia->ri_pd, + IB_ACCESS_REMOTE_WRITE | IB_ACCESS_REMOTE_READ, + &fa); + if (IS_ERR(r->r.fmr)) { + rc = PTR_ERR(r->r.fmr); + dprintk("RPC: %s: ib_alloc_fmr" + " failed %i\n", __func__, rc); + goto out; + } + list_add(&r->mw_list, &buf->rb_mws); + ++r; + } + } + break; + case RPCRDMA_MEMWINDOWS_ASYNC: + case RPCRDMA_MEMWINDOWS: + { + struct rpcrdma_mw *r = (struct rpcrdma_mw *)p; + /* Allocate one extra request's worth, for full cycling */ + for (i = (buf->rb_max_requests+1) * RPCRDMA_MAX_SEGS; i; i--) { + r->r.mw = ib_alloc_mw(ia->ri_pd); + if (IS_ERR(r->r.mw)) { + rc = PTR_ERR(r->r.mw); + dprintk("RPC: %s: ib_alloc_mw" + " failed %i\n", __func__, rc); + goto out; + } + list_add(&r->mw_list, &buf->rb_mws); + ++r; + } + } + break; + default: + break; + } + + /* + * Allocate/init the request/reply buffers. Doing this + * using kmalloc for now -- one for each buf. + */ + for (i = 0; i < buf->rb_max_requests; i++) { + struct rpcrdma_req *req; + struct rpcrdma_rep *rep; + + len = cdata->inline_wsize + sizeof(struct rpcrdma_req); + /* RPC layer requests *double* size + 1K RPC_SLACK_SPACE! */ + /* Typical ~2400b, so rounding up saves work later */ + if (len < 4096) + len = 4096; + req = kmalloc(len, GFP_KERNEL); + if (req == NULL) { + dprintk("RPC: %s: request buffer %d alloc" + " failed\n", __func__, i); + rc = -ENOMEM; + goto out; + } + memset(req, 0, sizeof(struct rpcrdma_req)); + buf->rb_send_bufs[i] = req; + buf->rb_send_bufs[i]->rl_buffer = buf; + + rc = rpcrdma_register_internal(ia, req->rl_base, + len - offsetof(struct rpcrdma_req, rl_base), + &buf->rb_send_bufs[i]->rl_handle, + &buf->rb_send_bufs[i]->rl_iov); + if (rc) + goto out; + + buf->rb_send_bufs[i]->rl_size = len-sizeof(struct rpcrdma_req); + + len = cdata->inline_rsize + sizeof(struct rpcrdma_rep); + rep = kmalloc(len, GFP_KERNEL); + if (rep == NULL) { + dprintk("RPC: %s: reply buffer %d alloc failed\n", + __func__, i); + rc = -ENOMEM; + goto out; + } + memset(rep, 0, sizeof(struct rpcrdma_rep)); + buf->rb_recv_bufs[i] = rep; + buf->rb_recv_bufs[i]->rr_buffer = buf; + init_waitqueue_head(&rep->rr_unbind); + + rc = rpcrdma_register_internal(ia, rep->rr_base, + len - offsetof(struct rpcrdma_rep, rr_base), + &buf->rb_recv_bufs[i]->rr_handle, + &buf->rb_recv_bufs[i]->rr_iov); + if (rc) + goto out; + + } + dprintk("RPC: %s: max_requests %d\n", + __func__, buf->rb_max_requests); + /* done */ + return 0; +out: + rpcrdma_buffer_destroy(buf); + return rc; +} + +/* + * Unregister and destroy buffer memory. Need to deal with + * partial initialization, so it's callable from failed create. + * Must be called before destroying endpoint, as registrations + * reference it. + */ +void +rpcrdma_buffer_destroy(struct rpcrdma_buffer *buf) +{ + int rc, i; + struct rpcrdma_ia *ia = rdmab_to_ia(buf); + + /* clean up in reverse order from create + * 1. recv mr memory (mr free, then kfree) + * 1a. bind mw memory + * 2. send mr memory (mr free, then kfree) + * 3. padding (if any) [moved to rpcrdma_ep_destroy] + * 4. arrays + */ + dprintk("RPC: %s: entering\n", __func__); + + for (i = 0; i < buf->rb_max_requests; i++) { + if (buf->rb_recv_bufs && buf->rb_recv_bufs[i]) { + rpcrdma_deregister_internal(ia, + buf->rb_recv_bufs[i]->rr_handle, + &buf->rb_recv_bufs[i]->rr_iov); + kfree(buf->rb_recv_bufs[i]); + } + if (buf->rb_send_bufs && buf->rb_send_bufs[i]) { + while (!list_empty(&buf->rb_mws)) { + struct rpcrdma_mw *r; + r = list_entry(buf->rb_mws.next, + struct rpcrdma_mw, mw_list); + list_del(&r->mw_list); + switch (ia->ri_memreg_strategy) { + case RPCRDMA_MTHCAFMR: + rc = ib_dealloc_fmr(r->r.fmr); + if (rc) + dprintk("RPC: %s:" + " ib_dealloc_fmr" + " failed %i\n", + __func__, rc); + break; + case RPCRDMA_MEMWINDOWS_ASYNC: + case RPCRDMA_MEMWINDOWS: + rc = ib_dealloc_mw(r->r.mw); + if (rc) + dprintk("RPC: %s:" + " ib_dealloc_mw" + " failed %i\n", + __func__, rc); + break; + default: + break; + } + } + rpcrdma_deregister_internal(ia, + buf->rb_send_bufs[i]->rl_handle, + &buf->rb_send_bufs[i]->rl_iov); + kfree(buf->rb_send_bufs[i]); + } + } + + kfree(buf->rb_pool); +} + +/* + * Get a set of request/reply buffers. + * + * Reply buffer (if needed) is attached to send buffer upon return. + * Rule: + * rb_send_index and rb_recv_index MUST always be pointing to the + * *next* available buffer (non-NULL). They are incremented after + * removing buffers, and decremented *before* returning them. + */ +struct rpcrdma_req * +rpcrdma_buffer_get(struct rpcrdma_buffer *buffers) +{ + struct rpcrdma_req *req; + unsigned long flags; + + spin_lock_irqsave(&buffers->rb_lock, flags); + if (buffers->rb_send_index == buffers->rb_max_requests) { + spin_unlock_irqrestore(&buffers->rb_lock, flags); + dprintk("RPC: %s: out of request buffers\n", __func__); + return ((struct rpcrdma_req *)NULL); + } + + req = buffers->rb_send_bufs[buffers->rb_send_index]; + if (buffers->rb_send_index < buffers->rb_recv_index) { + dprintk("RPC: %s: %d extra receives outstanding (ok)\n", + __func__, + buffers->rb_recv_index - buffers->rb_send_index); + req->rl_reply = NULL; + } else { + req->rl_reply = buffers->rb_recv_bufs[buffers->rb_recv_index]; + buffers->rb_recv_bufs[buffers->rb_recv_index++] = NULL; + } + buffers->rb_send_bufs[buffers->rb_send_index++] = NULL; + if (!list_empty(&buffers->rb_mws)) { + int i = RPCRDMA_MAX_SEGS - 1; + do { + struct rpcrdma_mw *r; + r = list_entry(buffers->rb_mws.next, + struct rpcrdma_mw, mw_list); + list_del(&r->mw_list); + req->rl_segments[i].mr_chunk.rl_mw = r; + } while (--i >= 0); + } + spin_unlock_irqrestore(&buffers->rb_lock, flags); + return req; +} + +/* + * Put request/reply buffers back into pool. + * Pre-decrement counter/array index. + */ +void +rpcrdma_buffer_put(struct rpcrdma_req *req) +{ + struct rpcrdma_buffer *buffers = req->rl_buffer; + struct rpcrdma_ia *ia = rdmab_to_ia(buffers); + int i; + unsigned long flags; + + BUG_ON(req->rl_nchunks != 0); + spin_lock_irqsave(&buffers->rb_lock, flags); + buffers->rb_send_bufs[--buffers->rb_send_index] = req; + req->rl_niovs = 0; + if (req->rl_reply) { + buffers->rb_recv_bufs[--buffers->rb_recv_index] = req->rl_reply; + init_waitqueue_head(&req->rl_reply->rr_unbind); + req->rl_reply->rr_func = NULL; + req->rl_reply = NULL; + } + switch (ia->ri_memreg_strategy) { + case RPCRDMA_MTHCAFMR: + case RPCRDMA_MEMWINDOWS_ASYNC: + case RPCRDMA_MEMWINDOWS: + /* + * Cycle mw's back in reverse order, and "spin" them. + * This delays and scrambles reuse as much as possible. + */ + i = 1; + do { + struct rpcrdma_mw **mw; + mw = &req->rl_segments[i].mr_chunk.rl_mw; + list_add_tail(&(*mw)->mw_list, &buffers->rb_mws); + *mw = NULL; + } while (++i < RPCRDMA_MAX_SEGS); + list_add_tail(&req->rl_segments[0].mr_chunk.rl_mw->mw_list, + &buffers->rb_mws); + req->rl_segments[0].mr_chunk.rl_mw = NULL; + break; + default: + break; + } + spin_unlock_irqrestore(&buffers->rb_lock, flags); +} + +/* + * Recover reply buffers from pool. + * This happens when recovering from error conditions. + * Post-increment counter/array index. + */ +void +rpcrdma_recv_buffer_get(struct rpcrdma_req *req) +{ + struct rpcrdma_buffer *buffers = req->rl_buffer; + unsigned long flags; + + if (req->rl_iov.length == 0) /* special case xprt_rdma_allocate() */ + buffers = ((struct rpcrdma_req *) buffers)->rl_buffer; + spin_lock_irqsave(&buffers->rb_lock, flags); + if (buffers->rb_recv_index < buffers->rb_max_requests) { + req->rl_reply = buffers->rb_recv_bufs[buffers->rb_recv_index]; + buffers->rb_recv_bufs[buffers->rb_recv_index++] = NULL; + } + spin_unlock_irqrestore(&buffers->rb_lock, flags); +} + +/* + * Put reply buffers back into pool when not attached to + * request. This happens in error conditions, and when + * aborting unbinds. Pre-decrement counter/array index. + */ +void +rpcrdma_recv_buffer_put(struct rpcrdma_rep *rep) +{ + struct rpcrdma_buffer *buffers = rep->rr_buffer; + unsigned long flags; + + rep->rr_func = NULL; + spin_lock_irqsave(&buffers->rb_lock, flags); + buffers->rb_recv_bufs[--buffers->rb_recv_index] = rep; + spin_unlock_irqrestore(&buffers->rb_lock, flags); +} + +/* + * Wrappers for internal-use kmalloc memory registration, used by buffer code. + */ + +int +rpcrdma_register_internal(struct rpcrdma_ia *ia, void *va, int len, + struct ib_mr **mrp, struct ib_sge *iov) +{ + struct ib_phys_buf ipb; + struct ib_mr *mr; + int rc; + + /* + * All memory passed here was kmalloc'ed, therefore phys-contiguous. + */ + iov->addr = ib_dma_map_single(ia->ri_id->device, + va, len, DMA_BIDIRECTIONAL); + iov->length = len; + + if (ia->ri_bind_mem != NULL) { + *mrp = NULL; + iov->lkey = ia->ri_bind_mem->lkey; + return 0; + } + + ipb.addr = iov->addr; + ipb.size = iov->length; + mr = ib_reg_phys_mr(ia->ri_pd, &ipb, 1, + IB_ACCESS_LOCAL_WRITE, &iov->addr); + + dprintk("RPC: %s: phys convert: 0x%llx " + "registered 0x%llx length %d\n", + __func__, ipb.addr, iov->addr, len); + + if (IS_ERR(mr)) { + *mrp = NULL; + rc = PTR_ERR(mr); + dprintk("RPC: %s: failed with %i\n", __func__, rc); + } else { + *mrp = mr; + iov->lkey = mr->lkey; + rc = 0; + } + + return rc; +} + +int +rpcrdma_deregister_internal(struct rpcrdma_ia *ia, + struct ib_mr *mr, struct ib_sge *iov) +{ + int rc; + + ib_dma_unmap_single(ia->ri_id->device, + iov->addr, iov->length, DMA_BIDIRECTIONAL); + + if (NULL == mr) + return 0; + + rc = ib_dereg_mr(mr); + if (rc) + dprintk("RPC: %s: ib_dereg_mr failed %i\n", __func__, rc); + return rc; +} + +/* + * Wrappers for chunk registration, shared by read/write chunk code. + */ + +static void +rpcrdma_map_one(struct rpcrdma_ia *ia, struct rpcrdma_mr_seg *seg, int writing) +{ + seg->mr_dir = writing ? DMA_FROM_DEVICE : DMA_TO_DEVICE; + seg->mr_dmalen = seg->mr_len; + if (seg->mr_page) + seg->mr_dma = ib_dma_map_page(ia->ri_id->device, + seg->mr_page, offset_in_page(seg->mr_offset), + seg->mr_dmalen, seg->mr_dir); + else + seg->mr_dma = ib_dma_map_single(ia->ri_id->device, + seg->mr_offset, + seg->mr_dmalen, seg->mr_dir); +} + +static void +rpcrdma_unmap_one(struct rpcrdma_ia *ia, struct rpcrdma_mr_seg *seg) +{ + if (seg->mr_page) + ib_dma_unmap_page(ia->ri_id->device, + seg->mr_dma, seg->mr_dmalen, seg->mr_dir); + else + ib_dma_unmap_single(ia->ri_id->device, + seg->mr_dma, seg->mr_dmalen, seg->mr_dir); +} + +int +rpcrdma_register_external(struct rpcrdma_mr_seg *seg, + int nsegs, int writing, struct rpcrdma_xprt *r_xprt) +{ + struct rpcrdma_ia *ia = &r_xprt->rx_ia; + int mem_priv = (writing ? IB_ACCESS_REMOTE_WRITE : + IB_ACCESS_REMOTE_READ); + struct rpcrdma_mr_seg *seg1 = seg; + int i; + int rc = 0; + + switch (ia->ri_memreg_strategy) { + +#if RPCRDMA_PERSISTENT_REGISTRATION + case RPCRDMA_ALLPHYSICAL: + rpcrdma_map_one(ia, seg, writing); + seg->mr_rkey = ia->ri_bind_mem->rkey; + seg->mr_base = seg->mr_dma; + seg->mr_nsegs = 1; + nsegs = 1; + break; +#endif + + /* Registration using fast memory registration */ + case RPCRDMA_MTHCAFMR: + { + u64 physaddrs[RPCRDMA_MAX_DATA_SEGS]; + int len, pageoff = offset_in_page(seg->mr_offset); + seg1->mr_offset -= pageoff; /* start of page */ + seg1->mr_len += pageoff; + len = -pageoff; + if (nsegs > RPCRDMA_MAX_DATA_SEGS) + nsegs = RPCRDMA_MAX_DATA_SEGS; + for (i = 0; i < nsegs;) { + rpcrdma_map_one(ia, seg, writing); + physaddrs[i] = seg->mr_dma; + len += seg->mr_len; + ++seg; + ++i; + /* Check for holes */ + if ((i < nsegs && offset_in_page(seg->mr_offset)) || + offset_in_page((seg-1)->mr_offset+(seg-1)->mr_len)) + break; + } + nsegs = i; + rc = ib_map_phys_fmr(seg1->mr_chunk.rl_mw->r.fmr, + physaddrs, nsegs, seg1->mr_dma); + if (rc) { + dprintk("RPC: %s: failed ib_map_phys_fmr " + "%u@0x%llx+%i (%d)... status %i\n", __func__, + len, (unsigned long long)seg1->mr_dma, + pageoff, nsegs, rc); + while (nsegs--) + rpcrdma_unmap_one(ia, --seg); + } else { + seg1->mr_rkey = seg1->mr_chunk.rl_mw->r.fmr->rkey; + seg1->mr_base = seg1->mr_dma + pageoff; + seg1->mr_nsegs = nsegs; + seg1->mr_len = len; + } + } + break; + + /* Registration using memory windows */ + case RPCRDMA_MEMWINDOWS_ASYNC: + case RPCRDMA_MEMWINDOWS: + { + struct ib_mw_bind param; + rpcrdma_map_one(ia, seg, writing); + param.mr = ia->ri_bind_mem; + param.wr_id = 0ULL; /* no send cookie */ + param.addr = seg->mr_dma; + param.length = seg->mr_len; + param.send_flags = 0; + param.mw_access_flags = mem_priv; + + DECR_CQCOUNT(&r_xprt->rx_ep); + rc = ib_bind_mw(ia->ri_id->qp, + seg->mr_chunk.rl_mw->r.mw, ¶m); + if (rc) { + dprintk("RPC: %s: failed ib_bind_mw " + "%u@0x%llx status %i\n", + __func__, seg->mr_len, + (unsigned long long)seg->mr_dma, rc); + rpcrdma_unmap_one(ia, seg); + } else { + seg->mr_rkey = seg->mr_chunk.rl_mw->r.mw->rkey; + seg->mr_base = param.addr; + seg->mr_nsegs = 1; + nsegs = 1; + } + } + break; + + /* Default registration each time */ + default: + { + struct ib_phys_buf ipb[RPCRDMA_MAX_DATA_SEGS]; + int len = 0; + if (nsegs > RPCRDMA_MAX_DATA_SEGS) + nsegs = RPCRDMA_MAX_DATA_SEGS; + for (i = 0; i < nsegs;) { + rpcrdma_map_one(ia, seg, writing); + ipb[i].addr = seg->mr_dma; + ipb[i].size = seg->mr_len; + len += seg->mr_len; + ++seg; + ++i; + /* Check for holes */ + if ((i < nsegs && offset_in_page(seg->mr_offset)) || + offset_in_page((seg-1)->mr_offset+(seg-1)->mr_len)) + break; + } + nsegs = i; + seg1->mr_base = seg1->mr_dma; + seg1->mr_chunk.rl_mr = ib_reg_phys_mr(ia->ri_pd, + ipb, nsegs, mem_priv, &seg1->mr_base); + if (IS_ERR(seg1->mr_chunk.rl_mr)) { + rc = PTR_ERR(seg1->mr_chunk.rl_mr); + dprintk("RPC: %s: failed ib_reg_phys_mr " + "%u@0x%llx (%d)... status %i\n", + __func__, len, + (unsigned long long)seg1->mr_dma, nsegs, rc); + while (nsegs--) + rpcrdma_unmap_one(ia, --seg); + } else { + seg1->mr_rkey = seg1->mr_chunk.rl_mr->rkey; + seg1->mr_nsegs = nsegs; + seg1->mr_len = len; + } + } + break; + } + if (rc) + return -1; + + return nsegs; +} + +int +rpcrdma_deregister_external(struct rpcrdma_mr_seg *seg, + struct rpcrdma_xprt *r_xprt, void *r) +{ + struct rpcrdma_ia *ia = &r_xprt->rx_ia; + struct rpcrdma_mr_seg *seg1 = seg; + int nsegs = seg->mr_nsegs, rc; + + switch (ia->ri_memreg_strategy) { + +#if RPCRDMA_PERSISTENT_REGISTRATION + case RPCRDMA_ALLPHYSICAL: + BUG_ON(nsegs != 1); + rpcrdma_unmap_one(ia, seg); + rc = 0; + break; +#endif + + case RPCRDMA_MTHCAFMR: + { + LIST_HEAD(l); + list_add(&seg->mr_chunk.rl_mw->r.fmr->list, &l); + rc = ib_unmap_fmr(&l); + while (seg1->mr_nsegs--) + rpcrdma_unmap_one(ia, seg++); + } + if (rc) + dprintk("RPC: %s: failed ib_unmap_fmr," + " status %i\n", __func__, rc); + break; + + case RPCRDMA_MEMWINDOWS_ASYNC: + case RPCRDMA_MEMWINDOWS: + { + struct ib_mw_bind param; + BUG_ON(nsegs != 1); + param.mr = ia->ri_bind_mem; + param.addr = 0ULL; /* unbind */ + param.length = 0; + param.mw_access_flags = 0; + if (r) { + param.wr_id = (u64) (unsigned long) r; + param.send_flags = IB_SEND_SIGNALED; + INIT_CQCOUNT(&r_xprt->rx_ep); + } else { + param.wr_id = 0ULL; + param.send_flags = 0; + DECR_CQCOUNT(&r_xprt->rx_ep); + } + rc = ib_bind_mw(ia->ri_id->qp, + seg->mr_chunk.rl_mw->r.mw, ¶m); + rpcrdma_unmap_one(ia, seg); + } + if (rc) + dprintk("RPC: %s: failed ib_(un)bind_mw," + " status %i\n", __func__, rc); + else + r = NULL; /* will upcall on completion */ + break; + + default: + rc = ib_dereg_mr(seg1->mr_chunk.rl_mr); + seg1->mr_chunk.rl_mr = NULL; + while (seg1->mr_nsegs--) + rpcrdma_unmap_one(ia, seg++); + if (rc) + dprintk("RPC: %s: failed ib_dereg_mr," + " status %i\n", __func__, rc); + break; + } + if (r) { + struct rpcrdma_rep *rep = r; + void (*func)(struct rpcrdma_rep *) = rep->rr_func; + rep->rr_func = NULL; + func(rep); /* dereg done, callback now */ + } + return nsegs; +} + +/* + * Prepost any receive buffer, then post send. + * + * Receive buffer is donated to hardware, reclaimed upon recv completion. + */ +int +rpcrdma_ep_post(struct rpcrdma_ia *ia, + struct rpcrdma_ep *ep, + struct rpcrdma_req *req) +{ + struct ib_send_wr send_wr, *send_wr_fail; + struct rpcrdma_rep *rep = req->rl_reply; + int rc; + + if (rep) { + rc = rpcrdma_ep_post_recv(ia, ep, rep); + if (rc) + goto out; + req->rl_reply = NULL; + } + + send_wr.next = NULL; + send_wr.wr_id = 0ULL; /* no send cookie */ + send_wr.sg_list = req->rl_send_iov; + send_wr.num_sge = req->rl_niovs; + send_wr.opcode = IB_WR_SEND; + send_wr.imm_data = 0; + if (send_wr.num_sge == 4) /* no need to sync any pad (constant) */ + ib_dma_sync_single_for_device(ia->ri_id->device, + req->rl_send_iov[3].addr, req->rl_send_iov[3].length, + DMA_TO_DEVICE); + ib_dma_sync_single_for_device(ia->ri_id->device, + req->rl_send_iov[1].addr, req->rl_send_iov[1].length, + DMA_TO_DEVICE); + ib_dma_sync_single_for_device(ia->ri_id->device, + req->rl_send_iov[0].addr, req->rl_send_iov[0].length, + DMA_TO_DEVICE); + + if (DECR_CQCOUNT(ep) > 0) + send_wr.send_flags = 0; + else { /* Provider must take a send completion every now and then */ + INIT_CQCOUNT(ep); + send_wr.send_flags = IB_SEND_SIGNALED; + } + + rc = ib_post_send(ia->ri_id->qp, &send_wr, &send_wr_fail); + if (rc) + dprintk("RPC: %s: ib_post_send returned %i\n", __func__, + rc); +out: + return rc; +} + +/* + * (Re)post a receive buffer. + */ +int +rpcrdma_ep_post_recv(struct rpcrdma_ia *ia, + struct rpcrdma_ep *ep, + struct rpcrdma_rep *rep) +{ + struct ib_recv_wr recv_wr, *recv_wr_fail; + int rc; + + recv_wr.next = NULL; + recv_wr.wr_id = (u64) (unsigned long) rep; + recv_wr.sg_list = &rep->rr_iov; + recv_wr.num_sge = 1; + + ib_dma_sync_single_for_cpu(ia->ri_id->device, + rep->rr_iov.addr, rep->rr_iov.length, DMA_BIDIRECTIONAL); + + DECR_CQCOUNT(ep); + rc = ib_post_recv(ia->ri_id->qp, &recv_wr, &recv_wr_fail); + + if (rc) + dprintk("RPC: %s: ib_post_recv returned %i\n", __func__, + rc); + return rc; +} diff --git a/net/sunrpc/xprtrdma/xprt_rdma.h b/net/sunrpc/xprtrdma/xprt_rdma.h new file mode 100644 index 000000000000..2427822f8bd4 --- /dev/null +++ b/net/sunrpc/xprtrdma/xprt_rdma.h @@ -0,0 +1,330 @@ +/* + * Copyright (c) 2003-2007 Network Appliance, Inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the BSD-type + * license below: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials provided + * with the distribution. + * + * Neither the name of the Network Appliance, Inc. nor the names of + * its contributors may be used to endorse or promote products + * derived from this software without specific prior written + * permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _LINUX_SUNRPC_XPRT_RDMA_H +#define _LINUX_SUNRPC_XPRT_RDMA_H + +#include <linux/wait.h> /* wait_queue_head_t, etc */ +#include <linux/spinlock.h> /* spinlock_t, etc */ +#include <asm/atomic.h> /* atomic_t, etc */ + +#include <rdma/rdma_cm.h> /* RDMA connection api */ +#include <rdma/ib_verbs.h> /* RDMA verbs api */ + +#include <linux/sunrpc/clnt.h> /* rpc_xprt */ +#include <linux/sunrpc/rpc_rdma.h> /* RPC/RDMA protocol */ +#include <linux/sunrpc/xprtrdma.h> /* xprt parameters */ + +/* + * Interface Adapter -- one per transport instance + */ +struct rpcrdma_ia { + struct rdma_cm_id *ri_id; + struct ib_pd *ri_pd; + struct ib_mr *ri_bind_mem; + struct completion ri_done; + int ri_async_rc; + enum rpcrdma_memreg ri_memreg_strategy; +}; + +/* + * RDMA Endpoint -- one per transport instance + */ + +struct rpcrdma_ep { + atomic_t rep_cqcount; + int rep_cqinit; + int rep_connected; + struct rpcrdma_ia *rep_ia; + struct ib_cq *rep_cq; + struct ib_qp_init_attr rep_attr; + wait_queue_head_t rep_connect_wait; + struct ib_sge rep_pad; /* holds zeroed pad */ + struct ib_mr *rep_pad_mr; /* holds zeroed pad */ + void (*rep_func)(struct rpcrdma_ep *); + struct rpc_xprt *rep_xprt; /* for rep_func */ + struct rdma_conn_param rep_remote_cma; + struct sockaddr_storage rep_remote_addr; +}; + +#define INIT_CQCOUNT(ep) atomic_set(&(ep)->rep_cqcount, (ep)->rep_cqinit) +#define DECR_CQCOUNT(ep) atomic_sub_return(1, &(ep)->rep_cqcount) + +/* + * struct rpcrdma_rep -- this structure encapsulates state required to recv + * and complete a reply, asychronously. It needs several pieces of + * state: + * o recv buffer (posted to provider) + * o ib_sge (also donated to provider) + * o status of reply (length, success or not) + * o bookkeeping state to get run by tasklet (list, etc) + * + * These are allocated during initialization, per-transport instance; + * however, the tasklet execution list itself is global, as it should + * always be pretty short. + * + * N of these are associated with a transport instance, and stored in + * struct rpcrdma_buffer. N is the max number of outstanding requests. + */ + +/* temporary static scatter/gather max */ +#define RPCRDMA_MAX_DATA_SEGS (8) /* max scatter/gather */ +#define RPCRDMA_MAX_SEGS (RPCRDMA_MAX_DATA_SEGS + 2) /* head+tail = 2 */ +#define MAX_RPCRDMAHDR (\ + /* max supported RPC/RDMA header */ \ + sizeof(struct rpcrdma_msg) + (2 * sizeof(u32)) + \ + (sizeof(struct rpcrdma_read_chunk) * RPCRDMA_MAX_SEGS) + sizeof(u32)) + +struct rpcrdma_buffer; + +struct rpcrdma_rep { + unsigned int rr_len; /* actual received reply length */ + struct rpcrdma_buffer *rr_buffer; /* home base for this structure */ + struct rpc_xprt *rr_xprt; /* needed for request/reply matching */ + void (*rr_func)(struct rpcrdma_rep *);/* called by tasklet in softint */ + struct list_head rr_list; /* tasklet list */ + wait_queue_head_t rr_unbind; /* optional unbind wait */ + struct ib_sge rr_iov; /* for posting */ + struct ib_mr *rr_handle; /* handle for mem in rr_iov */ + char rr_base[MAX_RPCRDMAHDR]; /* minimal inline receive buffer */ +}; + +/* + * struct rpcrdma_req -- structure central to the request/reply sequence. + * + * N of these are associated with a transport instance, and stored in + * struct rpcrdma_buffer. N is the max number of outstanding requests. + * + * It includes pre-registered buffer memory for send AND recv. + * The recv buffer, however, is not owned by this structure, and + * is "donated" to the hardware when a recv is posted. When a + * reply is handled, the recv buffer used is given back to the + * struct rpcrdma_req associated with the request. + * + * In addition to the basic memory, this structure includes an array + * of iovs for send operations. The reason is that the iovs passed to + * ib_post_{send,recv} must not be modified until the work request + * completes. + * + * NOTES: + * o RPCRDMA_MAX_SEGS is the max number of addressible chunk elements we + * marshal. The number needed varies depending on the iov lists that + * are passed to us, the memory registration mode we are in, and if + * physical addressing is used, the layout. + */ + +struct rpcrdma_mr_seg { /* chunk descriptors */ + union { /* chunk memory handles */ + struct ib_mr *rl_mr; /* if registered directly */ + struct rpcrdma_mw { /* if registered from region */ + union { + struct ib_mw *mw; + struct ib_fmr *fmr; + } r; + struct list_head mw_list; + } *rl_mw; + } mr_chunk; + u64 mr_base; /* registration result */ + u32 mr_rkey; /* registration result */ + u32 mr_len; /* length of chunk or segment */ + int mr_nsegs; /* number of segments in chunk or 0 */ + enum dma_data_direction mr_dir; /* segment mapping direction */ + dma_addr_t mr_dma; /* segment mapping address */ + size_t mr_dmalen; /* segment mapping length */ + struct page *mr_page; /* owning page, if any */ + char *mr_offset; /* kva if no page, else offset */ +}; + +struct rpcrdma_req { + size_t rl_size; /* actual length of buffer */ + unsigned int rl_niovs; /* 0, 2 or 4 */ + unsigned int rl_nchunks; /* non-zero if chunks */ + struct rpcrdma_buffer *rl_buffer; /* home base for this structure */ + struct rpcrdma_rep *rl_reply;/* holder for reply buffer */ + struct rpcrdma_mr_seg rl_segments[RPCRDMA_MAX_SEGS];/* chunk segments */ + struct ib_sge rl_send_iov[4]; /* for active requests */ + struct ib_sge rl_iov; /* for posting */ + struct ib_mr *rl_handle; /* handle for mem in rl_iov */ + char rl_base[MAX_RPCRDMAHDR]; /* start of actual buffer */ + __u32 rl_xdr_buf[0]; /* start of returned rpc rq_buffer */ +}; +#define rpcr_to_rdmar(r) \ + container_of((r)->rq_buffer, struct rpcrdma_req, rl_xdr_buf[0]) + +/* + * struct rpcrdma_buffer -- holds list/queue of pre-registered memory for + * inline requests/replies, and client/server credits. + * + * One of these is associated with a transport instance + */ +struct rpcrdma_buffer { + spinlock_t rb_lock; /* protects indexes */ + atomic_t rb_credits; /* most recent server credits */ + unsigned long rb_cwndscale; /* cached framework rpc_cwndscale */ + int rb_max_requests;/* client max requests */ + struct list_head rb_mws; /* optional memory windows/fmrs */ + int rb_send_index; + struct rpcrdma_req **rb_send_bufs; + int rb_recv_index; + struct rpcrdma_rep **rb_recv_bufs; + char *rb_pool; +}; +#define rdmab_to_ia(b) (&container_of((b), struct rpcrdma_xprt, rx_buf)->rx_ia) + +/* + * Internal structure for transport instance creation. This + * exists primarily for modularity. + * + * This data should be set with mount options + */ +struct rpcrdma_create_data_internal { + struct sockaddr_storage addr; /* RDMA server address */ + unsigned int max_requests; /* max requests (slots) in flight */ + unsigned int rsize; /* mount rsize - max read hdr+data */ + unsigned int wsize; /* mount wsize - max write hdr+data */ + unsigned int inline_rsize; /* max non-rdma read data payload */ + unsigned int inline_wsize; /* max non-rdma write data payload */ + unsigned int padding; /* non-rdma write header padding */ +}; + +#define RPCRDMA_INLINE_READ_THRESHOLD(rq) \ + (rpcx_to_rdmad(rq->rq_task->tk_xprt).inline_rsize) + +#define RPCRDMA_INLINE_WRITE_THRESHOLD(rq)\ + (rpcx_to_rdmad(rq->rq_task->tk_xprt).inline_wsize) + +#define RPCRDMA_INLINE_PAD_VALUE(rq)\ + rpcx_to_rdmad(rq->rq_task->tk_xprt).padding + +/* + * Statistics for RPCRDMA + */ +struct rpcrdma_stats { + unsigned long read_chunk_count; + unsigned long write_chunk_count; + unsigned long reply_chunk_count; + + unsigned long long total_rdma_request; + unsigned long long total_rdma_reply; + + unsigned long long pullup_copy_count; + unsigned long long fixup_copy_count; + unsigned long hardway_register_count; + unsigned long failed_marshal_count; + unsigned long bad_reply_count; +}; + +/* + * RPCRDMA transport -- encapsulates the structures above for + * integration with RPC. + * + * The contained structures are embedded, not pointers, + * for convenience. This structure need not be visible externally. + * + * It is allocated and initialized during mount, and released + * during unmount. + */ +struct rpcrdma_xprt { + struct rpc_xprt xprt; + struct rpcrdma_ia rx_ia; + struct rpcrdma_ep rx_ep; + struct rpcrdma_buffer rx_buf; + struct rpcrdma_create_data_internal rx_data; + struct delayed_work rdma_connect; + struct rpcrdma_stats rx_stats; +}; + +#define rpcx_to_rdmax(x) container_of(x, struct rpcrdma_xprt, xprt) +#define rpcx_to_rdmad(x) (rpcx_to_rdmax(x)->rx_data) + +/* + * Interface Adapter calls - xprtrdma/verbs.c + */ +int rpcrdma_ia_open(struct rpcrdma_xprt *, struct sockaddr *, int); +void rpcrdma_ia_close(struct rpcrdma_ia *); + +/* + * Endpoint calls - xprtrdma/verbs.c + */ +int rpcrdma_ep_create(struct rpcrdma_ep *, struct rpcrdma_ia *, + struct rpcrdma_create_data_internal *); +int rpcrdma_ep_destroy(struct rpcrdma_ep *, struct rpcrdma_ia *); +int rpcrdma_ep_connect(struct rpcrdma_ep *, struct rpcrdma_ia *); +int rpcrdma_ep_disconnect(struct rpcrdma_ep *, struct rpcrdma_ia *); + +int rpcrdma_ep_post(struct rpcrdma_ia *, struct rpcrdma_ep *, + struct rpcrdma_req *); +int rpcrdma_ep_post_recv(struct rpcrdma_ia *, struct rpcrdma_ep *, + struct rpcrdma_rep *); + +/* + * Buffer calls - xprtrdma/verbs.c + */ +int rpcrdma_buffer_create(struct rpcrdma_buffer *, struct rpcrdma_ep *, + struct rpcrdma_ia *, + struct rpcrdma_create_data_internal *); +void rpcrdma_buffer_destroy(struct rpcrdma_buffer *); + +struct rpcrdma_req *rpcrdma_buffer_get(struct rpcrdma_buffer *); +void rpcrdma_buffer_put(struct rpcrdma_req *); +void rpcrdma_recv_buffer_get(struct rpcrdma_req *); +void rpcrdma_recv_buffer_put(struct rpcrdma_rep *); + +int rpcrdma_register_internal(struct rpcrdma_ia *, void *, int, + struct ib_mr **, struct ib_sge *); +int rpcrdma_deregister_internal(struct rpcrdma_ia *, + struct ib_mr *, struct ib_sge *); + +int rpcrdma_register_external(struct rpcrdma_mr_seg *, + int, int, struct rpcrdma_xprt *); +int rpcrdma_deregister_external(struct rpcrdma_mr_seg *, + struct rpcrdma_xprt *, void *); + +/* + * RPC/RDMA connection management calls - xprtrdma/rpc_rdma.c + */ +void rpcrdma_conn_func(struct rpcrdma_ep *); +void rpcrdma_reply_handler(struct rpcrdma_rep *); + +/* + * RPC/RDMA protocol calls - xprtrdma/rpc_rdma.c + */ +int rpcrdma_marshal_req(struct rpc_rqst *); + +#endif /* _LINUX_SUNRPC_XPRT_RDMA_H */ diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c index 282efd447a61..02298f529dad 100644 --- a/net/sunrpc/xprtsock.c +++ b/net/sunrpc/xprtsock.c @@ -13,10 +13,14 @@ * (C) 1999 Trond Myklebust <trond.myklebust@fys.uio.no> * * IP socket transport implementation, (C) 2005 Chuck Lever <cel@netapp.com> + * + * IPv6 support contributed by Gilles Quillard, Bull Open Source, 2005. + * <gilles.quillard@bull.net> */ #include <linux/types.h> #include <linux/slab.h> +#include <linux/module.h> #include <linux/capability.h> #include <linux/pagemap.h> #include <linux/errno.h> @@ -28,6 +32,7 @@ #include <linux/tcp.h> #include <linux/sunrpc/clnt.h> #include <linux/sunrpc/sched.h> +#include <linux/sunrpc/xprtsock.h> #include <linux/file.h> #include <net/sock.h> @@ -260,14 +265,29 @@ struct sock_xprt { #define TCP_RCV_COPY_XID (1UL << 2) #define TCP_RCV_COPY_DATA (1UL << 3) -static void xs_format_peer_addresses(struct rpc_xprt *xprt) +static inline struct sockaddr *xs_addr(struct rpc_xprt *xprt) +{ + return (struct sockaddr *) &xprt->addr; +} + +static inline struct sockaddr_in *xs_addr_in(struct rpc_xprt *xprt) { - struct sockaddr_in *addr = (struct sockaddr_in *) &xprt->addr; + return (struct sockaddr_in *) &xprt->addr; +} + +static inline struct sockaddr_in6 *xs_addr_in6(struct rpc_xprt *xprt) +{ + return (struct sockaddr_in6 *) &xprt->addr; +} + +static void xs_format_ipv4_peer_addresses(struct rpc_xprt *xprt) +{ + struct sockaddr_in *addr = xs_addr_in(xprt); char *buf; buf = kzalloc(20, GFP_KERNEL); if (buf) { - snprintf(buf, 20, "%u.%u.%u.%u", + snprintf(buf, 20, NIPQUAD_FMT, NIPQUAD(addr->sin_addr.s_addr)); } xprt->address_strings[RPC_DISPLAY_ADDR] = buf; @@ -279,26 +299,123 @@ static void xs_format_peer_addresses(struct rpc_xprt *xprt) } xprt->address_strings[RPC_DISPLAY_PORT] = buf; - if (xprt->prot == IPPROTO_UDP) - xprt->address_strings[RPC_DISPLAY_PROTO] = "udp"; - else - xprt->address_strings[RPC_DISPLAY_PROTO] = "tcp"; + buf = kzalloc(8, GFP_KERNEL); + if (buf) { + if (xprt->prot == IPPROTO_UDP) + snprintf(buf, 8, "udp"); + else + snprintf(buf, 8, "tcp"); + } + xprt->address_strings[RPC_DISPLAY_PROTO] = buf; buf = kzalloc(48, GFP_KERNEL); if (buf) { - snprintf(buf, 48, "addr=%u.%u.%u.%u port=%u proto=%s", + snprintf(buf, 48, "addr="NIPQUAD_FMT" port=%u proto=%s", NIPQUAD(addr->sin_addr.s_addr), ntohs(addr->sin_port), xprt->prot == IPPROTO_UDP ? "udp" : "tcp"); } xprt->address_strings[RPC_DISPLAY_ALL] = buf; + + buf = kzalloc(10, GFP_KERNEL); + if (buf) { + snprintf(buf, 10, "%02x%02x%02x%02x", + NIPQUAD(addr->sin_addr.s_addr)); + } + xprt->address_strings[RPC_DISPLAY_HEX_ADDR] = buf; + + buf = kzalloc(8, GFP_KERNEL); + if (buf) { + snprintf(buf, 8, "%4hx", + ntohs(addr->sin_port)); + } + xprt->address_strings[RPC_DISPLAY_HEX_PORT] = buf; + + buf = kzalloc(30, GFP_KERNEL); + if (buf) { + snprintf(buf, 30, NIPQUAD_FMT".%u.%u", + NIPQUAD(addr->sin_addr.s_addr), + ntohs(addr->sin_port) >> 8, + ntohs(addr->sin_port) & 0xff); + } + xprt->address_strings[RPC_DISPLAY_UNIVERSAL_ADDR] = buf; + + xprt->address_strings[RPC_DISPLAY_NETID] = + kstrdup(xprt->prot == IPPROTO_UDP ? + RPCBIND_NETID_UDP : RPCBIND_NETID_TCP, GFP_KERNEL); +} + +static void xs_format_ipv6_peer_addresses(struct rpc_xprt *xprt) +{ + struct sockaddr_in6 *addr = xs_addr_in6(xprt); + char *buf; + + buf = kzalloc(40, GFP_KERNEL); + if (buf) { + snprintf(buf, 40, NIP6_FMT, + NIP6(addr->sin6_addr)); + } + xprt->address_strings[RPC_DISPLAY_ADDR] = buf; + + buf = kzalloc(8, GFP_KERNEL); + if (buf) { + snprintf(buf, 8, "%u", + ntohs(addr->sin6_port)); + } + xprt->address_strings[RPC_DISPLAY_PORT] = buf; + + buf = kzalloc(8, GFP_KERNEL); + if (buf) { + if (xprt->prot == IPPROTO_UDP) + snprintf(buf, 8, "udp"); + else + snprintf(buf, 8, "tcp"); + } + xprt->address_strings[RPC_DISPLAY_PROTO] = buf; + + buf = kzalloc(64, GFP_KERNEL); + if (buf) { + snprintf(buf, 64, "addr="NIP6_FMT" port=%u proto=%s", + NIP6(addr->sin6_addr), + ntohs(addr->sin6_port), + xprt->prot == IPPROTO_UDP ? "udp" : "tcp"); + } + xprt->address_strings[RPC_DISPLAY_ALL] = buf; + + buf = kzalloc(36, GFP_KERNEL); + if (buf) { + snprintf(buf, 36, NIP6_SEQFMT, + NIP6(addr->sin6_addr)); + } + xprt->address_strings[RPC_DISPLAY_HEX_ADDR] = buf; + + buf = kzalloc(8, GFP_KERNEL); + if (buf) { + snprintf(buf, 8, "%4hx", + ntohs(addr->sin6_port)); + } + xprt->address_strings[RPC_DISPLAY_HEX_PORT] = buf; + + buf = kzalloc(50, GFP_KERNEL); + if (buf) { + snprintf(buf, 50, NIP6_FMT".%u.%u", + NIP6(addr->sin6_addr), + ntohs(addr->sin6_port) >> 8, + ntohs(addr->sin6_port) & 0xff); + } + xprt->address_strings[RPC_DISPLAY_UNIVERSAL_ADDR] = buf; + + xprt->address_strings[RPC_DISPLAY_NETID] = + kstrdup(xprt->prot == IPPROTO_UDP ? + RPCBIND_NETID_UDP6 : RPCBIND_NETID_TCP6, GFP_KERNEL); } static void xs_free_peer_addresses(struct rpc_xprt *xprt) { - kfree(xprt->address_strings[RPC_DISPLAY_ADDR]); - kfree(xprt->address_strings[RPC_DISPLAY_PORT]); - kfree(xprt->address_strings[RPC_DISPLAY_ALL]); + int i; + + for (i = 0; i < RPC_DISPLAY_MAX; i++) + kfree(xprt->address_strings[i]); } #define XS_SENDMSG_FLAGS (MSG_DONTWAIT | MSG_NOSIGNAL) @@ -463,19 +580,20 @@ static int xs_udp_send_request(struct rpc_task *task) req->rq_xtime = jiffies; status = xs_sendpages(transport->sock, - (struct sockaddr *) &xprt->addr, + xs_addr(xprt), xprt->addrlen, xdr, req->rq_bytes_sent); dprintk("RPC: xs_udp_send_request(%u) = %d\n", xdr->len - req->rq_bytes_sent, status); - if (likely(status >= (int) req->rq_slen)) - return 0; - - /* Still some bytes left; set up for a retry later. */ - if (status > 0) + if (status >= 0) { + task->tk_bytes_sent += status; + if (status >= req->rq_slen) + return 0; + /* Still some bytes left; set up for a retry later. */ status = -EAGAIN; + } switch (status) { case -ENETUNREACH: @@ -523,7 +641,8 @@ static int xs_tcp_send_request(struct rpc_task *task) struct rpc_xprt *xprt = req->rq_xprt; struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt); struct xdr_buf *xdr = &req->rq_snd_buf; - int status, retry = 0; + int status; + unsigned int retry = 0; xs_encode_tcp_record_marker(&req->rq_snd_buf); @@ -661,6 +780,7 @@ static void xs_destroy(struct rpc_xprt *xprt) xs_free_peer_addresses(xprt); kfree(xprt->slot); kfree(xprt); + module_put(THIS_MODULE); } static inline struct rpc_xprt *xprt_from_sock(struct sock *sk) @@ -1139,14 +1259,23 @@ static unsigned short xs_get_random_port(void) */ static void xs_set_port(struct rpc_xprt *xprt, unsigned short port) { - struct sockaddr_in *sap = (struct sockaddr_in *) &xprt->addr; + struct sockaddr *addr = xs_addr(xprt); dprintk("RPC: setting port for xprt %p to %u\n", xprt, port); - sap->sin_port = htons(port); + switch (addr->sa_family) { + case AF_INET: + ((struct sockaddr_in *)addr)->sin_port = htons(port); + break; + case AF_INET6: + ((struct sockaddr_in6 *)addr)->sin6_port = htons(port); + break; + default: + BUG(); + } } -static int xs_bind(struct sock_xprt *transport, struct socket *sock) +static int xs_bind4(struct sock_xprt *transport, struct socket *sock) { struct sockaddr_in myaddr = { .sin_family = AF_INET, @@ -1174,8 +1303,42 @@ static int xs_bind(struct sock_xprt *transport, struct socket *sock) else port--; } while (err == -EADDRINUSE && port != transport->port); - dprintk("RPC: xs_bind "NIPQUAD_FMT":%u: %s (%d)\n", - NIPQUAD(myaddr.sin_addr), port, err ? "failed" : "ok", err); + dprintk("RPC: %s "NIPQUAD_FMT":%u: %s (%d)\n", + __FUNCTION__, NIPQUAD(myaddr.sin_addr), + port, err ? "failed" : "ok", err); + return err; +} + +static int xs_bind6(struct sock_xprt *transport, struct socket *sock) +{ + struct sockaddr_in6 myaddr = { + .sin6_family = AF_INET6, + }; + struct sockaddr_in6 *sa; + int err; + unsigned short port = transport->port; + + if (!transport->xprt.resvport) + port = 0; + sa = (struct sockaddr_in6 *)&transport->addr; + myaddr.sin6_addr = sa->sin6_addr; + do { + myaddr.sin6_port = htons(port); + err = kernel_bind(sock, (struct sockaddr *) &myaddr, + sizeof(myaddr)); + if (!transport->xprt.resvport) + break; + if (err == 0) { + transport->port = port; + break; + } + if (port <= xprt_min_resvport) + port = xprt_max_resvport; + else + port--; + } while (err == -EADDRINUSE && port != transport->port); + dprintk("RPC: xs_bind6 "NIP6_FMT":%u: %s (%d)\n", + NIP6(myaddr.sin6_addr), port, err ? "failed" : "ok", err); return err; } @@ -1183,38 +1346,69 @@ static int xs_bind(struct sock_xprt *transport, struct socket *sock) static struct lock_class_key xs_key[2]; static struct lock_class_key xs_slock_key[2]; -static inline void xs_reclassify_socket(struct socket *sock) +static inline void xs_reclassify_socket4(struct socket *sock) { struct sock *sk = sock->sk; + BUG_ON(sock_owned_by_user(sk)); - switch (sk->sk_family) { - case AF_INET: - sock_lock_init_class_and_name(sk, "slock-AF_INET-NFS", - &xs_slock_key[0], "sk_lock-AF_INET-NFS", &xs_key[0]); - break; + sock_lock_init_class_and_name(sk, "slock-AF_INET-RPC", + &xs_slock_key[0], "sk_lock-AF_INET-RPC", &xs_key[0]); +} - case AF_INET6: - sock_lock_init_class_and_name(sk, "slock-AF_INET6-NFS", - &xs_slock_key[1], "sk_lock-AF_INET6-NFS", &xs_key[1]); - break; +static inline void xs_reclassify_socket6(struct socket *sock) +{ + struct sock *sk = sock->sk; - default: - BUG(); - } + BUG_ON(sock_owned_by_user(sk)); + sock_lock_init_class_and_name(sk, "slock-AF_INET6-RPC", + &xs_slock_key[1], "sk_lock-AF_INET6-RPC", &xs_key[1]); } #else -static inline void xs_reclassify_socket(struct socket *sock) +static inline void xs_reclassify_socket4(struct socket *sock) +{ +} + +static inline void xs_reclassify_socket6(struct socket *sock) { } #endif +static void xs_udp_finish_connecting(struct rpc_xprt *xprt, struct socket *sock) +{ + struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt); + + if (!transport->inet) { + struct sock *sk = sock->sk; + + write_lock_bh(&sk->sk_callback_lock); + + sk->sk_user_data = xprt; + transport->old_data_ready = sk->sk_data_ready; + transport->old_state_change = sk->sk_state_change; + transport->old_write_space = sk->sk_write_space; + sk->sk_data_ready = xs_udp_data_ready; + sk->sk_write_space = xs_udp_write_space; + sk->sk_no_check = UDP_CSUM_NORCV; + sk->sk_allocation = GFP_ATOMIC; + + xprt_set_connected(xprt); + + /* Reset to new socket */ + transport->sock = sock; + transport->inet = sk; + + write_unlock_bh(&sk->sk_callback_lock); + } + xs_udp_do_set_buffer_size(xprt); +} + /** - * xs_udp_connect_worker - set up a UDP socket + * xs_udp_connect_worker4 - set up a UDP socket * @work: RPC transport to connect * * Invoked by a work queue tasklet. */ -static void xs_udp_connect_worker(struct work_struct *work) +static void xs_udp_connect_worker4(struct work_struct *work) { struct sock_xprt *transport = container_of(work, struct sock_xprt, connect_worker.work); @@ -1232,9 +1426,9 @@ static void xs_udp_connect_worker(struct work_struct *work) dprintk("RPC: can't create UDP transport socket (%d).\n", -err); goto out; } - xs_reclassify_socket(sock); + xs_reclassify_socket4(sock); - if (xs_bind(transport, sock)) { + if (xs_bind4(transport, sock)) { sock_release(sock); goto out; } @@ -1242,29 +1436,48 @@ static void xs_udp_connect_worker(struct work_struct *work) dprintk("RPC: worker connecting xprt %p to address: %s\n", xprt, xprt->address_strings[RPC_DISPLAY_ALL]); - if (!transport->inet) { - struct sock *sk = sock->sk; + xs_udp_finish_connecting(xprt, sock); + status = 0; +out: + xprt_wake_pending_tasks(xprt, status); + xprt_clear_connecting(xprt); +} - write_lock_bh(&sk->sk_callback_lock); +/** + * xs_udp_connect_worker6 - set up a UDP socket + * @work: RPC transport to connect + * + * Invoked by a work queue tasklet. + */ +static void xs_udp_connect_worker6(struct work_struct *work) +{ + struct sock_xprt *transport = + container_of(work, struct sock_xprt, connect_worker.work); + struct rpc_xprt *xprt = &transport->xprt; + struct socket *sock = transport->sock; + int err, status = -EIO; - sk->sk_user_data = xprt; - transport->old_data_ready = sk->sk_data_ready; - transport->old_state_change = sk->sk_state_change; - transport->old_write_space = sk->sk_write_space; - sk->sk_data_ready = xs_udp_data_ready; - sk->sk_write_space = xs_udp_write_space; - sk->sk_no_check = UDP_CSUM_NORCV; - sk->sk_allocation = GFP_ATOMIC; + if (xprt->shutdown || !xprt_bound(xprt)) + goto out; - xprt_set_connected(xprt); + /* Start by resetting any existing state */ + xs_close(xprt); - /* Reset to new socket */ - transport->sock = sock; - transport->inet = sk; + if ((err = sock_create_kern(PF_INET6, SOCK_DGRAM, IPPROTO_UDP, &sock)) < 0) { + dprintk("RPC: can't create UDP transport socket (%d).\n", -err); + goto out; + } + xs_reclassify_socket6(sock); - write_unlock_bh(&sk->sk_callback_lock); + if (xs_bind6(transport, sock) < 0) { + sock_release(sock); + goto out; } - xs_udp_do_set_buffer_size(xprt); + + dprintk("RPC: worker connecting xprt %p to address: %s\n", + xprt, xprt->address_strings[RPC_DISPLAY_ALL]); + + xs_udp_finish_connecting(xprt, sock); status = 0; out: xprt_wake_pending_tasks(xprt, status); @@ -1295,13 +1508,52 @@ static void xs_tcp_reuse_connection(struct rpc_xprt *xprt) result); } +static int xs_tcp_finish_connecting(struct rpc_xprt *xprt, struct socket *sock) +{ + struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt); + + if (!transport->inet) { + struct sock *sk = sock->sk; + + write_lock_bh(&sk->sk_callback_lock); + + sk->sk_user_data = xprt; + transport->old_data_ready = sk->sk_data_ready; + transport->old_state_change = sk->sk_state_change; + transport->old_write_space = sk->sk_write_space; + sk->sk_data_ready = xs_tcp_data_ready; + sk->sk_state_change = xs_tcp_state_change; + sk->sk_write_space = xs_tcp_write_space; + sk->sk_allocation = GFP_ATOMIC; + + /* socket options */ + sk->sk_userlocks |= SOCK_BINDPORT_LOCK; + sock_reset_flag(sk, SOCK_LINGER); + tcp_sk(sk)->linger2 = 0; + tcp_sk(sk)->nonagle |= TCP_NAGLE_OFF; + + xprt_clear_connected(xprt); + + /* Reset to new socket */ + transport->sock = sock; + transport->inet = sk; + + write_unlock_bh(&sk->sk_callback_lock); + } + + /* Tell the socket layer to start connecting... */ + xprt->stat.connect_count++; + xprt->stat.connect_start = jiffies; + return kernel_connect(sock, xs_addr(xprt), xprt->addrlen, O_NONBLOCK); +} + /** - * xs_tcp_connect_worker - connect a TCP socket to a remote endpoint + * xs_tcp_connect_worker4 - connect a TCP socket to a remote endpoint * @work: RPC transport to connect * * Invoked by a work queue tasklet. */ -static void xs_tcp_connect_worker(struct work_struct *work) +static void xs_tcp_connect_worker4(struct work_struct *work) { struct sock_xprt *transport = container_of(work, struct sock_xprt, connect_worker.work); @@ -1315,13 +1567,12 @@ static void xs_tcp_connect_worker(struct work_struct *work) if (!sock) { /* start from scratch */ if ((err = sock_create_kern(PF_INET, SOCK_STREAM, IPPROTO_TCP, &sock)) < 0) { - dprintk("RPC: can't create TCP transport " - "socket (%d).\n", -err); + dprintk("RPC: can't create TCP transport socket (%d).\n", -err); goto out; } - xs_reclassify_socket(sock); + xs_reclassify_socket4(sock); - if (xs_bind(transport, sock)) { + if (xs_bind4(transport, sock) < 0) { sock_release(sock); goto out; } @@ -1332,43 +1583,70 @@ static void xs_tcp_connect_worker(struct work_struct *work) dprintk("RPC: worker connecting xprt %p to address: %s\n", xprt, xprt->address_strings[RPC_DISPLAY_ALL]); - if (!transport->inet) { - struct sock *sk = sock->sk; - - write_lock_bh(&sk->sk_callback_lock); + status = xs_tcp_finish_connecting(xprt, sock); + dprintk("RPC: %p connect status %d connected %d sock state %d\n", + xprt, -status, xprt_connected(xprt), + sock->sk->sk_state); + if (status < 0) { + switch (status) { + case -EINPROGRESS: + case -EALREADY: + goto out_clear; + case -ECONNREFUSED: + case -ECONNRESET: + /* retry with existing socket, after a delay */ + break; + default: + /* get rid of existing socket, and retry */ + xs_close(xprt); + break; + } + } +out: + xprt_wake_pending_tasks(xprt, status); +out_clear: + xprt_clear_connecting(xprt); +} - sk->sk_user_data = xprt; - transport->old_data_ready = sk->sk_data_ready; - transport->old_state_change = sk->sk_state_change; - transport->old_write_space = sk->sk_write_space; - sk->sk_data_ready = xs_tcp_data_ready; - sk->sk_state_change = xs_tcp_state_change; - sk->sk_write_space = xs_tcp_write_space; - sk->sk_allocation = GFP_ATOMIC; +/** + * xs_tcp_connect_worker6 - connect a TCP socket to a remote endpoint + * @work: RPC transport to connect + * + * Invoked by a work queue tasklet. + */ +static void xs_tcp_connect_worker6(struct work_struct *work) +{ + struct sock_xprt *transport = + container_of(work, struct sock_xprt, connect_worker.work); + struct rpc_xprt *xprt = &transport->xprt; + struct socket *sock = transport->sock; + int err, status = -EIO; - /* socket options */ - sk->sk_userlocks |= SOCK_BINDPORT_LOCK; - sock_reset_flag(sk, SOCK_LINGER); - tcp_sk(sk)->linger2 = 0; - tcp_sk(sk)->nonagle |= TCP_NAGLE_OFF; + if (xprt->shutdown || !xprt_bound(xprt)) + goto out; - xprt_clear_connected(xprt); + if (!sock) { + /* start from scratch */ + if ((err = sock_create_kern(PF_INET6, SOCK_STREAM, IPPROTO_TCP, &sock)) < 0) { + dprintk("RPC: can't create TCP transport socket (%d).\n", -err); + goto out; + } + xs_reclassify_socket6(sock); - /* Reset to new socket */ - transport->sock = sock; - transport->inet = sk; + if (xs_bind6(transport, sock) < 0) { + sock_release(sock); + goto out; + } + } else + /* "close" the socket, preserving the local port */ + xs_tcp_reuse_connection(xprt); - write_unlock_bh(&sk->sk_callback_lock); - } + dprintk("RPC: worker connecting xprt %p to address: %s\n", + xprt, xprt->address_strings[RPC_DISPLAY_ALL]); - /* Tell the socket layer to start connecting... */ - xprt->stat.connect_count++; - xprt->stat.connect_start = jiffies; - status = kernel_connect(sock, (struct sockaddr *) &xprt->addr, - xprt->addrlen, O_NONBLOCK); + status = xs_tcp_finish_connecting(xprt, sock); dprintk("RPC: %p connect status %d connected %d sock state %d\n", - xprt, -status, xprt_connected(xprt), - sock->sk->sk_state); + xprt, -status, xprt_connected(xprt), sock->sk->sk_state); if (status < 0) { switch (status) { case -EINPROGRESS: @@ -1508,7 +1786,8 @@ static struct rpc_xprt_ops xs_tcp_ops = { .print_stats = xs_tcp_print_stats, }; -static struct rpc_xprt *xs_setup_xprt(struct rpc_xprtsock_create *args, unsigned int slot_table_size) +static struct rpc_xprt *xs_setup_xprt(struct xprt_create *args, + unsigned int slot_table_size) { struct rpc_xprt *xprt; struct sock_xprt *new; @@ -1549,8 +1828,9 @@ static struct rpc_xprt *xs_setup_xprt(struct rpc_xprtsock_create *args, unsigned * @args: rpc transport creation arguments * */ -struct rpc_xprt *xs_setup_udp(struct rpc_xprtsock_create *args) +struct rpc_xprt *xs_setup_udp(struct xprt_create *args) { + struct sockaddr *addr = args->dstaddr; struct rpc_xprt *xprt; struct sock_xprt *transport; @@ -1559,15 +1839,11 @@ struct rpc_xprt *xs_setup_udp(struct rpc_xprtsock_create *args) return xprt; transport = container_of(xprt, struct sock_xprt, xprt); - if (ntohs(((struct sockaddr_in *)args->dstaddr)->sin_port) != 0) - xprt_set_bound(xprt); - xprt->prot = IPPROTO_UDP; xprt->tsh_size = 0; /* XXX: header size can vary due to auth type, IPv6, etc. */ xprt->max_payload = (1U << 16) - (MAX_HEADER << 3); - INIT_DELAYED_WORK(&transport->connect_worker, xs_udp_connect_worker); xprt->bind_timeout = XS_BIND_TO; xprt->connect_timeout = XS_UDP_CONN_TO; xprt->reestablish_timeout = XS_UDP_REEST_TO; @@ -1580,11 +1856,37 @@ struct rpc_xprt *xs_setup_udp(struct rpc_xprtsock_create *args) else xprt_set_timeout(&xprt->timeout, 5, 5 * HZ); - xs_format_peer_addresses(xprt); + switch (addr->sa_family) { + case AF_INET: + if (((struct sockaddr_in *)addr)->sin_port != htons(0)) + xprt_set_bound(xprt); + + INIT_DELAYED_WORK(&transport->connect_worker, + xs_udp_connect_worker4); + xs_format_ipv4_peer_addresses(xprt); + break; + case AF_INET6: + if (((struct sockaddr_in6 *)addr)->sin6_port != htons(0)) + xprt_set_bound(xprt); + + INIT_DELAYED_WORK(&transport->connect_worker, + xs_udp_connect_worker6); + xs_format_ipv6_peer_addresses(xprt); + break; + default: + kfree(xprt); + return ERR_PTR(-EAFNOSUPPORT); + } + dprintk("RPC: set up transport to address %s\n", xprt->address_strings[RPC_DISPLAY_ALL]); - return xprt; + if (try_module_get(THIS_MODULE)) + return xprt; + + kfree(xprt->slot); + kfree(xprt); + return ERR_PTR(-EINVAL); } /** @@ -1592,8 +1894,9 @@ struct rpc_xprt *xs_setup_udp(struct rpc_xprtsock_create *args) * @args: rpc transport creation arguments * */ -struct rpc_xprt *xs_setup_tcp(struct rpc_xprtsock_create *args) +struct rpc_xprt *xs_setup_tcp(struct xprt_create *args) { + struct sockaddr *addr = args->dstaddr; struct rpc_xprt *xprt; struct sock_xprt *transport; @@ -1602,14 +1905,10 @@ struct rpc_xprt *xs_setup_tcp(struct rpc_xprtsock_create *args) return xprt; transport = container_of(xprt, struct sock_xprt, xprt); - if (ntohs(((struct sockaddr_in *)args->dstaddr)->sin_port) != 0) - xprt_set_bound(xprt); - xprt->prot = IPPROTO_TCP; xprt->tsh_size = sizeof(rpc_fraghdr) / sizeof(u32); xprt->max_payload = RPC_MAX_FRAGMENT_SIZE; - INIT_DELAYED_WORK(&transport->connect_worker, xs_tcp_connect_worker); xprt->bind_timeout = XS_BIND_TO; xprt->connect_timeout = XS_TCP_CONN_TO; xprt->reestablish_timeout = XS_TCP_INIT_REEST_TO; @@ -1622,15 +1921,55 @@ struct rpc_xprt *xs_setup_tcp(struct rpc_xprtsock_create *args) else xprt_set_timeout(&xprt->timeout, 2, 60 * HZ); - xs_format_peer_addresses(xprt); + switch (addr->sa_family) { + case AF_INET: + if (((struct sockaddr_in *)addr)->sin_port != htons(0)) + xprt_set_bound(xprt); + + INIT_DELAYED_WORK(&transport->connect_worker, xs_tcp_connect_worker4); + xs_format_ipv4_peer_addresses(xprt); + break; + case AF_INET6: + if (((struct sockaddr_in6 *)addr)->sin6_port != htons(0)) + xprt_set_bound(xprt); + + INIT_DELAYED_WORK(&transport->connect_worker, xs_tcp_connect_worker6); + xs_format_ipv6_peer_addresses(xprt); + break; + default: + kfree(xprt); + return ERR_PTR(-EAFNOSUPPORT); + } + dprintk("RPC: set up transport to address %s\n", xprt->address_strings[RPC_DISPLAY_ALL]); - return xprt; + if (try_module_get(THIS_MODULE)) + return xprt; + + kfree(xprt->slot); + kfree(xprt); + return ERR_PTR(-EINVAL); } +static struct xprt_class xs_udp_transport = { + .list = LIST_HEAD_INIT(xs_udp_transport.list), + .name = "udp", + .owner = THIS_MODULE, + .ident = IPPROTO_UDP, + .setup = xs_setup_udp, +}; + +static struct xprt_class xs_tcp_transport = { + .list = LIST_HEAD_INIT(xs_tcp_transport.list), + .name = "tcp", + .owner = THIS_MODULE, + .ident = IPPROTO_TCP, + .setup = xs_setup_tcp, +}; + /** - * init_socket_xprt - set up xprtsock's sysctls + * init_socket_xprt - set up xprtsock's sysctls, register with RPC client * */ int init_socket_xprt(void) @@ -1640,11 +1979,14 @@ int init_socket_xprt(void) sunrpc_table_header = register_sysctl_table(sunrpc_table); #endif + xprt_register_transport(&xs_udp_transport); + xprt_register_transport(&xs_tcp_transport); + return 0; } /** - * cleanup_socket_xprt - remove xprtsock's sysctls + * cleanup_socket_xprt - remove xprtsock's sysctls, unregister * */ void cleanup_socket_xprt(void) @@ -1655,4 +1997,7 @@ void cleanup_socket_xprt(void) sunrpc_table_header = NULL; } #endif + + xprt_unregister_transport(&xs_udp_transport); + xprt_unregister_transport(&xs_tcp_transport); } diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c index 3c3fff33d1ce..cf76150e623e 100644 --- a/security/selinux/hooks.c +++ b/security/selinux/hooks.c @@ -3932,7 +3932,7 @@ out: } static unsigned int selinux_ip_postroute_last(unsigned int hooknum, - struct sk_buff **pskb, + struct sk_buff *skb, const struct net_device *in, const struct net_device *out, int (*okfn)(struct sk_buff *), @@ -3941,7 +3941,6 @@ static unsigned int selinux_ip_postroute_last(unsigned int hooknum, char *addrp; int len, err = 0; struct sock *sk; - struct sk_buff *skb = *pskb; struct avc_audit_data ad; struct net_device *dev = (struct net_device *)out; struct sk_security_struct *sksec; @@ -3977,23 +3976,23 @@ out: } static unsigned int selinux_ipv4_postroute_last(unsigned int hooknum, - struct sk_buff **pskb, + struct sk_buff *skb, const struct net_device *in, const struct net_device *out, int (*okfn)(struct sk_buff *)) { - return selinux_ip_postroute_last(hooknum, pskb, in, out, okfn, PF_INET); + return selinux_ip_postroute_last(hooknum, skb, in, out, okfn, PF_INET); } #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) static unsigned int selinux_ipv6_postroute_last(unsigned int hooknum, - struct sk_buff **pskb, + struct sk_buff *skb, const struct net_device *in, const struct net_device *out, int (*okfn)(struct sk_buff *)) { - return selinux_ip_postroute_last(hooknum, pskb, in, out, okfn, PF_INET6); + return selinux_ip_postroute_last(hooknum, skb, in, out, okfn, PF_INET6); } #endif /* IPV6 */ |