diff options
1049 files changed, 36434 insertions, 8000 deletions
diff --git a/Documentation/filesystems/00-INDEX b/Documentation/filesystems/00-INDEX index 3c384c0cf86e..4dc28cc93503 100644 --- a/Documentation/filesystems/00-INDEX +++ b/Documentation/filesystems/00-INDEX @@ -34,6 +34,8 @@ ext2.txt - info, mount options and specifications for the Ext2 filesystem. ext3.txt - info, mount options and specifications for the Ext3 filesystem. +ext4.txt + - info, mount options and specifications for the Ext4 filesystem. files.txt - info on file management in the Linux kernel. fuse.txt diff --git a/Documentation/filesystems/ext4.txt b/Documentation/filesystems/ext4.txt new file mode 100644 index 000000000000..6a4adcae9f9a --- /dev/null +++ b/Documentation/filesystems/ext4.txt @@ -0,0 +1,236 @@ + +Ext4 Filesystem +=============== + +This is a development version of the ext4 filesystem, an advanced level +of the ext3 filesystem which incorporates scalability and reliability +enhancements for supporting large filesystems (64 bit) in keeping with +increasing disk capacities and state-of-the-art feature requirements. + +Mailing list: linux-ext4@vger.kernel.org + + +1. Quick usage instructions: +=========================== + + - Grab updated e2fsprogs from + ftp://ftp.kernel.org/pub/linux/kernel/people/tytso/e2fsprogs-interim/ + This is a patchset on top of e2fsprogs-1.39, which can be found at + ftp://ftp.kernel.org/pub/linux/kernel/people/tytso/e2fsprogs/ + + - It's still mke2fs -j /dev/hda1 + + - mount /dev/hda1 /wherever -t ext4dev + + - To enable extents, + + mount /dev/hda1 /wherever -t ext4dev -o extents + + - The filesystem is compatible with the ext3 driver until you add a file + which has extents (ie: `mount -o extents', then create a file). + + NOTE: The "extents" mount flag is temporary. It will soon go away and + extents will be enabled by the "-o extents" flag to mke2fs or tune2fs + + - When comparing performance with other filesystems, remember that + ext3/4 by default offers higher data integrity guarantees than most. So + when comparing with a metadata-only journalling filesystem, use `mount -o + data=writeback'. And you might as well use `mount -o nobh' too along + with it. Making the journal larger than the mke2fs default often helps + performance with metadata-intensive workloads. + +2. Features +=========== + +2.1 Currently available + +* ability to use filesystems > 16TB +* extent format reduces metadata overhead (RAM, IO for access, transactions) +* extent format more robust in face of on-disk corruption due to magics, +* internal redunancy in tree + +2.1 Previously available, soon to be enabled by default by "mkefs.ext4": + +* dir_index and resize inode will be on by default +* large inodes will be used by default for fast EAs, nsec timestamps, etc + +2.2 Candidate features for future inclusion + +There are several under discussion, whether they all make it in is +partly a function of how much time everyone has to work on them: + +* improved file allocation (multi-block alloc, delayed alloc; basically done) +* fix 32000 subdirectory limit (patch exists, needs some e2fsck work) +* nsec timestamps for mtime, atime, ctime, create time (patch exists, + needs some e2fsck work) +* inode version field on disk (NFSv4, Lustre; prototype exists) +* reduced mke2fs/e2fsck time via uninitialized groups (prototype exists) +* journal checksumming for robustness, performance (prototype exists) +* persistent file preallocation (e.g for streaming media, databases) + +Features like metadata checksumming have been discussed and planned for +a bit but no patches exist yet so I'm not sure they're in the near-term +roadmap. + +The big performance win will come with mballoc and delalloc. CFS has +been using mballoc for a few years already with Lustre, and IBM + Bull +did a lot of benchmarking on it. The reason it isn't in the first set of +patches is partly a manageability issue, and partly because it doesn't +directly affect the on-disk format (outside of much better allocation) +so it isn't critical to get into the first round of changes. I believe +Alex is working on a new set of patches right now. + +3. Options +========== + +When mounting an ext4 filesystem, the following option are accepted: +(*) == default + +extents ext4 will use extents to address file data. The + file system will no longer be mountable by ext3. + +journal=update Update the ext4 file system's journal to the current + format. + +journal=inum When a journal already exists, this option is ignored. + Otherwise, it specifies the number of the inode which + will represent the ext4 file system's journal file. + +journal_dev=devnum When the external journal device's major/minor numbers + have changed, this option allows the user to specify + the new journal location. The journal device is + identified through its new major/minor numbers encoded + in devnum. + +noload Don't load the journal on mounting. + +data=journal All data are committed into the journal prior to being + written into the main file system. + +data=ordered (*) All data are forced directly out to the main file + system prior to its metadata being committed to the + journal. + +data=writeback Data ordering is not preserved, data may be written + into the main file system after its metadata has been + committed to the journal. + +commit=nrsec (*) Ext4 can be told to sync all its data and metadata + every 'nrsec' seconds. The default value is 5 seconds. + This means that if you lose your power, you will lose + as much as the latest 5 seconds of work (your + filesystem will not be damaged though, thanks to the + journaling). This default value (or any low value) + will hurt performance, but it's good for data-safety. + Setting it to 0 will have the same effect as leaving + it at the default (5 seconds). + Setting it to very large values will improve + performance. + +barrier=1 This enables/disables barriers. barrier=0 disables + it, barrier=1 enables it. + +orlov (*) This enables the new Orlov block allocator. It is + enabled by default. + +oldalloc This disables the Orlov block allocator and enables + the old block allocator. Orlov should have better + performance - we'd like to get some feedback if it's + the contrary for you. + +user_xattr Enables Extended User Attributes. Additionally, you + need to have extended attribute support enabled in the + kernel configuration (CONFIG_EXT4_FS_XATTR). See the + attr(5) manual page and http://acl.bestbits.at/ to + learn more about extended attributes. + +nouser_xattr Disables Extended User Attributes. + +acl Enables POSIX Access Control Lists support. + Additionally, you need to have ACL support enabled in + the kernel configuration (CONFIG_EXT4_FS_POSIX_ACL). + See the acl(5) manual page and http://acl.bestbits.at/ + for more information. + +noacl This option disables POSIX Access Control List + support. + +reservation + +noreservation + +bsddf (*) Make 'df' act like BSD. +minixdf Make 'df' act like Minix. + +check=none Don't do extra checking of bitmaps on mount. +nocheck + +debug Extra debugging information is sent to syslog. + +errors=remount-ro(*) Remount the filesystem read-only on an error. +errors=continue Keep going on a filesystem error. +errors=panic Panic and halt the machine if an error occurs. + +grpid Give objects the same group ID as their creator. +bsdgroups + +nogrpid (*) New objects have the group ID of their creator. +sysvgroups + +resgid=n The group ID which may use the reserved blocks. + +resuid=n The user ID which may use the reserved blocks. + +sb=n Use alternate superblock at this location. + +quota +noquota +grpquota +usrquota + +bh (*) ext4 associates buffer heads to data pages to +nobh (a) cache disk block mapping information + (b) link pages into transaction to provide + ordering guarantees. + "bh" option forces use of buffer heads. + "nobh" option tries to avoid associating buffer + heads (supported only for "writeback" mode). + + +Data Mode +--------- +There are 3 different data modes: + +* writeback mode +In data=writeback mode, ext4 does not journal data at all. This mode provides +a similar level of journaling as that of XFS, JFS, and ReiserFS in its default +mode - metadata journaling. A crash+recovery can cause incorrect data to +appear in files which were written shortly before the crash. This mode will +typically provide the best ext4 performance. + +* ordered mode +In data=ordered mode, ext4 only officially journals metadata, but it logically +groups metadata and data blocks into a single unit called a transaction. When +it's time to write the new metadata out to disk, the associated data blocks +are written first. In general, this mode performs slightly slower than +writeback but significantly faster than journal mode. + +* journal mode +data=journal mode provides full data and metadata journaling. All new data is +written to the journal first, and then to its final location. +In the event of a crash, the journal can be replayed, bringing both data and +metadata into a consistent state. This mode is the slowest except when data +needs to be read from and written to disk at the same time where it +outperforms all others modes. + +References +========== + +kernel source: <file:fs/ext4/> + <file:fs/jbd2/> + +programs: http://e2fsprogs.sourceforge.net/ + http://ext2resize.sourceforge.net + +useful links: http://fedoraproject.org/wiki/ext3-devel + http://www.bullopensource.org/ext4/ diff --git a/Documentation/lockdep-design.txt b/Documentation/lockdep-design.txt index dab123db5a4f..488773018152 100644 --- a/Documentation/lockdep-design.txt +++ b/Documentation/lockdep-design.txt @@ -50,10 +50,10 @@ The bit position indicates hardirq, softirq, hardirq-read, softirq-read respectively, and the character displayed in each indicates: - '.' acquired while irqs enabled + '.' acquired while irqs disabled '+' acquired in irq context - '-' acquired in process context with irqs disabled - '?' read-acquired both with irqs enabled and in irq context + '-' acquired with irqs enabled + '?' read acquired in irq context with irqs enabled. Unused mutexes cannot be part of the cause of an error. diff --git a/Documentation/sysctl/kernel.txt b/Documentation/sysctl/kernel.txt index 89bf8c20a586..0bc7f1e3c9e6 100644 --- a/Documentation/sysctl/kernel.txt +++ b/Documentation/sysctl/kernel.txt @@ -86,7 +86,7 @@ valid for 30 seconds. core_pattern: core_pattern is used to specify a core dumpfile pattern name. -. max length 64 characters; default value is "core" +. max length 128 characters; default value is "core" . core_pattern is used as a pattern template for the output filename; certain string patterns (beginning with '%') are substituted with their actual values. @@ -105,6 +105,9 @@ core_pattern is used to specify a core dumpfile pattern name. %h hostname %e executable filename %<OTHER> both are dropped +. If the first character of the pattern is a '|', the kernel will treat + the rest of the pattern as a command to run. The core dump will be + written to the standard input of that program instead of to a file. ============================================================== diff --git a/MAINTAINERS b/MAINTAINERS index 84a018ee4556..931e6e40c08b 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -2261,6 +2261,17 @@ T: git kernel.org:/pub/scm/linux/kernel/git/kyle/parisc-2.6.git T: cvs cvs.parisc-linux.org:/var/cvs/linux-2.6 S: Maintained +PC87360 HARDWARE MONITORING DRIVER +P: Jim Cromie +M: jim.cromie@gmail.com +L: lm-sensors@lm-sensors.org +S: Maintained + +PC8736x GPIO DRIVER +P: Jim Cromie +M: jim.cromie@gmail.com +S: Maintained + PCI ERROR RECOVERY P: Linas Vepstas M: linas@austin.ibm.com @@ -2592,10 +2603,19 @@ L: lksctp-developers@lists.sourceforge.net S: Supported SCx200 CPU SUPPORT -P: Christer Weinigel -M: christer@weinigel.se -W: http://www.weinigel.se -S: Supported +P: Jim Cromie +M: jim.cromie@gmail.com +S: Odd Fixes + +SCx200 GPIO DRIVER +P: Jim Cromie +M: jim.cromie@gmail.com +S: Maintained + +SCx200 HRT CLOCKSOURCE DRIVER +P: Jim Cromie +M: jim.cromie@gmail.com +S: Maintained SECURITY CONTACT P: Security Officers @@ -2760,14 +2780,7 @@ S: Maintained UltraSPARC (sparc64): P: David S. Miller M: davem@davemloft.net -P: Eddie C. Dost -M: ecd@brainaid.de -P: Jakub Jelinek -M: jj@sunsite.ms.mff.cuni.cz -P: Anton Blanchard -M: anton@samba.org L: sparclinux@vger.kernel.org -L: ultralinux@vger.kernel.org T: git kernel.org:/pub/scm/linux/kernel/git/davem/sparc-2.6.git S: Maintained @@ -741,6 +741,9 @@ endif # ifdef CONFIG_KALLSYMS # vmlinux image - including updated kernel symbols vmlinux: $(vmlinux-lds) $(vmlinux-init) $(vmlinux-main) $(kallsyms.o) FORCE +ifdef CONFIG_HEADERS_CHECK + $(Q)$(MAKE) headers_check +endif $(call if_changed_rule,vmlinux__) $(Q)$(MAKE) -f $(srctree)/scripts/Makefile.modpost $@ $(Q)rm -f .old_version diff --git a/arch/alpha/kernel/alpha_ksyms.c b/arch/alpha/kernel/alpha_ksyms.c index 8b02420f732e..692809e4aece 100644 --- a/arch/alpha/kernel/alpha_ksyms.c +++ b/arch/alpha/kernel/alpha_ksyms.c @@ -6,41 +6,14 @@ */ #include <linux/module.h> -#include <linux/string.h> -#include <linux/user.h> -#include <linux/elfcore.h> -#include <linux/socket.h> -#include <linux/syscalls.h> -#include <linux/in.h> -#include <linux/in6.h> -#include <linux/pci.h> -#include <linux/screen_info.h> -#include <linux/tty.h> -#include <linux/mm.h> -#include <linux/delay.h> -#include <linux/dma-mapping.h> - -#include <asm/io.h> #include <asm/console.h> -#include <asm/hwrpb.h> #include <asm/uaccess.h> -#include <asm/processor.h> #include <asm/checksum.h> -#include <linux/interrupt.h> #include <asm/fpu.h> -#include <asm/irq.h> #include <asm/machvec.h> -#include <asm/pgalloc.h> -#include <asm/semaphore.h> -#include <asm/tlbflush.h> -#include <asm/cacheflush.h> -#include <asm/vga.h> #include <asm/unistd.h> -extern struct hwrpb_struct *hwrpb; -extern spinlock_t rtc_lock; - /* these are C runtime functions with special calling conventions: */ extern void __divl (void); extern void __reml (void); @@ -52,14 +25,9 @@ extern void __divqu (void); extern void __remqu (void); EXPORT_SYMBOL(alpha_mv); -EXPORT_SYMBOL(screen_info); -EXPORT_SYMBOL(perf_irq); EXPORT_SYMBOL(callback_getenv); EXPORT_SYMBOL(callback_setenv); EXPORT_SYMBOL(callback_save_env); -#ifdef CONFIG_ALPHA_GENERIC -EXPORT_SYMBOL(alpha_using_srm); -#endif /* CONFIG_ALPHA_GENERIC */ /* platform dependent support */ EXPORT_SYMBOL(strcat); @@ -77,47 +45,14 @@ EXPORT_SYMBOL(__constant_c_memset); EXPORT_SYMBOL(copy_page); EXPORT_SYMBOL(clear_page); -EXPORT_SYMBOL(__direct_map_base); -EXPORT_SYMBOL(__direct_map_size); - -#ifdef CONFIG_PCI -EXPORT_SYMBOL(pci_alloc_consistent); -EXPORT_SYMBOL(pci_free_consistent); -EXPORT_SYMBOL(pci_map_single); -EXPORT_SYMBOL(pci_map_page); -EXPORT_SYMBOL(pci_unmap_single); -EXPORT_SYMBOL(pci_unmap_page); -EXPORT_SYMBOL(pci_map_sg); -EXPORT_SYMBOL(pci_unmap_sg); -EXPORT_SYMBOL(pci_dma_supported); -EXPORT_SYMBOL(pci_dac_dma_supported); -EXPORT_SYMBOL(pci_dac_page_to_dma); -EXPORT_SYMBOL(pci_dac_dma_to_page); -EXPORT_SYMBOL(pci_dac_dma_to_offset); -EXPORT_SYMBOL(alpha_gendev_to_pci); -#endif -EXPORT_SYMBOL(dma_set_mask); - -EXPORT_SYMBOL(dump_thread); -EXPORT_SYMBOL(dump_elf_thread); -EXPORT_SYMBOL(dump_elf_task); -EXPORT_SYMBOL(dump_elf_task_fp); -EXPORT_SYMBOL(hwrpb); -EXPORT_SYMBOL(start_thread); EXPORT_SYMBOL(alpha_read_fp_reg); EXPORT_SYMBOL(alpha_read_fp_reg_s); EXPORT_SYMBOL(alpha_write_fp_reg); EXPORT_SYMBOL(alpha_write_fp_reg_s); -/* In-kernel system calls. */ +/* entry.S */ EXPORT_SYMBOL(kernel_thread); -EXPORT_SYMBOL(sys_dup); -EXPORT_SYMBOL(sys_exit); -EXPORT_SYMBOL(sys_write); -EXPORT_SYMBOL(sys_lseek); EXPORT_SYMBOL(kernel_execve); -EXPORT_SYMBOL(sys_setsid); -EXPORT_SYMBOL(sys_wait4); /* Networking helper routines. */ EXPORT_SYMBOL(csum_tcpudp_magic); @@ -134,10 +69,6 @@ EXPORT_SYMBOL(alpha_fp_emul_imprecise); EXPORT_SYMBOL(alpha_fp_emul); #endif -#ifdef CONFIG_ALPHA_BROKEN_IRQ_MASK -EXPORT_SYMBOL(__min_ipl); -#endif - /* * The following are specially called from the uaccess assembly stubs. */ @@ -160,27 +91,10 @@ EXPORT_SYMBOL(up); */ #ifdef CONFIG_SMP -EXPORT_SYMBOL(flush_tlb_mm); -EXPORT_SYMBOL(flush_tlb_range); -EXPORT_SYMBOL(flush_tlb_page); -EXPORT_SYMBOL(smp_imb); -EXPORT_SYMBOL(cpu_data); -EXPORT_SYMBOL(smp_num_cpus); -EXPORT_SYMBOL(smp_call_function); -EXPORT_SYMBOL(smp_call_function_on_cpu); EXPORT_SYMBOL(_atomic_dec_and_lock); #endif /* CONFIG_SMP */ /* - * NUMA specific symbols - */ -#ifdef CONFIG_DISCONTIGMEM -EXPORT_SYMBOL(node_data); -#endif /* CONFIG_DISCONTIGMEM */ - -EXPORT_SYMBOL(rtc_lock); - -/* * The following are special because they're not called * explicitly (the C compiler or assembler generates them in * response to division operations). Fortunately, their @@ -200,8 +114,3 @@ EXPORT_SYMBOL(__remqu); EXPORT_SYMBOL(memcpy); EXPORT_SYMBOL(memset); EXPORT_SYMBOL(memchr); - -#ifdef CONFIG_ALPHA_IRONGATE -EXPORT_SYMBOL(irongate_ioremap); -EXPORT_SYMBOL(irongate_iounmap); -#endif diff --git a/arch/alpha/kernel/core_apecs.c b/arch/alpha/kernel/core_apecs.c index a27ba12ba35e..ca46b2c24457 100644 --- a/arch/alpha/kernel/core_apecs.c +++ b/arch/alpha/kernel/core_apecs.c @@ -387,8 +387,7 @@ apecs_pci_clr_err(void) } void -apecs_machine_check(unsigned long vector, unsigned long la_ptr, - struct pt_regs * regs) +apecs_machine_check(unsigned long vector, unsigned long la_ptr) { struct el_common *mchk_header; struct el_apecs_procdata *mchk_procdata; @@ -412,7 +411,7 @@ apecs_machine_check(unsigned long vector, unsigned long la_ptr, wrmces(0x7); /* reset machine check pending flag */ mb(); - process_mcheck_info(vector, la_ptr, regs, "APECS", + process_mcheck_info(vector, la_ptr, "APECS", (mcheck_expected(0) && (mchk_sysdata->epic_dcsr & 0x0c00UL))); } diff --git a/arch/alpha/kernel/core_cia.c b/arch/alpha/kernel/core_cia.c index fd563064363c..1d6ee6c985f9 100644 --- a/arch/alpha/kernel/core_cia.c +++ b/arch/alpha/kernel/core_cia.c @@ -1192,8 +1192,7 @@ cia_decode_mchk(unsigned long la_ptr) } void -cia_machine_check(unsigned long vector, unsigned long la_ptr, - struct pt_regs * regs) +cia_machine_check(unsigned long vector, unsigned long la_ptr) { int expected; @@ -1208,5 +1207,5 @@ cia_machine_check(unsigned long vector, unsigned long la_ptr, expected = mcheck_expected(0); if (!expected && vector == 0x660) expected = cia_decode_mchk(la_ptr); - process_mcheck_info(vector, la_ptr, regs, "CIA", expected); + process_mcheck_info(vector, la_ptr, "CIA", expected); } diff --git a/arch/alpha/kernel/core_irongate.c b/arch/alpha/kernel/core_irongate.c index 138d497d1cca..e4a0bcf1d28b 100644 --- a/arch/alpha/kernel/core_irongate.c +++ b/arch/alpha/kernel/core_irongate.c @@ -404,6 +404,7 @@ irongate_ioremap(unsigned long addr, unsigned long size) #endif return (void __iomem *)vaddr; } +EXPORT_SYMBOL(irongate_ioremap); void irongate_iounmap(volatile void __iomem *xaddr) @@ -414,3 +415,4 @@ irongate_iounmap(volatile void __iomem *xaddr) if (addr) return vfree((void *)(PAGE_MASK & addr)); } +EXPORT_SYMBOL(irongate_iounmap); diff --git a/arch/alpha/kernel/core_lca.c b/arch/alpha/kernel/core_lca.c index 6a5a9145c676..4843f6ec9f3a 100644 --- a/arch/alpha/kernel/core_lca.c +++ b/arch/alpha/kernel/core_lca.c @@ -19,6 +19,7 @@ #include <linux/tty.h> #include <asm/ptrace.h> +#include <asm/irq_regs.h> #include <asm/smp.h> #include "proto.h" @@ -386,8 +387,7 @@ ioc_error(__u32 stat0, __u32 stat1) } void -lca_machine_check(unsigned long vector, unsigned long la_ptr, - struct pt_regs *regs) +lca_machine_check(unsigned long vector, unsigned long la_ptr) { const char * reason; union el_lca el; @@ -397,7 +397,7 @@ lca_machine_check(unsigned long vector, unsigned long la_ptr, wrmces(rdmces()); /* reset machine check pending flag */ printk(KERN_CRIT "LCA machine check: vector=%#lx pc=%#lx code=%#x\n", - vector, regs->pc, (unsigned int) el.c->code); + vector, get_irq_regs()->pc, (unsigned int) el.c->code); /* * The first quadword after the common header always seems to diff --git a/arch/alpha/kernel/core_mcpcia.c b/arch/alpha/kernel/core_mcpcia.c index 28849c894153..8d019071190a 100644 --- a/arch/alpha/kernel/core_mcpcia.c +++ b/arch/alpha/kernel/core_mcpcia.c @@ -572,8 +572,7 @@ mcpcia_print_system_area(unsigned long la_ptr) } void -mcpcia_machine_check(unsigned long vector, unsigned long la_ptr, - struct pt_regs * regs) +mcpcia_machine_check(unsigned long vector, unsigned long la_ptr) { struct el_common *mchk_header; struct el_MCPCIA_uncorrected_frame_mcheck *mchk_logout; @@ -610,7 +609,7 @@ mcpcia_machine_check(unsigned long vector, unsigned long la_ptr, wrmces(0x7); mb(); - process_mcheck_info(vector, la_ptr, regs, "MCPCIA", expected != 0); + process_mcheck_info(vector, la_ptr, "MCPCIA", expected != 0); if (!expected && vector != 0x620 && vector != 0x630) { mcpcia_print_uncorrectable(mchk_logout); mcpcia_print_system_area(la_ptr); diff --git a/arch/alpha/kernel/core_polaris.c b/arch/alpha/kernel/core_polaris.c index 277674a500ff..c5a271d37abd 100644 --- a/arch/alpha/kernel/core_polaris.c +++ b/arch/alpha/kernel/core_polaris.c @@ -187,8 +187,7 @@ polaris_pci_clr_err(void) } void -polaris_machine_check(unsigned long vector, unsigned long la_ptr, - struct pt_regs * regs) +polaris_machine_check(unsigned long vector, unsigned long la_ptr) { /* Clear the error before any reporting. */ mb(); @@ -198,6 +197,6 @@ polaris_machine_check(unsigned long vector, unsigned long la_ptr, wrmces(0x7); mb(); - process_mcheck_info(vector, la_ptr, regs, "POLARIS", + process_mcheck_info(vector, la_ptr, "POLARIS", mcheck_expected(0)); } diff --git a/arch/alpha/kernel/core_t2.c b/arch/alpha/kernel/core_t2.c index ecce09e3626a..f5ca5255eb06 100644 --- a/arch/alpha/kernel/core_t2.c +++ b/arch/alpha/kernel/core_t2.c @@ -551,8 +551,7 @@ t2_clear_errors(int cpu) * Hence all the taken/expected/any_expected/last_taken stuff... */ void -t2_machine_check(unsigned long vector, unsigned long la_ptr, - struct pt_regs * regs) +t2_machine_check(unsigned long vector, unsigned long la_ptr) { int cpu = smp_processor_id(); #ifdef CONFIG_VERBOSE_MCHECK @@ -618,5 +617,5 @@ t2_machine_check(unsigned long vector, unsigned long la_ptr, } #endif - process_mcheck_info(vector, la_ptr, regs, "T2", mcheck_expected(cpu)); + process_mcheck_info(vector, la_ptr, "T2", mcheck_expected(cpu)); } diff --git a/arch/alpha/kernel/core_tsunami.c b/arch/alpha/kernel/core_tsunami.c index 8aa305bd6a2c..ce623c6e55e1 100644 --- a/arch/alpha/kernel/core_tsunami.c +++ b/arch/alpha/kernel/core_tsunami.c @@ -443,8 +443,7 @@ tsunami_pci_clr_err(void) } void -tsunami_machine_check(unsigned long vector, unsigned long la_ptr, - struct pt_regs * regs) +tsunami_machine_check(unsigned long vector, unsigned long la_ptr) { /* Clear error before any reporting. */ mb(); @@ -454,6 +453,6 @@ tsunami_machine_check(unsigned long vector, unsigned long la_ptr, wrmces(0x7); mb(); - process_mcheck_info(vector, la_ptr, regs, "TSUNAMI", + process_mcheck_info(vector, la_ptr, "TSUNAMI", mcheck_expected(smp_processor_id())); } diff --git a/arch/alpha/kernel/core_wildfire.c b/arch/alpha/kernel/core_wildfire.c index 2b767a1bad96..7e072443d7fd 100644 --- a/arch/alpha/kernel/core_wildfire.c +++ b/arch/alpha/kernel/core_wildfire.c @@ -322,8 +322,7 @@ wildfire_init_arch(void) } void -wildfire_machine_check(unsigned long vector, unsigned long la_ptr, - struct pt_regs * regs) +wildfire_machine_check(unsigned long vector, unsigned long la_ptr) { mb(); mb(); /* magic */ @@ -332,7 +331,7 @@ wildfire_machine_check(unsigned long vector, unsigned long la_ptr, wrmces(0x7); mb(); - process_mcheck_info(vector, la_ptr, regs, "WILDFIRE", + process_mcheck_info(vector, la_ptr, "WILDFIRE", mcheck_expected(smp_processor_id())); } diff --git a/arch/alpha/kernel/err_ev6.c b/arch/alpha/kernel/err_ev6.c index 64f59f2fcf5c..69b5f4ea7355 100644 --- a/arch/alpha/kernel/err_ev6.c +++ b/arch/alpha/kernel/err_ev6.c @@ -11,6 +11,7 @@ #include <linux/sched.h> #include <asm/io.h> +#include <asm/irq_regs.h> #include <asm/hwrpb.h> #include <asm/smp.h> #include <asm/err_common.h> @@ -229,7 +230,7 @@ ev6_process_logout_frame(struct el_common *mchk_header, int print) } void -ev6_machine_check(u64 vector, u64 la_ptr, struct pt_regs *regs) +ev6_machine_check(u64 vector, u64 la_ptr) { struct el_common *mchk_header = (struct el_common *)la_ptr; @@ -260,7 +261,7 @@ ev6_machine_check(u64 vector, u64 la_ptr, struct pt_regs *regs) (unsigned int)vector, (int)smp_processor_id()); ev6_process_logout_frame(mchk_header, 1); - dik_show_regs(regs, NULL); + dik_show_regs(get_irq_regs(), NULL); err_print_prefix = saved_err_prefix; } diff --git a/arch/alpha/kernel/err_ev7.c b/arch/alpha/kernel/err_ev7.c index fed6b3d1b803..95463ab1cf35 100644 --- a/arch/alpha/kernel/err_ev7.c +++ b/arch/alpha/kernel/err_ev7.c @@ -118,7 +118,7 @@ ev7_collect_logout_frame_subpackets(struct el_subpacket *el_ptr, } void -ev7_machine_check(u64 vector, u64 la_ptr, struct pt_regs *regs) +ev7_machine_check(u64 vector, u64 la_ptr) { struct el_subpacket *el_ptr = (struct el_subpacket *)la_ptr; char *saved_err_prefix = err_print_prefix; diff --git a/arch/alpha/kernel/err_impl.h b/arch/alpha/kernel/err_impl.h index 64e9b73809fa..3c12258158e6 100644 --- a/arch/alpha/kernel/err_impl.h +++ b/arch/alpha/kernel/err_impl.h @@ -60,26 +60,26 @@ extern struct ev7_lf_subpackets * ev7_collect_logout_frame_subpackets(struct el_subpacket *, struct ev7_lf_subpackets *); extern void ev7_register_error_handlers(void); -extern void ev7_machine_check(u64, u64, struct pt_regs *); +extern void ev7_machine_check(u64, u64); /* * err_ev6.c */ extern void ev6_register_error_handlers(void); extern int ev6_process_logout_frame(struct el_common *, int); -extern void ev6_machine_check(u64, u64, struct pt_regs *); +extern void ev6_machine_check(u64, u64); /* * err_marvel.c */ -extern void marvel_machine_check(u64, u64, struct pt_regs *); +extern void marvel_machine_check(u64, u64); extern void marvel_register_error_handlers(void); /* * err_titan.c */ extern int titan_process_logout_frame(struct el_common *, int); -extern void titan_machine_check(u64, u64, struct pt_regs *); +extern void titan_machine_check(u64, u64); extern void titan_register_error_handlers(void); extern int privateer_process_logout_frame(struct el_common *, int); -extern void privateer_machine_check(u64, u64, struct pt_regs *); +extern void privateer_machine_check(u64, u64); diff --git a/arch/alpha/kernel/err_marvel.c b/arch/alpha/kernel/err_marvel.c index 70b38b1d2af3..f2956ac8dccc 100644 --- a/arch/alpha/kernel/err_marvel.c +++ b/arch/alpha/kernel/err_marvel.c @@ -1042,7 +1042,7 @@ marvel_process_logout_frame(struct ev7_lf_subpackets *lf_subpackets, int print) } void -marvel_machine_check(u64 vector, u64 la_ptr, struct pt_regs *regs) +marvel_machine_check(u64 vector, u64 la_ptr) { struct el_subpacket *el_ptr = (struct el_subpacket *)la_ptr; int (*process_frame)(struct ev7_lf_subpackets *, int) = NULL; @@ -1077,7 +1077,7 @@ marvel_machine_check(u64 vector, u64 la_ptr, struct pt_regs *regs) default: /* Don't know it - pass it up. */ - ev7_machine_check(vector, la_ptr, regs); + ev7_machine_check(vector, la_ptr); return; } diff --git a/arch/alpha/kernel/err_titan.c b/arch/alpha/kernel/err_titan.c index 7e6720d45f02..febe71c6869f 100644 --- a/arch/alpha/kernel/err_titan.c +++ b/arch/alpha/kernel/err_titan.c @@ -379,7 +379,7 @@ titan_process_logout_frame(struct el_common *mchk_header, int print) } void -titan_machine_check(u64 vector, u64 la_ptr, struct pt_regs *regs) +titan_machine_check(u64 vector, u64 la_ptr) { struct el_common *mchk_header = (struct el_common *)la_ptr; struct el_TITAN_sysdata_mcheck *tmchk = @@ -408,7 +408,7 @@ titan_machine_check(u64 vector, u64 la_ptr, struct pt_regs *regs) * Only handle system errors here */ if ((vector != SCB_Q_SYSMCHK) && (vector != SCB_Q_SYSERR)) { - ev6_machine_check(vector, la_ptr, regs); + ev6_machine_check(vector, la_ptr); return; } @@ -442,7 +442,7 @@ titan_machine_check(u64 vector, u64 la_ptr, struct pt_regs *regs) #ifdef CONFIG_VERBOSE_MCHECK titan_process_logout_frame(mchk_header, alpha_verbose_mcheck); if (alpha_verbose_mcheck) - dik_show_regs(regs, NULL); + dik_show_regs(get_irq_regs(), NULL); #endif /* CONFIG_VERBOSE_MCHECK */ err_print_prefix = saved_err_prefix; @@ -452,7 +452,7 @@ titan_machine_check(u64 vector, u64 la_ptr, struct pt_regs *regs) * machine checks to interrupts */ irqmask = tmchk->c_dirx & TITAN_MCHECK_INTERRUPT_MASK; - titan_dispatch_irqs(irqmask, regs); + titan_dispatch_irqs(irqmask); } @@ -701,7 +701,7 @@ privateer_process_logout_frame(struct el_common *mchk_header, int print) } void -privateer_machine_check(u64 vector, u64 la_ptr, struct pt_regs *regs) +privateer_machine_check(u64 vector, u64 la_ptr) { struct el_common *mchk_header = (struct el_common *)la_ptr; struct el_TITAN_sysdata_mcheck *tmchk = @@ -723,7 +723,7 @@ privateer_machine_check(u64 vector, u64 la_ptr, struct pt_regs *regs) * Only handle system events here. */ if (vector != SCB_Q_SYSEVENT) - return titan_machine_check(vector, la_ptr, regs); + return titan_machine_check(vector, la_ptr); /* * Report the event - System Events should be reported even if no @@ -746,7 +746,7 @@ privateer_machine_check(u64 vector, u64 la_ptr, struct pt_regs *regs) /* * Dispatch the interrupt(s). */ - titan_dispatch_irqs(irqmask, regs); + titan_dispatch_irqs(irqmask); /* * Release the logout frame. diff --git a/arch/alpha/kernel/irq.c b/arch/alpha/kernel/irq.c index 729c475d2269..facf82a5499a 100644 --- a/arch/alpha/kernel/irq.c +++ b/arch/alpha/kernel/irq.c @@ -127,7 +127,7 @@ unlock: #define MAX_ILLEGAL_IRQS 16 void -handle_irq(int irq, struct pt_regs * regs) +handle_irq(int irq) { /* * We ack quickly, we don't want the irq controller @@ -157,6 +157,6 @@ handle_irq(int irq, struct pt_regs * regs) * at IPL 0. */ local_irq_disable(); - __do_IRQ(irq, regs); + __do_IRQ(irq); irq_exit(); } diff --git a/arch/alpha/kernel/irq_alpha.c b/arch/alpha/kernel/irq_alpha.c index ddf5cf8dcb0b..e16aeb6e79ef 100644 --- a/arch/alpha/kernel/irq_alpha.c +++ b/arch/alpha/kernel/irq_alpha.c @@ -6,6 +6,7 @@ #include <linux/sched.h> #include <linux/irq.h> #include <linux/kernel_stat.h> +#include <linux/module.h> #include <asm/machvec.h> #include <asm/dma.h> @@ -16,6 +17,7 @@ /* Hack minimum IPL during interrupt processing for broken hardware. */ #ifdef CONFIG_ALPHA_BROKEN_IRQ_MASK int __min_ipl; +EXPORT_SYMBOL(__min_ipl); #endif /* @@ -30,6 +32,7 @@ dummy_perf(unsigned long vector, struct pt_regs *regs) } void (*perf_irq)(unsigned long, struct pt_regs *) = dummy_perf; +EXPORT_SYMBOL(perf_irq); /* * The main interrupt entry point. @@ -39,6 +42,7 @@ asmlinkage void do_entInt(unsigned long type, unsigned long vector, unsigned long la_ptr, struct pt_regs *regs) { + struct pt_regs *old_regs; switch (type) { case 0: #ifdef CONFIG_SMP @@ -51,6 +55,7 @@ do_entInt(unsigned long type, unsigned long vector, #endif break; case 1: + old_regs = set_irq_regs(regs); #ifdef CONFIG_SMP { long cpu; @@ -61,18 +66,23 @@ do_entInt(unsigned long type, unsigned long vector, if (cpu != boot_cpuid) { kstat_cpu(cpu).irqs[RTC_IRQ]++; } else { - handle_irq(RTC_IRQ, regs); + handle_irq(RTC_IRQ); } } #else - handle_irq(RTC_IRQ, regs); + handle_irq(RTC_IRQ); #endif + set_irq_regs(old_regs); return; case 2: - alpha_mv.machine_check(vector, la_ptr, regs); + old_regs = set_irq_regs(regs); + alpha_mv.machine_check(vector, la_ptr); + set_irq_regs(old_regs); return; case 3: - alpha_mv.device_interrupt(vector, regs); + old_regs = set_irq_regs(regs); + alpha_mv.device_interrupt(vector); + set_irq_regs(old_regs); return; case 4: perf_irq(la_ptr, regs); @@ -120,8 +130,7 @@ struct mcheck_info __mcheck_info; void process_mcheck_info(unsigned long vector, unsigned long la_ptr, - struct pt_regs *regs, const char *machine, - int expected) + const char *machine, int expected) { struct el_common *mchk_header; const char *reason; @@ -148,7 +157,7 @@ process_mcheck_info(unsigned long vector, unsigned long la_ptr, mchk_header = (struct el_common *)la_ptr; printk(KERN_CRIT "%s machine check: vector=0x%lx pc=0x%lx code=0x%x\n", - machine, vector, regs->pc, mchk_header->code); + machine, vector, get_irq_regs()->pc, mchk_header->code); switch (mchk_header->code) { /* Machine check reasons. Defined according to PALcode sources. */ @@ -189,7 +198,7 @@ process_mcheck_info(unsigned long vector, unsigned long la_ptr, printk(KERN_CRIT "machine check type: %s%s\n", reason, mchk_header->retry ? " (retryable)" : ""); - dik_show_regs(regs, NULL); + dik_show_regs(get_irq_regs(), NULL); #ifdef CONFIG_VERBOSE_MCHECK if (alpha_verbose_mcheck > 1) { diff --git a/arch/alpha/kernel/irq_i8259.c b/arch/alpha/kernel/irq_i8259.c index ebbadbc0c36a..9405bee9894e 100644 --- a/arch/alpha/kernel/irq_i8259.c +++ b/arch/alpha/kernel/irq_i8259.c @@ -137,7 +137,7 @@ init_i8259a_irqs(void) #if defined(IACK_SC) void -isa_device_interrupt(unsigned long vector, struct pt_regs *regs) +isa_device_interrupt(unsigned long vector) { /* * Generate a PCI interrupt acknowledge cycle. The PIC will @@ -147,13 +147,13 @@ isa_device_interrupt(unsigned long vector, struct pt_regs *regs) */ int j = *(vuip) IACK_SC; j &= 0xff; - handle_irq(j, regs); + handle_irq(j); } #endif #if defined(CONFIG_ALPHA_GENERIC) || !defined(IACK_SC) void -isa_no_iack_sc_device_interrupt(unsigned long vector, struct pt_regs *regs) +isa_no_iack_sc_device_interrupt(unsigned long vector) { unsigned long pic; @@ -176,7 +176,7 @@ isa_no_iack_sc_device_interrupt(unsigned long vector, struct pt_regs *regs) while (pic) { int j = ffz(~pic); pic &= pic - 1; - handle_irq(j, regs); + handle_irq(j); } } #endif diff --git a/arch/alpha/kernel/irq_impl.h b/arch/alpha/kernel/irq_impl.h index f201d8ffc0d9..cc9a8a7aa279 100644 --- a/arch/alpha/kernel/irq_impl.h +++ b/arch/alpha/kernel/irq_impl.h @@ -15,10 +15,10 @@ #define RTC_IRQ 8 -extern void isa_device_interrupt(unsigned long, struct pt_regs *); -extern void isa_no_iack_sc_device_interrupt(unsigned long, struct pt_regs *); -extern void srm_device_interrupt(unsigned long, struct pt_regs *); -extern void pyxis_device_interrupt(unsigned long, struct pt_regs *); +extern void isa_device_interrupt(unsigned long); +extern void isa_no_iack_sc_device_interrupt(unsigned long); +extern void srm_device_interrupt(unsigned long); +extern void pyxis_device_interrupt(unsigned long); extern struct irqaction timer_irqaction; extern struct irqaction isa_cascade_irqaction; @@ -39,4 +39,4 @@ extern void i8259a_end_irq(unsigned int); extern struct hw_interrupt_type i8259a_irq_type; extern void init_i8259a_irqs(void); -extern void handle_irq(int irq, struct pt_regs * regs); +extern void handle_irq(int irq); diff --git a/arch/alpha/kernel/irq_pyxis.c b/arch/alpha/kernel/irq_pyxis.c index 3b581415bab0..d53edbccbfe5 100644 --- a/arch/alpha/kernel/irq_pyxis.c +++ b/arch/alpha/kernel/irq_pyxis.c @@ -81,7 +81,7 @@ static struct hw_interrupt_type pyxis_irq_type = { }; void -pyxis_device_interrupt(unsigned long vector, struct pt_regs *regs) +pyxis_device_interrupt(unsigned long vector) { unsigned long pld; unsigned int i; @@ -98,9 +98,9 @@ pyxis_device_interrupt(unsigned long vector, struct pt_regs *regs) i = ffz(~pld); pld &= pld - 1; /* clear least bit set */ if (i == 7) - isa_device_interrupt(vector, regs); + isa_device_interrupt(vector); else - handle_irq(16+i, regs); + handle_irq(16+i); } } diff --git a/arch/alpha/kernel/irq_srm.c b/arch/alpha/kernel/irq_srm.c index 8e4d121f84cc..32212014fbe9 100644 --- a/arch/alpha/kernel/irq_srm.c +++ b/arch/alpha/kernel/irq_srm.c @@ -72,8 +72,8 @@ init_srm_irqs(long max, unsigned long ignore_mask) } void -srm_device_interrupt(unsigned long vector, struct pt_regs * regs) +srm_device_interrupt(unsigned long vector) { int irq = (vector - 0x800) >> 4; - handle_irq(irq, regs); + handle_irq(irq); } diff --git a/arch/alpha/kernel/pci-noop.c b/arch/alpha/kernel/pci-noop.c index fff5cf93e816..174b729c504b 100644 --- a/arch/alpha/kernel/pci-noop.c +++ b/arch/alpha/kernel/pci-noop.c @@ -201,6 +201,7 @@ dma_set_mask(struct device *dev, u64 mask) return 0; } +EXPORT_SYMBOL(dma_set_mask); void __iomem *pci_iomap(struct pci_dev *dev, int bar, unsigned long maxlen) { diff --git a/arch/alpha/kernel/pci_iommu.c b/arch/alpha/kernel/pci_iommu.c index c468e312e5f8..6e7d1fe6e935 100644 --- a/arch/alpha/kernel/pci_iommu.c +++ b/arch/alpha/kernel/pci_iommu.c @@ -300,6 +300,7 @@ pci_map_single(struct pci_dev *pdev, void *cpu_addr, size_t size, int dir) dac_allowed = pdev ? pci_dac_dma_supported(pdev, pdev->dma_mask) : 0; return pci_map_single_1(pdev, cpu_addr, size, dac_allowed); } +EXPORT_SYMBOL(pci_map_single); dma_addr_t pci_map_page(struct pci_dev *pdev, struct page *page, unsigned long offset, @@ -314,6 +315,7 @@ pci_map_page(struct pci_dev *pdev, struct page *page, unsigned long offset, return pci_map_single_1(pdev, (char *)page_address(page) + offset, size, dac_allowed); } +EXPORT_SYMBOL(pci_map_page); /* Unmap a single streaming mode DMA translation. The DMA_ADDR and SIZE must match what was provided for in a previous pci_map_single @@ -379,6 +381,7 @@ pci_unmap_single(struct pci_dev *pdev, dma_addr_t dma_addr, size_t size, DBGA2("pci_unmap_single: sg [%lx,%lx] np %ld from %p\n", dma_addr, size, npages, __builtin_return_address(0)); } +EXPORT_SYMBOL(pci_unmap_single); void pci_unmap_page(struct pci_dev *pdev, dma_addr_t dma_addr, @@ -386,6 +389,7 @@ pci_unmap_page(struct pci_dev *pdev, dma_addr_t dma_addr, { pci_unmap_single(pdev, dma_addr, size, direction); } +EXPORT_SYMBOL(pci_unmap_page); /* Allocate and map kernel buffer using consistent mode DMA for PCI device. Returns non-NULL cpu-view pointer to the buffer if @@ -427,6 +431,7 @@ try_again: return cpu_addr; } +EXPORT_SYMBOL(pci_alloc_consistent); /* Free and unmap a consistent DMA buffer. CPU_ADDR and DMA_ADDR must be values that were returned from pci_alloc_consistent. SIZE must @@ -444,7 +449,7 @@ pci_free_consistent(struct pci_dev *pdev, size_t size, void *cpu_addr, DBGA2("pci_free_consistent: [%x,%lx] from %p\n", dma_addr, size, __builtin_return_address(0)); } - +EXPORT_SYMBOL(pci_free_consistent); /* Classify the elements of the scatterlist. Write dma_address of each element with: @@ -672,6 +677,7 @@ pci_map_sg(struct pci_dev *pdev, struct scatterlist *sg, int nents, pci_unmap_sg(pdev, start, out - start, direction); return 0; } +EXPORT_SYMBOL(pci_map_sg); /* Unmap a set of streaming mode DMA translations. Again, cpu read rules concerning calls here are the same as for pci_unmap_single() @@ -752,6 +758,7 @@ pci_unmap_sg(struct pci_dev *pdev, struct scatterlist *sg, int nents, DBGA("pci_unmap_sg: %ld entries\n", nents - (end - sg)); } +EXPORT_SYMBOL(pci_unmap_sg); /* Return whether the given PCI device DMA address mask can be @@ -786,6 +793,7 @@ pci_dma_supported(struct pci_dev *pdev, u64 mask) return 0; } +EXPORT_SYMBOL(pci_dma_supported); /* @@ -908,6 +916,7 @@ pci_dac_dma_supported(struct pci_dev *dev, u64 mask) return ok; } +EXPORT_SYMBOL(pci_dac_dma_supported); dma64_addr_t pci_dac_page_to_dma(struct pci_dev *pdev, struct page *page, @@ -917,6 +926,7 @@ pci_dac_page_to_dma(struct pci_dev *pdev, struct page *page, + __pa(page_address(page)) + (dma64_addr_t) offset); } +EXPORT_SYMBOL(pci_dac_page_to_dma); struct page * pci_dac_dma_to_page(struct pci_dev *pdev, dma64_addr_t dma_addr) @@ -924,13 +934,14 @@ pci_dac_dma_to_page(struct pci_dev *pdev, dma64_addr_t dma_addr) unsigned long paddr = (dma_addr & PAGE_MASK) - alpha_mv.pci_dac_offset; return virt_to_page(__va(paddr)); } +EXPORT_SYMBOL(pci_dac_dma_to_page); unsigned long pci_dac_dma_to_offset(struct pci_dev *pdev, dma64_addr_t dma_addr) { return (dma_addr & ~PAGE_MASK); } - +EXPORT_SYMBOL(pci_dac_dma_to_offset); /* Helper for generic DMA-mapping functions. */ @@ -957,6 +968,7 @@ alpha_gendev_to_pci(struct device *dev) /* This assumes ISA bus master with dma_mask 0xffffff. */ return NULL; } +EXPORT_SYMBOL(alpha_gendev_to_pci); int dma_set_mask(struct device *dev, u64 mask) @@ -969,3 +981,4 @@ dma_set_mask(struct device *dev, u64 mask) return 0; } +EXPORT_SYMBOL(dma_set_mask); diff --git a/arch/alpha/kernel/process.c b/arch/alpha/kernel/process.c index b3a8a2980365..3370e6faeae0 100644 --- a/arch/alpha/kernel/process.c +++ b/arch/alpha/kernel/process.c @@ -205,6 +205,7 @@ start_thread(struct pt_regs * regs, unsigned long pc, unsigned long sp) regs->ps = 8; wrusp(sp); } +EXPORT_SYMBOL(start_thread); /* * Free current thread data structures etc.. @@ -376,6 +377,7 @@ dump_thread(struct pt_regs * pt, struct user * dump) dump->regs[EF_A2] = pt->r18; memcpy((char *)dump->regs + EF_SIZE, sw->fp, 32 * 8); } +EXPORT_SYMBOL(dump_thread); /* * Fill in the user structure for a ELF core dump. @@ -424,6 +426,7 @@ dump_elf_thread(elf_greg_t *dest, struct pt_regs *pt, struct thread_info *ti) useful value of the thread's UNIQUE field. */ dest[32] = ti->pcb.unique; } +EXPORT_SYMBOL(dump_elf_thread); int dump_elf_task(elf_greg_t *dest, struct task_struct *task) @@ -431,6 +434,7 @@ dump_elf_task(elf_greg_t *dest, struct task_struct *task) dump_elf_thread(dest, task_pt_regs(task), task_thread_info(task)); return 1; } +EXPORT_SYMBOL(dump_elf_task); int dump_elf_task_fp(elf_fpreg_t *dest, struct task_struct *task) @@ -439,6 +443,7 @@ dump_elf_task_fp(elf_fpreg_t *dest, struct task_struct *task) memcpy(dest, sw->fp, 32 * 8); return 1; } +EXPORT_SYMBOL(dump_elf_task_fp); /* * sys_execve() executes a new program. diff --git a/arch/alpha/kernel/proto.h b/arch/alpha/kernel/proto.h index 21f71287b6f5..95912ecc65e1 100644 --- a/arch/alpha/kernel/proto.h +++ b/arch/alpha/kernel/proto.h @@ -20,7 +20,7 @@ struct pci_controller; extern struct pci_ops apecs_pci_ops; extern void apecs_init_arch(void); extern void apecs_pci_clr_err(void); -extern void apecs_machine_check(u64, u64, struct pt_regs *); +extern void apecs_machine_check(u64, u64); extern void apecs_pci_tbi(struct pci_controller *, dma_addr_t, dma_addr_t); /* core_cia.c */ @@ -29,27 +29,27 @@ extern void cia_init_pci(void); extern void cia_init_arch(void); extern void pyxis_init_arch(void); extern void cia_kill_arch(int); -extern void cia_machine_check(u64, u64, struct pt_regs *); +extern void cia_machine_check(u64, u64); extern void cia_pci_tbi(struct pci_controller *, dma_addr_t, dma_addr_t); /* core_irongate.c */ extern struct pci_ops irongate_pci_ops; extern int irongate_pci_clr_err(void); extern void irongate_init_arch(void); -extern void irongate_machine_check(u64, u64, struct pt_regs *); +extern void irongate_machine_check(u64, u64); #define irongate_pci_tbi ((void *)0) /* core_lca.c */ extern struct pci_ops lca_pci_ops; extern void lca_init_arch(void); -extern void lca_machine_check(u64, u64, struct pt_regs *); +extern void lca_machine_check(u64, u64); extern void lca_pci_tbi(struct pci_controller *, dma_addr_t, dma_addr_t); /* core_marvel.c */ extern struct pci_ops marvel_pci_ops; extern void marvel_init_arch(void); extern void marvel_kill_arch(int); -extern void marvel_machine_check(u64, u64, struct pt_regs *); +extern void marvel_machine_check(u64, u64); extern void marvel_pci_tbi(struct pci_controller *, dma_addr_t, dma_addr_t); extern int marvel_pa_to_nid(unsigned long); extern int marvel_cpuid_to_nid(int); @@ -64,7 +64,7 @@ void io7_clear_errors(struct io7 *io7); extern struct pci_ops mcpcia_pci_ops; extern void mcpcia_init_arch(void); extern void mcpcia_init_hoses(void); -extern void mcpcia_machine_check(u64, u64, struct pt_regs *); +extern void mcpcia_machine_check(u64, u64); extern void mcpcia_pci_tbi(struct pci_controller *, dma_addr_t, dma_addr_t); /* core_polaris.c */ @@ -72,21 +72,21 @@ extern struct pci_ops polaris_pci_ops; extern int polaris_read_config_dword(struct pci_dev *, int, u32 *); extern int polaris_write_config_dword(struct pci_dev *, int, u32); extern void polaris_init_arch(void); -extern void polaris_machine_check(u64, u64, struct pt_regs *); +extern void polaris_machine_check(u64, u64); #define polaris_pci_tbi ((void *)0) /* core_t2.c */ extern struct pci_ops t2_pci_ops; extern void t2_init_arch(void); extern void t2_kill_arch(int); -extern void t2_machine_check(u64, u64, struct pt_regs *); +extern void t2_machine_check(u64, u64); extern void t2_pci_tbi(struct pci_controller *, dma_addr_t, dma_addr_t); /* core_titan.c */ extern struct pci_ops titan_pci_ops; extern void titan_init_arch(void); extern void titan_kill_arch(int); -extern void titan_machine_check(u64, u64, struct pt_regs *); +extern void titan_machine_check(u64, u64); extern void titan_pci_tbi(struct pci_controller *, dma_addr_t, dma_addr_t); extern struct _alpha_agp_info *titan_agp_info(void); @@ -94,14 +94,14 @@ extern struct _alpha_agp_info *titan_agp_info(void); extern struct pci_ops tsunami_pci_ops; extern void tsunami_init_arch(void); extern void tsunami_kill_arch(int); -extern void tsunami_machine_check(u64, u64, struct pt_regs *); +extern void tsunami_machine_check(u64, u64); extern void tsunami_pci_tbi(struct pci_controller *, dma_addr_t, dma_addr_t); /* core_wildfire.c */ extern struct pci_ops wildfire_pci_ops; extern void wildfire_init_arch(void); extern void wildfire_kill_arch(int); -extern void wildfire_machine_check(u64, u64, struct pt_regs *); +extern void wildfire_machine_check(u64, u64); extern void wildfire_pci_tbi(struct pci_controller *, dma_addr_t, dma_addr_t); extern int wildfire_pa_to_nid(unsigned long); extern int wildfire_cpuid_to_nid(int); @@ -133,7 +133,7 @@ extern void smp_percpu_timer_interrupt(struct pt_regs *); /* extern void reset_for_srm(void); */ /* time.c */ -extern irqreturn_t timer_interrupt(int irq, void *dev, struct pt_regs * regs); +extern irqreturn_t timer_interrupt(int irq, void *dev); extern void common_init_rtc(void); extern unsigned long est_cycle_freq; @@ -177,7 +177,7 @@ extern void dik_show_regs(struct pt_regs *regs, unsigned long *r9_15); extern void die_if_kernel(char *, struct pt_regs *, long, unsigned long *); /* sys_titan.c */ -extern void titan_dispatch_irqs(u64, struct pt_regs *); +extern void titan_dispatch_irqs(u64); /* ../mm/init.c */ extern void switch_to_system_map(void); @@ -214,5 +214,4 @@ extern struct mcheck_info #endif extern void process_mcheck_info(unsigned long vector, unsigned long la_ptr, - struct pt_regs *regs, const char *machine, - int expected); + const char *machine, int expected); diff --git a/arch/alpha/kernel/setup.c b/arch/alpha/kernel/setup.c index a94e6d93e2ee..1aea7c7c683c 100644 --- a/arch/alpha/kernel/setup.c +++ b/arch/alpha/kernel/setup.c @@ -66,6 +66,7 @@ static struct notifier_block alpha_panic_block = { struct hwrpb_struct *hwrpb; +EXPORT_SYMBOL(hwrpb); unsigned long srm_hae; int alpha_l1i_cacheshape; @@ -111,6 +112,7 @@ unsigned long alpha_agpgart_size = DEFAULT_AGP_APER_SIZE; #ifdef CONFIG_ALPHA_GENERIC struct alpha_machine_vector alpha_mv; int alpha_using_srm; +EXPORT_SYMBOL(alpha_using_srm); #endif static struct alpha_machine_vector *get_sysvec(unsigned long, unsigned long, @@ -137,6 +139,8 @@ struct screen_info screen_info = { .orig_video_points = 16 }; +EXPORT_SYMBOL(screen_info); + /* * The direct map I/O window, if any. This should be the same * for all busses, since it's used by virt_to_bus. @@ -144,6 +148,8 @@ struct screen_info screen_info = { unsigned long __direct_map_base; unsigned long __direct_map_size; +EXPORT_SYMBOL(__direct_map_base); +EXPORT_SYMBOL(__direct_map_size); /* * Declare all of the machine vectors. diff --git a/arch/alpha/kernel/smp.c b/arch/alpha/kernel/smp.c index 4dc273e537fd..d1ec4f51df1a 100644 --- a/arch/alpha/kernel/smp.c +++ b/arch/alpha/kernel/smp.c @@ -52,6 +52,7 @@ /* A collection of per-processor data. */ struct cpuinfo_alpha cpu_data[NR_CPUS]; +EXPORT_SYMBOL(cpu_data); /* A collection of single bit ipi messages. */ static struct { @@ -74,6 +75,7 @@ EXPORT_SYMBOL(cpu_online_map); int smp_num_probed; /* Internal processor count */ int smp_num_cpus = 1; /* Number that came online. */ +EXPORT_SYMBOL(smp_num_cpus); extern void calibrate_delay(void); @@ -515,12 +517,15 @@ smp_cpus_done(unsigned int max_cpus) void smp_percpu_timer_interrupt(struct pt_regs *regs) { + struct pt_regs *old_regs; int cpu = smp_processor_id(); unsigned long user = user_mode(regs); struct cpuinfo_alpha *data = &cpu_data[cpu]; + old_regs = set_irq_regs(regs); + /* Record kernel PC. */ - profile_tick(CPU_PROFILING, regs); + profile_tick(CPU_PROFILING); if (!--data->prof_counter) { /* We need to make like a normal interrupt -- otherwise @@ -534,6 +539,7 @@ smp_percpu_timer_interrupt(struct pt_regs *regs) irq_exit(); } + set_irq_regs(old_regs); } int __init @@ -786,6 +792,7 @@ smp_call_function_on_cpu (void (*func) (void *info), void *info, int retry, return 0; } +EXPORT_SYMBOL(smp_call_function_on_cpu); int smp_call_function (void (*func) (void *info), void *info, int retry, int wait) @@ -793,6 +800,7 @@ smp_call_function (void (*func) (void *info), void *info, int retry, int wait) return smp_call_function_on_cpu (func, info, retry, wait, cpu_online_map); } +EXPORT_SYMBOL(smp_call_function); static void ipi_imb(void *ignored) @@ -807,6 +815,7 @@ smp_imb(void) if (on_each_cpu(ipi_imb, NULL, 1, 1)) printk(KERN_CRIT "smp_imb: timed out\n"); } +EXPORT_SYMBOL(smp_imb); static void ipi_flush_tlb_all(void *ignored) @@ -862,6 +871,7 @@ flush_tlb_mm(struct mm_struct *mm) preempt_enable(); } +EXPORT_SYMBOL(flush_tlb_mm); struct flush_tlb_page_struct { struct vm_area_struct *vma; @@ -914,6 +924,7 @@ flush_tlb_page(struct vm_area_struct *vma, unsigned long addr) preempt_enable(); } +EXPORT_SYMBOL(flush_tlb_page); void flush_tlb_range(struct vm_area_struct *vma, unsigned long start, unsigned long end) @@ -921,6 +932,7 @@ flush_tlb_range(struct vm_area_struct *vma, unsigned long start, unsigned long e /* On the Alpha we always flush the whole user tlb. */ flush_tlb_mm(vma->vm_mm); } +EXPORT_SYMBOL(flush_tlb_range); static void ipi_flush_icache_page(void *x) diff --git a/arch/alpha/kernel/sys_alcor.c b/arch/alpha/kernel/sys_alcor.c index d6926b7b1e99..49bedfbbd31b 100644 --- a/arch/alpha/kernel/sys_alcor.c +++ b/arch/alpha/kernel/sys_alcor.c @@ -100,7 +100,7 @@ static struct hw_interrupt_type alcor_irq_type = { }; static void -alcor_device_interrupt(unsigned long vector, struct pt_regs *regs) +alcor_device_interrupt(unsigned long vector) { unsigned long pld; unsigned int i; @@ -116,9 +116,9 @@ alcor_device_interrupt(unsigned long vector, struct pt_regs *regs) i = ffz(~pld); pld &= pld - 1; /* clear least bit set */ if (i == 31) { - isa_device_interrupt(vector, regs); + isa_device_interrupt(vector); } else { - handle_irq(16 + i, regs); + handle_irq(16 + i); } } } diff --git a/arch/alpha/kernel/sys_cabriolet.c b/arch/alpha/kernel/sys_cabriolet.c index 25a215067da8..ace475c124f6 100644 --- a/arch/alpha/kernel/sys_cabriolet.c +++ b/arch/alpha/kernel/sys_cabriolet.c @@ -82,7 +82,7 @@ static struct hw_interrupt_type cabriolet_irq_type = { }; static void -cabriolet_device_interrupt(unsigned long v, struct pt_regs *r) +cabriolet_device_interrupt(unsigned long v) { unsigned long pld; unsigned int i; @@ -98,15 +98,15 @@ cabriolet_device_interrupt(unsigned long v, struct pt_regs *r) i = ffz(~pld); pld &= pld - 1; /* clear least bit set */ if (i == 4) { - isa_device_interrupt(v, r); + isa_device_interrupt(v); } else { - handle_irq(16 + i, r); + handle_irq(16 + i); } } } static void __init -common_init_irq(void (*srm_dev_int)(unsigned long v, struct pt_regs *r)) +common_init_irq(void (*srm_dev_int)(unsigned long v)) { init_i8259a_irqs(); @@ -154,18 +154,18 @@ cabriolet_init_irq(void) too invasive though. */ static void -pc164_srm_device_interrupt(unsigned long v, struct pt_regs *r) +pc164_srm_device_interrupt(unsigned long v) { __min_ipl = getipl(); - srm_device_interrupt(v, r); + srm_device_interrupt(v); __min_ipl = 0; } static void -pc164_device_interrupt(unsigned long v, struct pt_regs *r) +pc164_device_interrupt(unsigned long v) { __min_ipl = getipl(); - cabriolet_device_interrupt(v, r); + cabriolet_device_interrupt(v); __min_ipl = 0; } diff --git a/arch/alpha/kernel/sys_dp264.c b/arch/alpha/kernel/sys_dp264.c index dd6103b867e7..85d2f933dd07 100644 --- a/arch/alpha/kernel/sys_dp264.c +++ b/arch/alpha/kernel/sys_dp264.c @@ -217,7 +217,7 @@ static struct hw_interrupt_type clipper_irq_type = { }; static void -dp264_device_interrupt(unsigned long vector, struct pt_regs * regs) +dp264_device_interrupt(unsigned long vector) { #if 1 printk("dp264_device_interrupt: NOT IMPLEMENTED YET!! \n"); @@ -236,9 +236,9 @@ dp264_device_interrupt(unsigned long vector, struct pt_regs * regs) i = ffz(~pld); pld &= pld - 1; /* clear least bit set */ if (i == 55) - isa_device_interrupt(vector, regs); + isa_device_interrupt(vector); else - handle_irq(16 + i, 16 + i, regs); + handle_irq(16 + i); #if 0 TSUNAMI_cchip->dir0.csr = 1UL << i; mb(); tmp = TSUNAMI_cchip->dir0.csr; @@ -248,7 +248,7 @@ dp264_device_interrupt(unsigned long vector, struct pt_regs * regs) } static void -dp264_srm_device_interrupt(unsigned long vector, struct pt_regs * regs) +dp264_srm_device_interrupt(unsigned long vector) { int irq; @@ -268,11 +268,11 @@ dp264_srm_device_interrupt(unsigned long vector, struct pt_regs * regs) if (irq >= 32) irq -= 16; - handle_irq(irq, regs); + handle_irq(irq); } static void -clipper_srm_device_interrupt(unsigned long vector, struct pt_regs * regs) +clipper_srm_device_interrupt(unsigned long vector) { int irq; @@ -290,7 +290,7 @@ clipper_srm_device_interrupt(unsigned long vector, struct pt_regs * regs) * * Eg IRQ 24 is DRIR bit 8, etc, etc */ - handle_irq(irq, regs); + handle_irq(irq); } static void __init diff --git a/arch/alpha/kernel/sys_eb64p.c b/arch/alpha/kernel/sys_eb64p.c index ed108b66ec09..9c5a306dc0ee 100644 --- a/arch/alpha/kernel/sys_eb64p.c +++ b/arch/alpha/kernel/sys_eb64p.c @@ -80,7 +80,7 @@ static struct hw_interrupt_type eb64p_irq_type = { }; static void -eb64p_device_interrupt(unsigned long vector, struct pt_regs *regs) +eb64p_device_interrupt(unsigned long vector) { unsigned long pld; unsigned int i; @@ -97,9 +97,9 @@ eb64p_device_interrupt(unsigned long vector, struct pt_regs *regs) pld &= pld - 1; /* clear least bit set */ if (i == 5) { - isa_device_interrupt(vector, regs); + isa_device_interrupt(vector); } else { - handle_irq(16 + i, regs); + handle_irq(16 + i); } } } diff --git a/arch/alpha/kernel/sys_eiger.c b/arch/alpha/kernel/sys_eiger.c index 64a785baf53a..7ef3b6fb3700 100644 --- a/arch/alpha/kernel/sys_eiger.c +++ b/arch/alpha/kernel/sys_eiger.c @@ -91,7 +91,7 @@ static struct hw_interrupt_type eiger_irq_type = { }; static void -eiger_device_interrupt(unsigned long vector, struct pt_regs * regs) +eiger_device_interrupt(unsigned long vector) { unsigned intstatus; @@ -118,20 +118,20 @@ eiger_device_interrupt(unsigned long vector, struct pt_regs * regs) * despatch an interrupt if it's set. */ - if (intstatus & 8) handle_irq(16+3, regs); - if (intstatus & 4) handle_irq(16+2, regs); - if (intstatus & 2) handle_irq(16+1, regs); - if (intstatus & 1) handle_irq(16+0, regs); + if (intstatus & 8) handle_irq(16+3); + if (intstatus & 4) handle_irq(16+2); + if (intstatus & 2) handle_irq(16+1); + if (intstatus & 1) handle_irq(16+0); } else { - isa_device_interrupt(vector, regs); + isa_device_interrupt(vector); } } static void -eiger_srm_device_interrupt(unsigned long vector, struct pt_regs * regs) +eiger_srm_device_interrupt(unsigned long vector) { int irq = (vector - 0x800) >> 4; - handle_irq(irq, regs); + handle_irq(irq); } static void __init diff --git a/arch/alpha/kernel/sys_jensen.c b/arch/alpha/kernel/sys_jensen.c index 4ac2b328b8de..2c3de97de46c 100644 --- a/arch/alpha/kernel/sys_jensen.c +++ b/arch/alpha/kernel/sys_jensen.c @@ -129,7 +129,7 @@ static struct hw_interrupt_type jensen_local_irq_type = { }; static void -jensen_device_interrupt(unsigned long vector, struct pt_regs * regs) +jensen_device_interrupt(unsigned long vector) { int irq; @@ -189,7 +189,7 @@ jensen_device_interrupt(unsigned long vector, struct pt_regs * regs) if (cc - last_msg > ((JENSEN_CYCLES_PER_SEC) * 3) || irq != last_irq) { printk(KERN_CRIT " irq %d count %d cc %u @ %lx\n", - irq, count, cc-last_cc, regs->pc); + irq, count, cc-last_cc, get_irq_regs()->pc); count = 0; last_msg = cc; last_irq = irq; @@ -198,7 +198,7 @@ jensen_device_interrupt(unsigned long vector, struct pt_regs * regs) } #endif - handle_irq(irq, regs); + handle_irq(irq); } static void __init @@ -244,7 +244,7 @@ jensen_init_arch(void) } static void -jensen_machine_check (u64 vector, u64 la, struct pt_regs *regs) +jensen_machine_check (u64 vector, u64 la) { printk(KERN_CRIT "Machine check\n"); } diff --git a/arch/alpha/kernel/sys_marvel.c b/arch/alpha/kernel/sys_marvel.c index 36d215954376..e349f03b830e 100644 --- a/arch/alpha/kernel/sys_marvel.c +++ b/arch/alpha/kernel/sys_marvel.c @@ -38,7 +38,7 @@ * Interrupt handling. */ static void -io7_device_interrupt(unsigned long vector, struct pt_regs * regs) +io7_device_interrupt(unsigned long vector) { unsigned int pid; unsigned int irq; @@ -64,7 +64,7 @@ io7_device_interrupt(unsigned long vector, struct pt_regs * regs) irq &= MARVEL_IRQ_VEC_IRQ_MASK; /* not too many bits */ irq |= pid << MARVEL_IRQ_VEC_PE_SHIFT; /* merge the pid */ - handle_irq(irq, regs); + handle_irq(irq); } static volatile unsigned long * diff --git a/arch/alpha/kernel/sys_miata.c b/arch/alpha/kernel/sys_miata.c index 61ac56f8eeea..b8b817feb1ee 100644 --- a/arch/alpha/kernel/sys_miata.c +++ b/arch/alpha/kernel/sys_miata.c @@ -33,7 +33,7 @@ static void -miata_srm_device_interrupt(unsigned long vector, struct pt_regs * regs) +miata_srm_device_interrupt(unsigned long vector) { int irq; @@ -56,7 +56,7 @@ miata_srm_device_interrupt(unsigned long vector, struct pt_regs * regs) if (irq >= 16) irq = irq + 8; - handle_irq(irq, regs); + handle_irq(irq); } static void __init diff --git a/arch/alpha/kernel/sys_mikasa.c b/arch/alpha/kernel/sys_mikasa.c index cc4c58111366..8d3e9429c5ee 100644 --- a/arch/alpha/kernel/sys_mikasa.c +++ b/arch/alpha/kernel/sys_mikasa.c @@ -79,7 +79,7 @@ static struct hw_interrupt_type mikasa_irq_type = { }; static void -mikasa_device_interrupt(unsigned long vector, struct pt_regs *regs) +mikasa_device_interrupt(unsigned long vector) { unsigned long pld; unsigned int i; @@ -97,9 +97,9 @@ mikasa_device_interrupt(unsigned long vector, struct pt_regs *regs) i = ffz(~pld); pld &= pld - 1; /* clear least bit set */ if (i < 16) { - isa_device_interrupt(vector, regs); + isa_device_interrupt(vector); } else { - handle_irq(i, regs); + handle_irq(i); } } } @@ -182,8 +182,7 @@ mikasa_map_irq(struct pci_dev *dev, u8 slot, u8 pin) #if defined(CONFIG_ALPHA_GENERIC) || !defined(CONFIG_ALPHA_PRIMO) static void -mikasa_apecs_machine_check(unsigned long vector, unsigned long la_ptr, - struct pt_regs * regs) +mikasa_apecs_machine_check(unsigned long vector, unsigned long la_ptr) { #define MCHK_NO_DEVSEL 0x205U #define MCHK_NO_TABT 0x204U @@ -202,7 +201,7 @@ mikasa_apecs_machine_check(unsigned long vector, unsigned long la_ptr, mb(); code = mchk_header->code; - process_mcheck_info(vector, la_ptr, regs, "MIKASA APECS", + process_mcheck_info(vector, la_ptr, "MIKASA APECS", (mcheck_expected(0) && (code == MCHK_NO_DEVSEL || code == MCHK_NO_TABT))); diff --git a/arch/alpha/kernel/sys_nautilus.c b/arch/alpha/kernel/sys_nautilus.c index c0d696efec5b..93744bab73fb 100644 --- a/arch/alpha/kernel/sys_nautilus.c +++ b/arch/alpha/kernel/sys_nautilus.c @@ -124,8 +124,7 @@ naut_sys_machine_check(unsigned long vector, unsigned long la_ptr, in the system. They are analysed separately but all starts here. */ void -nautilus_machine_check(unsigned long vector, unsigned long la_ptr, - struct pt_regs *regs) +nautilus_machine_check(unsigned long vector, unsigned long la_ptr) { char *mchk_class; @@ -165,7 +164,7 @@ nautilus_machine_check(unsigned long vector, unsigned long la_ptr, else if (vector == SCB_Q_SYSMCHK) mchk_class = "Fatal"; else { - ev6_machine_check(vector, la_ptr, regs); + ev6_machine_check(vector, la_ptr); return; } @@ -173,7 +172,7 @@ nautilus_machine_check(unsigned long vector, unsigned long la_ptr, "[%s System Machine Check (NMI)]\n", vector, mchk_class); - naut_sys_machine_check(vector, la_ptr, regs); + naut_sys_machine_check(vector, la_ptr, get_irq_regs()); /* Tell the PALcode to clear the machine check */ draina(); diff --git a/arch/alpha/kernel/sys_noritake.c b/arch/alpha/kernel/sys_noritake.c index 2d3cff7e8c5f..de6ba3432e8a 100644 --- a/arch/alpha/kernel/sys_noritake.c +++ b/arch/alpha/kernel/sys_noritake.c @@ -77,7 +77,7 @@ static struct hw_interrupt_type noritake_irq_type = { }; static void -noritake_device_interrupt(unsigned long vector, struct pt_regs *regs) +noritake_device_interrupt(unsigned long vector) { unsigned long pld; unsigned int i; @@ -96,15 +96,15 @@ noritake_device_interrupt(unsigned long vector, struct pt_regs *regs) i = ffz(~pld); pld &= pld - 1; /* clear least bit set */ if (i < 16) { - isa_device_interrupt(vector, regs); + isa_device_interrupt(vector); } else { - handle_irq(i, regs); + handle_irq(i); } } } static void -noritake_srm_device_interrupt(unsigned long vector, struct pt_regs * regs) +noritake_srm_device_interrupt(unsigned long vector) { int irq; @@ -122,7 +122,7 @@ noritake_srm_device_interrupt(unsigned long vector, struct pt_regs * regs) if (irq >= 16) irq = irq + 1; - handle_irq(irq, regs); + handle_irq(irq); } static void __init @@ -264,8 +264,7 @@ noritake_swizzle(struct pci_dev *dev, u8 *pinp) #if defined(CONFIG_ALPHA_GENERIC) || !defined(CONFIG_ALPHA_PRIMO) static void -noritake_apecs_machine_check(unsigned long vector, unsigned long la_ptr, - struct pt_regs * regs) +noritake_apecs_machine_check(unsigned long vector, unsigned long la_ptr) { #define MCHK_NO_DEVSEL 0x205U #define MCHK_NO_TABT 0x204U @@ -284,7 +283,7 @@ noritake_apecs_machine_check(unsigned long vector, unsigned long la_ptr, mb(); code = mchk_header->code; - process_mcheck_info(vector, la_ptr, regs, "NORITAKE APECS", + process_mcheck_info(vector, la_ptr, "NORITAKE APECS", (mcheck_expected(0) && (code == MCHK_NO_DEVSEL || code == MCHK_NO_TABT))); diff --git a/arch/alpha/kernel/sys_rawhide.c b/arch/alpha/kernel/sys_rawhide.c index 949607e3d6fb..581d08c70b92 100644 --- a/arch/alpha/kernel/sys_rawhide.c +++ b/arch/alpha/kernel/sys_rawhide.c @@ -134,7 +134,7 @@ static struct hw_interrupt_type rawhide_irq_type = { }; static void -rawhide_srm_device_interrupt(unsigned long vector, struct pt_regs * regs) +rawhide_srm_device_interrupt(unsigned long vector) { int irq; @@ -158,7 +158,7 @@ rawhide_srm_device_interrupt(unsigned long vector, struct pt_regs * regs) /* Adjust by which hose it is from. */ irq -= ((irq + 16) >> 2) & 0x38; - handle_irq(irq, regs); + handle_irq(irq); } static void __init diff --git a/arch/alpha/kernel/sys_rx164.c b/arch/alpha/kernel/sys_rx164.c index 6ae506052635..ce1faa6f1df1 100644 --- a/arch/alpha/kernel/sys_rx164.c +++ b/arch/alpha/kernel/sys_rx164.c @@ -83,7 +83,7 @@ static struct hw_interrupt_type rx164_irq_type = { }; static void -rx164_device_interrupt(unsigned long vector, struct pt_regs *regs) +rx164_device_interrupt(unsigned long vector) { unsigned long pld; volatile unsigned int *dirr; @@ -102,9 +102,9 @@ rx164_device_interrupt(unsigned long vector, struct pt_regs *regs) i = ffz(~pld); pld &= pld - 1; /* clear least bit set */ if (i == 20) { - isa_no_iack_sc_device_interrupt(vector, regs); + isa_no_iack_sc_device_interrupt(vector); } else { - handle_irq(16+i, regs); + handle_irq(16+i); } } } diff --git a/arch/alpha/kernel/sys_sable.c b/arch/alpha/kernel/sys_sable.c index a7a14647b50e..906019cfa681 100644 --- a/arch/alpha/kernel/sys_sable.c +++ b/arch/alpha/kernel/sys_sable.c @@ -512,7 +512,7 @@ static struct hw_interrupt_type sable_lynx_irq_type = { }; static void -sable_lynx_srm_device_interrupt(unsigned long vector, struct pt_regs * regs) +sable_lynx_srm_device_interrupt(unsigned long vector) { /* Note that the vector reported by the SRM PALcode corresponds to the interrupt mask bits, but we have to manage via the @@ -526,7 +526,7 @@ sable_lynx_srm_device_interrupt(unsigned long vector, struct pt_regs * regs) printk("%s: vector 0x%lx bit 0x%x irq 0x%x\n", __FUNCTION__, vector, bit, irq); #endif - handle_irq(irq, regs); + handle_irq(irq); } static void __init diff --git a/arch/alpha/kernel/sys_takara.c b/arch/alpha/kernel/sys_takara.c index 2c75cd1fd81a..9bd9a31450c6 100644 --- a/arch/alpha/kernel/sys_takara.c +++ b/arch/alpha/kernel/sys_takara.c @@ -85,7 +85,7 @@ static struct hw_interrupt_type takara_irq_type = { }; static void -takara_device_interrupt(unsigned long vector, struct pt_regs *regs) +takara_device_interrupt(unsigned long vector) { unsigned intstatus; @@ -112,20 +112,20 @@ takara_device_interrupt(unsigned long vector, struct pt_regs *regs) * despatch an interrupt if it's set. */ - if (intstatus & 8) handle_irq(16+3, regs); - if (intstatus & 4) handle_irq(16+2, regs); - if (intstatus & 2) handle_irq(16+1, regs); - if (intstatus & 1) handle_irq(16+0, regs); + if (intstatus & 8) handle_irq(16+3); + if (intstatus & 4) handle_irq(16+2); + if (intstatus & 2) handle_irq(16+1); + if (intstatus & 1) handle_irq(16+0); } else { - isa_device_interrupt (vector, regs); + isa_device_interrupt (vector); } } static void -takara_srm_device_interrupt(unsigned long vector, struct pt_regs * regs) +takara_srm_device_interrupt(unsigned long vector) { int irq = (vector - 0x800) >> 4; - handle_irq(irq, regs); + handle_irq(irq); } static void __init diff --git a/arch/alpha/kernel/sys_titan.c b/arch/alpha/kernel/sys_titan.c index 302aab38d95f..29ab7db81c30 100644 --- a/arch/alpha/kernel/sys_titan.c +++ b/arch/alpha/kernel/sys_titan.c @@ -167,18 +167,18 @@ titan_set_irq_affinity(unsigned int irq, cpumask_t affinity) } static void -titan_device_interrupt(unsigned long vector, struct pt_regs * regs) +titan_device_interrupt(unsigned long vector) { printk("titan_device_interrupt: NOT IMPLEMENTED YET!! \n"); } static void -titan_srm_device_interrupt(unsigned long vector, struct pt_regs * regs) +titan_srm_device_interrupt(unsigned long vector) { int irq; irq = (vector - 0x800) >> 4; - handle_irq(irq, regs); + handle_irq(irq); } @@ -204,7 +204,7 @@ static struct hw_interrupt_type titan_irq_type = { }; static irqreturn_t -titan_intr_nop(int irq, void *dev_id, struct pt_regs *regs) +titan_intr_nop(int irq, void *dev_id) { /* * This is a NOP interrupt handler for the purposes of @@ -243,7 +243,7 @@ titan_legacy_init_irq(void) } void -titan_dispatch_irqs(u64 mask, struct pt_regs *regs) +titan_dispatch_irqs(u64 mask) { unsigned long vector; @@ -263,7 +263,7 @@ titan_dispatch_irqs(u64 mask, struct pt_regs *regs) vector = 0x900 + (vector << 4); /* convert to SRM vector */ /* dispatch it */ - alpha_mv.device_interrupt(vector, regs); + alpha_mv.device_interrupt(vector); } } diff --git a/arch/alpha/kernel/sys_wildfire.c b/arch/alpha/kernel/sys_wildfire.c index 22c5798fe083..42c3eede4d09 100644 --- a/arch/alpha/kernel/sys_wildfire.c +++ b/arch/alpha/kernel/sys_wildfire.c @@ -234,7 +234,7 @@ wildfire_init_irq(void) } static void -wildfire_device_interrupt(unsigned long vector, struct pt_regs * regs) +wildfire_device_interrupt(unsigned long vector) { int irq; @@ -246,7 +246,7 @@ wildfire_device_interrupt(unsigned long vector, struct pt_regs * regs) * bits 5-0: irq in PCA */ - handle_irq(irq, regs); + handle_irq(irq); return; } diff --git a/arch/alpha/kernel/time.c b/arch/alpha/kernel/time.c index 581ddcc22fc5..d7053eb4ffcf 100644 --- a/arch/alpha/kernel/time.c +++ b/arch/alpha/kernel/time.c @@ -57,6 +57,7 @@ static int set_rtc_mmss(unsigned long); DEFINE_SPINLOCK(rtc_lock); +EXPORT_SYMBOL(rtc_lock); #define TICK_SIZE (tick_nsec / 1000) @@ -104,7 +105,7 @@ unsigned long long sched_clock(void) * timer_interrupt() needs to keep up the real-time clock, * as well as call the "do_timer()" routine every clocktick */ -irqreturn_t timer_interrupt(int irq, void *dev, struct pt_regs * regs) +irqreturn_t timer_interrupt(int irq, void *dev) { unsigned long delta; __u32 now; @@ -112,7 +113,7 @@ irqreturn_t timer_interrupt(int irq, void *dev, struct pt_regs * regs) #ifndef CONFIG_SMP /* Not SMP, do kernel PC profiling here. */ - profile_tick(CPU_PROFILING, regs); + profile_tick(CPU_PROFILING); #endif write_seqlock(&xtime_lock); @@ -132,7 +133,7 @@ irqreturn_t timer_interrupt(int irq, void *dev, struct pt_regs * regs) while (nticks > 0) { do_timer(1); #ifndef CONFIG_SMP - update_process_times(user_mode(regs)); + update_process_times(user_mode(get_irq_regs())); #endif nticks--; } diff --git a/arch/alpha/mm/numa.c b/arch/alpha/mm/numa.c index b826f58c6e72..e3e3806a6f25 100644 --- a/arch/alpha/mm/numa.c +++ b/arch/alpha/mm/numa.c @@ -13,12 +13,14 @@ #include <linux/swap.h> #include <linux/initrd.h> #include <linux/pfn.h> +#include <linux/module.h> #include <asm/hwrpb.h> #include <asm/pgalloc.h> pg_data_t node_data[MAX_NUMNODES]; bootmem_data_t node_bdata[MAX_NUMNODES]; +EXPORT_SYMBOL(node_data); #undef DEBUG_DISCONTIG #ifdef DEBUG_DISCONTIG diff --git a/arch/arm/common/locomo.c b/arch/arm/common/locomo.c index 181ef1ead5b8..80a72c75214f 100644 --- a/arch/arm/common/locomo.c +++ b/arch/arm/common/locomo.c @@ -163,8 +163,7 @@ static struct locomo_dev_info locomo_devices[] = { #define LOCOMO_IRQ_LT_START (IRQ_LOCOMO_LT) #define LOCOMO_IRQ_SPI_START (IRQ_LOCOMO_SPI_RFR) -static void locomo_handler(unsigned int irq, struct irqdesc *desc, - struct pt_regs *regs) +static void locomo_handler(unsigned int irq, struct irqdesc *desc) { int req, i; struct irqdesc *d; @@ -182,7 +181,7 @@ static void locomo_handler(unsigned int irq, struct irqdesc *desc, d = irq_desc + irq; for (i = 0; i <= 3; i++, d++, irq++) { if (req & (0x0100 << i)) { - desc_handle_irq(irq, d, regs); + desc_handle_irq(irq, d); } } @@ -218,15 +217,14 @@ static struct irq_chip locomo_chip = { .unmask = locomo_unmask_irq, }; -static void locomo_key_handler(unsigned int irq, struct irqdesc *desc, - struct pt_regs *regs) +static void locomo_key_handler(unsigned int irq, struct irqdesc *desc) { struct irqdesc *d; void __iomem *mapbase = get_irq_chipdata(irq); if (locomo_readl(mapbase + LOCOMO_KEYBOARD + LOCOMO_KIC) & 0x0001) { d = irq_desc + LOCOMO_IRQ_KEY_START; - desc_handle_irq(LOCOMO_IRQ_KEY_START, d, regs); + desc_handle_irq(LOCOMO_IRQ_KEY_START, d); } } @@ -264,8 +262,7 @@ static struct irq_chip locomo_key_chip = { .unmask = locomo_key_unmask_irq, }; -static void locomo_gpio_handler(unsigned int irq, struct irqdesc *desc, - struct pt_regs *regs) +static void locomo_gpio_handler(unsigned int irq, struct irqdesc *desc) { int req, i; struct irqdesc *d; @@ -280,7 +277,7 @@ static void locomo_gpio_handler(unsigned int irq, struct irqdesc *desc, d = irq_desc + LOCOMO_IRQ_GPIO_START; for (i = 0; i <= 15; i++, irq++, d++) { if (req & (0x0001 << i)) { - desc_handle_irq(irq, d, regs); + desc_handle_irq(irq, d); } } } @@ -328,15 +325,14 @@ static struct irq_chip locomo_gpio_chip = { .unmask = locomo_gpio_unmask_irq, }; -static void locomo_lt_handler(unsigned int irq, struct irqdesc *desc, - struct pt_regs *regs) +static void locomo_lt_handler(unsigned int irq, struct irqdesc *desc) { struct irqdesc *d; void __iomem *mapbase = get_irq_chipdata(irq); if (locomo_readl(mapbase + LOCOMO_LTINT) & 0x0001) { d = irq_desc + LOCOMO_IRQ_LT_START; - desc_handle_irq(LOCOMO_IRQ_LT_START, d, regs); + desc_handle_irq(LOCOMO_IRQ_LT_START, d); } } @@ -374,8 +370,7 @@ static struct irq_chip locomo_lt_chip = { .unmask = locomo_lt_unmask_irq, }; -static void locomo_spi_handler(unsigned int irq, struct irqdesc *desc, - struct pt_regs *regs) +static void locomo_spi_handler(unsigned int irq, struct irqdesc *desc) { int req, i; struct irqdesc *d; @@ -388,7 +383,7 @@ static void locomo_spi_handler(unsigned int irq, struct irqdesc *desc, for (i = 0; i <= 3; i++, irq++, d++) { if (req & (0x0001 << i)) { - desc_handle_irq(irq, d, regs); + desc_handle_irq(irq, d); } } } diff --git a/arch/arm/common/sa1111.c b/arch/arm/common/sa1111.c index 30046ad41ced..d5f72010a6f3 100644 --- a/arch/arm/common/sa1111.c +++ b/arch/arm/common/sa1111.c @@ -147,7 +147,7 @@ void __init sa1111_adjust_zones(int node, unsigned long *size, unsigned long *ho * will call us again if there are more interrupts to process. */ static void -sa1111_irq_handler(unsigned int irq, struct irqdesc *desc, struct pt_regs *regs) +sa1111_irq_handler(unsigned int irq, struct irqdesc *desc) { unsigned int stat0, stat1, i; void __iomem *base = get_irq_data(irq); @@ -162,17 +162,17 @@ sa1111_irq_handler(unsigned int irq, struct irqdesc *desc, struct pt_regs *regs) sa1111_writel(stat1, base + SA1111_INTSTATCLR1); if (stat0 == 0 && stat1 == 0) { - do_bad_IRQ(irq, desc, regs); + do_bad_IRQ(irq, desc); return; } for (i = IRQ_SA1111_START; stat0; i++, stat0 >>= 1) if (stat0 & 1) - handle_edge_irq(i, irq_desc + i, regs); + handle_edge_irq(i, irq_desc + i); for (i = IRQ_SA1111_START + 32; stat1; i++, stat1 >>= 1) if (stat1 & 1) - handle_edge_irq(i, irq_desc + i, regs); + handle_edge_irq(i, irq_desc + i); /* For level-based interrupts */ desc->chip->unmask(irq); diff --git a/arch/arm/common/sharpsl_pm.c b/arch/arm/common/sharpsl_pm.c index f412dedda684..605dedf96790 100644 --- a/arch/arm/common/sharpsl_pm.c +++ b/arch/arm/common/sharpsl_pm.c @@ -258,7 +258,7 @@ static void sharpsl_ac_timer(unsigned long data) } -irqreturn_t sharpsl_ac_isr(int irq, void *dev_id, struct pt_regs *fp) +irqreturn_t sharpsl_ac_isr(int irq, void *dev_id) { /* Delay the event slightly to debounce */ /* Must be a smaller delay than the chrg_full_isr below */ @@ -293,7 +293,7 @@ static void sharpsl_chrg_full_timer(unsigned long data) /* Charging Finished Interrupt (Not present on Corgi) */ /* Can trigger at the same time as an AC staus change so delay until after that has been processed */ -irqreturn_t sharpsl_chrg_full_isr(int irq, void *dev_id, struct pt_regs *fp) +irqreturn_t sharpsl_chrg_full_isr(int irq, void *dev_id) { if (sharpsl_pm.flags & SHARPSL_SUSPENDED) return IRQ_HANDLED; @@ -304,7 +304,7 @@ irqreturn_t sharpsl_chrg_full_isr(int irq, void *dev_id, struct pt_regs *fp) return IRQ_HANDLED; } -irqreturn_t sharpsl_fatal_isr(int irq, void *dev_id, struct pt_regs *fp) +irqreturn_t sharpsl_fatal_isr(int irq, void *dev_id) { int is_fatal = 0; diff --git a/arch/arm/common/time-acorn.c b/arch/arm/common/time-acorn.c index 3f60dd9aca80..34038eccbba9 100644 --- a/arch/arm/common/time-acorn.c +++ b/arch/arm/common/time-acorn.c @@ -67,10 +67,10 @@ void __init ioctime_init(void) } static irqreturn_t -ioc_timer_interrupt(int irq, void *dev_id, struct pt_regs *regs) +ioc_timer_interrupt(int irq, void *dev_id) { write_seqlock(&xtime_lock); - timer_tick(regs); + timer_tick(); write_sequnlock(&xtime_lock); return IRQ_HANDLED; } diff --git a/arch/arm/kernel/armksyms.c b/arch/arm/kernel/armksyms.c index da69e660574b..4779f474f911 100644 --- a/arch/arm/kernel/armksyms.c +++ b/arch/arm/kernel/armksyms.c @@ -178,9 +178,3 @@ EXPORT_SYMBOL(_find_next_zero_bit_be); EXPORT_SYMBOL(_find_first_bit_be); EXPORT_SYMBOL(_find_next_bit_be); #endif - - /* syscalls */ -EXPORT_SYMBOL(sys_write); -EXPORT_SYMBOL(sys_lseek); -EXPORT_SYMBOL(sys_exit); -EXPORT_SYMBOL(sys_wait4); diff --git a/arch/arm/kernel/ecard.c b/arch/arm/kernel/ecard.c index 3e14b1348c0b..b27513a0f11e 100644 --- a/arch/arm/kernel/ecard.c +++ b/arch/arm/kernel/ecard.c @@ -567,7 +567,7 @@ static void ecard_check_lockup(struct irqdesc *desc) } static void -ecard_irq_handler(unsigned int irq, struct irqdesc *desc, struct pt_regs *regs) +ecard_irq_handler(unsigned int irq, struct irqdesc *desc) { ecard_t *ec; int called = 0; @@ -586,7 +586,7 @@ ecard_irq_handler(unsigned int irq, struct irqdesc *desc, struct pt_regs *regs) if (pending) { struct irqdesc *d = irq_desc + ec->irq; - desc_handle_irq(ec->irq, d, regs); + desc_handle_irq(ec->irq, d); called ++; } } @@ -609,7 +609,7 @@ static unsigned char first_set[] = }; static void -ecard_irqexp_handler(unsigned int irq, struct irqdesc *desc, struct pt_regs *regs) +ecard_irqexp_handler(unsigned int irq, struct irqdesc *desc) { const unsigned int statusmask = 15; unsigned int status; @@ -633,7 +633,7 @@ ecard_irqexp_handler(unsigned int irq, struct irqdesc *desc, struct pt_regs *reg * Serial cards should go in 0/1, ethernet/scsi in 2/3 * otherwise you will lose serial data at high speeds! */ - desc_handle_irq(ec->irq, d, regs); + desc_handle_irq(ec->irq, d); } else { printk(KERN_WARNING "card%d: interrupt from unclaimed " "card???\n", slot); diff --git a/arch/arm/kernel/irq.c b/arch/arm/kernel/irq.c index 2e1bf830fe11..2c4ff1cbe334 100644 --- a/arch/arm/kernel/irq.c +++ b/arch/arm/kernel/irq.c @@ -111,6 +111,7 @@ static struct irq_desc bad_irq_desc = { */ asmlinkage void asm_do_IRQ(unsigned int irq, struct pt_regs *regs) { + struct pt_regs *old_regs = set_irq_regs(regs); struct irqdesc *desc = irq_desc + irq; /* @@ -122,12 +123,13 @@ asmlinkage void asm_do_IRQ(unsigned int irq, struct pt_regs *regs) irq_enter(); - desc_handle_irq(irq, desc, regs); + desc_handle_irq(irq, desc); /* AT91 specific workaround */ irq_finish(irq); irq_exit(); + set_irq_regs(old_regs); } void set_irq_flags(unsigned int irq, unsigned int iflags) diff --git a/arch/arm/kernel/time.c b/arch/arm/kernel/time.c index b030320b17c7..c03cab5c4c79 100644 --- a/arch/arm/kernel/time.c +++ b/arch/arm/kernel/time.c @@ -27,6 +27,7 @@ #include <linux/profile.h> #include <linux/sysdev.h> #include <linux/timer.h> +#include <linux/irq.h> #include <asm/leds.h> #include <asm/thread_info.h> @@ -324,9 +325,10 @@ EXPORT_SYMBOL(restore_time_delta); /* * Kernel system timer support. */ -void timer_tick(struct pt_regs *regs) +void timer_tick(void) { - profile_tick(CPU_PROFILING, regs); + struct pt_regs *regs = get_irq_regs(); + profile_tick(CPU_PROFILING); do_leds(); do_set_rtc(); do_timer(1); diff --git a/arch/arm/mach-aaec2000/core.c b/arch/arm/mach-aaec2000/core.c index baa997c857dc..fe3d297d682d 100644 --- a/arch/arm/mach-aaec2000/core.c +++ b/arch/arm/mach-aaec2000/core.c @@ -127,12 +127,12 @@ static unsigned long aaec2000_gettimeoffset(void) /* We enter here with IRQs enabled */ static irqreturn_t -aaec2000_timer_interrupt(int irq, void *dev_id, struct pt_regs *regs) +aaec2000_timer_interrupt(int irq, void *dev_id) { /* TODO: Check timer accuracy */ write_seqlock(&xtime_lock); - timer_tick(regs); + timer_tick(); TIMER1_CLEAR = 1; write_sequnlock(&xtime_lock); diff --git a/arch/arm/mach-at91rm9200/at91rm9200_time.c b/arch/arm/mach-at91rm9200/at91rm9200_time.c index a92a8622c78a..07c9cea8961d 100644 --- a/arch/arm/mach-at91rm9200/at91rm9200_time.c +++ b/arch/arm/mach-at91rm9200/at91rm9200_time.c @@ -65,13 +65,13 @@ static unsigned long at91rm9200_gettimeoffset(void) /* * IRQ handler for the timer. */ -static irqreturn_t at91rm9200_timer_interrupt(int irq, void *dev_id, struct pt_regs *regs) +static irqreturn_t at91rm9200_timer_interrupt(int irq, void *dev_id) { if (at91_sys_read(AT91_ST_SR) & AT91_ST_PITS) { /* This is a shared interrupt */ write_seqlock(&xtime_lock); while (((read_CRTR() - last_crtr) & AT91_ST_ALMV) >= LATCH) { - timer_tick(regs); + timer_tick(); last_crtr = (last_crtr + LATCH) & AT91_ST_ALMV; } diff --git a/arch/arm/mach-at91rm9200/gpio.c b/arch/arm/mach-at91rm9200/gpio.c index 58c9bf5e9520..7467d644f0a3 100644 --- a/arch/arm/mach-at91rm9200/gpio.c +++ b/arch/arm/mach-at91rm9200/gpio.c @@ -332,7 +332,7 @@ static struct irq_chip gpio_irqchip = { .set_wake = gpio_irq_set_wake, }; -static void gpio_irq_handler(unsigned irq, struct irqdesc *desc, struct pt_regs *regs) +static void gpio_irq_handler(unsigned irq, struct irqdesc *desc) { unsigned pin; struct irqdesc *gpio; @@ -363,7 +363,7 @@ static void gpio_irq_handler(unsigned irq, struct irqdesc *desc, struct pt_regs gpio_irq_mask(pin); } else - desc_handle_irq(pin, gpio, regs); + desc_handle_irq(pin, gpio); } pin++; gpio++; diff --git a/arch/arm/mach-clps711x/time.c b/arch/arm/mach-clps711x/time.c index a071eac4a30a..428493dd4687 100644 --- a/arch/arm/mach-clps711x/time.c +++ b/arch/arm/mach-clps711x/time.c @@ -48,10 +48,10 @@ static unsigned long clps711x_gettimeoffset(void) * IRQ handler for the timer */ static irqreturn_t -p720t_timer_interrupt(int irq, void *dev_id, struct pt_regs *regs) +p720t_timer_interrupt(int irq, void *dev_id) { write_seqlock(&xtime_lock); - timer_tick(regs); + timer_tick(); write_sequnlock(&xtime_lock); return IRQ_HANDLED; } diff --git a/arch/arm/mach-clps7500/core.c b/arch/arm/mach-clps7500/core.c index 92eaebdd5606..fb10cf252588 100644 --- a/arch/arm/mach-clps7500/core.c +++ b/arch/arm/mach-clps7500/core.c @@ -292,11 +292,11 @@ extern void ioctime_init(void); extern unsigned long ioc_timer_gettimeoffset(void); static irqreturn_t -clps7500_timer_interrupt(int irq, void *dev_id, struct pt_regs *regs) +clps7500_timer_interrupt(int irq, void *dev_id) { write_seqlock(&xtime_lock); - timer_tick(regs); + timer_tick(); /* Why not using do_leds interface?? */ { diff --git a/arch/arm/mach-ebsa110/core.c b/arch/arm/mach-ebsa110/core.c index 70dd12ef3c40..90103ab373a6 100644 --- a/arch/arm/mach-ebsa110/core.c +++ b/arch/arm/mach-ebsa110/core.c @@ -174,7 +174,7 @@ static unsigned long ebsa110_gettimeoffset(void) } static irqreturn_t -ebsa110_timer_interrupt(int irq, void *dev_id, struct pt_regs *regs) +ebsa110_timer_interrupt(int irq, void *dev_id) { u32 count; @@ -190,7 +190,7 @@ ebsa110_timer_interrupt(int irq, void *dev_id, struct pt_regs *regs) __raw_writeb(count & 0xff, PIT_T1); __raw_writeb(count >> 8, PIT_T1); - timer_tick(regs); + timer_tick(); write_sequnlock(&xtime_lock); diff --git a/arch/arm/mach-ep93xx/core.c b/arch/arm/mach-ep93xx/core.c index a87a784b9201..e3fd1ab6adcc 100644 --- a/arch/arm/mach-ep93xx/core.c +++ b/arch/arm/mach-ep93xx/core.c @@ -97,7 +97,7 @@ static unsigned int last_jiffy_time; #define TIMER4_TICKS_PER_JIFFY ((CLOCK_TICK_RATE + (HZ/2)) / HZ) -static int ep93xx_timer_interrupt(int irq, void *dev_id, struct pt_regs *regs) +static int ep93xx_timer_interrupt(int irq, void *dev_id) { write_seqlock(&xtime_lock); @@ -106,7 +106,7 @@ static int ep93xx_timer_interrupt(int irq, void *dev_id, struct pt_regs *regs) (__raw_readl(EP93XX_TIMER4_VALUE_LOW) - last_jiffy_time) >= TIMER4_TICKS_PER_JIFFY) { last_jiffy_time += TIMER4_TICKS_PER_JIFFY; - timer_tick(regs); + timer_tick(); } write_sequnlock(&xtime_lock); @@ -245,7 +245,7 @@ EXPORT_SYMBOL(gpio_line_set); * EP93xx IRQ handling *************************************************************************/ static void ep93xx_gpio_ab_irq_handler(unsigned int irq, - struct irqdesc *desc, struct pt_regs *regs) + struct irqdesc *desc) { unsigned char status; int i; @@ -254,7 +254,7 @@ static void ep93xx_gpio_ab_irq_handler(unsigned int irq, for (i = 0; i < 8; i++) { if (status & (1 << i)) { desc = irq_desc + IRQ_EP93XX_GPIO(0) + i; - desc_handle_irq(IRQ_EP93XX_GPIO(0) + i, desc, regs); + desc_handle_irq(IRQ_EP93XX_GPIO(0) + i, desc); } } @@ -262,7 +262,7 @@ static void ep93xx_gpio_ab_irq_handler(unsigned int irq, for (i = 0; i < 8; i++) { if (status & (1 << i)) { desc = irq_desc + IRQ_EP93XX_GPIO(8) + i; - desc_handle_irq(IRQ_EP93XX_GPIO(8) + i, desc, regs); + desc_handle_irq(IRQ_EP93XX_GPIO(8) + i, desc); } } } diff --git a/arch/arm/mach-footbridge/dc21285-timer.c b/arch/arm/mach-footbridge/dc21285-timer.c index 2af610811ca4..fa6be870c6c2 100644 --- a/arch/arm/mach-footbridge/dc21285-timer.c +++ b/arch/arm/mach-footbridge/dc21285-timer.c @@ -28,13 +28,13 @@ static unsigned long timer1_gettimeoffset (void) } static irqreturn_t -timer1_interrupt(int irq, void *dev_id, struct pt_regs *regs) +timer1_interrupt(int irq, void *dev_id) { write_seqlock(&xtime_lock); *CSR_TIMER1_CLR = 0; - timer_tick(regs); + timer_tick(); write_sequnlock(&xtime_lock); diff --git a/arch/arm/mach-footbridge/dc21285.c b/arch/arm/mach-footbridge/dc21285.c index a1ae49df5c3b..fa5d4976f514 100644 --- a/arch/arm/mach-footbridge/dc21285.c +++ b/arch/arm/mach-footbridge/dc21285.c @@ -154,7 +154,7 @@ static void dc21285_enable_error(unsigned long __data) /* * Warn on PCI errors. */ -static irqreturn_t dc21285_abort_irq(int irq, void *dev_id, struct pt_regs *regs) +static irqreturn_t dc21285_abort_irq(int irq, void *dev_id) { unsigned int cmd; unsigned int status; @@ -165,7 +165,7 @@ static irqreturn_t dc21285_abort_irq(int irq, void *dev_id, struct pt_regs *regs if (status & PCI_STATUS_REC_MASTER_ABORT) { printk(KERN_DEBUG "PCI: master abort, pc=0x%08lx\n", - instruction_pointer(regs)); + instruction_pointer(get_irq_regs())); cmd |= PCI_STATUS_REC_MASTER_ABORT << 16; } @@ -184,7 +184,7 @@ static irqreturn_t dc21285_abort_irq(int irq, void *dev_id, struct pt_regs *regs return IRQ_HANDLED; } -static irqreturn_t dc21285_serr_irq(int irq, void *dev_id, struct pt_regs *regs) +static irqreturn_t dc21285_serr_irq(int irq, void *dev_id) { struct timer_list *timer = dev_id; unsigned int cntl; @@ -206,7 +206,7 @@ static irqreturn_t dc21285_serr_irq(int irq, void *dev_id, struct pt_regs *regs) return IRQ_HANDLED; } -static irqreturn_t dc21285_discard_irq(int irq, void *dev_id, struct pt_regs *regs) +static irqreturn_t dc21285_discard_irq(int irq, void *dev_id) { printk(KERN_DEBUG "PCI: discard timer expired\n"); *CSR_SA110_CNTL &= 0xffffde07; @@ -214,7 +214,7 @@ static irqreturn_t dc21285_discard_irq(int irq, void *dev_id, struct pt_regs *re return IRQ_HANDLED; } -static irqreturn_t dc21285_dparity_irq(int irq, void *dev_id, struct pt_regs *regs) +static irqreturn_t dc21285_dparity_irq(int irq, void *dev_id) { unsigned int cmd; @@ -228,7 +228,7 @@ static irqreturn_t dc21285_dparity_irq(int irq, void *dev_id, struct pt_regs *re return IRQ_HANDLED; } -static irqreturn_t dc21285_parity_irq(int irq, void *dev_id, struct pt_regs *regs) +static irqreturn_t dc21285_parity_irq(int irq, void *dev_id) { struct timer_list *timer = dev_id; unsigned int cmd; diff --git a/arch/arm/mach-footbridge/isa-irq.c b/arch/arm/mach-footbridge/isa-irq.c index 87448c2d6baa..888dedd501b9 100644 --- a/arch/arm/mach-footbridge/isa-irq.c +++ b/arch/arm/mach-footbridge/isa-irq.c @@ -85,17 +85,17 @@ static struct irqchip isa_hi_chip = { }; static void -isa_irq_handler(unsigned int irq, struct irqdesc *desc, struct pt_regs *regs) +isa_irq_handler(unsigned int irq, struct irqdesc *desc) { unsigned int isa_irq = *(unsigned char *)PCIIACK_BASE; if (isa_irq < _ISA_IRQ(0) || isa_irq >= _ISA_IRQ(16)) { - do_bad_IRQ(isa_irq, desc, regs); + do_bad_IRQ(isa_irq, desc); return; } desc = irq_desc + isa_irq; - desc_handle_irq(isa_irq, desc, regs); + desc_handle_irq(isa_irq, desc); } static struct irqaction irq_cascade = { diff --git a/arch/arm/mach-footbridge/isa-timer.c b/arch/arm/mach-footbridge/isa-timer.c index c4810a40c8e1..d884a3954fb4 100644 --- a/arch/arm/mach-footbridge/isa-timer.c +++ b/arch/arm/mach-footbridge/isa-timer.c @@ -62,10 +62,10 @@ static unsigned long isa_gettimeoffset(void) } static irqreturn_t -isa_timer_interrupt(int irq, void *dev_id, struct pt_regs *regs) +isa_timer_interrupt(int irq, void *dev_id) { write_seqlock(&xtime_lock); - timer_tick(regs); + timer_tick(); write_sequnlock(&xtime_lock); return IRQ_HANDLED; } diff --git a/arch/arm/mach-h720x/common.c b/arch/arm/mach-h720x/common.c index c096b4569308..4719229a1a78 100644 --- a/arch/arm/mach-h720x/common.c +++ b/arch/arm/mach-h720x/common.c @@ -101,14 +101,14 @@ static void inline unmask_gpio_irq(u32 irq) static void h720x_gpio_handler(unsigned int mask, unsigned int irq, - struct irqdesc *desc, struct pt_regs *regs) + struct irqdesc *desc) { IRQDBG("%s irq: %d\n",__FUNCTION__,irq); desc = irq_desc + irq; while (mask) { if (mask & 1) { IRQDBG("handling irq %d\n", irq); - desc_handle_irq(irq, desc, regs); + desc_handle_irq(irq, desc); } irq++; desc++; @@ -117,63 +117,58 @@ h720x_gpio_handler(unsigned int mask, unsigned int irq, } static void -h720x_gpioa_demux_handler(unsigned int irq_unused, struct irqdesc *desc, - struct pt_regs *regs) +h720x_gpioa_demux_handler(unsigned int irq_unused, struct irqdesc *desc) { unsigned int mask, irq; mask = CPU_REG(GPIO_A_VIRT,GPIO_STAT); irq = IRQ_CHAINED_GPIOA(0); IRQDBG("%s mask: 0x%08x irq: %d\n",__FUNCTION__,mask,irq); - h720x_gpio_handler(mask, irq, desc, regs); + h720x_gpio_handler(mask, irq, desc); } static void -h720x_gpiob_demux_handler(unsigned int irq_unused, struct irqdesc *desc, - struct pt_regs *regs) +h720x_gpiob_demux_handler(unsigned int irq_unused, struct irqdesc *desc) { unsigned int mask, irq; mask = CPU_REG(GPIO_B_VIRT,GPIO_STAT); irq = IRQ_CHAINED_GPIOB(0); IRQDBG("%s mask: 0x%08x irq: %d\n",__FUNCTION__,mask,irq); - h720x_gpio_handler(mask, irq, desc, regs); + h720x_gpio_handler(mask, irq, desc); } static void -h720x_gpioc_demux_handler(unsigned int irq_unused, struct irqdesc *desc, - struct pt_regs *regs) +h720x_gpioc_demux_handler(unsigned int irq_unused, struct irqdesc *desc) { unsigned int mask, irq; mask = CPU_REG(GPIO_C_VIRT,GPIO_STAT); irq = IRQ_CHAINED_GPIOC(0); IRQDBG("%s mask: 0x%08x irq: %d\n",__FUNCTION__,mask,irq); - h720x_gpio_handler(mask, irq, desc, regs); + h720x_gpio_handler(mask, irq, desc); } static void -h720x_gpiod_demux_handler(unsigned int irq_unused, struct irqdesc *desc, - struct pt_regs *regs) +h720x_gpiod_demux_handler(unsigned int irq_unused, struct irqdesc *desc) { unsigned int mask, irq; mask = CPU_REG(GPIO_D_VIRT,GPIO_STAT); irq = IRQ_CHAINED_GPIOD(0); IRQDBG("%s mask: 0x%08x irq: %d\n",__FUNCTION__,mask,irq); - h720x_gpio_handler(mask, irq, desc, regs); + h720x_gpio_handler(mask, irq, desc); } #ifdef CONFIG_CPU_H7202 static void -h720x_gpioe_demux_handler(unsigned int irq_unused, struct irqdesc *desc, - struct pt_regs *regs) +h720x_gpioe_demux_handler(unsigned int irq_unused, struct irqdesc *desc) { unsigned int mask, irq; mask = CPU_REG(GPIO_E_VIRT,GPIO_STAT); irq = IRQ_CHAINED_GPIOE(0); IRQDBG("%s mask: 0x%08x irq: %d\n",__FUNCTION__,mask,irq); - h720x_gpio_handler(mask, irq, desc, regs); + h720x_gpio_handler(mask, irq, desc); } #endif diff --git a/arch/arm/mach-h720x/cpu-h7201.c b/arch/arm/mach-h720x/cpu-h7201.c index a9a8255a3a03..13f76bdb3d9d 100644 --- a/arch/arm/mach-h720x/cpu-h7201.c +++ b/arch/arm/mach-h720x/cpu-h7201.c @@ -27,12 +27,12 @@ * Timer interrupt handler */ static irqreturn_t -h7201_timer_interrupt(int irq, void *dev_id, struct pt_regs *regs) +h7201_timer_interrupt(int irq, void *dev_id) { write_seqlock(&xtime_lock); CPU_REG (TIMER_VIRT, TIMER_TOPSTAT); - timer_tick(regs); + timer_tick(); write_sequnlock(&xtime_lock); diff --git a/arch/arm/mach-h720x/cpu-h7202.c b/arch/arm/mach-h720x/cpu-h7202.c index da678d163fd9..06fecaefd8dc 100644 --- a/arch/arm/mach-h720x/cpu-h7202.c +++ b/arch/arm/mach-h720x/cpu-h7202.c @@ -106,8 +106,7 @@ static struct platform_device *devices[] __initdata = { * we have to handle all timer interrupts in one place. */ static void -h7202_timerx_demux_handler(unsigned int irq_unused, struct irqdesc *desc, - struct pt_regs *regs) +h7202_timerx_demux_handler(unsigned int irq_unused, struct irqdesc *desc) { unsigned int mask, irq; @@ -115,7 +114,7 @@ h7202_timerx_demux_handler(unsigned int irq_unused, struct irqdesc *desc, if ( mask & TSTAT_T0INT ) { write_seqlock(&xtime_lock); - timer_tick(regs); + timer_tick(); write_sequnlock(&xtime_lock); if( mask == TSTAT_T0INT ) return; @@ -126,7 +125,7 @@ h7202_timerx_demux_handler(unsigned int irq_unused, struct irqdesc *desc, desc = irq_desc + irq; while (mask) { if (mask & 1) - desc_handle_irq(irq, desc, regs); + desc_handle_irq(irq, desc); irq++; desc++; mask >>= 1; @@ -137,9 +136,9 @@ h7202_timerx_demux_handler(unsigned int irq_unused, struct irqdesc *desc, * Timer interrupt handler */ static irqreturn_t -h7202_timer_interrupt(int irq, void *dev_id, struct pt_regs *regs) +h7202_timer_interrupt(int irq, void *dev_id) { - h7202_timerx_demux_handler(0, NULL, regs); + h7202_timerx_demux_handler(0, NULL); return IRQ_HANDLED; } diff --git a/arch/arm/mach-imx/dma.c b/arch/arm/mach-imx/dma.c index 36578871ecc8..6d50d85a618c 100644 --- a/arch/arm/mach-imx/dma.c +++ b/arch/arm/mach-imx/dma.c @@ -279,8 +279,8 @@ imx_dma_setup_sg(imx_dmach_t dma_ch, */ int imx_dma_setup_handlers(imx_dmach_t dma_ch, - void (*irq_handler) (int, void *, struct pt_regs *), - void (*err_handler) (int, void *, struct pt_regs *, int), + void (*irq_handler) (int, void *), + void (*err_handler) (int, void *, int), void *data) { struct imx_dma_channel *imxdma = &imx_dma_channels[dma_ch]; @@ -461,7 +461,7 @@ imx_dma_request_by_prio(imx_dmach_t * pdma_ch, const char *name, return -ENODEV; } -static irqreturn_t dma_err_handler(int irq, void *dev_id, struct pt_regs *regs) +static irqreturn_t dma_err_handler(int irq, void *dev_id) { int i, disr = DISR; struct imx_dma_channel *channel; @@ -500,7 +500,7 @@ static irqreturn_t dma_err_handler(int irq, void *dev_id, struct pt_regs *regs) /*imx_dma_channels[i].sg = NULL;*/ if (channel->name && channel->err_handler) { - channel->err_handler(i, channel->data, regs, errcode); + channel->err_handler(i, channel->data, errcode); continue; } @@ -517,7 +517,7 @@ static irqreturn_t dma_err_handler(int irq, void *dev_id, struct pt_regs *regs) return IRQ_HANDLED; } -static irqreturn_t dma_irq_handler(int irq, void *dev_id, struct pt_regs *regs) +static irqreturn_t dma_irq_handler(int irq, void *dev_id) { int i, disr = DISR; @@ -536,7 +536,7 @@ static irqreturn_t dma_irq_handler(int irq, void *dev_id, struct pt_regs *regs) } else { if (channel->irq_handler) channel->irq_handler(i, - channel->data, regs); + channel->data); } } else { /* diff --git a/arch/arm/mach-imx/irq.c b/arch/arm/mach-imx/irq.c index 2688bd82c2a2..368b13b058ab 100644 --- a/arch/arm/mach-imx/irq.c +++ b/arch/arm/mach-imx/irq.c @@ -146,13 +146,13 @@ imx_gpio_unmask_irq(unsigned int irq) static void imx_gpio_handler(unsigned int mask, unsigned int irq, - struct irqdesc *desc, struct pt_regs *regs) + struct irqdesc *desc) { desc = irq_desc + irq; while (mask) { if (mask & 1) { DEBUG_IRQ("handling irq %d\n", irq); - desc_handle_irq(irq, desc, regs); + desc_handle_irq(irq, desc); } irq++; desc++; @@ -161,47 +161,43 @@ imx_gpio_handler(unsigned int mask, unsigned int irq, } static void -imx_gpioa_demux_handler(unsigned int irq_unused, struct irqdesc *desc, - struct pt_regs *regs) +imx_gpioa_demux_handler(unsigned int irq_unused, struct irqdesc *desc) { unsigned int mask, irq; mask = ISR(0); irq = IRQ_GPIOA(0); - imx_gpio_handler(mask, irq, desc, regs); + imx_gpio_handler(mask, irq, desc); } static void -imx_gpiob_demux_handler(unsigned int irq_unused, struct irqdesc *desc, - struct pt_regs *regs) +imx_gpiob_demux_handler(unsigned int irq_unused, struct irqdesc *desc) { unsigned int mask, irq; mask = ISR(1); irq = IRQ_GPIOB(0); - imx_gpio_handler(mask, irq, desc, regs); + imx_gpio_handler(mask, irq, desc); } static void -imx_gpioc_demux_handler(unsigned int irq_unused, struct irqdesc *desc, - struct pt_regs *regs) +imx_gpioc_demux_handler(unsigned int irq_unused, struct irqdesc *desc) { unsigned int mask, irq; mask = ISR(2); irq = IRQ_GPIOC(0); - imx_gpio_handler(mask, irq, desc, regs); + imx_gpio_handler(mask, irq, desc); } static void -imx_gpiod_demux_handler(unsigned int irq_unused, struct irqdesc *desc, - struct pt_regs *regs) +imx_gpiod_demux_handler(unsigned int irq_unused, struct irqdesc *desc) { unsigned int mask, irq; mask = ISR(3); irq = IRQ_GPIOD(0); - imx_gpio_handler(mask, irq, desc, regs); + imx_gpio_handler(mask, irq, desc); } static struct irq_chip imx_internal_chip = { diff --git a/arch/arm/mach-imx/time.c b/arch/arm/mach-imx/time.c index 6ed7523c65bb..8ae4a2c5066f 100644 --- a/arch/arm/mach-imx/time.c +++ b/arch/arm/mach-imx/time.c @@ -56,7 +56,7 @@ static unsigned long imx_gettimeoffset(void) * IRQ handler for the timer */ static irqreturn_t -imx_timer_interrupt(int irq, void *dev_id, struct pt_regs *regs) +imx_timer_interrupt(int irq, void *dev_id) { write_seqlock(&xtime_lock); @@ -64,7 +64,7 @@ imx_timer_interrupt(int irq, void *dev_id, struct pt_regs *regs) if (IMX_TSTAT(TIMER_BASE)) IMX_TSTAT(TIMER_BASE) = 0; - timer_tick(regs); + timer_tick(); write_sequnlock(&xtime_lock); return IRQ_HANDLED; diff --git a/arch/arm/mach-integrator/core.c b/arch/arm/mach-integrator/core.c index 42021fdfa0c6..8d880cb9ba39 100644 --- a/arch/arm/mach-integrator/core.c +++ b/arch/arm/mach-integrator/core.c @@ -248,7 +248,7 @@ unsigned long integrator_gettimeoffset(void) * IRQ handler for the timer */ static irqreturn_t -integrator_timer_interrupt(int irq, void *dev_id, struct pt_regs *regs) +integrator_timer_interrupt(int irq, void *dev_id) { write_seqlock(&xtime_lock); @@ -262,7 +262,7 @@ integrator_timer_interrupt(int irq, void *dev_id, struct pt_regs *regs) * primary CPU */ if (hard_smp_processor_id() == 0) { - timer_tick(regs); + timer_tick(); #ifdef CONFIG_SMP smp_send_timer(); #endif @@ -272,7 +272,7 @@ integrator_timer_interrupt(int irq, void *dev_id, struct pt_regs *regs) /* * this is the ARM equivalent of the APIC timer interrupt */ - update_process_times(user_mode(regs)); + update_process_times(user_mode(get_irq_regs())); #endif /* CONFIG_SMP */ write_sequnlock(&xtime_lock); diff --git a/arch/arm/mach-integrator/integrator_cp.c b/arch/arm/mach-integrator/integrator_cp.c index 678b6ba2b463..771b65bffe69 100644 --- a/arch/arm/mach-integrator/integrator_cp.c +++ b/arch/arm/mach-integrator/integrator_cp.c @@ -202,12 +202,12 @@ static struct irq_chip sic_chip = { }; static void -sic_handle_irq(unsigned int irq, struct irqdesc *desc, struct pt_regs *regs) +sic_handle_irq(unsigned int irq, struct irqdesc *desc) { unsigned long status = sic_readl(INTCP_VA_SIC_BASE + IRQ_STATUS); if (status == 0) { - do_bad_IRQ(irq, desc, regs); + do_bad_IRQ(irq, desc); return; } @@ -218,7 +218,7 @@ sic_handle_irq(unsigned int irq, struct irqdesc *desc, struct pt_regs *regs) irq += IRQ_SIC_START; desc = irq_desc + irq; - desc_handle_irq(irq, desc, regs); + desc_handle_irq(irq, desc); } while (status); } diff --git a/arch/arm/mach-integrator/pci_v3.c b/arch/arm/mach-integrator/pci_v3.c index 4418f6d7572d..fb8c6d97b22b 100644 --- a/arch/arm/mach-integrator/pci_v3.c +++ b/arch/arm/mach-integrator/pci_v3.c @@ -440,9 +440,10 @@ v3_pci_fault(unsigned long addr, unsigned int fsr, struct pt_regs *regs) return 1; } -static irqreturn_t v3_irq(int irq, void *devid, struct pt_regs *regs) +static irqreturn_t v3_irq(int irq, void *devid) { #ifdef CONFIG_DEBUG_LL + struct pt_regs *regs = get_irq_regs(); unsigned long pc = instruction_pointer(regs); unsigned long instr = *(unsigned long *)pc; char buf[128]; diff --git a/arch/arm/mach-integrator/time.c b/arch/arm/mach-integrator/time.c index ee49cf790dab..5278f589fcee 100644 --- a/arch/arm/mach-integrator/time.c +++ b/arch/arm/mach-integrator/time.c @@ -96,8 +96,7 @@ static struct rtc_ops rtc_ops = { .set_alarm = integrator_rtc_set_alarm, }; -static irqreturn_t arm_rtc_interrupt(int irq, void *dev_id, - struct pt_regs *regs) +static irqreturn_t arm_rtc_interrupt(int irq, void *dev_id) { writel(0, rtc_base + RTC_EOI); return IRQ_HANDLED; diff --git a/arch/arm/mach-ixp2000/core.c b/arch/arm/mach-ixp2000/core.c index 7f91f689a041..22c98e9dad28 100644 --- a/arch/arm/mach-ixp2000/core.c +++ b/arch/arm/mach-ixp2000/core.c @@ -204,7 +204,7 @@ unsigned long ixp2000_gettimeoffset (void) return offset / ticks_per_usec; } -static int ixp2000_timer_interrupt(int irq, void *dev_id, struct pt_regs *regs) +static int ixp2000_timer_interrupt(int irq, void *dev_id) { write_seqlock(&xtime_lock); @@ -213,7 +213,7 @@ static int ixp2000_timer_interrupt(int irq, void *dev_id, struct pt_regs *regs) while ((signed long)(next_jiffy_time - *missing_jiffy_timer_csr) >= ticks_per_jiffy) { - timer_tick(regs); + timer_tick(); next_jiffy_time -= ticks_per_jiffy; } @@ -308,7 +308,7 @@ EXPORT_SYMBOL(gpio_line_config); /************************************************************************* * IRQ handling IXP2000 *************************************************************************/ -static void ixp2000_GPIO_irq_handler(unsigned int irq, struct irqdesc *desc, struct pt_regs *regs) +static void ixp2000_GPIO_irq_handler(unsigned int irq, struct irqdesc *desc) { int i; unsigned long status = *IXP2000_GPIO_INST; @@ -316,7 +316,7 @@ static void ixp2000_GPIO_irq_handler(unsigned int irq, struct irqdesc *desc, str for (i = 0; i <= 7; i++) { if (status & (1<<i)) { desc = irq_desc + i + IRQ_IXP2000_GPIO0; - desc_handle_irq(i + IRQ_IXP2000_GPIO0, desc, regs); + desc_handle_irq(i + IRQ_IXP2000_GPIO0, desc); } } } @@ -401,7 +401,7 @@ static void ixp2000_pci_irq_unmask(unsigned int irq) /* * Error interrupts. These are used extensively by the microengine drivers */ -static void ixp2000_err_irq_handler(unsigned int irq, struct irqdesc *desc, struct pt_regs *regs) +static void ixp2000_err_irq_handler(unsigned int irq, struct irqdesc *desc) { int i; unsigned long status = *IXP2000_IRQ_ERR_STATUS; @@ -409,7 +409,7 @@ static void ixp2000_err_irq_handler(unsigned int irq, struct irqdesc *desc, str for(i = 31; i >= 0; i--) { if(status & (1 << i)) { desc = irq_desc + IRQ_IXP2000_DRAM0_MIN_ERR + i; - desc_handle_irq(IRQ_IXP2000_DRAM0_MIN_ERR + i, desc, regs); + desc_handle_irq(IRQ_IXP2000_DRAM0_MIN_ERR + i, desc); } } } diff --git a/arch/arm/mach-ixp2000/ixdp2x00.c b/arch/arm/mach-ixp2000/ixdp2x00.c index 40eef8b36740..af48cb52dfc4 100644 --- a/arch/arm/mach-ixp2000/ixdp2x00.c +++ b/arch/arm/mach-ixp2000/ixdp2x00.c @@ -106,7 +106,7 @@ static void ixdp2x00_irq_unmask(unsigned int irq) ixp2000_release_slowport(&old_cfg); } -static void ixdp2x00_irq_handler(unsigned int irq, struct irqdesc *desc, struct pt_regs *regs) +static void ixdp2x00_irq_handler(unsigned int irq, struct irqdesc *desc) { volatile u32 ex_interrupt = 0; static struct slowport_cfg old_cfg; @@ -132,7 +132,7 @@ static void ixdp2x00_irq_handler(unsigned int irq, struct irqdesc *desc, struct struct irqdesc *cpld_desc; int cpld_irq = IXP2000_BOARD_IRQ(0) + i; cpld_desc = irq_desc + cpld_irq; - desc_handle_irq(cpld_irq, cpld_desc, regs); + desc_handle_irq(cpld_irq, cpld_desc); } } diff --git a/arch/arm/mach-ixp2000/ixdp2x01.c b/arch/arm/mach-ixp2000/ixdp2x01.c index 7f42366f60d1..9ccae9e63f70 100644 --- a/arch/arm/mach-ixp2000/ixdp2x01.c +++ b/arch/arm/mach-ixp2000/ixdp2x01.c @@ -63,7 +63,7 @@ static void ixdp2x01_irq_unmask(unsigned int irq) static u32 valid_irq_mask; -static void ixdp2x01_irq_handler(unsigned int irq, struct irqdesc *desc, struct pt_regs *regs) +static void ixdp2x01_irq_handler(unsigned int irq, struct irqdesc *desc) { u32 ex_interrupt; int i; @@ -82,7 +82,7 @@ static void ixdp2x01_irq_handler(unsigned int irq, struct irqdesc *desc, struct struct irqdesc *cpld_desc; int cpld_irq = IXP2000_BOARD_IRQ(0) + i; cpld_desc = irq_desc + cpld_irq; - desc_handle_irq(cpld_irq, cpld_desc, regs); + desc_handle_irq(cpld_irq, cpld_desc); } } diff --git a/arch/arm/mach-ixp23xx/core.c b/arch/arm/mach-ixp23xx/core.c index 566a07821c77..a704a1820048 100644 --- a/arch/arm/mach-ixp23xx/core.c +++ b/arch/arm/mach-ixp23xx/core.c @@ -251,7 +251,7 @@ static void ixp23xx_pci_irq_unmask(unsigned int irq) /* * TODO: Should this just be done at ASM level? */ -static void pci_handler(unsigned int irq, struct irqdesc *desc, struct pt_regs *regs) +static void pci_handler(unsigned int irq, struct irqdesc *desc) { u32 pci_interrupt; unsigned int irqno; @@ -271,7 +271,7 @@ static void pci_handler(unsigned int irq, struct irqdesc *desc, struct pt_regs * } int_desc = irq_desc + irqno; - desc_handle_irq(irqno, int_desc, regs); + desc_handle_irq(irqno, int_desc); desc->chip->unmask(irq); } @@ -348,12 +348,12 @@ ixp23xx_gettimeoffset(void) } static irqreturn_t -ixp23xx_timer_interrupt(int irq, void *dev_id, struct pt_regs *regs) +ixp23xx_timer_interrupt(int irq, void *dev_id) { /* Clear Pending Interrupt by writing '1' to it */ *IXP23XX_TIMER_STATUS = IXP23XX_TIMER1_INT_PEND; while ((signed long)(*IXP23XX_TIMER_CONT - next_jiffy_time) >= LATCH) { - timer_tick(regs); + timer_tick(); next_jiffy_time += LATCH; } diff --git a/arch/arm/mach-ixp23xx/ixdp2351.c b/arch/arm/mach-ixp23xx/ixdp2351.c index 37a32e6bcca2..b6ab0e8bb5e8 100644 --- a/arch/arm/mach-ixp23xx/ixdp2351.c +++ b/arch/arm/mach-ixp23xx/ixdp2351.c @@ -60,7 +60,7 @@ static void ixdp2351_inta_unmask(unsigned int irq) *IXDP2351_CPLD_INTA_MASK_CLR_REG = IXDP2351_INTA_IRQ_MASK(irq); } -static void ixdp2351_inta_handler(unsigned int irq, struct irqdesc *desc, struct pt_regs *regs) +static void ixdp2351_inta_handler(unsigned int irq, struct irqdesc *desc) { u16 ex_interrupt = *IXDP2351_CPLD_INTA_STAT_REG & IXDP2351_INTA_IRQ_VALID; @@ -74,7 +74,7 @@ static void ixdp2351_inta_handler(unsigned int irq, struct irqdesc *desc, struct int cpld_irq = IXP23XX_MACH_IRQ(IXDP2351_INTA_IRQ_BASE + i); cpld_desc = irq_desc + cpld_irq; - desc_handle_irq(cpld_irq, cpld_desc, regs); + desc_handle_irq(cpld_irq, cpld_desc); } } @@ -97,7 +97,7 @@ static void ixdp2351_intb_unmask(unsigned int irq) *IXDP2351_CPLD_INTB_MASK_CLR_REG = IXDP2351_INTB_IRQ_MASK(irq); } -static void ixdp2351_intb_handler(unsigned int irq, struct irqdesc *desc, struct pt_regs *regs) +static void ixdp2351_intb_handler(unsigned int irq, struct irqdesc *desc) { u16 ex_interrupt = *IXDP2351_CPLD_INTB_STAT_REG & IXDP2351_INTB_IRQ_VALID; @@ -111,7 +111,7 @@ static void ixdp2351_intb_handler(unsigned int irq, struct irqdesc *desc, struct int cpld_irq = IXP23XX_MACH_IRQ(IXDP2351_INTB_IRQ_BASE + i); cpld_desc = irq_desc + cpld_irq; - desc_handle_irq(cpld_irq, cpld_desc, regs); + desc_handle_irq(cpld_irq, cpld_desc); } } diff --git a/arch/arm/mach-ixp4xx/common.c b/arch/arm/mach-ixp4xx/common.c index 35dd8b3824b0..c7513f6eb50c 100644 --- a/arch/arm/mach-ixp4xx/common.c +++ b/arch/arm/mach-ixp4xx/common.c @@ -256,7 +256,7 @@ static unsigned volatile last_jiffy_time; #define CLOCK_TICKS_PER_USEC ((CLOCK_TICK_RATE + USEC_PER_SEC/2) / USEC_PER_SEC) -static irqreturn_t ixp4xx_timer_interrupt(int irq, void *dev_id, struct pt_regs *regs) +static irqreturn_t ixp4xx_timer_interrupt(int irq, void *dev_id) { write_seqlock(&xtime_lock); @@ -267,7 +267,7 @@ static irqreturn_t ixp4xx_timer_interrupt(int irq, void *dev_id, struct pt_regs * Catch up with the real idea of time */ while ((signed long)(*IXP4XX_OSTS - last_jiffy_time) >= LATCH) { - timer_tick(regs); + timer_tick(); last_jiffy_time += LATCH; } diff --git a/arch/arm/mach-ixp4xx/nas100d-power.c b/arch/arm/mach-ixp4xx/nas100d-power.c index 81ffcae1f56e..29aa98d3a7fa 100644 --- a/arch/arm/mach-ixp4xx/nas100d-power.c +++ b/arch/arm/mach-ixp4xx/nas100d-power.c @@ -24,7 +24,7 @@ #include <asm/mach-types.h> -static irqreturn_t nas100d_reset_handler(int irq, void *dev_id, struct pt_regs *regs) +static irqreturn_t nas100d_reset_handler(int irq, void *dev_id) { /* Signal init to do the ctrlaltdel action, this will bypass init if * it hasn't started and do a kernel_restart. diff --git a/arch/arm/mach-ixp4xx/nslu2-power.c b/arch/arm/mach-ixp4xx/nslu2-power.c index a29b3b2b61b6..acd71e9c38a7 100644 --- a/arch/arm/mach-ixp4xx/nslu2-power.c +++ b/arch/arm/mach-ixp4xx/nslu2-power.c @@ -25,7 +25,7 @@ #include <asm/mach-types.h> -static irqreturn_t nslu2_power_handler(int irq, void *dev_id, struct pt_regs *regs) +static irqreturn_t nslu2_power_handler(int irq, void *dev_id) { /* Signal init to do the ctrlaltdel action, this will bypass init if * it hasn't started and do a kernel_restart. @@ -35,7 +35,7 @@ static irqreturn_t nslu2_power_handler(int irq, void *dev_id, struct pt_regs *re return IRQ_HANDLED; } -static irqreturn_t nslu2_reset_handler(int irq, void *dev_id, struct pt_regs *regs) +static irqreturn_t nslu2_reset_handler(int irq, void *dev_id) { /* This is the paper-clip reset, it shuts the machine down directly. */ diff --git a/arch/arm/mach-lh7a40x/arch-kev7a400.c b/arch/arm/mach-lh7a40x/arch-kev7a400.c index 4f2ab48800a5..15fbcc911fe7 100644 --- a/arch/arm/mach-lh7a40x/arch-kev7a400.c +++ b/arch/arm/mach-lh7a40x/arch-kev7a400.c @@ -71,14 +71,13 @@ static struct irq_chip kev7a400_cpld_chip = { }; -static void kev7a400_cpld_handler (unsigned int irq, struct irqdesc *desc, - struct pt_regs *regs) +static void kev7a400_cpld_handler (unsigned int irq, struct irqdesc *desc) { u32 mask = CPLD_LATCHED_INTS; irq = IRQ_KEV7A400_CPLD; for (; mask; mask >>= 1, ++irq) { if (mask & 1) - desc[irq].handle (irq, desc, regs); + desc[irq].handle (irq, desc); } } diff --git a/arch/arm/mach-lh7a40x/arch-lpd7a40x.c b/arch/arm/mach-lh7a40x/arch-lpd7a40x.c index a21b12f06c6b..8441e0a156cb 100644 --- a/arch/arm/mach-lh7a40x/arch-lpd7a40x.c +++ b/arch/arm/mach-lh7a40x/arch-lpd7a40x.c @@ -207,8 +207,7 @@ static struct irq_chip lpd7a40x_cpld_chip = { .unmask = lh7a40x_unmask_cpld_irq, }; -static void lpd7a40x_cpld_handler (unsigned int irq, struct irqdesc *desc, - struct pt_regs *regs) +static void lpd7a40x_cpld_handler (unsigned int irq, struct irqdesc *desc) { unsigned int mask = CPLD_INTERRUPTS; diff --git a/arch/arm/mach-lh7a40x/irq-kev7a400.c b/arch/arm/mach-lh7a40x/irq-kev7a400.c index f9b3fe9174a5..646071334b8f 100644 --- a/arch/arm/mach-lh7a40x/irq-kev7a400.c +++ b/arch/arm/mach-lh7a40x/irq-kev7a400.c @@ -51,14 +51,13 @@ irq_chip lh7a400_cpld_chip = { }; static void -lh7a400_cpld_handler (unsigned int irq, struct irqdesc *desc, - struct pt_regs *regs) +lh7a400_cpld_handler (unsigned int irq, struct irqdesc *desc) { u32 mask = CPLD_LATCHED_INTS; irq = IRQ_KEV_7A400_CPLD; for (; mask; mask >>= 1, ++irq) { if (mask & 1) - desc[irq].handle (irq, desc, regs); + desc[irq].handle (irq, desc); } } diff --git a/arch/arm/mach-lh7a40x/irq-lpd7a40x.c b/arch/arm/mach-lh7a40x/irq-lpd7a40x.c index d6055dde6468..b20376804bbb 100644 --- a/arch/arm/mach-lh7a40x/irq-lpd7a40x.c +++ b/arch/arm/mach-lh7a40x/irq-lpd7a40x.c @@ -57,8 +57,7 @@ static struct irq_chip lh7a40x_cpld_chip = { .unmask = lh7a40x_unmask_cpld_irq, }; -static void lh7a40x_cpld_handler (unsigned int irq, struct irqdesc *desc, - struct pt_regs *regs) +static void lh7a40x_cpld_handler (unsigned int irq, struct irqdesc *desc) { unsigned int mask = CPLD_INTERRUPTS; diff --git a/arch/arm/mach-lh7a40x/time.c b/arch/arm/mach-lh7a40x/time.c index ad5652e01507..bef3c4b68d3b 100644 --- a/arch/arm/mach-lh7a40x/time.c +++ b/arch/arm/mach-lh7a40x/time.c @@ -39,12 +39,12 @@ #endif static irqreturn_t -lh7a40x_timer_interrupt(int irq, void *dev_id, struct pt_regs *regs) +lh7a40x_timer_interrupt(int irq, void *dev_id) { write_seqlock(&xtime_lock); TIMER_EOI = 0; - timer_tick(regs); + timer_tick(); write_sequnlock(&xtime_lock); diff --git a/arch/arm/mach-netx/generic.c b/arch/arm/mach-netx/generic.c index af0b13534cfd..edbbbdc3b06b 100644 --- a/arch/arm/mach-netx/generic.c +++ b/arch/arm/mach-netx/generic.c @@ -69,8 +69,7 @@ static struct platform_device *devices[] __initdata = { #endif static void -netx_hif_demux_handler(unsigned int irq_unused, struct irqdesc *desc, - struct pt_regs *regs) +netx_hif_demux_handler(unsigned int irq_unused, struct irqdesc *desc) { unsigned int irq = NETX_IRQ_HIF_CHAINED(0); unsigned int stat; @@ -83,7 +82,7 @@ netx_hif_demux_handler(unsigned int irq_unused, struct irqdesc *desc, while (stat) { if (stat & 1) { DEBUG_IRQ("handling irq %d\n", irq); - desc_handle_irq(irq, desc, regs); + desc_handle_irq(irq, desc); } irq++; desc++; diff --git a/arch/arm/mach-netx/time.c b/arch/arm/mach-netx/time.c index 6d72c81b7d9f..0993336c0b55 100644 --- a/arch/arm/mach-netx/time.c +++ b/arch/arm/mach-netx/time.c @@ -38,11 +38,11 @@ static unsigned long netx_gettimeoffset(void) * IRQ handler for the timer */ static irqreturn_t -netx_timer_interrupt(int irq, void *dev_id, struct pt_regs *regs) +netx_timer_interrupt(int irq, void *dev_id) { write_seqlock(&xtime_lock); - timer_tick(regs); + timer_tick(); write_sequnlock(&xtime_lock); /* acknowledge interrupt */ diff --git a/arch/arm/mach-omap1/board-osk.c b/arch/arm/mach-omap1/board-osk.c index 6b05647a6c01..3a622801d7b0 100644 --- a/arch/arm/mach-omap1/board-osk.c +++ b/arch/arm/mach-omap1/board-osk.c @@ -327,7 +327,7 @@ static struct spi_board_info __initdata mistral_boardinfo[] = { { #ifdef CONFIG_PM static irqreturn_t -osk_mistral_wake_interrupt(int irq, void *ignored, struct pt_regs *regs) +osk_mistral_wake_interrupt(int irq, void *ignored) { return IRQ_HANDLED; } diff --git a/arch/arm/mach-omap1/fpga.c b/arch/arm/mach-omap1/fpga.c index efe9bfc6e55f..8e40208b10bb 100644 --- a/arch/arm/mach-omap1/fpga.c +++ b/arch/arm/mach-omap1/fpga.c @@ -84,8 +84,7 @@ static void fpga_mask_ack_irq(unsigned int irq) fpga_ack_irq(irq); } -void innovator_fpga_IRQ_demux(unsigned int irq, struct irqdesc *desc, - struct pt_regs *regs) +void innovator_fpga_IRQ_demux(unsigned int irq, struct irqdesc *desc) { struct irqdesc *d; u32 stat; @@ -101,7 +100,7 @@ void innovator_fpga_IRQ_demux(unsigned int irq, struct irqdesc *desc, fpga_irq++, stat >>= 1) { if (stat & 1) { d = irq_desc + fpga_irq; - desc_handle_irq(fpga_irq, d, regs); + desc_handle_irq(fpga_irq, d); } } } diff --git a/arch/arm/mach-omap1/pm.c b/arch/arm/mach-omap1/pm.c index cd76185bab74..4834758d340c 100644 --- a/arch/arm/mach-omap1/pm.c +++ b/arch/arm/mach-omap1/pm.c @@ -682,8 +682,7 @@ static int omap_pm_finish(suspend_state_t state) } -static irqreturn_t omap_wakeup_interrupt(int irq, void * dev, - struct pt_regs * regs) +static irqreturn_t omap_wakeup_interrupt(int irq, void *dev) { return IRQ_HANDLED; } diff --git a/arch/arm/mach-omap1/serial.c b/arch/arm/mach-omap1/serial.c index c4b790217a5b..4cc98a578e4b 100644 --- a/arch/arm/mach-omap1/serial.c +++ b/arch/arm/mach-omap1/serial.c @@ -204,8 +204,7 @@ void __init omap_serial_init(void) #ifdef CONFIG_OMAP_SERIAL_WAKE -static irqreturn_t omap_serial_wake_interrupt(int irq, void *dev_id, - struct pt_regs *regs) +static irqreturn_t omap_serial_wake_interrupt(int irq, void *dev_id) { /* Need to do something with serial port right after wake-up? */ return IRQ_HANDLED; diff --git a/arch/arm/mach-omap1/time.c b/arch/arm/mach-omap1/time.c index 4d91b9f51084..1b7e4a506c26 100644 --- a/arch/arm/mach-omap1/time.c +++ b/arch/arm/mach-omap1/time.c @@ -160,8 +160,7 @@ static unsigned long omap_mpu_timer_gettimeoffset(void) * Latency during the interrupt is calculated using timer1. * Both timer0 and timer1 are counting at 6MHz (P2 6.5MHz). */ -static irqreturn_t omap_mpu_timer_interrupt(int irq, void *dev_id, - struct pt_regs *regs) +static irqreturn_t omap_mpu_timer_interrupt(int irq, void *dev_id) { unsigned long now, latency; @@ -169,7 +168,7 @@ static irqreturn_t omap_mpu_timer_interrupt(int irq, void *dev_id, now = 0 - omap_mpu_timer_read(0); latency = MPU_TICKS_PER_SEC / HZ - omap_mpu_timer_read(1); omap_mpu_timer_last = now - latency; - timer_tick(regs); + timer_tick(); write_sequnlock(&xtime_lock); return IRQ_HANDLED; @@ -182,8 +181,7 @@ static struct irqaction omap_mpu_timer_irq = { }; static unsigned long omap_mpu_timer1_overflows; -static irqreturn_t omap_mpu_timer1_interrupt(int irq, void *dev_id, - struct pt_regs *regs) +static irqreturn_t omap_mpu_timer1_interrupt(int irq, void *dev_id) { omap_mpu_timer1_overflows++; return IRQ_HANDLED; diff --git a/arch/arm/mach-omap2/board-apollon.c b/arch/arm/mach-omap2/board-apollon.c index c37b0e6d1248..03d6905ba490 100644 --- a/arch/arm/mach-omap2/board-apollon.c +++ b/arch/arm/mach-omap2/board-apollon.c @@ -203,7 +203,7 @@ static void __init apollon_led_init(void) omap_set_gpio_dataout(LED2_GPIO15, 0); } -static irqreturn_t apollon_sw_interrupt(int irq, void *ignored, struct pt_regs *regs) +static irqreturn_t apollon_sw_interrupt(int irq, void *ignored) { static unsigned int led0, led1, led2; diff --git a/arch/arm/mach-omap2/timer-gp.c b/arch/arm/mach-omap2/timer-gp.c index fe5fd6d42dea..973189cd9766 100644 --- a/arch/arm/mach-omap2/timer-gp.c +++ b/arch/arm/mach-omap2/timer-gp.c @@ -37,13 +37,12 @@ static inline void omap2_gp_timer_start(unsigned long load_val) omap_dm_timer_start(gptimer); } -static irqreturn_t omap2_gp_timer_interrupt(int irq, void *dev_id, - struct pt_regs *regs) +static irqreturn_t omap2_gp_timer_interrupt(int irq, void *dev_id) { write_seqlock(&xtime_lock); omap_dm_timer_write_status(gptimer, OMAP_TIMER_INT_OVERFLOW); - timer_tick(regs); + timer_tick(); write_sequnlock(&xtime_lock); diff --git a/arch/arm/mach-pnx4008/dma.c b/arch/arm/mach-pnx4008/dma.c index ec01574f88ac..d6a279e4b524 100644 --- a/arch/arm/mach-pnx4008/dma.c +++ b/arch/arm/mach-pnx4008/dma.c @@ -32,7 +32,7 @@ static struct dma_channel { char *name; - void (*irq_handler) (int, int, void *, struct pt_regs *); + void (*irq_handler) (int, int, void *); void *data; struct pnx4008_dma_ll *ll; u32 ll_dma; @@ -150,8 +150,7 @@ static inline void pnx4008_dma_unlock(void) #define VALID_CHANNEL(c) (((c) >= 0) && ((c) < MAX_DMA_CHANNELS)) int pnx4008_request_channel(char *name, int ch, - void (*irq_handler) (int, int, void *, - struct pt_regs *), void *data) + void (*irq_handler) (int, int, void *), void *data) { int i, found = 0; @@ -1033,7 +1032,7 @@ int pnx4008_dma_ch_enabled(int ch) EXPORT_SYMBOL_GPL(pnx4008_dma_ch_enabled); -static irqreturn_t dma_irq_handler(int irq, void *dev_id, struct pt_regs *regs) +static irqreturn_t dma_irq_handler(int irq, void *dev_id) { int i; unsigned long dint = __raw_readl(DMAC_INT_STAT); @@ -1053,8 +1052,7 @@ static irqreturn_t dma_irq_handler(int irq, void *dev_id, struct pt_regs *regs) cause |= DMA_ERR_INT; if (tcint & i_bit) cause |= DMA_TC_INT; - channel->irq_handler(i, cause, channel->data, - regs); + channel->irq_handler(i, cause, channel->data); } else { /* * IRQ for an unregistered DMA channel diff --git a/arch/arm/mach-pnx4008/time.c b/arch/arm/mach-pnx4008/time.c index b986065cd0f3..8621c206ac84 100644 --- a/arch/arm/mach-pnx4008/time.c +++ b/arch/arm/mach-pnx4008/time.c @@ -47,15 +47,14 @@ static unsigned long pnx4008_gettimeoffset(void) /*! * IRQ handler for the timer */ -static irqreturn_t pnx4008_timer_interrupt(int irq, void *dev_id, - struct pt_regs *regs) +static irqreturn_t pnx4008_timer_interrupt(int irq, void *dev_id) { if (__raw_readl(HSTIM_INT) & MATCH0_INT) { write_seqlock(&xtime_lock); do { - timer_tick(regs); + timer_tick(); /* * this algorithm takes care of possible delay diff --git a/arch/arm/mach-pxa/corgi.c b/arch/arm/mach-pxa/corgi.c index 337c01c4ac37..a1a900d16665 100644 --- a/arch/arm/mach-pxa/corgi.c +++ b/arch/arm/mach-pxa/corgi.c @@ -212,7 +212,7 @@ static struct platform_device corgits_device = { */ static struct pxamci_platform_data corgi_mci_platform_data; -static int corgi_mci_init(struct device *dev, irqreturn_t (*corgi_detect_int)(int, void *, struct pt_regs *), void *data) +static int corgi_mci_init(struct device *dev, irq_handler_t corgi_detect_int, void *data) { int err; diff --git a/arch/arm/mach-pxa/dma.c b/arch/arm/mach-pxa/dma.c index 7d8c85486c66..4440babe7b97 100644 --- a/arch/arm/mach-pxa/dma.c +++ b/arch/arm/mach-pxa/dma.c @@ -27,13 +27,13 @@ static struct dma_channel { char *name; - void (*irq_handler)(int, void *, struct pt_regs *); + void (*irq_handler)(int, void *); void *data; } dma_channels[PXA_DMA_CHANNELS]; int pxa_request_dma (char *name, pxa_dma_prio prio, - void (*irq_handler)(int, void *, struct pt_regs *), + void (*irq_handler)(int, void *), void *data) { unsigned long flags; @@ -87,7 +87,7 @@ void pxa_free_dma (int dma_ch) local_irq_restore(flags); } -static irqreturn_t dma_irq_handler(int irq, void *dev_id, struct pt_regs *regs) +static irqreturn_t dma_irq_handler(int irq, void *dev_id) { int i, dint = DINT; @@ -95,7 +95,7 @@ static irqreturn_t dma_irq_handler(int irq, void *dev_id, struct pt_regs *regs) if (dint & (1 << i)) { struct dma_channel *channel = &dma_channels[i]; if (channel->name && channel->irq_handler) { - channel->irq_handler(i, channel->data, regs); + channel->irq_handler(i, channel->data); } else { /* * IRQ for an unregistered DMA channel: diff --git a/arch/arm/mach-pxa/idp.c b/arch/arm/mach-pxa/idp.c index 3e4b0ab71c66..64df44043a65 100644 --- a/arch/arm/mach-pxa/idp.c +++ b/arch/arm/mach-pxa/idp.c @@ -125,7 +125,7 @@ static struct pxafb_mach_info sharp_lm8v31 = { .pxafb_lcd_power = &idp_lcd_power }; -static int idp_mci_init(struct device *dev, irqreturn_t (*idp_detect_int)(int, void *, struct pt_regs *), void *data) +static int idp_mci_init(struct device *dev, irq_handler_t idp_detect_int, void *data) { /* setup GPIO for PXA25x MMC controller */ pxa_gpio_mode(GPIO6_MMCCLK_MD); diff --git a/arch/arm/mach-pxa/irq.c b/arch/arm/mach-pxa/irq.c index 12141e2a50cc..ab1a16025d51 100644 --- a/arch/arm/mach-pxa/irq.c +++ b/arch/arm/mach-pxa/irq.c @@ -143,8 +143,7 @@ static struct irq_chip pxa_low_gpio_chip = { * Demux handler for GPIO>=2 edge detect interrupts */ -static void pxa_gpio_demux_handler(unsigned int irq, struct irqdesc *desc, - struct pt_regs *regs) +static void pxa_gpio_demux_handler(unsigned int irq, struct irqdesc *desc) { unsigned int mask; int loop; @@ -160,7 +159,7 @@ static void pxa_gpio_demux_handler(unsigned int irq, struct irqdesc *desc, mask >>= 2; do { if (mask & 1) - desc_handle_irq(irq, desc, regs); + desc_handle_irq(irq, desc); irq++; desc++; mask >>= 1; @@ -175,7 +174,7 @@ static void pxa_gpio_demux_handler(unsigned int irq, struct irqdesc *desc, desc = irq_desc + irq; do { if (mask & 1) - desc_handle_irq(irq, desc, regs); + desc_handle_irq(irq, desc); irq++; desc++; mask >>= 1; @@ -190,7 +189,7 @@ static void pxa_gpio_demux_handler(unsigned int irq, struct irqdesc *desc, desc = irq_desc + irq; do { if (mask & 1) - desc_handle_irq(irq, desc, regs); + desc_handle_irq(irq, desc); irq++; desc++; mask >>= 1; @@ -206,7 +205,7 @@ static void pxa_gpio_demux_handler(unsigned int irq, struct irqdesc *desc, desc = irq_desc + irq; do { if (mask & 1) - desc_handle_irq(irq, desc, regs); + desc_handle_irq(irq, desc); irq++; desc++; mask >>= 1; diff --git a/arch/arm/mach-pxa/lpd270.c b/arch/arm/mach-pxa/lpd270.c index eff2a91b2565..5749f6b72e12 100644 --- a/arch/arm/mach-pxa/lpd270.c +++ b/arch/arm/mach-pxa/lpd270.c @@ -75,8 +75,7 @@ static struct irq_chip lpd270_irq_chip = { .unmask = lpd270_unmask_irq, }; -static void lpd270_irq_handler(unsigned int irq, struct irqdesc *desc, - struct pt_regs *regs) +static void lpd270_irq_handler(unsigned int irq, struct irqdesc *desc) { unsigned long pending; @@ -86,7 +85,7 @@ static void lpd270_irq_handler(unsigned int irq, struct irqdesc *desc, if (likely(pending)) { irq = LPD270_IRQ(0) + __ffs(pending); desc = irq_desc + irq; - desc_handle_irq(irq, desc, regs); + desc_handle_irq(irq, desc); pending = __raw_readw(LPD270_INT_STATUS) & lpd270_irq_enabled; diff --git a/arch/arm/mach-pxa/lubbock.c b/arch/arm/mach-pxa/lubbock.c index 157cf47cbe66..ee80d62119d3 100644 --- a/arch/arm/mach-pxa/lubbock.c +++ b/arch/arm/mach-pxa/lubbock.c @@ -85,8 +85,7 @@ static struct irq_chip lubbock_irq_chip = { .unmask = lubbock_unmask_irq, }; -static void lubbock_irq_handler(unsigned int irq, struct irqdesc *desc, - struct pt_regs *regs) +static void lubbock_irq_handler(unsigned int irq, struct irqdesc *desc) { unsigned long pending = LUB_IRQ_SET_CLR & lubbock_irq_enabled; do { @@ -94,7 +93,7 @@ static void lubbock_irq_handler(unsigned int irq, struct irqdesc *desc, if (likely(pending)) { irq = LUBBOCK_IRQ(0) + __ffs(pending); desc = irq_desc + irq; - desc_handle_irq(irq, desc, regs); + desc_handle_irq(irq, desc); } pending = LUB_IRQ_SET_CLR & lubbock_irq_enabled; } while (pending); @@ -379,7 +378,7 @@ static struct pxafb_mach_info sharp_lm8v31 = { #define MMC_POLL_RATE msecs_to_jiffies(1000) static void lubbock_mmc_poll(unsigned long); -static irqreturn_t (*mmc_detect_int)(int, void *, struct pt_regs *); +static irq_handler_t mmc_detect_int; static struct timer_list mmc_timer = { .function = lubbock_mmc_poll, @@ -403,17 +402,17 @@ static void lubbock_mmc_poll(unsigned long data) } } -static irqreturn_t lubbock_detect_int(int irq, void *data, struct pt_regs *regs) +static irqreturn_t lubbock_detect_int(int irq, void *data) { /* IRQ is level triggered; disable, and poll for removal */ disable_irq(irq); mod_timer(&mmc_timer, jiffies + MMC_POLL_RATE); - return mmc_detect_int(irq, data, regs); + return mmc_detect_int(irq, data); } static int lubbock_mci_init(struct device *dev, - irqreturn_t (*detect_int)(int, void *, struct pt_regs *), + irq_handler_t detect_int, void *data) { /* setup GPIO for PXA25x MMC controller */ diff --git a/arch/arm/mach-pxa/mainstone.c b/arch/arm/mach-pxa/mainstone.c index 7ba0447d6fa3..49c34d94a9fe 100644 --- a/arch/arm/mach-pxa/mainstone.c +++ b/arch/arm/mach-pxa/mainstone.c @@ -71,8 +71,7 @@ static struct irq_chip mainstone_irq_chip = { .unmask = mainstone_unmask_irq, }; -static void mainstone_irq_handler(unsigned int irq, struct irqdesc *desc, - struct pt_regs *regs) +static void mainstone_irq_handler(unsigned int irq, struct irqdesc *desc) { unsigned long pending = MST_INTSETCLR & mainstone_irq_enabled; do { @@ -80,7 +79,7 @@ static void mainstone_irq_handler(unsigned int irq, struct irqdesc *desc, if (likely(pending)) { irq = MAINSTONE_IRQ(0) + __ffs(pending); desc = irq_desc + irq; - desc_handle_irq(irq, desc, regs); + desc_handle_irq(irq, desc); } pending = MST_INTSETCLR & mainstone_irq_enabled; } while (pending); @@ -314,7 +313,7 @@ static struct pxafb_mach_info mainstone_pxafb_info = { .pxafb_backlight_power = mainstone_backlight_power, }; -static int mainstone_mci_init(struct device *dev, irqreturn_t (*mstone_detect_int)(int, void *, struct pt_regs *), void *data) +static int mainstone_mci_init(struct device *dev, irq_handler_t mstone_detect_int, void *data) { int err; diff --git a/arch/arm/mach-pxa/poodle.c b/arch/arm/mach-pxa/poodle.c index 5e8c098ca139..34fb80b37023 100644 --- a/arch/arm/mach-pxa/poodle.c +++ b/arch/arm/mach-pxa/poodle.c @@ -197,7 +197,7 @@ static struct platform_device poodle_ts_device = { */ static struct pxamci_platform_data poodle_mci_platform_data; -static int poodle_mci_init(struct device *dev, irqreturn_t (*poodle_detect_int)(int, void *, struct pt_regs *), void *data) +static int poodle_mci_init(struct device *dev, irq_handler_t poodle_detect_int, void *data) { int err; diff --git a/arch/arm/mach-pxa/spitz.c b/arch/arm/mach-pxa/spitz.c index 401cdb850fbc..3cbac63bed3c 100644 --- a/arch/arm/mach-pxa/spitz.c +++ b/arch/arm/mach-pxa/spitz.c @@ -291,7 +291,7 @@ static struct platform_device spitzts_device = { static struct pxamci_platform_data spitz_mci_platform_data; -static int spitz_mci_init(struct device *dev, irqreturn_t (*spitz_detect_int)(int, void *, struct pt_regs *), void *data) +static int spitz_mci_init(struct device *dev, irq_handler_t spitz_detect_int, void *data) { int err; diff --git a/arch/arm/mach-pxa/ssp.c b/arch/arm/mach-pxa/ssp.c index 1fddfeaa630d..6cc202755fb4 100644 --- a/arch/arm/mach-pxa/ssp.c +++ b/arch/arm/mach-pxa/ssp.c @@ -65,7 +65,7 @@ static const struct ssp_info_ ssp_info[PXA_SSP_PORTS] = { static DEFINE_MUTEX(mutex); static int use_count[PXA_SSP_PORTS] = {0, 0, 0}; -static irqreturn_t ssp_interrupt(int irq, void *dev_id, struct pt_regs *regs) +static irqreturn_t ssp_interrupt(int irq, void *dev_id) { struct ssp_dev *dev = (struct ssp_dev*) dev_id; unsigned int status = SSSR_P(dev->port); diff --git a/arch/arm/mach-pxa/time.c b/arch/arm/mach-pxa/time.c index 5dbd191c57c4..3ac268fa419b 100644 --- a/arch/arm/mach-pxa/time.c +++ b/arch/arm/mach-pxa/time.c @@ -75,7 +75,7 @@ static int match_posponed; #endif static irqreturn_t -pxa_timer_interrupt(int irq, void *dev_id, struct pt_regs *regs) +pxa_timer_interrupt(int irq, void *dev_id) { int next_match; @@ -105,7 +105,7 @@ pxa_timer_interrupt(int irq, void *dev_id, struct pt_regs *regs) * exactly one tick period which should be a pretty rare event. */ do { - timer_tick(regs); + timer_tick(); OSSR = OSSR_M0; /* Clear match on timer 0 */ next_match = (OSMR0 += LATCH); } while( (signed long)(next_match - OSCR) <= 8 ); @@ -157,13 +157,13 @@ static void pxa_dyn_tick_reprogram(unsigned long ticks) } static irqreturn_t -pxa_dyn_tick_handler(int irq, void *dev_id, struct pt_regs *regs) +pxa_dyn_tick_handler(int irq, void *dev_id) { if (match_posponed) { match_posponed = 0; OSMR0 = initial_match; if ( (signed long)(initial_match - OSCR) <= 8 ) - return pxa_timer_interrupt(irq, dev_id, regs); + return pxa_timer_interrupt(irq, dev_id); } return IRQ_NONE; } diff --git a/arch/arm/mach-pxa/tosa.c b/arch/arm/mach-pxa/tosa.c index 249353616aba..7915a5a22865 100644 --- a/arch/arm/mach-pxa/tosa.c +++ b/arch/arm/mach-pxa/tosa.c @@ -174,7 +174,7 @@ static struct pxa2xx_udc_mach_info udc_info __initdata = { */ static struct pxamci_platform_data tosa_mci_platform_data; -static int tosa_mci_init(struct device *dev, irqreturn_t (*tosa_detect_int)(int, void *, struct pt_regs *), void *data) +static int tosa_mci_init(struct device *dev, irq_handler_t tosa_detect_int, void *data) { int err; diff --git a/arch/arm/mach-pxa/trizeps4.c b/arch/arm/mach-pxa/trizeps4.c index 910571e9a190..c1827d021ba8 100644 --- a/arch/arm/mach-pxa/trizeps4.c +++ b/arch/arm/mach-pxa/trizeps4.c @@ -270,7 +270,7 @@ void board_pcmcia_power(int power) {;} #endif /* CONFIG_MACH_TRIZEPS4_CONXS */ EXPORT_SYMBOL(board_pcmcia_power); -static int trizeps4_mci_init(struct device *dev, irqreturn_t (*mci_detect_int)(int, void *, struct pt_regs *), void *data) +static int trizeps4_mci_init(struct device *dev, irq_handler_t mci_detect_int, void *data) { int err; /* setup GPIO for PXA27x MMC controller */ diff --git a/arch/arm/mach-realview/core.c b/arch/arm/mach-realview/core.c index da0286973823..68c67053f479 100644 --- a/arch/arm/mach-realview/core.c +++ b/arch/arm/mach-realview/core.c @@ -515,18 +515,18 @@ static unsigned long realview_gettimeoffset(void) /* * IRQ handler for the timer */ -static irqreturn_t realview_timer_interrupt(int irq, void *dev_id, struct pt_regs *regs) +static irqreturn_t realview_timer_interrupt(int irq, void *dev_id) { write_seqlock(&xtime_lock); // ...clear the interrupt writel(1, TIMER0_VA_BASE + TIMER_INTCLR); - timer_tick(regs); + timer_tick(); #if defined(CONFIG_SMP) && !defined(CONFIG_LOCAL_TIMERS) smp_send_timer(); - update_process_times(user_mode(regs)); + update_process_times(user_mode(get_irq_regs())); #endif write_sequnlock(&xtime_lock); diff --git a/arch/arm/mach-rpc/dma.c b/arch/arm/mach-rpc/dma.c index ac511d41d4d7..596379a4cf82 100644 --- a/arch/arm/mach-rpc/dma.c +++ b/arch/arm/mach-rpc/dma.c @@ -83,7 +83,7 @@ static void iomd_get_next_sg(struct scatterlist *sg, dma_t *dma) sg->length |= flags; } -static irqreturn_t iomd_dma_handle(int irq, void *dev_id, struct pt_regs *regs) +static irqreturn_t iomd_dma_handle(int irq, void *dev_id) { dma_t *dma = (dma_t *)dev_id; unsigned long base = dma->dma_base; diff --git a/arch/arm/mach-s3c2410/bast-irq.c b/arch/arm/mach-s3c2410/bast-irq.c index 440e9aa0211a..23d5beea5568 100644 --- a/arch/arm/mach-s3c2410/bast-irq.c +++ b/arch/arm/mach-s3c2410/bast-irq.c @@ -112,8 +112,7 @@ static struct irqchip bast_pc104_chip = { static void bast_irq_pc104_demux(unsigned int irq, - struct irqdesc *desc, - struct pt_regs *regs) + struct irqdesc *desc) { unsigned int stat; unsigned int irqno; @@ -133,7 +132,7 @@ bast_irq_pc104_demux(unsigned int irq, if (stat & 1) { irqno = bast_pc104_irqs[i]; desc = irq_desc + irqno; - desc_handle_irq(irqno, desc, regs); + desc_handle_irq(irqno, desc); } } } diff --git a/arch/arm/mach-s3c2410/dma.c b/arch/arm/mach-s3c2410/dma.c index d264bbbd8bef..3d211dc2f2f9 100644 --- a/arch/arm/mach-s3c2410/dma.c +++ b/arch/arm/mach-s3c2410/dma.c @@ -595,7 +595,7 @@ s3c2410_dma_lastxfer(struct s3c2410_dma_chan *chan) #define dmadbg2(x...) static irqreturn_t -s3c2410_dma_irq(int irq, void *devpw, struct pt_regs *regs) +s3c2410_dma_irq(int irq, void *devpw) { struct s3c2410_dma_chan *chan = (struct s3c2410_dma_chan *)devpw; struct s3c2410_dma_buf *buf; diff --git a/arch/arm/mach-s3c2410/irq.c b/arch/arm/mach-s3c2410/irq.c index 3e9f3462c61b..683b3491ba3c 100644 --- a/arch/arm/mach-s3c2410/irq.c +++ b/arch/arm/mach-s3c2410/irq.c @@ -480,8 +480,7 @@ static struct irqchip s3c_irq_adc = { /* irq demux for adc */ static void s3c_irq_demux_adc(unsigned int irq, - struct irqdesc *desc, - struct pt_regs *regs) + struct irqdesc *desc) { unsigned int subsrc, submsk; unsigned int offset = 9; @@ -500,17 +499,16 @@ static void s3c_irq_demux_adc(unsigned int irq, if (subsrc != 0) { if (subsrc & 1) { mydesc = irq_desc + IRQ_TC; - desc_handle_irq(IRQ_TC, mydesc, regs); + desc_handle_irq(IRQ_TC, mydesc); } if (subsrc & 2) { mydesc = irq_desc + IRQ_ADC; - desc_handle_irq(IRQ_ADC, mydesc, regs); + desc_handle_irq(IRQ_ADC, mydesc); } } } -static void s3c_irq_demux_uart(unsigned int start, - struct pt_regs *regs) +static void s3c_irq_demux_uart(unsigned int start) { unsigned int subsrc, submsk; unsigned int offset = start - IRQ_S3CUART_RX0; @@ -533,17 +531,17 @@ static void s3c_irq_demux_uart(unsigned int start, desc = irq_desc + start; if (subsrc & 1) - desc_handle_irq(start, desc, regs); + desc_handle_irq(start, desc); desc++; if (subsrc & 2) - desc_handle_irq(start+1, desc, regs); + desc_handle_irq(start+1, desc); desc++; if (subsrc & 4) - desc_handle_irq(start+2, desc, regs); + desc_handle_irq(start+2, desc); } } @@ -551,35 +549,31 @@ static void s3c_irq_demux_uart(unsigned int start, static void s3c_irq_demux_uart0(unsigned int irq, - struct irqdesc *desc, - struct pt_regs *regs) + struct irqdesc *desc) { irq = irq; - s3c_irq_demux_uart(IRQ_S3CUART_RX0, regs); + s3c_irq_demux_uart(IRQ_S3CUART_RX0); } static void s3c_irq_demux_uart1(unsigned int irq, - struct irqdesc *desc, - struct pt_regs *regs) + struct irqdesc *desc) { irq = irq; - s3c_irq_demux_uart(IRQ_S3CUART_RX1, regs); + s3c_irq_demux_uart(IRQ_S3CUART_RX1); } static void s3c_irq_demux_uart2(unsigned int irq, - struct irqdesc *desc, - struct pt_regs *regs) + struct irqdesc *desc) { irq = irq; - s3c_irq_demux_uart(IRQ_S3CUART_RX2, regs); + s3c_irq_demux_uart(IRQ_S3CUART_RX2); } static void s3c_irq_demux_extint8(unsigned int irq, - struct irqdesc *desc, - struct pt_regs *regs) + struct irqdesc *desc) { unsigned long eintpnd = __raw_readl(S3C24XX_EINTPEND); unsigned long eintmsk = __raw_readl(S3C24XX_EINTMASK); @@ -594,15 +588,14 @@ s3c_irq_demux_extint8(unsigned int irq, eintpnd &= ~(1<<irq); irq += (IRQ_EINT4 - 4); - desc_handle_irq(irq, irq_desc + irq, regs); + desc_handle_irq(irq, irq_desc + irq); } } static void s3c_irq_demux_extint4t7(unsigned int irq, - struct irqdesc *desc, - struct pt_regs *regs) + struct irqdesc *desc) { unsigned long eintpnd = __raw_readl(S3C24XX_EINTPEND); unsigned long eintmsk = __raw_readl(S3C24XX_EINTMASK); @@ -618,7 +611,7 @@ s3c_irq_demux_extint4t7(unsigned int irq, irq += (IRQ_EINT4 - 4); - desc_handle_irq(irq, irq_desc + irq, regs); + desc_handle_irq(irq, irq_desc + irq); } } diff --git a/arch/arm/mach-s3c2410/mach-amlm5900.c b/arch/arm/mach-s3c2410/mach-amlm5900.c index ba5109af40b4..817e2c684410 100644 --- a/arch/arm/mach-s3c2410/mach-amlm5900.c +++ b/arch/arm/mach-s3c2410/mach-amlm5900.c @@ -226,7 +226,7 @@ static struct s3c2410fb_mach_info __initdata amlm5900_lcd_info = { #endif static irqreturn_t -amlm5900_wake_interrupt(int irq, void *ignored, struct pt_regs *regs) +amlm5900_wake_interrupt(int irq, void *ignored) { return IRQ_HANDLED; } diff --git a/arch/arm/mach-s3c2410/s3c2440-irq.c b/arch/arm/mach-s3c2410/s3c2440-irq.c index fc08febe2e54..39db0752d53b 100644 --- a/arch/arm/mach-s3c2410/s3c2440-irq.c +++ b/arch/arm/mach-s3c2410/s3c2440-irq.c @@ -42,8 +42,7 @@ /* WDT/AC97 */ static void s3c_irq_demux_wdtac97(unsigned int irq, - struct irqdesc *desc, - struct pt_regs *regs) + struct irqdesc *desc) { unsigned int subsrc, submsk; struct irqdesc *mydesc; @@ -61,11 +60,11 @@ static void s3c_irq_demux_wdtac97(unsigned int irq, if (subsrc != 0) { if (subsrc & 1) { mydesc = irq_desc + IRQ_S3C2440_WDT; - desc_handle_irq(IRQ_S3C2440_WDT, mydesc, regs); + desc_handle_irq(IRQ_S3C2440_WDT, mydesc); } if (subsrc & 2) { mydesc = irq_desc + IRQ_S3C2440_AC97; - desc_handle_irq(IRQ_S3C2440_AC97, mydesc, regs); + desc_handle_irq(IRQ_S3C2440_AC97, mydesc); } } } diff --git a/arch/arm/mach-s3c2410/s3c244x-irq.c b/arch/arm/mach-s3c2410/s3c244x-irq.c index ec702f88b299..146f2109dd90 100644 --- a/arch/arm/mach-s3c2410/s3c244x-irq.c +++ b/arch/arm/mach-s3c2410/s3c244x-irq.c @@ -42,8 +42,7 @@ /* camera irq */ static void s3c_irq_demux_cam(unsigned int irq, - struct irqdesc *desc, - struct pt_regs *regs) + struct irqdesc *desc) { unsigned int subsrc, submsk; struct irqdesc *mydesc; @@ -61,11 +60,11 @@ static void s3c_irq_demux_cam(unsigned int irq, if (subsrc != 0) { if (subsrc & 1) { mydesc = irq_desc + IRQ_S3C2440_CAM_C; - desc_handle_irq(IRQ_S3C2440_CAM_C, mydesc, regs); + desc_handle_irq(IRQ_S3C2440_CAM_C, mydesc); } if (subsrc & 2) { mydesc = irq_desc + IRQ_S3C2440_CAM_P; - desc_handle_irq(IRQ_S3C2440_CAM_P, mydesc, regs); + desc_handle_irq(IRQ_S3C2440_CAM_P, mydesc); } } } diff --git a/arch/arm/mach-s3c2410/time.c b/arch/arm/mach-s3c2410/time.c index 00d1cfca9712..9910bf0f2cea 100644 --- a/arch/arm/mach-s3c2410/time.c +++ b/arch/arm/mach-s3c2410/time.c @@ -128,10 +128,10 @@ static unsigned long s3c2410_gettimeoffset (void) * IRQ handler for the timer */ static irqreturn_t -s3c2410_timer_interrupt(int irq, void *dev_id, struct pt_regs *regs) +s3c2410_timer_interrupt(int irq, void *dev_id) { write_seqlock(&xtime_lock); - timer_tick(regs); + timer_tick(); write_sequnlock(&xtime_lock); return IRQ_HANDLED; } diff --git a/arch/arm/mach-s3c2410/usb-simtec.c b/arch/arm/mach-s3c2410/usb-simtec.c index c635efa7cd31..22b0e1cdd4bf 100644 --- a/arch/arm/mach-s3c2410/usb-simtec.c +++ b/arch/arm/mach-s3c2410/usb-simtec.c @@ -58,7 +58,7 @@ usb_simtec_powercontrol(int port, int to) } static irqreturn_t -usb_simtec_ocirq(int irq, void *pw, struct pt_regs *regs) +usb_simtec_ocirq(int irq, void *pw) { struct s3c2410_hcd_info *info = (struct s3c2410_hcd_info *)pw; diff --git a/arch/arm/mach-sa1100/dma.c b/arch/arm/mach-sa1100/dma.c index 2ea2a657a034..1fbe053e8b59 100644 --- a/arch/arm/mach-sa1100/dma.c +++ b/arch/arm/mach-sa1100/dma.c @@ -42,7 +42,7 @@ static sa1100_dma_t dma_chan[SA1100_DMA_CHANNELS]; static spinlock_t dma_list_lock; -static irqreturn_t dma_irq_handler(int irq, void *dev_id, struct pt_regs *regs) +static irqreturn_t dma_irq_handler(int irq, void *dev_id) { dma_regs_t *dma_regs = dev_id; sa1100_dma_t *dma = dma_chan + (((u_int)dma_regs >> 5) & 7); diff --git a/arch/arm/mach-sa1100/h3600.c b/arch/arm/mach-sa1100/h3600.c index 7364478cec12..fa6dc71bd6ad 100644 --- a/arch/arm/mach-sa1100/h3600.c +++ b/arch/arm/mach-sa1100/h3600.c @@ -702,7 +702,7 @@ static u32 gpio_irq_mask[] = { GPIO2_SD_CON_SLT, }; -static void h3800_IRQ_demux(unsigned int irq, struct irqdesc *desc, struct pt_regs *regs) +static void h3800_IRQ_demux(unsigned int irq, struct irqdesc *desc) { int i; @@ -719,14 +719,14 @@ static void h3800_IRQ_demux(unsigned int irq, struct irqdesc *desc, struct pt_re if (0) printk("%s KPIO 0x%08X\n", __FUNCTION__, irq); for (j = 0; j < H3800_KPIO_IRQ_COUNT; j++) if (irq & kpio_irq_mask[j]) - do_edge_IRQ(H3800_KPIO_IRQ_COUNT + j, irq_desc + H3800_KPIO_IRQ_COUNT + j, regs); + do_edge_IRQ(H3800_KPIO_IRQ_COUNT + j, irq_desc + H3800_KPIO_IRQ_COUNT + j); /* GPIO2 */ irq = H3800_ASIC2_GPIINTFLAG; if (0) printk("%s GPIO 0x%08X\n", __FUNCTION__, irq); for (j = 0; j < H3800_GPIO_IRQ_COUNT; j++) if (irq & gpio_irq_mask[j]) - do_edge_IRQ(H3800_GPIO_IRQ_COUNT + j, irq_desc + H3800_GPIO_IRQ_COUNT + j , regs); + do_edge_IRQ(H3800_GPIO_IRQ_COUNT + j, irq_desc + H3800_GPIO_IRQ_COUNT + j); } if (i >= MAX_ASIC_ISR_LOOPS) diff --git a/arch/arm/mach-sa1100/irq.c b/arch/arm/mach-sa1100/irq.c index b55b90a2e8fe..f4c6322ca33e 100644 --- a/arch/arm/mach-sa1100/irq.c +++ b/arch/arm/mach-sa1100/irq.c @@ -110,8 +110,7 @@ static struct irq_chip sa1100_low_gpio_chip = { * and call the handler. */ static void -sa1100_high_gpio_handler(unsigned int irq, struct irqdesc *desc, - struct pt_regs *regs) +sa1100_high_gpio_handler(unsigned int irq, struct irqdesc *desc) { unsigned int mask; @@ -128,7 +127,7 @@ sa1100_high_gpio_handler(unsigned int irq, struct irqdesc *desc, mask >>= 11; do { if (mask & 1) - desc_handle_irq(irq, desc, regs); + desc_handle_irq(irq, desc); mask >>= 1; irq++; desc++; diff --git a/arch/arm/mach-sa1100/neponset.c b/arch/arm/mach-sa1100/neponset.c index af6d2775cf82..354d5e91da59 100644 --- a/arch/arm/mach-sa1100/neponset.c +++ b/arch/arm/mach-sa1100/neponset.c @@ -29,7 +29,7 @@ * is rather unfortunate. */ static void -neponset_irq_handler(unsigned int irq, struct irqdesc *desc, struct pt_regs *regs) +neponset_irq_handler(unsigned int irq, struct irqdesc *desc) { unsigned int irr; @@ -69,12 +69,12 @@ neponset_irq_handler(unsigned int irq, struct irqdesc *desc, struct pt_regs *reg if (irr & IRR_ETHERNET) { d = irq_desc + IRQ_NEPONSET_SMC9196; - desc_handle_irq(IRQ_NEPONSET_SMC9196, d, regs); + desc_handle_irq(IRQ_NEPONSET_SMC9196, d); } if (irr & IRR_USAR) { d = irq_desc + IRQ_NEPONSET_USAR; - desc_handle_irq(IRQ_NEPONSET_USAR, d, regs); + desc_handle_irq(IRQ_NEPONSET_USAR, d); } desc->chip->unmask(irq); @@ -82,7 +82,7 @@ neponset_irq_handler(unsigned int irq, struct irqdesc *desc, struct pt_regs *reg if (irr & IRR_SA1111) { d = irq_desc + IRQ_NEPONSET_SA1111; - desc_handle_irq(IRQ_NEPONSET_SA1111, d, regs); + desc_handle_irq(IRQ_NEPONSET_SA1111, d); } } } diff --git a/arch/arm/mach-sa1100/ssp.c b/arch/arm/mach-sa1100/ssp.c index 5eba5fbbb561..59703c6fb29b 100644 --- a/arch/arm/mach-sa1100/ssp.c +++ b/arch/arm/mach-sa1100/ssp.c @@ -25,7 +25,7 @@ #define TIMEOUT 100000 -static irqreturn_t ssp_interrupt(int irq, void *dev_id, struct pt_regs *regs) +static irqreturn_t ssp_interrupt(int irq, void *dev_id) { unsigned int status = Ser4SSSR; diff --git a/arch/arm/mach-sa1100/time.c b/arch/arm/mach-sa1100/time.c index 49ae716e16c2..4284bd6f7a1f 100644 --- a/arch/arm/mach-sa1100/time.c +++ b/arch/arm/mach-sa1100/time.c @@ -77,7 +77,7 @@ static int match_posponed; #endif static irqreturn_t -sa1100_timer_interrupt(int irq, void *dev_id, struct pt_regs *regs) +sa1100_timer_interrupt(int irq, void *dev_id) { unsigned int next_match; @@ -99,7 +99,7 @@ sa1100_timer_interrupt(int irq, void *dev_id, struct pt_regs *regs) * handlers. */ do { - timer_tick(regs); + timer_tick(); OSSR = OSSR_M0; /* Clear match on timer 0 */ next_match = (OSMR0 += LATCH); } while ((signed long)(next_match - OSCR) <= 0); @@ -151,13 +151,13 @@ static void sa1100_dyn_tick_reprogram(unsigned long ticks) } static irqreturn_t -sa1100_dyn_tick_handler(int irq, void *dev_id, struct pt_regs *regs) +sa1100_dyn_tick_handler(int irq, void *dev_id) { if (match_posponed) { match_posponed = 0; OSMR0 = initial_match; if ((signed long)(initial_match - OSCR) <= 0) - return sa1100_timer_interrupt(irq, dev_id, regs); + return sa1100_timer_interrupt(irq, dev_id); } return IRQ_NONE; } diff --git a/arch/arm/mach-shark/core.c b/arch/arm/mach-shark/core.c index 1095df34fec0..0e480fae8ec5 100644 --- a/arch/arm/mach-shark/core.c +++ b/arch/arm/mach-shark/core.c @@ -80,10 +80,10 @@ static void __init shark_map_io(void) #define HZ_TIME ((1193180 + HZ/2) / HZ) static irqreturn_t -shark_timer_interrupt(int irq, void *dev_id, struct pt_regs *regs) +shark_timer_interrupt(int irq, void *dev_id) { write_seqlock(&xtime_lock); - timer_tick(regs); + timer_tick(); write_sequnlock(&xtime_lock); return IRQ_HANDLED; } diff --git a/arch/arm/mach-shark/irq.c b/arch/arm/mach-shark/irq.c index b227052296cf..297ecf130650 100644 --- a/arch/arm/mach-shark/irq.c +++ b/arch/arm/mach-shark/irq.c @@ -61,7 +61,7 @@ static void shark_enable_8259A_irq(unsigned int irq) static void shark_ack_8259A_irq(unsigned int irq){} -static irqreturn_t bogus_int(int irq, void *dev_id, struct pt_regs *regs) +static irqreturn_t bogus_int(int irq, void *dev_id) { printk("Got interrupt %i!\n",irq); return IRQ_NONE; diff --git a/arch/arm/mach-versatile/core.c b/arch/arm/mach-versatile/core.c index f2bbef07b1e4..3b8576111c16 100644 --- a/arch/arm/mach-versatile/core.c +++ b/arch/arm/mach-versatile/core.c @@ -77,12 +77,12 @@ static struct irq_chip sic_chip = { }; static void -sic_handle_irq(unsigned int irq, struct irqdesc *desc, struct pt_regs *regs) +sic_handle_irq(unsigned int irq, struct irqdesc *desc) { unsigned long status = readl(VA_SIC_BASE + SIC_IRQ_STATUS); if (status == 0) { - do_bad_IRQ(irq, desc, regs); + do_bad_IRQ(irq, desc); return; } @@ -93,7 +93,7 @@ sic_handle_irq(unsigned int irq, struct irqdesc *desc, struct pt_regs *regs) irq += IRQ_SIC_START; desc = irq_desc + irq; - desc_handle_irq(irq, desc, regs); + desc_handle_irq(irq, desc); } while (status); } @@ -188,12 +188,12 @@ static struct map_desc versatile_io_desc[] __initdata = { .length = SZ_4K, .type = MT_DEVICE }, { - .virtual = VERSATILE_PCI_VIRT_BASE, + .virtual = (unsigned long)VERSATILE_PCI_VIRT_BASE, .pfn = __phys_to_pfn(VERSATILE_PCI_BASE), .length = VERSATILE_PCI_BASE_SIZE, .type = MT_DEVICE }, { - .virtual = VERSATILE_PCI_CFG_VIRT_BASE, + .virtual = (unsigned long)VERSATILE_PCI_CFG_VIRT_BASE, .pfn = __phys_to_pfn(VERSATILE_PCI_CFG_BASE), .length = VERSATILE_PCI_CFG_BASE_SIZE, .type = MT_DEVICE @@ -851,14 +851,14 @@ static unsigned long versatile_gettimeoffset(void) /* * IRQ handler for the timer */ -static irqreturn_t versatile_timer_interrupt(int irq, void *dev_id, struct pt_regs *regs) +static irqreturn_t versatile_timer_interrupt(int irq, void *dev_id) { write_seqlock(&xtime_lock); // ...clear the interrupt writel(1, TIMER0_VA_BASE + TIMER_INTCLR); - timer_tick(regs); + timer_tick(); write_sequnlock(&xtime_lock); diff --git a/arch/arm/mach-versatile/pci.c b/arch/arm/mach-versatile/pci.c index 13bbd08ff841..5cd0b5d9e7eb 100644 --- a/arch/arm/mach-versatile/pci.c +++ b/arch/arm/mach-versatile/pci.c @@ -40,14 +40,15 @@ * Cfg 42000000 - 42FFFFFF PCI config * */ -#define SYS_PCICTL IO_ADDRESS(VERSATILE_SYS_PCICTL) -#define PCI_IMAP0 IO_ADDRESS(VERSATILE_PCI_CORE_BASE+0x0) -#define PCI_IMAP1 IO_ADDRESS(VERSATILE_PCI_CORE_BASE+0x4) -#define PCI_IMAP2 IO_ADDRESS(VERSATILE_PCI_CORE_BASE+0x8) -#define PCI_SMAP0 IO_ADDRESS(VERSATILE_PCI_CORE_BASE+0x10) -#define PCI_SMAP1 IO_ADDRESS(VERSATILE_PCI_CORE_BASE+0x14) -#define PCI_SMAP2 IO_ADDRESS(VERSATILE_PCI_CORE_BASE+0x18) -#define PCI_SELFID IO_ADDRESS(VERSATILE_PCI_CORE_BASE+0xc) +#define __IO_ADDRESS(n) ((void __iomem *)(unsigned long)IO_ADDRESS(n)) +#define SYS_PCICTL __IO_ADDRESS(VERSATILE_SYS_PCICTL) +#define PCI_IMAP0 __IO_ADDRESS(VERSATILE_PCI_CORE_BASE+0x0) +#define PCI_IMAP1 __IO_ADDRESS(VERSATILE_PCI_CORE_BASE+0x4) +#define PCI_IMAP2 __IO_ADDRESS(VERSATILE_PCI_CORE_BASE+0x8) +#define PCI_SMAP0 __IO_ADDRESS(VERSATILE_PCI_CORE_BASE+0x10) +#define PCI_SMAP1 __IO_ADDRESS(VERSATILE_PCI_CORE_BASE+0x14) +#define PCI_SMAP2 __IO_ADDRESS(VERSATILE_PCI_CORE_BASE+0x18) +#define PCI_SELFID __IO_ADDRESS(VERSATILE_PCI_CORE_BASE+0xc) #define DEVICE_ID_OFFSET 0x00 #define CSR_OFFSET 0x04 @@ -76,7 +77,7 @@ static int __init versatile_pci_slot_ignore(char *str) __setup("pci_slot_ignore=", versatile_pci_slot_ignore); -static unsigned long __pci_addr(struct pci_bus *bus, +static void __iomem *__pci_addr(struct pci_bus *bus, unsigned int devfn, int offset) { unsigned int busnr = bus->number; @@ -91,14 +92,14 @@ static unsigned long __pci_addr(struct pci_bus *bus, if (devfn > 255) BUG(); - return (VERSATILE_PCI_CFG_VIRT_BASE | (busnr << 16) | + return VERSATILE_PCI_CFG_VIRT_BASE + ((busnr << 16) | (PCI_SLOT(devfn) << 11) | (PCI_FUNC(devfn) << 8) | offset); } static int versatile_read_config(struct pci_bus *bus, unsigned int devfn, int where, int size, u32 *val) { - unsigned long addr = __pci_addr(bus, devfn, where); + void __iomem *addr = __pci_addr(bus, devfn, where & ~3); u32 v; int slot = PCI_SLOT(devfn); @@ -121,13 +122,12 @@ static int versatile_read_config(struct pci_bus *bus, unsigned int devfn, int wh break; case 2: - v = __raw_readl(addr & ~3); - if (addr & 2) v >>= 16; + v = __raw_readl(addr); + if (where & 2) v >>= 16; v &= 0xffff; break; default: - addr &= ~3; v = __raw_readl(addr); break; } @@ -140,7 +140,7 @@ static int versatile_read_config(struct pci_bus *bus, unsigned int devfn, int wh static int versatile_write_config(struct pci_bus *bus, unsigned int devfn, int where, int size, u32 val) { - unsigned long addr = __pci_addr(bus, devfn, where); + void __iomem *addr = __pci_addr(bus, devfn, where); int slot = PCI_SLOT(devfn); if (pci_slot_ignore & (1 << slot)) { @@ -279,7 +279,7 @@ int __init pci_versatile_setup(int nr, struct pci_sys_data *sys) printk("PCI core found (slot %d)\n",myslot); __raw_writel(myslot, PCI_SELFID); - local_pci_cfg_base = (void *) VERSATILE_PCI_CFG_VIRT_BASE + (myslot << 11); + local_pci_cfg_base = VERSATILE_PCI_CFG_VIRT_BASE + (myslot << 11); val = __raw_readl(local_pci_cfg_base + CSR_OFFSET); val |= PCI_COMMAND_MEMORY | PCI_COMMAND_MASTER | PCI_COMMAND_INVALIDATE; diff --git a/arch/arm/mm/ioremap.c b/arch/arm/mm/ioremap.c index 591fc3187c7f..465440592791 100644 --- a/arch/arm/mm/ioremap.c +++ b/arch/arm/mm/ioremap.c @@ -361,14 +361,14 @@ __ioremap(unsigned long phys_addr, size_t size, unsigned long flags) } EXPORT_SYMBOL(__ioremap); -void __iounmap(void __iomem *addr) +void __iounmap(volatile void __iomem *addr) { #ifndef CONFIG_SMP struct vm_struct **p, *tmp; #endif unsigned int section_mapping = 0; - addr = (void __iomem *)(PAGE_MASK & (unsigned long)addr); + addr = (volatile void __iomem *)(PAGE_MASK & (unsigned long)addr); #ifndef CONFIG_SMP /* @@ -395,6 +395,6 @@ void __iounmap(void __iomem *addr) #endif if (!section_mapping) - vunmap(addr); + vunmap((void __force *)addr); } EXPORT_SYMBOL(__iounmap); diff --git a/arch/arm/oprofile/op_model_xscale.c b/arch/arm/oprofile/op_model_xscale.c index 726ad2b3b435..7899d3ca75a3 100644 --- a/arch/arm/oprofile/op_model_xscale.c +++ b/arch/arm/oprofile/op_model_xscale.c @@ -341,7 +341,7 @@ static void inline __xsc2_check_ctrs(void) __asm__ __volatile__ ("mcr p14, 0, %0, c5, c1, 0" : : "r" (flag)); } -static irqreturn_t xscale_pmu_interrupt(int irq, void *arg, struct pt_regs *regs) +static irqreturn_t xscale_pmu_interrupt(int irq, void *arg) { int i; u32 pmnc; @@ -356,7 +356,7 @@ static irqreturn_t xscale_pmu_interrupt(int irq, void *arg, struct pt_regs *regs continue; write_counter(i, -(u32)results[i].reset_counter); - oprofile_add_sample(regs, i); + oprofile_add_sample(get_irq_regs(), i); results[i].ovf--; } diff --git a/arch/arm/plat-iop/time.c b/arch/arm/plat-iop/time.c index 06282dffbdc6..f530abdaa7a1 100644 --- a/arch/arm/plat-iop/time.c +++ b/arch/arm/plat-iop/time.c @@ -47,7 +47,7 @@ unsigned long iop3xx_gettimeoffset(void) } static irqreturn_t -iop3xx_timer_interrupt(int irq, void *dev_id, struct pt_regs *regs) +iop3xx_timer_interrupt(int irq, void *dev_id) { write_seqlock(&xtime_lock); @@ -57,7 +57,7 @@ iop3xx_timer_interrupt(int irq, void *dev_id, struct pt_regs *regs) while ((signed long)(next_jiffy_time - *IOP3XX_TU_TCR1) >= ticks_per_jiffy) { - timer_tick(regs); + timer_tick(); next_jiffy_time -= ticks_per_jiffy; } diff --git a/arch/arm/plat-omap/dma.c b/arch/arm/plat-omap/dma.c index 1bbb431843ce..bb045e5ddbd8 100644 --- a/arch/arm/plat-omap/dma.c +++ b/arch/arm/plat-omap/dma.c @@ -899,8 +899,7 @@ static int omap1_dma_handle_ch(int ch) return 1; } -static irqreturn_t omap1_dma_irq_handler(int irq, void *dev_id, - struct pt_regs *regs) +static irqreturn_t omap1_dma_irq_handler(int irq, void *dev_id) { int ch = ((int) dev_id) - 1; int handled = 0; @@ -962,8 +961,7 @@ static int omap2_dma_handle_ch(int ch) } /* STATUS register count is from 1-32 while our is 0-31 */ -static irqreturn_t omap2_dma_irq_handler(int irq, void *dev_id, - struct pt_regs *regs) +static irqreturn_t omap2_dma_irq_handler(int irq, void *dev_id) { u32 val; int i; @@ -1220,8 +1218,7 @@ static void set_b1_regs(void) omap_writew(fi, OMAP1610_DMA_LCD_SRC_FI_B1_L); } -static irqreturn_t lcd_dma_irq_handler(int irq, void *dev_id, - struct pt_regs *regs) +static irqreturn_t lcd_dma_irq_handler(int irq, void *dev_id) { u16 w; diff --git a/arch/arm/plat-omap/gpio.c b/arch/arm/plat-omap/gpio.c index f55f99ae58ae..8162eed8b500 100644 --- a/arch/arm/plat-omap/gpio.c +++ b/arch/arm/plat-omap/gpio.c @@ -783,8 +783,7 @@ void omap_free_gpio(int gpio) * line's interrupt handler has been run, we may miss some nested * interrupts. */ -static void gpio_irq_handler(unsigned int irq, struct irqdesc *desc, - struct pt_regs *regs) +static void gpio_irq_handler(unsigned int irq, struct irqdesc *desc) { void __iomem *isr_reg = NULL; u32 isr; @@ -882,7 +881,7 @@ static void gpio_irq_handler(unsigned int irq, struct irqdesc *desc, continue; } - desc_handle_irq(gpio_irq, d, regs); + desc_handle_irq(gpio_irq, d); if (unlikely((d->status & IRQ_PENDING) && !d->depth)) { irq_mask = 1 << diff --git a/arch/arm/plat-omap/mcbsp.c b/arch/arm/plat-omap/mcbsp.c index ade9a0fa6ef6..ec50008a2df6 100644 --- a/arch/arm/plat-omap/mcbsp.c +++ b/arch/arm/plat-omap/mcbsp.c @@ -96,7 +96,7 @@ static void omap_mcbsp_dump_reg(u8 id) DBG("***********************\n"); } -static irqreturn_t omap_mcbsp_tx_irq_handler(int irq, void *dev_id, struct pt_regs *regs) +static irqreturn_t omap_mcbsp_tx_irq_handler(int irq, void *dev_id) { struct omap_mcbsp * mcbsp_tx = (struct omap_mcbsp *)(dev_id); @@ -106,7 +106,7 @@ static irqreturn_t omap_mcbsp_tx_irq_handler(int irq, void *dev_id, struct pt_re return IRQ_HANDLED; } -static irqreturn_t omap_mcbsp_rx_irq_handler(int irq, void *dev_id, struct pt_regs *regs) +static irqreturn_t omap_mcbsp_rx_irq_handler(int irq, void *dev_id) { struct omap_mcbsp * mcbsp_rx = (struct omap_mcbsp *)(dev_id); diff --git a/arch/arm/plat-omap/timer32k.c b/arch/arm/plat-omap/timer32k.c index cf6df3378d37..265310601161 100644 --- a/arch/arm/plat-omap/timer32k.c +++ b/arch/arm/plat-omap/timer32k.c @@ -194,8 +194,7 @@ unsigned long long sched_clock(void) * issues with dynamic tick. In the dynamic tick case, we need to lock * with irqsave. */ -static inline irqreturn_t _omap_32k_timer_interrupt(int irq, void *dev_id, - struct pt_regs *regs) +static inline irqreturn_t _omap_32k_timer_interrupt(int irq, void *dev_id) { unsigned long now; @@ -205,7 +204,7 @@ static inline irqreturn_t _omap_32k_timer_interrupt(int irq, void *dev_id, while ((signed long)(now - omap_32k_last_tick) >= OMAP_32K_TICKS_PER_HZ) { omap_32k_last_tick += OMAP_32K_TICKS_PER_HZ; - timer_tick(regs); + timer_tick(); } /* Restart timer so we don't drift off due to modulo or dynamic tick. @@ -218,19 +217,17 @@ static inline irqreturn_t _omap_32k_timer_interrupt(int irq, void *dev_id, return IRQ_HANDLED; } -static irqreturn_t omap_32k_timer_handler(int irq, void *dev_id, - struct pt_regs *regs) +static irqreturn_t omap_32k_timer_handler(int irq, void *dev_id) { - return _omap_32k_timer_interrupt(irq, dev_id, regs); + return _omap_32k_timer_interrupt(irq, dev_id); } -static irqreturn_t omap_32k_timer_interrupt(int irq, void *dev_id, - struct pt_regs *regs) +static irqreturn_t omap_32k_timer_interrupt(int irq, void *dev_id) { unsigned long flags; write_seqlock_irqsave(&xtime_lock, flags); - _omap_32k_timer_interrupt(irq, dev_id, regs); + _omap_32k_timer_interrupt(irq, dev_id); write_sequnlock_irqrestore(&xtime_lock, flags); return IRQ_HANDLED; diff --git a/arch/arm/vfp/vfpmodule.c b/arch/arm/vfp/vfpmodule.c index dedbb449632e..a657a28f08db 100644 --- a/arch/arm/vfp/vfpmodule.c +++ b/arch/arm/vfp/vfpmodule.c @@ -90,7 +90,7 @@ void vfp_raise_sigfpe(unsigned int sicode, struct pt_regs *regs) info.si_signo = SIGFPE; info.si_code = sicode; - info.si_addr = (void *)(instruction_pointer(regs) - 4); + info.si_addr = (void __user *)(instruction_pointer(regs) - 4); /* * This is the same as NWFPE, because it's not clear what diff --git a/arch/arm26/kernel/armksyms.c b/arch/arm26/kernel/armksyms.c index 07907b6ecb63..93293d04b303 100644 --- a/arch/arm26/kernel/armksyms.c +++ b/arch/arm26/kernel/armksyms.c @@ -202,14 +202,6 @@ EXPORT_SYMBOL(_find_next_zero_bit_le); EXPORT_SYMBOL(elf_platform); EXPORT_SYMBOL(elf_hwcap); - /* syscalls */ -EXPORT_SYMBOL(sys_write); -EXPORT_SYMBOL(sys_read); -EXPORT_SYMBOL(sys_lseek); -EXPORT_SYMBOL(sys_open); -EXPORT_SYMBOL(sys_exit); -EXPORT_SYMBOL(sys_wait4); - #ifdef CONFIG_PREEMPT EXPORT_SYMBOL(kernel_flag); #endif diff --git a/arch/avr32/kernel/time.c b/arch/avr32/kernel/time.c index 3e56b9f4358a..5a247ba71a72 100644 --- a/arch/avr32/kernel/time.c +++ b/arch/avr32/kernel/time.c @@ -124,15 +124,15 @@ unsigned long long sched_clock(void) * * In UP mode, it is invoked from the (global) timer_interrupt. */ -static void local_timer_interrupt(int irq, void *dev_id, struct pt_regs *regs) +static void local_timer_interrupt(int irq, void *dev_id) { if (current->pid) - profile_tick(CPU_PROFILING, regs); - update_process_times(user_mode(regs)); + profile_tick(CPU_PROFILING); + update_process_times(user_mode(get_irq_regs())); } static irqreturn_t -timer_interrupt(int irq, void *dev_id, struct pt_regs *regs) +timer_interrupt(int irq, void *dev_id) { unsigned int count; @@ -157,7 +157,7 @@ timer_interrupt(int irq, void *dev_id, struct pt_regs *regs) * * SMP is not supported yet. */ - local_timer_interrupt(irq, dev_id, regs); + local_timer_interrupt(irq, dev_id); return IRQ_HANDLED; } diff --git a/arch/avr32/mach-at32ap/extint.c b/arch/avr32/mach-at32ap/extint.c index 7da9c5f7a0eb..4dff1f988900 100644 --- a/arch/avr32/mach-at32ap/extint.c +++ b/arch/avr32/mach-at32ap/extint.c @@ -102,8 +102,7 @@ struct irq_chip eim_chip = { .set_type = eim_set_irq_type, }; -static void demux_eim_irq(unsigned int irq, struct irq_desc *desc, - struct pt_regs *regs) +static void demux_eim_irq(unsigned int irq, struct irq_desc *desc) { struct at32_sm *sm = desc->handler_data; struct irq_desc *ext_desc; @@ -121,7 +120,7 @@ static void demux_eim_irq(unsigned int irq, struct irq_desc *desc, ext_irq = i + sm->eim_first_irq; ext_desc = irq_desc + ext_irq; - ext_desc->handle_irq(ext_irq, ext_desc, regs); + ext_desc->handle_irq(ext_irq, ext_desc); } spin_unlock(&sm->lock); diff --git a/arch/avr32/mach-at32ap/intc.c b/arch/avr32/mach-at32ap/intc.c index 74f8c9f2f03d..eb87a18ad7b2 100644 --- a/arch/avr32/mach-at32ap/intc.c +++ b/arch/avr32/mach-at32ap/intc.c @@ -52,16 +52,19 @@ static struct intc intc0 = { asmlinkage void do_IRQ(int level, struct pt_regs *regs) { struct irq_desc *desc; + struct pt_regs *old_regs; unsigned int irq; unsigned long status_reg; local_irq_disable(); + old_regs = set_irq_regs(regs); + irq_enter(); irq = intc_readl(&intc0, INTCAUSE0 - 4 * level); desc = irq_desc + irq; - desc->handle_irq(irq, desc, regs); + desc->handle_irq(irq, desc); /* * Clear all interrupt level masks so that we may handle @@ -75,6 +78,8 @@ asmlinkage void do_IRQ(int level, struct pt_regs *regs) sysreg_write(SR, status_reg); irq_exit(); + + set_irq_regs(old_regs); } void __init init_IRQ(void) diff --git a/arch/i386/Kconfig.cpu b/arch/i386/Kconfig.cpu index 21c9a4e71104..fc4f2abccf06 100644 --- a/arch/i386/Kconfig.cpu +++ b/arch/i386/Kconfig.cpu @@ -7,6 +7,7 @@ choice config M386 bool "386" + depends on !UML ---help--- This is the processor type of your CPU. This information is used for optimizing purposes. In order to compile a kernel that can run on @@ -301,7 +302,7 @@ config X86_USE_PPRO_CHECKSUM config X86_USE_3DNOW bool - depends on MCYRIXIII || MK7 || MGEODE_LX + depends on (MCYRIXIII || MK7 || MGEODE_LX) && !UML default y config X86_OOSTORE diff --git a/arch/i386/kernel/apic.c b/arch/i386/kernel/apic.c index 7d500da0e63b..2fd4b7d927c2 100644 --- a/arch/i386/kernel/apic.c +++ b/arch/i386/kernel/apic.c @@ -1197,7 +1197,7 @@ inline void smp_local_timer_interrupt(void) { profile_tick(CPU_PROFILING); #ifdef CONFIG_SMP - update_process_times(user_mode_vm(irq_regs)); + update_process_times(user_mode_vm(get_irq_regs())); #endif /* diff --git a/arch/i386/kernel/io_apic.c b/arch/i386/kernel/io_apic.c index b7287fb499f3..27bceaf5ce40 100644 --- a/arch/i386/kernel/io_apic.c +++ b/arch/i386/kernel/io_apic.c @@ -1184,8 +1184,8 @@ static int __assign_irq_vector(int irq) BUG_ON((unsigned)irq >= NR_IRQ_VECTORS); - if (IO_APIC_VECTOR(irq) > 0) - return IO_APIC_VECTOR(irq); + if (irq_vector[irq] > 0) + return irq_vector[irq]; current_vector += 8; if (current_vector == SYSCALL_VECTOR) @@ -1199,7 +1199,7 @@ static int __assign_irq_vector(int irq) } vector = current_vector; - IO_APIC_VECTOR(irq) = vector; + irq_vector[irq] = vector; return vector; } @@ -1967,7 +1967,7 @@ static void ack_ioapic_quirk_irq(unsigned int irq) * operation to prevent an edge-triggered interrupt escaping meanwhile. * The idea is from Manfred Spraul. --macro */ - i = IO_APIC_VECTOR(irq); + i = irq_vector[irq]; v = apic_read(APIC_TMR + ((i & ~0x1f) >> 1)); @@ -1984,7 +1984,7 @@ static void ack_ioapic_quirk_irq(unsigned int irq) static int ioapic_retrigger_irq(unsigned int irq) { - send_IPI_self(IO_APIC_VECTOR(irq)); + send_IPI_self(irq_vector[irq]); return 1; } @@ -2020,7 +2020,7 @@ static inline void init_IO_APIC_traps(void) */ for (irq = 0; irq < NR_IRQS ; irq++) { int tmp = irq; - if (IO_APIC_IRQ(tmp) && !IO_APIC_VECTOR(tmp)) { + if (IO_APIC_IRQ(tmp) && !irq_vector[tmp]) { /* * Hmm.. We don't have an entry for this, * so default to an old-fashioned 8259 @@ -2594,7 +2594,7 @@ static void set_ht_irq_affinity(unsigned int irq, cpumask_t mask) } #endif -static struct hw_interrupt_type ht_irq_chip = { +static struct irq_chip ht_irq_chip = { .name = "PCI-HT", .mask = mask_ht_irq, .unmask = unmask_ht_irq, diff --git a/arch/i386/kernel/microcode.c b/arch/i386/kernel/microcode.c index 9b9479768d5e..c4d0291b519f 100644 --- a/arch/i386/kernel/microcode.c +++ b/arch/i386/kernel/microcode.c @@ -656,14 +656,18 @@ static struct attribute_group mc_attr_group = { static int mc_sysdev_add(struct sys_device *sys_dev) { - int cpu = sys_dev->id; + int err, cpu = sys_dev->id; struct ucode_cpu_info *uci = ucode_cpu_info + cpu; if (!cpu_online(cpu)) return 0; + pr_debug("Microcode:CPU %d added\n", cpu); memset(uci, 0, sizeof(*uci)); - sysfs_create_group(&sys_dev->kobj, &mc_attr_group); + + err = sysfs_create_group(&sys_dev->kobj, &mc_attr_group); + if (err) + return err; microcode_init_cpu(cpu); return 0; diff --git a/arch/i386/kernel/setup.c b/arch/i386/kernel/setup.c index 000cf03751fe..519e63c3c130 100644 --- a/arch/i386/kernel/setup.c +++ b/arch/i386/kernel/setup.c @@ -1083,16 +1083,15 @@ static unsigned long __init setup_memory(void) void __init zone_sizes_init(void) { + unsigned long max_zone_pfns[MAX_NR_ZONES]; + memset(max_zone_pfns, 0, sizeof(max_zone_pfns)); + max_zone_pfns[ZONE_DMA] = + virt_to_phys((char *)MAX_DMA_ADDRESS) >> PAGE_SHIFT; + max_zone_pfns[ZONE_NORMAL] = max_low_pfn; #ifdef CONFIG_HIGHMEM - unsigned long max_zone_pfns[MAX_NR_ZONES] = { - virt_to_phys((char *)MAX_DMA_ADDRESS) >> PAGE_SHIFT, - max_low_pfn, - highend_pfn}; + max_zone_pfns[ZONE_HIGHMEM] = highend_pfn; add_active_range(0, 0, highend_pfn); #else - unsigned long max_zone_pfns[MAX_NR_ZONES] = { - virt_to_phys((char *)MAX_DMA_ADDRESS) >> PAGE_SHIFT, - max_low_pfn}; add_active_range(0, 0, max_low_pfn); #endif diff --git a/arch/i386/kernel/syscall_table.S b/arch/i386/kernel/syscall_table.S index 7e639f78b0b9..2697e9210e92 100644 --- a/arch/i386/kernel/syscall_table.S +++ b/arch/i386/kernel/syscall_table.S @@ -318,3 +318,4 @@ ENTRY(sys_call_table) .long sys_vmsplice .long sys_move_pages .long sys_getcpu + .long sys_epoll_pwait diff --git a/arch/i386/kernel/time.c b/arch/i386/kernel/time.c index 3f221f5eb47e..78af572fd17c 100644 --- a/arch/i386/kernel/time.c +++ b/arch/i386/kernel/time.c @@ -201,8 +201,8 @@ irqreturn_t timer_interrupt(int irq, void *dev_id) high bit of the PPI port B (0x61). Note that some PS/2s, notably the 55SX, work fine if this is removed. */ - irq = inb_p( 0x61 ); /* read the current state */ - outb_p( irq|0x80, 0x61 ); /* reset the IRQ */ + u8 irq_v = inb_p( 0x61 ); /* read the current state */ + outb_p( irq_v|0x80, 0x61 ); /* reset the IRQ */ } write_sequnlock(&xtime_lock); diff --git a/arch/i386/lib/usercopy.c b/arch/i386/lib/usercopy.c index 08502fc6d0cb..258df6b4d7d7 100644 --- a/arch/i386/lib/usercopy.c +++ b/arch/i386/lib/usercopy.c @@ -179,7 +179,7 @@ __clear_user(void __user *to, unsigned long n) EXPORT_SYMBOL(__clear_user); /** - * strlen_user: - Get the size of a string in user space. + * strnlen_user: - Get the size of a string in user space. * @s: The string to measure. * @n: The maximum valid length * diff --git a/arch/i386/mm/discontig.c b/arch/i386/mm/discontig.c index 455597db84df..ddbdb0336f28 100644 --- a/arch/i386/mm/discontig.c +++ b/arch/i386/mm/discontig.c @@ -356,11 +356,12 @@ void __init numa_kva_reserve(void) void __init zone_sizes_init(void) { int nid; - unsigned long max_zone_pfns[MAX_NR_ZONES] = { - virt_to_phys((char *)MAX_DMA_ADDRESS) >> PAGE_SHIFT, - max_low_pfn, - highend_pfn - }; + unsigned long max_zone_pfns[MAX_NR_ZONES]; + memset(max_zone_pfns, 0, sizeof(max_zone_pfns)); + max_zone_pfns[ZONE_DMA] = + virt_to_phys((char *)MAX_DMA_ADDRESS) >> PAGE_SHIFT; + max_zone_pfns[ZONE_NORMAL] = max_low_pfn; + max_zone_pfns[ZONE_HIGHMEM] = highend_pfn; /* If SRAT has not registered memory, register it now */ if (find_max_pfn_with_active_regions() == 0) { diff --git a/arch/i386/pci/irq.c b/arch/i386/pci/irq.c index 47f02af74be3..dbc4aae91959 100644 --- a/arch/i386/pci/irq.c +++ b/arch/i386/pci/irq.c @@ -1141,10 +1141,6 @@ static int pirq_enable_irq(struct pci_dev *dev) } dev = temp_dev; if (irq >= 0) { -#ifdef CONFIG_PCI_MSI - if (!platform_legacy_irq(irq)) - irq = IO_APIC_VECTOR(irq); -#endif printk(KERN_INFO "PCI->APIC IRQ transform: %s[%c] -> IRQ %d\n", pci_name(dev), 'A' + pin, irq); dev->irq = irq; diff --git a/arch/ia64/hp/sim/simeth.c b/arch/ia64/hp/sim/simeth.c index e1a1b11473e2..424e9257c9a0 100644 --- a/arch/ia64/hp/sim/simeth.c +++ b/arch/ia64/hp/sim/simeth.c @@ -54,7 +54,7 @@ static int simeth_close(struct net_device *dev); static int simeth_tx(struct sk_buff *skb, struct net_device *dev); static int simeth_rx(struct net_device *dev); static struct net_device_stats *simeth_get_stats(struct net_device *dev); -static irqreturn_t simeth_interrupt(int irq, void *dev_id, struct pt_regs * regs); +static irqreturn_t simeth_interrupt(int irq, void *dev_id); static void set_multicast_list(struct net_device *dev); static int simeth_device_event(struct notifier_block *this,unsigned long event, void *ptr); @@ -87,7 +87,7 @@ static int simeth_debug; /* set to 1 to get debug information */ */ static struct notifier_block simeth_dev_notifier = { simeth_device_event, - 0 + NULL }; @@ -497,7 +497,7 @@ simeth_rx(struct net_device *dev) * Interrupt handler (Yes, we can do it too !!!) */ static irqreturn_t -simeth_interrupt(int irq, void *dev_id, struct pt_regs * regs) +simeth_interrupt(int irq, void *dev_id) { struct net_device *dev = dev_id; diff --git a/arch/ia64/hp/sim/simscsi.c b/arch/ia64/hp/sim/simscsi.c index 8f0a16a79a67..bb87682bbb1b 100644 --- a/arch/ia64/hp/sim/simscsi.c +++ b/arch/ia64/hp/sim/simscsi.c @@ -103,7 +103,7 @@ simscsi_interrupt (unsigned long val) while ((sc = queue[rd].sc) != 0) { atomic_dec(&num_reqs); - queue[rd].sc = 0; + queue[rd].sc = NULL; if (DBG) printk("simscsi_interrupt: done with %ld\n", sc->serial_number); (*sc->scsi_done)(sc); diff --git a/arch/ia64/hp/sim/simserial.c b/arch/ia64/hp/sim/simserial.c index 246eb3d3757a..caab986af70c 100644 --- a/arch/ia64/hp/sim/simserial.c +++ b/arch/ia64/hp/sim/simserial.c @@ -92,7 +92,7 @@ static struct serial_uart_config uart_config[] = { { "ST16650V2", 32, UART_CLEAR_FIFO | UART_USE_FIFO | UART_STARTECH }, { "TI16750", 64, UART_CLEAR_FIFO | UART_USE_FIFO}, - { 0, 0} + { NULL, 0} }; struct tty_driver *hp_simserial_driver; @@ -130,7 +130,7 @@ static void rs_start(struct tty_struct *tty) #endif } -static void receive_chars(struct tty_struct *tty, struct pt_regs *regs) +static void receive_chars(struct tty_struct *tty) { unsigned char ch; static unsigned char seen_esc = 0; @@ -152,7 +152,7 @@ static void receive_chars(struct tty_struct *tty, struct pt_regs *regs) ch = ia64_ssc(0, 0, 0, 0, SSC_GETCHAR); while (!ch); - handle_sysrq(ch, regs, NULL); + handle_sysrq(ch, NULL); } #endif seen_esc = 0; @@ -170,7 +170,7 @@ static void receive_chars(struct tty_struct *tty, struct pt_regs *regs) /* * This is the serial driver's interrupt routine for a single port */ -static irqreturn_t rs_interrupt_single(int irq, void *dev_id, struct pt_regs * regs) +static irqreturn_t rs_interrupt_single(int irq, void *dev_id) { struct async_struct * info; @@ -187,7 +187,7 @@ static irqreturn_t rs_interrupt_single(int irq, void *dev_id, struct pt_regs * r * pretty simple in our case, because we only get interrupts * on inbound traffic */ - receive_chars(info->tty, regs); + receive_chars(info->tty); return IRQ_HANDLED; } @@ -555,7 +555,7 @@ static void shutdown(struct async_struct * info) if (info->xmit.buf) { free_page((unsigned long) info->xmit.buf); - info->xmit.buf = 0; + info->xmit.buf = NULL; } if (info->tty) set_bit(TTY_IO_ERROR, &info->tty->flags); @@ -628,7 +628,7 @@ static void rs_close(struct tty_struct *tty, struct file * filp) if (tty->driver->flush_buffer) tty->driver->flush_buffer(tty); if (tty->ldisc.flush_buffer) tty->ldisc.flush_buffer(tty); info->event = 0; - info->tty = 0; + info->tty = NULL; if (info->blocked_open) { if (info->close_delay) schedule_timeout_interruptible(info->close_delay); @@ -668,7 +668,7 @@ static void rs_hangup(struct tty_struct *tty) info->event = 0; state->count = 0; info->flags &= ~ASYNC_NORMAL_ACTIVE; - info->tty = 0; + info->tty = NULL; wake_up_interruptible(&info->open_wait); } @@ -714,7 +714,7 @@ startup(struct async_struct *info) { unsigned long flags; int retval=0; - irqreturn_t (*handler)(int, void *, struct pt_regs *); + irq_handler_t handler; struct serial_state *state= info->state; unsigned long page; @@ -769,7 +769,7 @@ startup(struct async_struct *info) /* * Insert serial port into IRQ chain. */ - info->prev_port = 0; + info->prev_port = NULL; info->next_port = IRQ_ports[state->irq]; if (info->next_port) info->next_port->prev_port = info; diff --git a/arch/ia64/kernel/irq.c b/arch/ia64/kernel/irq.c index 7852382de2fa..f07c0864b0b4 100644 --- a/arch/ia64/kernel/irq.c +++ b/arch/ia64/kernel/irq.c @@ -194,8 +194,11 @@ void fixup_irqs(void) */ for (irq=0; irq < NR_IRQS; irq++) { if (vectors_in_migration[irq]) { + struct pt_regs *old_regs = set_irq_regs(NULL); + vectors_in_migration[irq]=0; - __do_IRQ(irq, NULL); + __do_IRQ(irq); + set_irq_regs(old_regs); } } diff --git a/arch/ia64/kernel/irq_ia64.c b/arch/ia64/kernel/irq_ia64.c index 7fd3ef9e064d..68339dd0c9e2 100644 --- a/arch/ia64/kernel/irq_ia64.c +++ b/arch/ia64/kernel/irq_ia64.c @@ -226,6 +226,8 @@ void ia64_process_pending_intr(void) */ while (vector != IA64_SPURIOUS_INT_VECTOR) { if (!IS_RESCHEDULE(vector)) { + struct pt_regs *old_regs = set_irq_regs(NULL); + ia64_setreg(_IA64_REG_CR_TPR, vector); ia64_srlz_d(); @@ -236,7 +238,8 @@ void ia64_process_pending_intr(void) * Probably could shared code. */ vectors_in_migration[local_vector_to_irq(vector)]=0; - __do_IRQ(local_vector_to_irq(vector), NULL); + __do_IRQ(local_vector_to_irq(vector)); + set_irq_regs(old_regs); /* * Disable interrupts and send EOI @@ -253,7 +256,7 @@ void ia64_process_pending_intr(void) #ifdef CONFIG_SMP -extern irqreturn_t handle_IPI (int irq, void *dev_id, struct pt_regs *regs); +extern irqreturn_t handle_IPI (int irq, void *dev_id); static struct irqaction ipi_irqaction = { .handler = handle_IPI, diff --git a/arch/ia64/mm/contig.c b/arch/ia64/mm/contig.c index daf977ff2920..82deaa3a7c48 100644 --- a/arch/ia64/mm/contig.c +++ b/arch/ia64/mm/contig.c @@ -233,6 +233,7 @@ paging_init (void) efi_memmap_walk(count_pages, &num_physpages); max_dma = virt_to_phys((void *) MAX_DMA_ADDRESS) >> PAGE_SHIFT; + memset(max_zone_pfns, 0, sizeof(max_zone_pfns)); max_zone_pfns[ZONE_DMA] = max_dma; max_zone_pfns[ZONE_NORMAL] = max_low_pfn; diff --git a/arch/ia64/mm/discontig.c b/arch/ia64/mm/discontig.c index d497b6b0f5b2..96722cb1b49d 100644 --- a/arch/ia64/mm/discontig.c +++ b/arch/ia64/mm/discontig.c @@ -709,6 +709,7 @@ void __init paging_init(void) max_pfn = mem_data[node].max_pfn; } + memset(max_zone_pfns, 0, sizeof(max_zone_pfns)); max_zone_pfns[ZONE_DMA] = max_dma; max_zone_pfns[ZONE_NORMAL] = max_pfn; free_area_init_nodes(max_zone_pfns); diff --git a/arch/ia64/sn/kernel/huberror.c b/arch/ia64/sn/kernel/huberror.c index 96fb81e6321f..abca6bd7962f 100644 --- a/arch/ia64/sn/kernel/huberror.c +++ b/arch/ia64/sn/kernel/huberror.c @@ -22,7 +22,7 @@ void hubiio_crb_error_handler(struct hubdev_info *hubdev_info); extern void bte_crb_error_handler(cnodeid_t, int, int, ioerror_t *, int); -static irqreturn_t hub_eint_handler(int irq, void *arg, struct pt_regs *ep) +static irqreturn_t hub_eint_handler(int irq, void *arg) { struct hubdev_info *hubdev_info; struct ia64_sal_retval ret_stuff; @@ -178,7 +178,7 @@ void hubiio_crb_error_handler(struct hubdev_info *hubdev_info) */ void hub_error_init(struct hubdev_info *hubdev_info) { - if (request_irq(SGI_II_ERROR, (void *)hub_eint_handler, IRQF_SHARED, + if (request_irq(SGI_II_ERROR, hub_eint_handler, IRQF_SHARED, "SN_hub_error", (void *)hubdev_info)) printk("hub_error_init: Failed to request_irq for 0x%p\n", hubdev_info); diff --git a/arch/ia64/sn/kernel/sn2/timer_interrupt.c b/arch/ia64/sn/kernel/sn2/timer_interrupt.c index fa7f69945917..103d6ea8e94b 100644 --- a/arch/ia64/sn/kernel/sn2/timer_interrupt.c +++ b/arch/ia64/sn/kernel/sn2/timer_interrupt.c @@ -36,7 +36,7 @@ extern irqreturn_t timer_interrupt(int irq, void *dev_id, struct pt_regs *regs); #define SN_LB_INT_WAR_INTERVAL 100 -void sn_timer_interrupt(int irq, void *dev_id, struct pt_regs *regs) +void sn_timer_interrupt(int irq, void *dev_id) { /* LED blinking */ if (!pda->hb_count--) { diff --git a/arch/ia64/sn/kernel/xpc_main.c b/arch/ia64/sn/kernel/xpc_main.c index 4d026f9dd98b..fa96dfc0e1aa 100644 --- a/arch/ia64/sn/kernel/xpc_main.c +++ b/arch/ia64/sn/kernel/xpc_main.c @@ -222,7 +222,7 @@ xpc_timeout_partition_disengage_request(unsigned long data) * Notify the heartbeat check thread that an IRQ has been received. */ static irqreturn_t -xpc_act_IRQ_handler(int irq, void *dev_id, struct pt_regs *regs) +xpc_act_IRQ_handler(int irq, void *dev_id) { atomic_inc(&xpc_act_IRQ_rcvd); wake_up_interruptible(&xpc_act_IRQ_wq); @@ -607,12 +607,9 @@ xpc_activate_partition(struct xpc_partition *part) * irq - Interrupt ReQuest number. NOT USED. * * dev_id - partid of IPI's potential sender. - * - * regs - processor's context before the processor entered - * interrupt code. NOT USED. */ irqreturn_t -xpc_notify_IRQ_handler(int irq, void *dev_id, struct pt_regs *regs) +xpc_notify_IRQ_handler(int irq, void *dev_id) { partid_t partid = (partid_t) (u64) dev_id; struct xpc_partition *part = &xpc_partitions[partid]; diff --git a/arch/ia64/sn/pci/pcibr/pcibr_ate.c b/arch/ia64/sn/pci/pcibr/pcibr_ate.c index 5eb1e1e078b4..935029fc400d 100644 --- a/arch/ia64/sn/pci/pcibr/pcibr_ate.c +++ b/arch/ia64/sn/pci/pcibr/pcibr_ate.c @@ -126,7 +126,7 @@ int pcibr_ate_alloc(struct pcibus_info *pcibus_info, int count) * Setup an Address Translation Entry as specified. Use either the Bridge * internal maps or the external map RAM, as appropriate. */ -static inline u64 *pcibr_ate_addr(struct pcibus_info *pcibus_info, +static inline u64 __iomem *pcibr_ate_addr(struct pcibus_info *pcibus_info, int ate_index) { if (ate_index < pcibus_info->pbi_int_ate_size) { diff --git a/arch/ia64/sn/pci/pcibr/pcibr_provider.c b/arch/ia64/sn/pci/pcibr/pcibr_provider.c index 838c93c9a16a..27dd7df0f446 100644 --- a/arch/ia64/sn/pci/pcibr/pcibr_provider.c +++ b/arch/ia64/sn/pci/pcibr/pcibr_provider.c @@ -95,7 +95,7 @@ u16 sn_ioboard_to_pci_bus(struct pci_bus *pci_bus) * bridge sends an error interrupt. */ static irqreturn_t -pcibr_error_intr_handler(int irq, void *arg, struct pt_regs *regs) +pcibr_error_intr_handler(int irq, void *arg) { struct pcibus_info *soft = (struct pcibus_info *)arg; @@ -138,7 +138,7 @@ pcibr_bus_fixup(struct pcibus_bussoft *prom_bussoft, struct pci_controller *cont /* * register the bridge's error interrupt handler */ - if (request_irq(SGI_PCIASIC_ERROR, (void *)pcibr_error_intr_handler, + if (request_irq(SGI_PCIASIC_ERROR, pcibr_error_intr_handler, IRQF_SHARED, "PCIBR error", (void *)(soft))) { printk(KERN_WARNING "pcibr cannot allocate interrupt for error handler\n"); diff --git a/arch/ia64/sn/pci/tioce_provider.c b/arch/ia64/sn/pci/tioce_provider.c index 0e81f68aaf8e..46e16dcf5971 100644 --- a/arch/ia64/sn/pci/tioce_provider.c +++ b/arch/ia64/sn/pci/tioce_provider.c @@ -53,7 +53,7 @@ */ static void inline -tioce_mmr_war_pre(struct tioce_kernel *kern, void *mmr_addr) +tioce_mmr_war_pre(struct tioce_kernel *kern, void __iomem *mmr_addr) { u64 mmr_base; u64 mmr_offset; @@ -62,7 +62,7 @@ tioce_mmr_war_pre(struct tioce_kernel *kern, void *mmr_addr) return; mmr_base = kern->ce_common->ce_pcibus.bs_base; - mmr_offset = (u64)mmr_addr - mmr_base; + mmr_offset = (unsigned long)mmr_addr - mmr_base; if (mmr_offset < 0x45000) { u64 mmr_war_offset; @@ -79,7 +79,7 @@ tioce_mmr_war_pre(struct tioce_kernel *kern, void *mmr_addr) } static void inline -tioce_mmr_war_post(struct tioce_kernel *kern, void *mmr_addr) +tioce_mmr_war_post(struct tioce_kernel *kern, void __iomem *mmr_addr) { u64 mmr_base; u64 mmr_offset; @@ -88,7 +88,7 @@ tioce_mmr_war_post(struct tioce_kernel *kern, void *mmr_addr) return; mmr_base = kern->ce_common->ce_pcibus.bs_base; - mmr_offset = (u64)mmr_addr - mmr_base; + mmr_offset = (unsigned long)mmr_addr - mmr_base; if (mmr_offset < 0x45000) { if (mmr_offset == 0x100) @@ -223,7 +223,7 @@ tioce_dma_d64(unsigned long ct_addr, int dma_flags) * @pci_dev. */ static inline void -pcidev_to_tioce(struct pci_dev *pdev, struct tioce **base, +pcidev_to_tioce(struct pci_dev *pdev, struct tioce __iomem **base, struct tioce_kernel **kernel, int *port) { struct pcidev_info *pcidev_info; @@ -235,7 +235,7 @@ pcidev_to_tioce(struct pci_dev *pdev, struct tioce **base, ce_kernel = (struct tioce_kernel *)ce_common->ce_kernel_private; if (base) - *base = (struct tioce *)ce_common->ce_pcibus.bs_base; + *base = (struct tioce __iomem *)ce_common->ce_pcibus.bs_base; if (kernel) *kernel = ce_kernel; @@ -275,13 +275,13 @@ tioce_alloc_map(struct tioce_kernel *ce_kern, int type, int port, u64 pagesize; int msi_capable, msi_wanted; u64 *ate_shadow; - u64 *ate_reg; + u64 __iomem *ate_reg; u64 addr; - struct tioce *ce_mmr; + struct tioce __iomem *ce_mmr; u64 bus_base; struct tioce_dmamap *map; - ce_mmr = (struct tioce *)ce_kern->ce_common->ce_pcibus.bs_base; + ce_mmr = (struct tioce __iomem *)ce_kern->ce_common->ce_pcibus.bs_base; switch (type) { case TIOCE_ATE_M32: @@ -386,7 +386,7 @@ tioce_dma_d32(struct pci_dev *pdev, u64 ct_addr, int dma_flags) { int dma_ok; int port; - struct tioce *ce_mmr; + struct tioce __iomem *ce_mmr; struct tioce_kernel *ce_kern; u64 ct_upper; u64 ct_lower; @@ -461,7 +461,7 @@ tioce_dma_unmap(struct pci_dev *pdev, dma_addr_t bus_addr, int dir) int i; int port; struct tioce_kernel *ce_kern; - struct tioce *ce_mmr; + struct tioce __iomem *ce_mmr; unsigned long flags; bus_addr = tioce_dma_barrier(bus_addr, 0); @@ -700,9 +700,9 @@ static void tioce_reserve_m32(struct tioce_kernel *ce_kern, u64 base, u64 limit) { int ate_index, last_ate, ps; - struct tioce *ce_mmr; + struct tioce __iomem *ce_mmr; - ce_mmr = (struct tioce *)ce_kern->ce_common->ce_pcibus.bs_base; + ce_mmr = (struct tioce __iomem *)ce_kern->ce_common->ce_pcibus.bs_base; ps = ce_kern->ce_ate3240_pagesize; ate_index = ATE_PAGE(base, ps); last_ate = ate_index + ATE_NPAGES(base, limit-base+1, ps) - 1; @@ -736,7 +736,7 @@ tioce_kern_init(struct tioce_common *tioce_common) int dev; u32 tmp; unsigned int seg, bus; - struct tioce *tioce_mmr; + struct tioce __iomem *tioce_mmr; struct tioce_kernel *tioce_kern; tioce_kern = kzalloc(sizeof(struct tioce_kernel), GFP_KERNEL); @@ -767,7 +767,7 @@ tioce_kern_init(struct tioce_common *tioce_common) * the ate's. */ - tioce_mmr = (struct tioce *)tioce_common->ce_pcibus.bs_base; + tioce_mmr = (struct tioce __iomem *)tioce_common->ce_pcibus.bs_base; tioce_mmr_clri(tioce_kern, &tioce_mmr->ce_ure_page_map, CE_URE_PAGESIZE_MASK); tioce_mmr_seti(tioce_kern, &tioce_mmr->ce_ure_page_map, @@ -858,7 +858,7 @@ tioce_force_interrupt(struct sn_irq_info *sn_irq_info) struct pcidev_info *pcidev_info; struct tioce_common *ce_common; struct tioce_kernel *ce_kern; - struct tioce *ce_mmr; + struct tioce __iomem *ce_mmr; u64 force_int_val; if (!sn_irq_info->irq_bridge) @@ -872,7 +872,7 @@ tioce_force_interrupt(struct sn_irq_info *sn_irq_info) return; ce_common = (struct tioce_common *)pcidev_info->pdi_pcibus_info; - ce_mmr = (struct tioce *)ce_common->ce_pcibus.bs_base; + ce_mmr = (struct tioce __iomem *)ce_common->ce_pcibus.bs_base; ce_kern = (struct tioce_kernel *)ce_common->ce_kernel_private; /* @@ -953,7 +953,7 @@ tioce_target_interrupt(struct sn_irq_info *sn_irq_info) struct pcidev_info *pcidev_info; struct tioce_common *ce_common; struct tioce_kernel *ce_kern; - struct tioce *ce_mmr; + struct tioce __iomem *ce_mmr; int bit; u64 vector; @@ -962,7 +962,7 @@ tioce_target_interrupt(struct sn_irq_info *sn_irq_info) return; ce_common = (struct tioce_common *)pcidev_info->pdi_pcibus_info; - ce_mmr = (struct tioce *)ce_common->ce_pcibus.bs_base; + ce_mmr = (struct tioce __iomem *)ce_common->ce_pcibus.bs_base; ce_kern = (struct tioce_kernel *)ce_common->ce_kernel_private; bit = sn_irq_info->irq_int_bit; @@ -994,7 +994,7 @@ tioce_bus_fixup(struct pcibus_bussoft *prom_bussoft, struct pci_controller *cont cnodeid_t my_cnode, mem_cnode; struct tioce_common *tioce_common; struct tioce_kernel *tioce_kern; - struct tioce *tioce_mmr; + struct tioce __iomem *tioce_mmr; /* * Allocate kernel bus soft and copy from prom. @@ -1018,7 +1018,7 @@ tioce_bus_fixup(struct pcibus_bussoft *prom_bussoft, struct pci_controller *cont * interrupt handler. */ - tioce_mmr = (struct tioce *)tioce_common->ce_pcibus.bs_base; + tioce_mmr = (struct tioce __iomem *)tioce_common->ce_pcibus.bs_base; tioce_mmr_seti(tioce_kern, &tioce_mmr->ce_adm_int_status_alias, ~0ULL); tioce_mmr_seti(tioce_kern, &tioce_mmr->ce_adm_error_summary_alias, ~0ULL); diff --git a/arch/m32r/kernel/irq.c b/arch/m32r/kernel/irq.c index 3841861df6a2..f8d8650383e0 100644 --- a/arch/m32r/kernel/irq.c +++ b/arch/m32r/kernel/irq.c @@ -77,13 +77,16 @@ skip: */ asmlinkage unsigned int do_IRQ(int irq, struct pt_regs *regs) { + struct pt_regs *old_regs; + old_regs = set_irq_regs(regs); irq_enter(); #ifdef CONFIG_DEBUG_STACKOVERFLOW /* FIXME M32R */ #endif - __do_IRQ(irq, regs); + __do_IRQ(irq); irq_exit(); + set_irq_regs(old_regs); return 1; } diff --git a/arch/m32r/kernel/setup.c b/arch/m32r/kernel/setup.c index 3f35ab3d2dc2..0e7778be33cc 100644 --- a/arch/m32r/kernel/setup.c +++ b/arch/m32r/kernel/setup.c @@ -369,10 +369,10 @@ static void c_stop(struct seq_file *m, void *v) } struct seq_operations cpuinfo_op = { - start: c_start, - next: c_next, - stop: c_stop, - show: show_cpuinfo, + .start = c_start, + .next = c_next, + .stop = c_stop, + .show = show_cpuinfo, }; #endif /* CONFIG_PROC_FS */ diff --git a/arch/m32r/kernel/setup_mappi.c b/arch/m32r/kernel/setup_mappi.c index 67dbbdc9d111..6b2d77da0683 100644 --- a/arch/m32r/kernel/setup_mappi.c +++ b/arch/m32r/kernel/setup_mappi.c @@ -86,7 +86,7 @@ void __init init_IRQ(void) /* INT0 : LAN controller (RTL8019AS) */ irq_desc[M32R_IRQ_INT0].status = IRQ_DISABLED; irq_desc[M32R_IRQ_INT0].chip = &mappi_irq_type; - irq_desc[M32R_IRQ_INT0].action = 0; + irq_desc[M32R_IRQ_INT0].action = NULL; irq_desc[M32R_IRQ_INT0].depth = 1; icu_data[M32R_IRQ_INT0].icucr = M32R_ICUCR_IEN|M32R_ICUCR_ISMOD10; disable_mappi_irq(M32R_IRQ_INT0); @@ -95,7 +95,7 @@ void __init init_IRQ(void) /* MFT2 : system timer */ irq_desc[M32R_IRQ_MFT2].status = IRQ_DISABLED; irq_desc[M32R_IRQ_MFT2].chip = &mappi_irq_type; - irq_desc[M32R_IRQ_MFT2].action = 0; + irq_desc[M32R_IRQ_MFT2].action = NULL; irq_desc[M32R_IRQ_MFT2].depth = 1; icu_data[M32R_IRQ_MFT2].icucr = M32R_ICUCR_IEN; disable_mappi_irq(M32R_IRQ_MFT2); @@ -104,7 +104,7 @@ void __init init_IRQ(void) /* SIO0_R : uart receive data */ irq_desc[M32R_IRQ_SIO0_R].status = IRQ_DISABLED; irq_desc[M32R_IRQ_SIO0_R].chip = &mappi_irq_type; - irq_desc[M32R_IRQ_SIO0_R].action = 0; + irq_desc[M32R_IRQ_SIO0_R].action = NULL; irq_desc[M32R_IRQ_SIO0_R].depth = 1; icu_data[M32R_IRQ_SIO0_R].icucr = 0; disable_mappi_irq(M32R_IRQ_SIO0_R); @@ -112,7 +112,7 @@ void __init init_IRQ(void) /* SIO0_S : uart send data */ irq_desc[M32R_IRQ_SIO0_S].status = IRQ_DISABLED; irq_desc[M32R_IRQ_SIO0_S].chip = &mappi_irq_type; - irq_desc[M32R_IRQ_SIO0_S].action = 0; + irq_desc[M32R_IRQ_SIO0_S].action = NULL; irq_desc[M32R_IRQ_SIO0_S].depth = 1; icu_data[M32R_IRQ_SIO0_S].icucr = 0; disable_mappi_irq(M32R_IRQ_SIO0_S); @@ -120,7 +120,7 @@ void __init init_IRQ(void) /* SIO1_R : uart receive data */ irq_desc[M32R_IRQ_SIO1_R].status = IRQ_DISABLED; irq_desc[M32R_IRQ_SIO1_R].chip = &mappi_irq_type; - irq_desc[M32R_IRQ_SIO1_R].action = 0; + irq_desc[M32R_IRQ_SIO1_R].action = NULL; irq_desc[M32R_IRQ_SIO1_R].depth = 1; icu_data[M32R_IRQ_SIO1_R].icucr = 0; disable_mappi_irq(M32R_IRQ_SIO1_R); @@ -128,7 +128,7 @@ void __init init_IRQ(void) /* SIO1_S : uart send data */ irq_desc[M32R_IRQ_SIO1_S].status = IRQ_DISABLED; irq_desc[M32R_IRQ_SIO1_S].chip = &mappi_irq_type; - irq_desc[M32R_IRQ_SIO1_S].action = 0; + irq_desc[M32R_IRQ_SIO1_S].action = NULL; irq_desc[M32R_IRQ_SIO1_S].depth = 1; icu_data[M32R_IRQ_SIO1_S].icucr = 0; disable_mappi_irq(M32R_IRQ_SIO1_S); @@ -138,7 +138,7 @@ void __init init_IRQ(void) /* INT1 : pccard0 interrupt */ irq_desc[M32R_IRQ_INT1].status = IRQ_DISABLED; irq_desc[M32R_IRQ_INT1].chip = &mappi_irq_type; - irq_desc[M32R_IRQ_INT1].action = 0; + irq_desc[M32R_IRQ_INT1].action = NULL; irq_desc[M32R_IRQ_INT1].depth = 1; icu_data[M32R_IRQ_INT1].icucr = M32R_ICUCR_IEN | M32R_ICUCR_ISMOD00; disable_mappi_irq(M32R_IRQ_INT1); @@ -146,7 +146,7 @@ void __init init_IRQ(void) /* INT2 : pccard1 interrupt */ irq_desc[M32R_IRQ_INT2].status = IRQ_DISABLED; irq_desc[M32R_IRQ_INT2].chip = &mappi_irq_type; - irq_desc[M32R_IRQ_INT2].action = 0; + irq_desc[M32R_IRQ_INT2].action = NULL; irq_desc[M32R_IRQ_INT2].depth = 1; icu_data[M32R_IRQ_INT2].icucr = M32R_ICUCR_IEN | M32R_ICUCR_ISMOD00; disable_mappi_irq(M32R_IRQ_INT2); diff --git a/arch/m32r/kernel/signal.c b/arch/m32r/kernel/signal.c index a9174efe80cb..b60cea4aebaa 100644 --- a/arch/m32r/kernel/signal.c +++ b/arch/m32r/kernel/signal.c @@ -33,7 +33,7 @@ int do_signal(struct pt_regs *, sigset_t *); asmlinkage int -sys_rt_sigsuspend(sigset_t *unewset, size_t sigsetsize, +sys_rt_sigsuspend(sigset_t __user *unewset, size_t sigsetsize, unsigned long r2, unsigned long r3, unsigned long r4, unsigned long r5, unsigned long r6, struct pt_regs *regs) { @@ -78,8 +78,8 @@ sys_sigaltstack(const stack_t __user *uss, stack_t __user *uoss, struct rt_sigframe { int sig; - struct siginfo *pinfo; - void *puc; + struct siginfo __user *pinfo; + void __user *puc; struct siginfo info; struct ucontext uc; // struct _fpstate fpstate; diff --git a/arch/m32r/kernel/smp.c b/arch/m32r/kernel/smp.c index 8b1f6eb76870..360129174b2b 100644 --- a/arch/m32r/kernel/smp.c +++ b/arch/m32r/kernel/smp.c @@ -101,7 +101,7 @@ void smp_call_function_interrupt(void); void smp_send_timer(void); void smp_ipi_timer_interrupt(struct pt_regs *); -void smp_local_timer_interrupt(struct pt_regs *); +void smp_local_timer_interrupt(void); void send_IPI_allbutself(int, int); static void send_IPI_mask(cpumask_t, int, int); @@ -231,7 +231,7 @@ void smp_flush_tlb_all(void) local_irq_save(flags); __flush_tlb_all(); local_irq_restore(flags); - smp_call_function(flush_tlb_all_ipi, 0, 1, 1); + smp_call_function(flush_tlb_all_ipi, NULL, 1, 1); preempt_enable(); } @@ -734,9 +734,12 @@ void smp_send_timer(void) *==========================================================================*/ void smp_ipi_timer_interrupt(struct pt_regs *regs) { + struct pt_regs *old_regs; + old_regs = set_irq_regs(regs); irq_enter(); - smp_local_timer_interrupt(regs); + smp_local_timer_interrupt(); irq_exit(); + set_irq_regs(old_regs); } /*==========================================================================* @@ -762,9 +765,9 @@ void smp_ipi_timer_interrupt(struct pt_regs *regs) * ---------- --- -------------------------------------------------------- * 2003-06-24 hy use per_cpu structure. *==========================================================================*/ -void smp_local_timer_interrupt(struct pt_regs *regs) +void smp_local_timer_interrupt(void) { - int user = user_mode(regs); + int user = user_mode(get_irq_regs()); int cpu_id = smp_processor_id(); /* @@ -774,7 +777,7 @@ void smp_local_timer_interrupt(struct pt_regs *regs) * useful with a profiling multiplier != 1 */ - profile_tick(CPU_PROFILING, regs); + profile_tick(CPU_PROFILING); if (--per_cpu(prof_counter, cpu_id) <= 0) { /* diff --git a/arch/m32r/kernel/sys_m32r.c b/arch/m32r/kernel/sys_m32r.c index b567351f3c52..b4e7bcb43540 100644 --- a/arch/m32r/kernel/sys_m32r.c +++ b/arch/m32r/kernel/sys_m32r.c @@ -31,7 +31,7 @@ /* * sys_tas() - test-and-set */ -asmlinkage int sys_tas(int *addr) +asmlinkage int sys_tas(int __user *addr) { int oldval; @@ -90,7 +90,7 @@ sys_pipe(unsigned long r0, unsigned long r1, unsigned long r2, error = do_pipe(fd); if (!error) { - if (copy_to_user((void *)r0, (void *)fd, 2*sizeof(int))) + if (copy_to_user((void __user *)r0, fd, 2*sizeof(int))) error = -EFAULT; } return error; @@ -201,7 +201,7 @@ asmlinkage int sys_ipc(uint call, int first, int second, } } -asmlinkage int sys_uname(struct old_utsname * name) +asmlinkage int sys_uname(struct old_utsname __user * name) { int err; if (!name) diff --git a/arch/m32r/kernel/time.c b/arch/m32r/kernel/time.c index d8af155db984..a09038282c78 100644 --- a/arch/m32r/kernel/time.c +++ b/arch/m32r/kernel/time.c @@ -35,7 +35,7 @@ #ifdef CONFIG_SMP extern void send_IPI_allbutself(int, int); -extern void smp_local_timer_interrupt(struct pt_regs *); +extern void smp_local_timer_interrupt(void); #endif #define TICK_SIZE (tick_nsec / 1000) @@ -188,15 +188,15 @@ static long last_rtc_update = 0; * timer_interrupt() needs to keep up the real-time clock, * as well as call the "do_timer()" routine every clocktick */ -irqreturn_t timer_interrupt(int irq, void *dev_id, struct pt_regs *regs) +irqreturn_t timer_interrupt(int irq, void *dev_id) { #ifndef CONFIG_SMP - profile_tick(CPU_PROFILING, regs); + profile_tick(CPU_PROFILING); #endif do_timer(1); #ifndef CONFIG_SMP - update_process_times(user_mode(regs)); + update_process_times(user_mode(get_irq_regs())); #endif /* * If we have an externally synchronized Linux clock, then update @@ -221,7 +221,7 @@ irqreturn_t timer_interrupt(int irq, void *dev_id, struct pt_regs *regs) a hack, so don't look closely for now.. */ #ifdef CONFIG_SMP - smp_local_timer_interrupt(regs); + smp_local_timer_interrupt(); smp_send_timer(); #endif diff --git a/arch/m32r/kernel/traps.c b/arch/m32r/kernel/traps.c index c1daf2c40c7c..97e0b1c0830e 100644 --- a/arch/m32r/kernel/traps.c +++ b/arch/m32r/kernel/traps.c @@ -268,7 +268,7 @@ static __inline__ void do_trap(int trapnr, int signr, const char * str, #define DO_ERROR(trapnr, signr, str, name) \ asmlinkage void do_##name(struct pt_regs * regs, long error_code) \ { \ - do_trap(trapnr, signr, 0, regs, error_code, NULL); \ + do_trap(trapnr, signr, NULL, regs, error_code, NULL); \ } #define DO_ERROR_INFO(trapnr, signr, str, name, sicode, siaddr) \ diff --git a/arch/m68k/Kconfig b/arch/m68k/Kconfig index 805b81fedf80..7bc14461a6ac 100644 --- a/arch/m68k/Kconfig +++ b/arch/m68k/Kconfig @@ -600,7 +600,7 @@ config MVME147_SCC config SERIAL167 bool "CD2401 support for MVME166/7 serial ports" - depends on MVME16x && BROKEN + depends on MVME16x help This is the driver for the serial ports on the Motorola MVME166, 167, and 172 boards. Everyone using one of these boards should say diff --git a/arch/m68k/amiga/amiints.c b/arch/m68k/amiga/amiints.c index 96c79d840cff..28d95cfe8ac0 100644 --- a/arch/m68k/amiga/amiints.c +++ b/arch/m68k/amiga/amiints.c @@ -47,10 +47,10 @@ static void amiga_enable_irq(unsigned int irq); static void amiga_disable_irq(unsigned int irq); -static irqreturn_t ami_int1(int irq, void *dev_id, struct pt_regs *fp); -static irqreturn_t ami_int3(int irq, void *dev_id, struct pt_regs *fp); -static irqreturn_t ami_int4(int irq, void *dev_id, struct pt_regs *fp); -static irqreturn_t ami_int5(int irq, void *dev_id, struct pt_regs *fp); +static irqreturn_t ami_int1(int irq, void *dev_id); +static irqreturn_t ami_int3(int irq, void *dev_id); +static irqreturn_t ami_int4(int irq, void *dev_id); +static irqreturn_t ami_int5(int irq, void *dev_id); static struct irq_controller amiga_irq_controller = { .name = "amiga", @@ -113,98 +113,98 @@ static void amiga_disable_irq(unsigned int irq) * The builtin Amiga hardware interrupt handlers. */ -static irqreturn_t ami_int1(int irq, void *dev_id, struct pt_regs *fp) +static irqreturn_t ami_int1(int irq, void *dev_id) { unsigned short ints = amiga_custom.intreqr & amiga_custom.intenar; /* if serial transmit buffer empty, interrupt */ if (ints & IF_TBE) { amiga_custom.intreq = IF_TBE; - m68k_handle_int(IRQ_AMIGA_TBE, fp); + m68k_handle_int(IRQ_AMIGA_TBE); } /* if floppy disk transfer complete, interrupt */ if (ints & IF_DSKBLK) { amiga_custom.intreq = IF_DSKBLK; - m68k_handle_int(IRQ_AMIGA_DSKBLK, fp); + m68k_handle_int(IRQ_AMIGA_DSKBLK); } /* if software interrupt set, interrupt */ if (ints & IF_SOFT) { amiga_custom.intreq = IF_SOFT; - m68k_handle_int(IRQ_AMIGA_SOFT, fp); + m68k_handle_int(IRQ_AMIGA_SOFT); } return IRQ_HANDLED; } -static irqreturn_t ami_int3(int irq, void *dev_id, struct pt_regs *fp) +static irqreturn_t ami_int3(int irq, void *dev_id) { unsigned short ints = amiga_custom.intreqr & amiga_custom.intenar; /* if a blitter interrupt */ if (ints & IF_BLIT) { amiga_custom.intreq = IF_BLIT; - m68k_handle_int(IRQ_AMIGA_BLIT, fp); + m68k_handle_int(IRQ_AMIGA_BLIT); } /* if a copper interrupt */ if (ints & IF_COPER) { amiga_custom.intreq = IF_COPER; - m68k_handle_int(IRQ_AMIGA_COPPER, fp); + m68k_handle_int(IRQ_AMIGA_COPPER); } /* if a vertical blank interrupt */ if (ints & IF_VERTB) { amiga_custom.intreq = IF_VERTB; - m68k_handle_int(IRQ_AMIGA_VERTB, fp); + m68k_handle_int(IRQ_AMIGA_VERTB); } return IRQ_HANDLED; } -static irqreturn_t ami_int4(int irq, void *dev_id, struct pt_regs *fp) +static irqreturn_t ami_int4(int irq, void *dev_id) { unsigned short ints = amiga_custom.intreqr & amiga_custom.intenar; /* if audio 0 interrupt */ if (ints & IF_AUD0) { amiga_custom.intreq = IF_AUD0; - m68k_handle_int(IRQ_AMIGA_AUD0, fp); + m68k_handle_int(IRQ_AMIGA_AUD0); } /* if audio 1 interrupt */ if (ints & IF_AUD1) { amiga_custom.intreq = IF_AUD1; - m68k_handle_int(IRQ_AMIGA_AUD1, fp); + m68k_handle_int(IRQ_AMIGA_AUD1); } /* if audio 2 interrupt */ if (ints & IF_AUD2) { amiga_custom.intreq = IF_AUD2; - m68k_handle_int(IRQ_AMIGA_AUD2, fp); + m68k_handle_int(IRQ_AMIGA_AUD2); } /* if audio 3 interrupt */ if (ints & IF_AUD3) { amiga_custom.intreq = IF_AUD3; - m68k_handle_int(IRQ_AMIGA_AUD3, fp); + m68k_handle_int(IRQ_AMIGA_AUD3); } return IRQ_HANDLED; } -static irqreturn_t ami_int5(int irq, void *dev_id, struct pt_regs *fp) +static irqreturn_t ami_int5(int irq, void *dev_id) { unsigned short ints = amiga_custom.intreqr & amiga_custom.intenar; /* if serial receive buffer full interrupt */ if (ints & IF_RBF) { /* acknowledge of IF_RBF must be done by the serial interrupt */ - m68k_handle_int(IRQ_AMIGA_RBF, fp); + m68k_handle_int(IRQ_AMIGA_RBF); } /* if a disk sync interrupt */ if (ints & IF_DSKSYN) { amiga_custom.intreq = IF_DSKSYN; - m68k_handle_int(IRQ_AMIGA_DSKSYN, fp); + m68k_handle_int(IRQ_AMIGA_DSKSYN); } return IRQ_HANDLED; } diff --git a/arch/m68k/amiga/cia.c b/arch/m68k/amiga/cia.c index dbad30054721..7a20058eb380 100644 --- a/arch/m68k/amiga/cia.c +++ b/arch/m68k/amiga/cia.c @@ -82,7 +82,7 @@ unsigned char cia_able_irq(struct ciabase *base, unsigned char mask) return old; } -static irqreturn_t cia_handler(int irq, void *dev_id, struct pt_regs *fp) +static irqreturn_t cia_handler(int irq, void *dev_id) { struct ciabase *base = (struct ciabase *)dev_id; int mach_irq; @@ -93,7 +93,7 @@ static irqreturn_t cia_handler(int irq, void *dev_id, struct pt_regs *fp) amiga_custom.intreq = base->int_mask; for (; ints; mach_irq++, ints >>= 1) { if (ints & 1) - m68k_handle_int(mach_irq, fp); + m68k_handle_int(mach_irq); } return IRQ_HANDLED; } diff --git a/arch/m68k/amiga/config.c b/arch/m68k/amiga/config.c index 092e50d2cb13..3204f412cad8 100644 --- a/arch/m68k/amiga/config.c +++ b/arch/m68k/amiga/config.c @@ -83,7 +83,7 @@ static char amiga_model_name[13] = "Amiga "; extern char m68k_debug_device[]; -static void amiga_sched_init(irqreturn_t (*handler)(int, void *, struct pt_regs *)); +static void amiga_sched_init(irq_handler_t handler); /* amiga specific irq functions */ extern void amiga_init_IRQ (void); static void amiga_get_model(char *model); @@ -487,8 +487,7 @@ void __init config_amiga(void) static unsigned short jiffy_ticks; -static void __init amiga_sched_init(irqreturn_t (*timer_routine)(int, void *, - struct pt_regs *)) +static void __init amiga_sched_init(irq_handler_t timer_routine) { static struct resource sched_res = { .name = "timer", .start = 0x00bfd400, .end = 0x00bfd5ff, diff --git a/arch/m68k/apollo/config.c b/arch/m68k/apollo/config.c index 6f4581575fb4..cb8e7609df4c 100644 --- a/arch/m68k/apollo/config.c +++ b/arch/m68k/apollo/config.c @@ -25,7 +25,7 @@ u_long cpuctrl_physaddr; u_long timer_physaddr; u_long apollo_model; -extern void dn_sched_init(irqreturn_t (*handler)(int,void *,struct pt_regs *)); +extern void dn_sched_init(irq_handler_t handler); extern void dn_init_IRQ(void); extern unsigned long dn_gettimeoffset(void); extern int dn_dummy_hwclk(int, struct rtc_time *); @@ -38,7 +38,7 @@ extern irqreturn_t dn_process_int(int irq, struct pt_regs *fp); #ifdef CONFIG_HEARTBEAT static void dn_heartbeat(int on); #endif -static irqreturn_t dn_timer_int(int irq,void *, struct pt_regs *); +static irqreturn_t dn_timer_int(int irq,void *); static void dn_get_model(char *model); static const char *apollo_models[] = { [APOLLO_DN3000-APOLLO_DN3000] = "DN3000 (Otter)", @@ -174,13 +174,13 @@ void config_apollo(void) { } -irqreturn_t dn_timer_int(int irq, void *dev_id, struct pt_regs *fp) +irqreturn_t dn_timer_int(int irq, void *dev_id) { - irqreturn_t (*timer_handler)(int, void *, struct pt_regs *) = dev_id; + irq_handler_t timer_handler = dev_id; volatile unsigned char x; - timer_handler(irq, dev_id, fp); + timer_handler(irq, dev_id); x=*(volatile unsigned char *)(timer+3); x=*(volatile unsigned char *)(timer+5); @@ -188,8 +188,8 @@ irqreturn_t dn_timer_int(int irq, void *dev_id, struct pt_regs *fp) return IRQ_HANDLED; } -void dn_sched_init(irqreturn_t (*timer_routine)(int, void *, struct pt_regs *)) { - +void dn_sched_init(irq_handler_t timer_routine) +{ /* program timer 1 */ *(volatile unsigned char *)(timer+3)=0x01; *(volatile unsigned char *)(timer+1)=0x40; diff --git a/arch/m68k/apollo/dma.c b/arch/m68k/apollo/dma.c deleted file mode 100644 index aed8be177ef1..000000000000 --- a/arch/m68k/apollo/dma.c +++ /dev/null @@ -1,50 +0,0 @@ -#include <linux/types.h> -#include <linux/kernel.h> -#include <linux/mm.h> -#include <linux/kd.h> -#include <linux/tty.h> -#include <linux/console.h> - -#include <asm/setup.h> -#include <asm/bootinfo.h> -#include <asm/system.h> -#include <asm/pgtable.h> -#include <asm/apollodma.h> -#include <asm/io.h> - -/* note only works for 16 Bit 1 page DMA's */ - -static unsigned short next_free_xlat_entry=0; - -unsigned short dma_map_page(unsigned long phys_addr,int count,int type) { - - unsigned long page_aligned_addr=phys_addr & (~((1<<12)-1)); - unsigned short start_map_addr=page_aligned_addr >> 10; - unsigned short free_xlat_entry, *xlat_map_entry; - int i; - - free_xlat_entry=next_free_xlat_entry; - for(i=0,xlat_map_entry=addr_xlat_map+(free_xlat_entry<<2);i<8;i++,xlat_map_entry++) { -#if 0 - printk("phys_addr: %x, page_aligned_addr: %x, start_map_addr: %x\n",phys_addr,page_aligned_addr,start_map_addr+i); -#endif - out_be16(xlat_map_entry, start_map_addr+i); - } - - next_free_xlat_entry+=2; - if(next_free_xlat_entry>125) - next_free_xlat_entry=0; - -#if 0 - printk("next_free_xlat_entry: %d\n",next_free_xlat_entry); -#endif - - return free_xlat_entry<<10; -} - -void dma_unmap_page(unsigned short dma_addr) { - - return ; - -} - diff --git a/arch/m68k/apollo/dn_ints.c b/arch/m68k/apollo/dn_ints.c index 9fe07803797b..4274af125998 100644 --- a/arch/m68k/apollo/dn_ints.c +++ b/arch/m68k/apollo/dn_ints.c @@ -6,7 +6,7 @@ void dn_process_int(unsigned int irq, struct pt_regs *fp) { - m68k_handle_int(irq, fp); + __m68k_handle_int(irq, fp); *(volatile unsigned char *)(pica)=0x20; *(volatile unsigned char *)(picb)=0x20; diff --git a/arch/m68k/atari/ataints.c b/arch/m68k/atari/ataints.c index ece13cbf9950..7f812641790c 100644 --- a/arch/m68k/atari/ataints.c +++ b/arch/m68k/atari/ataints.c @@ -332,6 +332,9 @@ static void atari_shutdown_irq(unsigned int irq) atari_disable_irq(irq); atari_turnoff_irq(irq); m68k_irq_shutdown(irq); + + if (irq == IRQ_AUTO_4) + vectors[VEC_INT4] = falcon_hblhandler; } static struct irq_controller atari_irq_controller = { @@ -356,7 +359,7 @@ static struct irq_controller atari_irq_controller = { void __init atari_init_IRQ(void) { - m68k_setup_user_interrupt(VEC_USER, 192, NULL); + m68k_setup_user_interrupt(VEC_USER, NUM_ATARI_SOURCES - IRQ_USER, NULL); m68k_setup_irq_controller(&atari_irq_controller, 1, NUM_ATARI_SOURCES - 1); /* Initialize the MFP(s) */ @@ -403,8 +406,10 @@ void __init atari_init_IRQ(void) * gets overruns) */ - if (!MACH_IS_HADES) + if (!MACH_IS_HADES) { vectors[VEC_INT2] = falcon_hblhandler; + vectors[VEC_INT4] = falcon_hblhandler; + } } if (ATARIHW_PRESENT(PCM_8BIT) && ATARIHW_PRESENT(MICROWIRE)) { diff --git a/arch/m68k/atari/config.c b/arch/m68k/atari/config.c index b2079252a954..ca5cd4344e3d 100644 --- a/arch/m68k/atari/config.c +++ b/arch/m68k/atari/config.c @@ -62,7 +62,7 @@ static void atari_heartbeat( int on ); #endif /* atari specific timer functions (in time.c) */ -extern void atari_sched_init(irqreturn_t (*)(int, void *, struct pt_regs *)); +extern void atari_sched_init(irq_handler_t ); extern unsigned long atari_gettimeoffset (void); extern int atari_mste_hwclk (int, struct rtc_time *); extern int atari_tt_hwclk (int, struct rtc_time *); diff --git a/arch/m68k/atari/stdma.c b/arch/m68k/atari/stdma.c index 288f5e6a124e..d64b5804e980 100644 --- a/arch/m68k/atari/stdma.c +++ b/arch/m68k/atari/stdma.c @@ -44,7 +44,7 @@ static int stdma_locked; /* the semaphore */ /* int func to be called */ -static irqreturn_t (*stdma_isr)(int, void *, struct pt_regs *); +static irq_handler_t stdma_isr; static void *stdma_isr_data; /* data passed to isr */ static DECLARE_WAIT_QUEUE_HEAD(stdma_wait); /* wait queue for ST-DMA */ @@ -53,7 +53,7 @@ static DECLARE_WAIT_QUEUE_HEAD(stdma_wait); /* wait queue for ST-DMA */ /***************************** Prototypes *****************************/ -static irqreturn_t stdma_int (int irq, void *dummy, struct pt_regs *fp); +static irqreturn_t stdma_int (int irq, void *dummy); /************************* End of Prototypes **************************/ @@ -75,8 +75,7 @@ static irqreturn_t stdma_int (int irq, void *dummy, struct pt_regs *fp); * */ -void stdma_lock(irqreturn_t (*handler)(int, void *, struct pt_regs *), - void *data) +void stdma_lock(irq_handler_t handler, void *data) { unsigned long flags; @@ -188,9 +187,9 @@ void __init stdma_init(void) * */ -static irqreturn_t stdma_int(int irq, void *dummy, struct pt_regs *fp) +static irqreturn_t stdma_int(int irq, void *dummy) { if (stdma_isr) - (*stdma_isr)(irq, stdma_isr_data, fp); + (*stdma_isr)(irq, stdma_isr_data); return IRQ_HANDLED; } diff --git a/arch/m68k/atari/time.c b/arch/m68k/atari/time.c index e79bbc94216d..e0d3c8bfb408 100644 --- a/arch/m68k/atari/time.c +++ b/arch/m68k/atari/time.c @@ -16,11 +16,12 @@ #include <linux/init.h> #include <linux/rtc.h> #include <linux/bcd.h> +#include <linux/delay.h> #include <asm/atariints.h> void __init -atari_sched_init(irqreturn_t (*timer_routine)(int, void *, struct pt_regs *)) +atari_sched_init(irq_handler_t timer_routine) { /* set Timer C data Register */ mfp.tim_dt_c = INT_TICKS; @@ -212,8 +213,12 @@ int atari_tt_hwclk( int op, struct rtc_time *t ) * additionally the RTC_SET bit is set to prevent an update cycle. */ - while( RTC_READ(RTC_FREQ_SELECT) & RTC_UIP ) - schedule_timeout_interruptible(HWCLK_POLL_INTERVAL); + while( RTC_READ(RTC_FREQ_SELECT) & RTC_UIP ) { + if (in_atomic() || irqs_disabled()) + mdelay(1); + else + schedule_timeout_interruptible(HWCLK_POLL_INTERVAL); + } local_irq_save(flags); RTC_WRITE( RTC_CONTROL, ctrl | RTC_SET ); diff --git a/arch/m68k/bvme6000/config.c b/arch/m68k/bvme6000/config.c index d1e916ae55a8..896ae3d3d919 100644 --- a/arch/m68k/bvme6000/config.c +++ b/arch/m68k/bvme6000/config.c @@ -38,7 +38,7 @@ static void bvme6000_get_model(char *model); static int bvme6000_get_hardware_list(char *buffer); -extern void bvme6000_sched_init(irqreturn_t (*handler)(int, void *, struct pt_regs *)); +extern void bvme6000_sched_init(irq_handler_t handler); extern unsigned long bvme6000_gettimeoffset (void); extern int bvme6000_hwclk (int, struct rtc_time *); extern int bvme6000_set_clock_mmss (unsigned long); @@ -52,7 +52,7 @@ static unsigned char bin2bcd (unsigned char b); /* Save tick handler routine pointer, will point to do_timer() in * kernel/sched.c, called via bvme6000_process_int() */ -static irqreturn_t (*tick_handler)(int, void *, struct pt_regs *); +static irq_handler_t tick_handler; int bvme6000_parse_bootinfo(const struct bi_record *bi) @@ -154,7 +154,7 @@ void __init config_bvme6000(void) } -irqreturn_t bvme6000_abort_int (int irq, void *dev_id, struct pt_regs *fp) +irqreturn_t bvme6000_abort_int (int irq, void *dev_id) { unsigned long *new = (unsigned long *)vectors; unsigned long *old = (unsigned long *)0xf8000000; @@ -171,14 +171,14 @@ irqreturn_t bvme6000_abort_int (int irq, void *dev_id, struct pt_regs *fp) } -static irqreturn_t bvme6000_timer_int (int irq, void *dev_id, struct pt_regs *fp) +static irqreturn_t bvme6000_timer_int (int irq, void *dev_id) { volatile RtcPtr_t rtc = (RtcPtr_t)BVME_RTC_BASE; unsigned char msr = rtc->msr & 0xc0; rtc->msr = msr | 0x20; /* Ack the interrupt */ - return tick_handler(irq, dev_id, fp); + return tick_handler(irq, dev_id); } /* @@ -190,7 +190,7 @@ static irqreturn_t bvme6000_timer_int (int irq, void *dev_id, struct pt_regs *fp * so divide by 8 to get the microsecond result. */ -void bvme6000_sched_init (irqreturn_t (*timer_routine)(int, void *, struct pt_regs *)) +void bvme6000_sched_init (irq_handler_t timer_routine) { volatile RtcPtr_t rtc = (RtcPtr_t)BVME_RTC_BASE; unsigned char msr = rtc->msr & 0xc0; diff --git a/arch/m68k/hp300/time.c b/arch/m68k/hp300/time.c index 7df05662b277..dd7c8a2583d3 100644 --- a/arch/m68k/hp300/time.c +++ b/arch/m68k/hp300/time.c @@ -36,15 +36,15 @@ #define INTVAL ((10000 / 4) - 1) -static irqreturn_t hp300_tick(int irq, void *dev_id, struct pt_regs *regs) +static irqreturn_t hp300_tick(int irq, void *dev_id) { unsigned long tmp; - irqreturn_t (*vector)(int, void *, struct pt_regs *) = dev_id; + irq_handler_t vector = dev_id; in_8(CLOCKBASE + CLKSR); asm volatile ("movpw %1@(5),%0" : "=d" (tmp) : "a" (CLOCKBASE)); /* Turn off the network and SCSI leds */ blinken_leds(0, 0xe0); - return vector(irq, NULL, regs); + return vector(irq, NULL); } unsigned long hp300_gettimeoffset(void) @@ -63,7 +63,7 @@ unsigned long hp300_gettimeoffset(void) return (USECS_PER_JIFFY * ticks) / INTVAL; } -void __init hp300_sched_init(irqreturn_t (*vector)(int, void *, struct pt_regs *)) +void __init hp300_sched_init(irq_handler_t vector) { out_8(CLOCKBASE + CLKCR2, 0x1); /* select CR1 */ out_8(CLOCKBASE + CLKCR1, 0x1); /* reset */ diff --git a/arch/m68k/hp300/time.h b/arch/m68k/hp300/time.h index 8ef9987b49ab..f5b3d098b0f5 100644 --- a/arch/m68k/hp300/time.h +++ b/arch/m68k/hp300/time.h @@ -1,4 +1,4 @@ -extern void hp300_sched_init(irqreturn_t (*vector)(int, void *, struct pt_regs *)); +extern void hp300_sched_init(irq_handler_t vector); extern unsigned long hp300_gettimeoffset (void); diff --git a/arch/m68k/kernel/Makefile b/arch/m68k/kernel/Makefile index dae609797dc0..1c9ecaa473d5 100644 --- a/arch/m68k/kernel/Makefile +++ b/arch/m68k/kernel/Makefile @@ -9,10 +9,11 @@ else endif extra-y += vmlinux.lds -obj-y := entry.o process.o traps.o ints.o dma.o signal.o ptrace.o \ +obj-y := entry.o process.o traps.o ints.o signal.o ptrace.o \ sys_m68k.o time.o semaphore.o setup.o m68k_ksyms.o obj-$(CONFIG_PCI) += bios32.o obj-$(CONFIG_MODULES) += module.o +obj-y$(CONFIG_MMU_SUN3) += dma.o # no, it's not a typo EXTRA_AFLAGS := -traditional diff --git a/arch/m68k/kernel/dma.c b/arch/m68k/kernel/dma.c index fc449f8b2045..9d4e4b5b6bd8 100644 --- a/arch/m68k/kernel/dma.c +++ b/arch/m68k/kernel/dma.c @@ -15,7 +15,7 @@ #include <asm/scatterlist.h> void *dma_alloc_coherent(struct device *dev, size_t size, - dma_addr_t *handle, int flag) + dma_addr_t *handle, gfp_t flag) { struct page *page, **map; pgprot_t pgprot; @@ -51,7 +51,7 @@ void *dma_alloc_coherent(struct device *dev, size_t size, pgprot_val(pgprot) |= _PAGE_GLOBAL040 | _PAGE_NOCACHE_S; else pgprot_val(pgprot) |= _PAGE_NOCACHE030; - addr = vmap(map, size, flag, pgprot); + addr = vmap(map, size, VM_MAP, pgprot); kfree(map); return addr; diff --git a/arch/m68k/kernel/entry.S b/arch/m68k/kernel/entry.S index 9083c8b7659f..222ce4244564 100644 --- a/arch/m68k/kernel/entry.S +++ b/arch/m68k/kernel/entry.S @@ -205,7 +205,7 @@ ENTRY(auto_inthandler) movel %sp,%sp@- movel %d0,%sp@- | put vector # on stack auto_irqhandler_fixup = . + 2 - jsr m68k_handle_int | process the IRQ + jsr __m68k_handle_int | process the IRQ addql #8,%sp | pop parameters off stack ret_from_interrupt: @@ -239,7 +239,7 @@ user_irqvec_fixup = . + 2 movel %sp,%sp@- movel %d0,%sp@- | put vector # on stack user_irqhandler_fixup = . + 2 - jsr m68k_handle_int | process the IRQ + jsr __m68k_handle_int | process the IRQ addql #8,%sp | pop parameters off stack subqb #1,%curptr@(TASK_INFO+TINFO_PREEMPT+1) @@ -706,4 +706,33 @@ sys_call_table: .long sys_add_key .long sys_request_key /* 280 */ .long sys_keyctl + .long sys_ioprio_set + .long sys_ioprio_get + .long sys_inotify_init + .long sys_inotify_add_watch /* 285 */ + .long sys_inotify_rm_watch + .long sys_migrate_pages + .long sys_openat + .long sys_mkdirat + .long sys_mknodat /* 290 */ + .long sys_fchownat + .long sys_futimesat + .long sys_fstatat64 + .long sys_unlinkat + .long sys_renameat /* 295 */ + .long sys_linkat + .long sys_symlinkat + .long sys_readlinkat + .long sys_fchmodat + .long sys_faccessat /* 300 */ + .long sys_ni_syscall /* Reserved for pselect6 */ + .long sys_ni_syscall /* Reserved for ppoll */ + .long sys_unshare + .long sys_set_robust_list + .long sys_get_robust_list /* 305 */ + .long sys_splice + .long sys_sync_file_range + .long sys_tee + .long sys_vmsplice + .long sys_move_pages /* 310 */ diff --git a/arch/m68k/kernel/ints.c b/arch/m68k/kernel/ints.c index b33e37fb7b0e..84aceca6c05c 100644 --- a/arch/m68k/kernel/ints.c +++ b/arch/m68k/kernel/ints.c @@ -39,6 +39,7 @@ #include <asm/page.h> #include <asm/machdep.h> #include <asm/cacheflush.h> +#include <asm/irq_regs.h> #ifdef CONFIG_Q40 #include <asm/q40ints.h> @@ -104,7 +105,7 @@ void __init init_IRQ(void) * @handler: called from auto vector interrupts * * setup the handler to be called from auto vector interrupts instead of the - * standard m68k_handle_int(), it will be called with irq numbers in the range + * standard __m68k_handle_int(), it will be called with irq numbers in the range * from IRQ_AUTO_1 - IRQ_AUTO_7. */ void __init m68k_setup_auto_interrupt(void (*handler)(unsigned int, struct pt_regs *)) @@ -123,7 +124,7 @@ void __init m68k_setup_auto_interrupt(void (*handler)(unsigned int, struct pt_re * setup user vector interrupts, this includes activating the specified range * of interrupts, only then these interrupts can be requested (note: this is * different from auto vector interrupts). An optional handler can be installed - * to be called instead of the default m68k_handle_int(), it will be called + * to be called instead of the default __m68k_handle_int(), it will be called * with irq numbers starting from IRQ_USER. */ void __init m68k_setup_user_interrupt(unsigned int vec, unsigned int cnt, @@ -131,6 +132,7 @@ void __init m68k_setup_user_interrupt(unsigned int vec, unsigned int cnt, { int i; + BUG_ON(IRQ_USER + cnt >= NR_IRQS); m68k_first_user_vec = vec; for (i = 0; i < cnt; i++) irq_controller[IRQ_USER + i] = &user_irq_controller; @@ -215,7 +217,7 @@ int setup_irq(unsigned int irq, struct irq_node *node) } int request_irq(unsigned int irq, - irqreturn_t (*handler) (int, void *, struct pt_regs *), + irq_handler_t handler, unsigned long flags, const char *devname, void *dev_id) { struct irq_node *node; @@ -379,18 +381,25 @@ unsigned int irq_canonicalize(unsigned int irq) EXPORT_SYMBOL(irq_canonicalize); -asmlinkage void m68k_handle_int(unsigned int irq, struct pt_regs *regs) +asmlinkage void m68k_handle_int(unsigned int irq) { struct irq_node *node; - kstat_cpu(0).irqs[irq]++; node = irq_list[irq]; do { - node->handler(irq, node->dev_id, regs); + node->handler(irq, node->dev_id); node = node->next; } while (node); } +asmlinkage void __m68k_handle_int(unsigned int irq, struct pt_regs *regs) +{ + struct pt_regs *old_regs; + old_regs = set_irq_regs(regs); + m68k_handle_int(irq); + set_irq_regs(old_regs); +} + asmlinkage void handle_badint(struct pt_regs *regs) { kstat_cpu(0).irqs[0]++; diff --git a/arch/m68k/kernel/m68k_ksyms.c b/arch/m68k/kernel/m68k_ksyms.c index aff26a52167c..6fc69c74fe2e 100644 --- a/arch/m68k/kernel/m68k_ksyms.c +++ b/arch/m68k/kernel/m68k_ksyms.c @@ -1,65 +1,10 @@ #include <linux/module.h> -#include <linux/linkage.h> -#include <linux/sched.h> -#include <linux/string.h> -#include <linux/mm.h> -#include <linux/user.h> -#include <linux/elfcore.h> -#include <linux/in6.h> -#include <linux/interrupt.h> - -#include <asm/setup.h> -#include <asm/machdep.h> -#include <asm/pgalloc.h> -#include <asm/irq.h> -#include <asm/io.h> #include <asm/semaphore.h> -#include <asm/checksum.h> asmlinkage long long __ashldi3 (long long, int); asmlinkage long long __ashrdi3 (long long, int); asmlinkage long long __lshrdi3 (long long, int); asmlinkage long long __muldi3 (long long, long long); -extern char m68k_debug_device[]; - -/* platform dependent support */ - -EXPORT_SYMBOL(m68k_machtype); -EXPORT_SYMBOL(m68k_cputype); -EXPORT_SYMBOL(m68k_is040or060); -EXPORT_SYMBOL(m68k_realnum_memory); -EXPORT_SYMBOL(m68k_memory); -#ifndef CONFIG_SUN3 -EXPORT_SYMBOL(cache_push); -EXPORT_SYMBOL(cache_clear); -#ifndef CONFIG_SINGLE_MEMORY_CHUNK -EXPORT_SYMBOL(mm_vtop); -EXPORT_SYMBOL(mm_ptov); -EXPORT_SYMBOL(mm_end_of_chunk); -#else -EXPORT_SYMBOL(m68k_memoffset); -#endif /* !CONFIG_SINGLE_MEMORY_CHUNK */ -EXPORT_SYMBOL(__ioremap); -EXPORT_SYMBOL(iounmap); -EXPORT_SYMBOL(kernel_set_cachemode); -#endif /* !CONFIG_SUN3 */ -EXPORT_SYMBOL(m68k_debug_device); -EXPORT_SYMBOL(mach_hwclk); -EXPORT_SYMBOL(mach_get_ss); -EXPORT_SYMBOL(mach_get_rtc_pll); -EXPORT_SYMBOL(mach_set_rtc_pll); -#ifdef CONFIG_INPUT_M68K_BEEP_MODULE -EXPORT_SYMBOL(mach_beep); -#endif -EXPORT_SYMBOL(dump_fpu); -EXPORT_SYMBOL(dump_thread); -EXPORT_SYMBOL(strnlen); -EXPORT_SYMBOL(strrchr); -EXPORT_SYMBOL(strstr); -EXPORT_SYMBOL(kernel_thread); -#ifdef CONFIG_VME -EXPORT_SYMBOL(vme_brdtype); -#endif /* The following are special because they're not called explicitly (the C compiler generates them). Fortunately, diff --git a/arch/m68k/kernel/process.c b/arch/m68k/kernel/process.c index 45a46646c1b3..99fc1226f7f8 100644 --- a/arch/m68k/kernel/process.c +++ b/arch/m68k/kernel/process.c @@ -187,6 +187,7 @@ int kernel_thread(int (*fn)(void *), void * arg, unsigned long flags) set_fs (fs); return pid; } +EXPORT_SYMBOL(kernel_thread); void flush_thread(void) { @@ -221,13 +222,13 @@ asmlinkage int m68k_clone(struct pt_regs *regs) { unsigned long clone_flags; unsigned long newsp; - int *parent_tidptr, *child_tidptr; + int __user *parent_tidptr, *child_tidptr; /* syscall2 puts clone_flags in d1 and usp in d2 */ clone_flags = regs->d1; newsp = regs->d2; - parent_tidptr = (int *)regs->d3; - child_tidptr = (int *)regs->d4; + parent_tidptr = (int __user *)regs->d3; + child_tidptr = (int __user *)regs->d4; if (!newsp) newsp = rdusp(); return do_fork(clone_flags, newsp, regs, 0, @@ -311,6 +312,7 @@ int dump_fpu (struct pt_regs *regs, struct user_m68kfp_struct *fpu) : "memory"); return 1; } +EXPORT_SYMBOL(dump_fpu); /* * fill in the user structure for a core dump.. @@ -357,11 +359,12 @@ void dump_thread(struct pt_regs * regs, struct user * dump) /* dump floating point stuff */ dump->u_fpvalid = dump_fpu (regs, &dump->m68kfp); } +EXPORT_SYMBOL(dump_thread); /* * sys_execve() executes a new program. */ -asmlinkage int sys_execve(char *name, char **argv, char **envp) +asmlinkage int sys_execve(char __user *name, char __user * __user *argv, char __user * __user *envp) { int error; char * filename; diff --git a/arch/m68k/kernel/setup.c b/arch/m68k/kernel/setup.c index f2d7ee0ee18c..9af3ee0e555d 100644 --- a/arch/m68k/kernel/setup.c +++ b/arch/m68k/kernel/setup.c @@ -42,29 +42,39 @@ unsigned long m68k_machtype; unsigned long m68k_cputype; +EXPORT_SYMBOL(m68k_machtype); +EXPORT_SYMBOL(m68k_cputype); unsigned long m68k_fputype; unsigned long m68k_mmutype; #ifdef CONFIG_VME unsigned long vme_brdtype; +EXPORT_SYMBOL(vme_brdtype); #endif int m68k_is040or060; +EXPORT_SYMBOL(m68k_is040or060); extern int end; extern unsigned long availmem; int m68k_num_memory; int m68k_realnum_memory; +EXPORT_SYMBOL(m68k_realnum_memory); +#ifdef CONFIG_SINGLE_MEMORY_CHUNK unsigned long m68k_memoffset; +EXPORT_SYMBOL(m68k_memoffset); +#endif struct mem_info m68k_memory[NUM_MEMINFO]; +EXPORT_SYMBOL(m68k_memory); static struct mem_info m68k_ramdisk; static char m68k_command_line[CL_SIZE]; char m68k_debug_device[6] = ""; +EXPORT_SYMBOL(m68k_debug_device); -void (*mach_sched_init) (irqreturn_t (*handler)(int, void *, struct pt_regs *)) __initdata = NULL; +void (*mach_sched_init) (irq_handler_t handler) __initdata = NULL; /* machine dependent irq functions */ void (*mach_init_IRQ) (void) __initdata = NULL; void (*mach_get_model) (char *model); @@ -72,10 +82,14 @@ int (*mach_get_hardware_list) (char *buffer); /* machine dependent timer functions */ unsigned long (*mach_gettimeoffset) (void); int (*mach_hwclk) (int, struct rtc_time*); +EXPORT_SYMBOL(mach_hwclk); int (*mach_set_clock_mmss) (unsigned long); unsigned int (*mach_get_ss)(void); int (*mach_get_rtc_pll)(struct rtc_pll_info *); int (*mach_set_rtc_pll)(struct rtc_pll_info *); +EXPORT_SYMBOL(mach_get_ss); +EXPORT_SYMBOL(mach_get_rtc_pll); +EXPORT_SYMBOL(mach_set_rtc_pll); void (*mach_reset)( void ); void (*mach_halt)( void ); void (*mach_power_off)( void ); @@ -89,6 +103,7 @@ void (*mach_l2_flush) (int); #endif #if defined(CONFIG_INPUT_M68K_BEEP) || defined(CONFIG_INPUT_M68K_BEEP_MODULE) void (*mach_beep)(unsigned int, unsigned int); +EXPORT_SYMBOL(mach_beep); #endif #if defined(CONFIG_ISA) && defined(MULTI_ISA) int isa_type; diff --git a/arch/m68k/kernel/time.c b/arch/m68k/kernel/time.c index 28b2fefa4513..2a599c3ed787 100644 --- a/arch/m68k/kernel/time.c +++ b/arch/m68k/kernel/time.c @@ -21,6 +21,7 @@ #include <asm/machdep.h> #include <asm/io.h> +#include <asm/irq_regs.h> #include <linux/time.h> #include <linux/timex.h> @@ -37,13 +38,13 @@ static inline int set_rtc_mmss(unsigned long nowtime) * timer_interrupt() needs to keep up the real-time clock, * as well as call the "do_timer()" routine every clocktick */ -static irqreturn_t timer_interrupt(int irq, void *dummy, struct pt_regs * regs) +static irqreturn_t timer_interrupt(int irq, void *dummy) { do_timer(1); #ifndef CONFIG_SMP - update_process_times(user_mode(regs)); + update_process_times(user_mode(get_irq_regs())); #endif - profile_tick(CPU_PROFILING, regs); + profile_tick(CPU_PROFILING); #ifdef CONFIG_HEARTBEAT /* use power LED as a heartbeat instead -- much more useful diff --git a/arch/m68k/kernel/traps.c b/arch/m68k/kernel/traps.c index 4569406a2e1f..759fa244e6cd 100644 --- a/arch/m68k/kernel/traps.c +++ b/arch/m68k/kernel/traps.c @@ -326,13 +326,13 @@ static inline int do_040writeback1(unsigned short wbs, unsigned long wba, switch (wbs & WBSIZ_040) { case BA_SIZE_BYTE: - res = put_user(wbd & 0xff, (char *)wba); + res = put_user(wbd & 0xff, (char __user *)wba); break; case BA_SIZE_WORD: - res = put_user(wbd & 0xffff, (short *)wba); + res = put_user(wbd & 0xffff, (short __user *)wba); break; case BA_SIZE_LONG: - res = put_user(wbd, (int *)wba); + res = put_user(wbd, (int __user *)wba); break; } diff --git a/arch/m68k/lib/string.c b/arch/m68k/lib/string.c index b92b89e1ea0c..891e1347bc4e 100644 --- a/arch/m68k/lib/string.c +++ b/arch/m68k/lib/string.c @@ -1,6 +1,19 @@ +/* + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file COPYING in the main directory of this archive + * for more details. + */ + +#define __IN_STRING_C -#include <linux/types.h> #include <linux/module.h> +#include <linux/string.h> + +char *strcpy(char *dest, const char *src) +{ + return __kernel_strcpy(dest, src); +} +EXPORT_SYMBOL(strcpy); void *memset(void *s, int c, size_t count) { diff --git a/arch/m68k/lib/uaccess.c b/arch/m68k/lib/uaccess.c index 1bc188c0d983..865f9fb9e686 100644 --- a/arch/m68k/lib/uaccess.c +++ b/arch/m68k/lib/uaccess.c @@ -84,7 +84,7 @@ unsigned long __generic_copy_to_user(void __user *to, const void *from, " .even\n" "20: lsl.l #2,%0\n" "50: add.l %5,%0\n" - " jra 7b\n" + " jra 8b\n" " .previous\n" "\n" " .section __ex_table,\"a\"\n" diff --git a/arch/m68k/mac/baboon.c b/arch/m68k/mac/baboon.c index 6eaa881793d1..a1c7ec706741 100644 --- a/arch/m68k/mac/baboon.c +++ b/arch/m68k/mac/baboon.c @@ -25,7 +25,7 @@ int baboon_present,baboon_active; volatile struct baboon *baboon; -irqreturn_t baboon_irq(int, void *, struct pt_regs *); +irqreturn_t baboon_irq(int, void *); #if 0 extern int macide_ack_intr(struct ata_channel *); @@ -64,7 +64,7 @@ void __init baboon_register_interrupts(void) * Baboon interrupt handler. This works a lot like a VIA. */ -irqreturn_t baboon_irq(int irq, void *dev_id, struct pt_regs *regs) +irqreturn_t baboon_irq(int irq, void *dev_id) { int irq_bit,i; unsigned char events; @@ -81,7 +81,7 @@ irqreturn_t baboon_irq(int irq, void *dev_id, struct pt_regs *regs) for (i = 0, irq_bit = 1 ; i < 3 ; i++, irq_bit <<= 1) { if (events & irq_bit/* & baboon_active*/) { baboon_active &= ~irq_bit; - m68k_handle_int(IRQ_BABOON_0 + i, regs); + m68k_handle_int(IRQ_BABOON_0 + i); baboon_active |= irq_bit; baboon->mb_ifr &= ~irq_bit; } diff --git a/arch/m68k/mac/config.c b/arch/m68k/mac/config.c index 85dda1095b1f..562b38d00180 100644 --- a/arch/m68k/mac/config.c +++ b/arch/m68k/mac/config.c @@ -72,7 +72,7 @@ extern int show_mac_interrupts(struct seq_file *, void *); extern void iop_preinit(void); extern void iop_init(void); extern void via_init(void); -extern void via_init_clock(irqreturn_t (*func)(int, void *, struct pt_regs *)); +extern void via_init_clock(irq_handler_t func); extern void via_flush_cache(void); extern void oss_init(void); extern void psc_init(void); @@ -88,7 +88,7 @@ extern void mac_debugging_long(int, long); static void mac_get_model(char *str); -static void mac_sched_init(irqreturn_t (*vector)(int, void *, struct pt_regs *)) +static void mac_sched_init(irq_handler_t vector) { via_init_clock(vector); } diff --git a/arch/m68k/mac/iop.c b/arch/m68k/mac/iop.c index bc657b1057a7..0cea21f58192 100644 --- a/arch/m68k/mac/iop.c +++ b/arch/m68k/mac/iop.c @@ -132,7 +132,7 @@ static int iop_get_proc_info(char *, char **, off_t, int); struct listener { const char *devname; - void (*handler)(struct iop_msg *, struct pt_regs *); + void (*handler)(struct iop_msg *); }; /* @@ -152,7 +152,7 @@ static struct iop_msg iop_msg_pool[NUM_IOP_MSGS]; static struct iop_msg *iop_send_queue[NUM_IOPS][NUM_IOP_CHAN]; static struct listener iop_listeners[NUM_IOPS][NUM_IOP_CHAN]; -irqreturn_t iop_ism_irq(int, void *, struct pt_regs *); +irqreturn_t iop_ism_irq(int, void *); extern void oss_irq_enable(int); @@ -342,7 +342,7 @@ void __init iop_register_interrupts(void) */ int iop_listen(uint iop_num, uint chan, - void (*handler)(struct iop_msg *, struct pt_regs *), + void (*handler)(struct iop_msg *), const char *devname) { if ((iop_num >= NUM_IOPS) || !iop_base[iop_num]) return -EINVAL; @@ -407,7 +407,7 @@ static void iop_do_send(struct iop_msg *msg) * has gone into the IOP_MSG_COMPLETE state. */ -static void iop_handle_send(uint iop_num, uint chan, struct pt_regs *regs) +static void iop_handle_send(uint iop_num, uint chan) { volatile struct mac_iop *iop = iop_base[iop_num]; struct iop_msg *msg,*msg2; @@ -426,7 +426,7 @@ static void iop_handle_send(uint iop_num, uint chan, struct pt_regs *regs) for (i = 0 ; i < IOP_MSG_LEN ; i++, offset++) { msg->reply[i] = iop_readb(iop, offset); } - if (msg->handler) (*msg->handler)(msg, regs); + if (msg->handler) (*msg->handler)(msg); msg2 = msg; msg = msg->next; iop_free_msg(msg2); @@ -440,7 +440,7 @@ static void iop_handle_send(uint iop_num, uint chan, struct pt_regs *regs) * gone into the IOP_MSG_NEW state. */ -static void iop_handle_recv(uint iop_num, uint chan, struct pt_regs *regs) +static void iop_handle_recv(uint iop_num, uint chan) { volatile struct mac_iop *iop = iop_base[iop_num]; int i,offset; @@ -468,7 +468,7 @@ static void iop_handle_recv(uint iop_num, uint chan, struct pt_regs *regs) /* the message ourselves to avoid possible stalls. */ if (msg->handler) { - (*msg->handler)(msg, regs); + (*msg->handler)(msg); } else { #ifdef DEBUG_IOP printk("iop_handle_recv: unclaimed message on iop %d channel %d\n", iop_num, chan); @@ -492,7 +492,7 @@ static void iop_handle_recv(uint iop_num, uint chan, struct pt_regs *regs) int iop_send_message(uint iop_num, uint chan, void *privdata, uint msg_len, __u8 *msg_data, - void (*handler)(struct iop_msg *, struct pt_regs *)) + void (*handler)(struct iop_msg *)) { struct iop_msg *msg, *q; @@ -584,7 +584,7 @@ __u8 *iop_compare_code(uint iop_num, __u8 *code_start, * Handle an ISM IOP interrupt */ -irqreturn_t iop_ism_irq(int irq, void *dev_id, struct pt_regs *regs) +irqreturn_t iop_ism_irq(int irq, void *dev_id) { uint iop_num = (uint) dev_id; volatile struct mac_iop *iop = iop_base[iop_num]; @@ -608,7 +608,7 @@ irqreturn_t iop_ism_irq(int irq, void *dev_id, struct pt_regs *regs) printk(" %02X", state); #endif if (state == IOP_MSG_COMPLETE) { - iop_handle_send(iop_num, i, regs); + iop_handle_send(iop_num, i); } } #ifdef DEBUG_IOP @@ -628,7 +628,7 @@ irqreturn_t iop_ism_irq(int irq, void *dev_id, struct pt_regs *regs) printk(" %02X", state); #endif if (state == IOP_MSG_NEW) { - iop_handle_recv(iop_num, i, regs); + iop_handle_recv(iop_num, i); } } #ifdef DEBUG_IOP diff --git a/arch/m68k/mac/macints.c b/arch/m68k/mac/macints.c index 694b14bb0de1..f6fcd754d8f6 100644 --- a/arch/m68k/mac/macints.c +++ b/arch/m68k/mac/macints.c @@ -133,6 +133,7 @@ #include <asm/hwtest.h> #include <asm/errno.h> #include <asm/macints.h> +#include <asm/irq_regs.h> #define DEBUG_SPURIOUS #define SHUTUP_SONIC @@ -208,8 +209,8 @@ static void scc_irq_disable(unsigned int); * console_loglevel determines NMI handler function */ -irqreturn_t mac_nmi_handler(int, void *, struct pt_regs *); -irqreturn_t mac_debug_handler(int, void *, struct pt_regs *); +irqreturn_t mac_nmi_handler(int, void *); +irqreturn_t mac_debug_handler(int, void *); /* #define DEBUG_MACINTS */ @@ -393,7 +394,7 @@ int mac_irq_pending(unsigned int irq) static int num_debug[8]; -irqreturn_t mac_debug_handler(int irq, void *dev_id, struct pt_regs *regs) +irqreturn_t mac_debug_handler(int irq, void *dev_id) { if (num_debug[irq] < 10) { printk("DEBUG: Unexpected IRQ %d\n", irq); @@ -405,7 +406,7 @@ irqreturn_t mac_debug_handler(int irq, void *dev_id, struct pt_regs *regs) static int in_nmi; static volatile int nmi_hold; -irqreturn_t mac_nmi_handler(int irq, void *dev_id, struct pt_regs *fp) +irqreturn_t mac_nmi_handler(int irq, void *dev_id) { int i; /* @@ -432,6 +433,7 @@ irqreturn_t mac_nmi_handler(int irq, void *dev_id, struct pt_regs *fp) if (console_loglevel >= 8) { #if 0 + struct pt_regs *fp = get_irq_regs(); show_state(); printk("PC: %08lx\nSR: %04x SP: %p\n", fp->pc, fp->sr, fp); printk("d0: %08lx d1: %08lx d2: %08lx d3: %08lx\n", @@ -479,7 +481,7 @@ static void scc_irq_disable(unsigned int irq) * here is cleaner than hacking it into drivers/char/macserial.c. */ -void mac_scc_dispatch(int irq, void *dev_id, struct pt_regs *regs) +void mac_scc_dispatch(int irq, void *dev_id) { volatile unsigned char *scc = (unsigned char *) mac_bi_data.sccbase + 2; unsigned char reg; @@ -504,7 +506,7 @@ void mac_scc_dispatch(int irq, void *dev_id, struct pt_regs *regs) /* pretty much kill the system. */ if (reg & 0x38) - m68k_handle_int(IRQ_SCCA, regs); + m68k_handle_int(IRQ_SCCA); if (reg & 0x07) - m68k_handle_int(IRQ_SCCB, regs); + m68k_handle_int(IRQ_SCCB); } diff --git a/arch/m68k/mac/oss.c b/arch/m68k/mac/oss.c index 63e04365191f..63690819565a 100644 --- a/arch/m68k/mac/oss.c +++ b/arch/m68k/mac/oss.c @@ -30,11 +30,11 @@ int oss_present; volatile struct mac_oss *oss; -irqreturn_t oss_irq(int, void *, struct pt_regs *); -irqreturn_t oss_nubus_irq(int, void *, struct pt_regs *); +irqreturn_t oss_irq(int, void *); +irqreturn_t oss_nubus_irq(int, void *); -extern irqreturn_t via1_irq(int, void *, struct pt_regs *); -extern irqreturn_t mac_scc_dispatch(int, void *, struct pt_regs *); +extern irqreturn_t via1_irq(int, void *); +extern irqreturn_t mac_scc_dispatch(int, void *); /* * Initialize the OSS @@ -92,7 +92,7 @@ void __init oss_nubus_init(void) * and SCSI; everything else is routed to its own autovector IRQ. */ -irqreturn_t oss_irq(int irq, void *dev_id, struct pt_regs *regs) +irqreturn_t oss_irq(int irq, void *dev_id) { int events; @@ -113,7 +113,7 @@ irqreturn_t oss_irq(int irq, void *dev_id, struct pt_regs *regs) oss->irq_pending &= ~OSS_IP_SOUND; } else if (events & OSS_IP_SCSI) { oss->irq_level[OSS_SCSI] = OSS_IRQLEV_DISABLED; - m68k_handle_int(IRQ_MAC_SCSI, regs); + m68k_handle_int(IRQ_MAC_SCSI); oss->irq_pending &= ~OSS_IP_SCSI; oss->irq_level[OSS_SCSI] = OSS_IRQLEV_SCSI; } else { @@ -128,7 +128,7 @@ irqreturn_t oss_irq(int irq, void *dev_id, struct pt_regs *regs) * Unlike the VIA/RBV this is on its own autovector interrupt level. */ -irqreturn_t oss_nubus_irq(int irq, void *dev_id, struct pt_regs *regs) +irqreturn_t oss_nubus_irq(int irq, void *dev_id) { int events, irq_bit, i; @@ -146,7 +146,7 @@ irqreturn_t oss_nubus_irq(int irq, void *dev_id, struct pt_regs *regs) for (i = 0, irq_bit = 1 ; i < 6 ; i++, irq_bit <<= 1) { if (events & irq_bit) { oss->irq_level[i] = OSS_IRQLEV_DISABLED; - m68k_handle_int(NUBUS_SOURCE_BASE + i, regs); + m68k_handle_int(NUBUS_SOURCE_BASE + i); oss->irq_pending &= ~irq_bit; oss->irq_level[i] = OSS_IRQLEV_NUBUS; } diff --git a/arch/m68k/mac/psc.c b/arch/m68k/mac/psc.c index e26218091755..15378a5878c9 100644 --- a/arch/m68k/mac/psc.c +++ b/arch/m68k/mac/psc.c @@ -30,7 +30,7 @@ int psc_present; volatile __u8 *psc; -irqreturn_t psc_irq(int, void *, struct pt_regs *); +irqreturn_t psc_irq(int, void *); /* * Debugging dump, used in various places to see what's going on. @@ -127,7 +127,7 @@ void __init psc_register_interrupts(void) * PSC interrupt handler. It's a lot like the VIA interrupt handler. */ -irqreturn_t psc_irq(int irq, void *dev_id, struct pt_regs *regs) +irqreturn_t psc_irq(int irq, void *dev_id) { int pIFR = pIFRbase + ((int) dev_id); int pIER = pIERbase + ((int) dev_id); @@ -149,7 +149,7 @@ irqreturn_t psc_irq(int irq, void *dev_id, struct pt_regs *regs) for (i = 0, irq_bit = 1 ; i < 4 ; i++, irq_bit <<= 1) { if (events & irq_bit) { psc_write_byte(pIER, irq_bit); - m68k_handle_int(base_irq + i, regs); + m68k_handle_int(base_irq + i); psc_write_byte(pIFR, irq_bit); psc_write_byte(pIER, irq_bit | 0x80); } diff --git a/arch/m68k/mac/via.c b/arch/m68k/mac/via.c index c4aa345d544e..e27735be2924 100644 --- a/arch/m68k/mac/via.c +++ b/arch/m68k/mac/via.c @@ -63,14 +63,14 @@ static int gIER,gIFR,gBufA,gBufB; static int nubus_active; void via_debug_dump(void); -irqreturn_t via1_irq(int, void *, struct pt_regs *); -irqreturn_t via2_irq(int, void *, struct pt_regs *); -irqreturn_t via_nubus_irq(int, void *, struct pt_regs *); +irqreturn_t via1_irq(int, void *); +irqreturn_t via2_irq(int, void *); +irqreturn_t via_nubus_irq(int, void *); void via_irq_enable(int irq); void via_irq_disable(int irq); void via_irq_clear(int irq); -extern irqreturn_t mac_scc_dispatch(int, void *, struct pt_regs *); +extern irqreturn_t mac_scc_dispatch(int, void *); extern int oss_present; /* @@ -235,7 +235,7 @@ void __init via_init(void) * Start the 100 Hz clock */ -void __init via_init_clock(irqreturn_t (*func)(int, void *, struct pt_regs *)) +void __init via_init_clock(irq_handler_t func) { via1[vACR] |= 0x40; via1[vT1LL] = MAC_CLOCK_LOW; @@ -412,7 +412,7 @@ void __init via_nubus_init(void) * the machspec interrupt number after clearing the interrupt. */ -irqreturn_t via1_irq(int irq, void *dev_id, struct pt_regs *regs) +irqreturn_t via1_irq(int irq, void *dev_id) { int irq_bit, i; unsigned char events, mask; @@ -424,7 +424,7 @@ irqreturn_t via1_irq(int irq, void *dev_id, struct pt_regs *regs) for (i = 0, irq_bit = 1 ; i < 7 ; i++, irq_bit <<= 1) if (events & irq_bit) { via1[vIER] = irq_bit; - m68k_handle_int(VIA1_SOURCE_BASE + i, regs); + m68k_handle_int(VIA1_SOURCE_BASE + i); via1[vIFR] = irq_bit; via1[vIER] = irq_bit | 0x80; } @@ -439,14 +439,14 @@ irqreturn_t via1_irq(int irq, void *dev_id, struct pt_regs *regs) /* No, it won't be set. that's why we're doing this. */ via_irq_disable(IRQ_MAC_NUBUS); via_irq_clear(IRQ_MAC_NUBUS); - m68k_handle_int(IRQ_MAC_NUBUS, regs); + m68k_handle_int(IRQ_MAC_NUBUS); via_irq_enable(IRQ_MAC_NUBUS); } #endif return IRQ_HANDLED; } -irqreturn_t via2_irq(int irq, void *dev_id, struct pt_regs *regs) +irqreturn_t via2_irq(int irq, void *dev_id) { int irq_bit, i; unsigned char events, mask; @@ -459,7 +459,7 @@ irqreturn_t via2_irq(int irq, void *dev_id, struct pt_regs *regs) if (events & irq_bit) { via2[gIER] = irq_bit; via2[gIFR] = irq_bit | rbv_clear; - m68k_handle_int(VIA2_SOURCE_BASE + i, regs); + m68k_handle_int(VIA2_SOURCE_BASE + i); via2[gIER] = irq_bit | 0x80; } return IRQ_HANDLED; @@ -470,7 +470,7 @@ irqreturn_t via2_irq(int irq, void *dev_id, struct pt_regs *regs) * VIA2 dispatcher as a fast interrupt handler. */ -irqreturn_t via_nubus_irq(int irq, void *dev_id, struct pt_regs *regs) +irqreturn_t via_nubus_irq(int irq, void *dev_id) { int irq_bit, i; unsigned char events; @@ -481,7 +481,7 @@ irqreturn_t via_nubus_irq(int irq, void *dev_id, struct pt_regs *regs) for (i = 0, irq_bit = 1 ; i < 7 ; i++, irq_bit <<= 1) { if (events & irq_bit) { via_irq_disable(NUBUS_SOURCE_BASE + i); - m68k_handle_int(NUBUS_SOURCE_BASE + i, regs); + m68k_handle_int(NUBUS_SOURCE_BASE + i); via_irq_enable(NUBUS_SOURCE_BASE + i); } } diff --git a/arch/m68k/mm/kmap.c b/arch/m68k/mm/kmap.c index f46f049d29ff..b54ef1726c55 100644 --- a/arch/m68k/mm/kmap.c +++ b/arch/m68k/mm/kmap.c @@ -7,6 +7,7 @@ * used by other architectures /Roman Zippel */ +#include <linux/module.h> #include <linux/mm.h> #include <linux/kernel.h> #include <linux/string.h> @@ -219,6 +220,7 @@ void __iomem *__ioremap(unsigned long physaddr, unsigned long size, int cachefla return (void __iomem *)retaddr; } +EXPORT_SYMBOL(__ioremap); /* * Unmap a ioremap()ed region again @@ -234,6 +236,7 @@ void iounmap(void __iomem *addr) free_io_area((__force void *)addr); #endif } +EXPORT_SYMBOL(iounmap); /* * __iounmap unmaps nearly everything, so be careful @@ -360,3 +363,4 @@ void kernel_set_cachemode(void *addr, unsigned long size, int cmode) flush_tlb_all(); } +EXPORT_SYMBOL(kernel_set_cachemode); diff --git a/arch/m68k/mm/memory.c b/arch/m68k/mm/memory.c index a0c095e17222..0f88812822b1 100644 --- a/arch/m68k/mm/memory.c +++ b/arch/m68k/mm/memory.c @@ -4,6 +4,7 @@ * Copyright (C) 1995 Hamish Macdonald */ +#include <linux/module.h> #include <linux/mm.h> #include <linux/kernel.h> #include <linux/string.h> @@ -157,9 +158,8 @@ unsigned long mm_vtop(unsigned long vaddr) return -1; } -#endif +EXPORT_SYMBOL(mm_vtop); -#ifndef CONFIG_SINGLE_MEMORY_CHUNK unsigned long mm_ptov (unsigned long paddr) { int i = 0; @@ -185,6 +185,7 @@ unsigned long mm_ptov (unsigned long paddr) #endif return -1; } +EXPORT_SYMBOL(mm_ptov); #endif /* invalidate page in both caches */ @@ -298,6 +299,7 @@ void cache_clear (unsigned long paddr, int len) mach_l2_flush(0); #endif } +EXPORT_SYMBOL(cache_clear); /* probably can be unexported */ /* @@ -350,6 +352,7 @@ void cache_push (unsigned long paddr, int len) mach_l2_flush(1); #endif } +EXPORT_SYMBOL(cache_push); /* probably can be unexported */ #ifndef CONFIG_SINGLE_MEMORY_CHUNK int mm_end_of_chunk (unsigned long addr, int len) @@ -361,4 +364,5 @@ int mm_end_of_chunk (unsigned long addr, int len) return 1; return 0; } +EXPORT_SYMBOL(mm_end_of_chunk); #endif diff --git a/arch/m68k/mm/sun3kmap.c b/arch/m68k/mm/sun3kmap.c index 7f0d86f3fe73..1af24cb5bfe1 100644 --- a/arch/m68k/mm/sun3kmap.c +++ b/arch/m68k/mm/sun3kmap.c @@ -8,6 +8,7 @@ * for more details. */ +#include <linux/module.h> #include <linux/types.h> #include <linux/kernel.h> #include <linux/mm.h> @@ -59,7 +60,7 @@ static inline void do_pmeg_mapin(unsigned long phys, unsigned long virt, } } -void *sun3_ioremap(unsigned long phys, unsigned long size, +void __iomem *sun3_ioremap(unsigned long phys, unsigned long size, unsigned long type) { struct vm_struct *area; @@ -101,22 +102,24 @@ void *sun3_ioremap(unsigned long phys, unsigned long size, virt += seg_pages * PAGE_SIZE; } - return (void *)ret; + return (void __iomem *)ret; } -void *__ioremap(unsigned long phys, unsigned long size, int cache) +void __iomem *__ioremap(unsigned long phys, unsigned long size, int cache) { return sun3_ioremap(phys, size, SUN3_PAGE_TYPE_IO); } +EXPORT_SYMBOL(__ioremap); -void iounmap(void *addr) +void iounmap(void __iomem *addr) { vfree((void *)(PAGE_MASK & (unsigned long)addr)); } +EXPORT_SYMBOL(iounmap); /* sun3_map_test(addr, val) -- Reads a byte from addr, storing to val, * trapping the potential read fault. Returns 0 if the access faulted, diff --git a/arch/m68k/mvme147/config.c b/arch/m68k/mvme147/config.c index 0cd0e5bddcee..4a7df9c3f85a 100644 --- a/arch/m68k/mvme147/config.c +++ b/arch/m68k/mvme147/config.c @@ -38,7 +38,7 @@ static void mvme147_get_model(char *model); static int mvme147_get_hardware_list(char *buffer); -extern void mvme147_sched_init(irqreturn_t (*handler)(int, void *, struct pt_regs *)); +extern void mvme147_sched_init(irq_handler_t handler); extern unsigned long mvme147_gettimeoffset (void); extern int mvme147_hwclk (int, struct rtc_time *); extern int mvme147_set_clock_mmss (unsigned long); @@ -51,7 +51,7 @@ static int bcd2int (unsigned char b); /* Save tick handler routine pointer, will point to do_timer() in * kernel/sched.c, called via mvme147_process_int() */ -irqreturn_t (*tick_handler)(int, void *, struct pt_regs *); +irq_handler_t tick_handler; int mvme147_parse_bootinfo(const struct bi_record *bi) @@ -114,15 +114,15 @@ void __init config_mvme147(void) /* Using pcc tick timer 1 */ -static irqreturn_t mvme147_timer_int (int irq, void *dev_id, struct pt_regs *fp) +static irqreturn_t mvme147_timer_int (int irq, void *dev_id) { m147_pcc->t1_int_cntrl = PCC_TIMER_INT_CLR; m147_pcc->t1_int_cntrl = PCC_INT_ENAB|PCC_LEVEL_TIMER1; - return tick_handler(irq, dev_id, fp); + return tick_handler(irq, dev_id); } -void mvme147_sched_init (irqreturn_t (*timer_routine)(int, void *, struct pt_regs *)) +void mvme147_sched_init (irq_handler_t timer_routine) { tick_handler = timer_routine; request_irq (PCC_IRQ_TIMER1, mvme147_timer_int, diff --git a/arch/m68k/mvme16x/config.c b/arch/m68k/mvme16x/config.c index ce2727ed1bc0..c829ebb6b1af 100644 --- a/arch/m68k/mvme16x/config.c +++ b/arch/m68k/mvme16x/config.c @@ -42,7 +42,7 @@ static MK48T08ptr_t volatile rtc = (MK48T08ptr_t)MVME_RTC_BASE; static void mvme16x_get_model(char *model); static int mvme16x_get_hardware_list(char *buffer); -extern void mvme16x_sched_init(irqreturn_t (*handler)(int, void *, struct pt_regs *)); +extern void mvme16x_sched_init(irq_handler_t handler); extern unsigned long mvme16x_gettimeoffset (void); extern int mvme16x_hwclk (int, struct rtc_time *); extern int mvme16x_set_clock_mmss (unsigned long); @@ -54,7 +54,7 @@ int bcd2int (unsigned char b); /* Save tick handler routine pointer, will point to do_timer() in * kernel/sched.c, called via mvme16x_process_int() */ -static irqreturn_t (*tick_handler)(int, void *, struct pt_regs *); +static irq_handler_t tick_handler; unsigned short mvme16x_config; @@ -190,7 +190,7 @@ void __init config_mvme16x(void) } } -static irqreturn_t mvme16x_abort_int (int irq, void *dev_id, struct pt_regs *fp) +static irqreturn_t mvme16x_abort_int (int irq, void *dev_id) { p_bdid p = &mvme_bdid; unsigned long *new = (unsigned long *)vectors; @@ -218,13 +218,13 @@ static irqreturn_t mvme16x_abort_int (int irq, void *dev_id, struct pt_regs *fp) return IRQ_HANDLED; } -static irqreturn_t mvme16x_timer_int (int irq, void *dev_id, struct pt_regs *fp) +static irqreturn_t mvme16x_timer_int (int irq, void *dev_id) { *(volatile unsigned char *)0xfff4201b |= 8; - return tick_handler(irq, dev_id, fp); + return tick_handler(irq, dev_id); } -void mvme16x_sched_init (irqreturn_t (*timer_routine)(int, void *, struct pt_regs *)) +void mvme16x_sched_init (irq_handler_t timer_routine) { p_bdid p = &mvme_bdid; int irq; diff --git a/arch/m68k/q40/config.c b/arch/m68k/q40/config.c index 9a1827876408..92f873cc7060 100644 --- a/arch/m68k/q40/config.c +++ b/arch/m68k/q40/config.c @@ -39,7 +39,7 @@ extern irqreturn_t q40_process_int (int level, struct pt_regs *regs); extern void q40_init_IRQ (void); static void q40_get_model(char *model); static int q40_get_hardware_list(char *buffer); -extern void q40_sched_init(irqreturn_t (*handler)(int, void *, struct pt_regs *)); +extern void q40_sched_init(irq_handler_t handler); extern unsigned long q40_gettimeoffset (void); extern int q40_hwclk (int, struct rtc_time *); diff --git a/arch/m68k/q40/q40ints.c b/arch/m68k/q40/q40ints.c index 472f41c4158b..31cc07d8cec4 100644 --- a/arch/m68k/q40/q40ints.c +++ b/arch/m68k/q40/q40ints.c @@ -125,9 +125,9 @@ void q40_mksound(unsigned int hz, unsigned int ticks) sound_ticks = ticks << 1; } -static irqreturn_t (*q40_timer_routine)(int, void *, struct pt_regs *); +static irq_handler_t q40_timer_routine; -static irqreturn_t q40_timer_int (int irq, void * dev, struct pt_regs * regs) +static irqreturn_t q40_timer_int (int irq, void * dev) { ql_ticks = ql_ticks ? 0 : 1; if (sound_ticks) { @@ -138,11 +138,11 @@ static irqreturn_t q40_timer_int (int irq, void * dev, struct pt_regs * regs) } if (!ql_ticks) - q40_timer_routine(irq, dev, regs); + q40_timer_routine(irq, dev); return IRQ_HANDLED; } -void q40_sched_init (irqreturn_t (*timer_routine)(int, void *, struct pt_regs *)) +void q40_sched_init (irq_handler_t timer_routine) { int timer_irq; @@ -218,11 +218,11 @@ static void q40_irq_handler(unsigned int irq, struct pt_regs *fp) switch (irq) { case 4: case 6: - m68k_handle_int(Q40_IRQ_SAMPLE, fp); + __m68k_handle_int(Q40_IRQ_SAMPLE, fp); return; } if (mir & Q40_IRQ_FRAME_MASK) { - m68k_handle_int(Q40_IRQ_FRAME, fp); + __m68k_handle_int(Q40_IRQ_FRAME, fp); master_outb(-1, FRAME_CLEAR_REG); } if ((mir & Q40_IRQ_SER_MASK) || (mir & Q40_IRQ_EXT_MASK)) { @@ -257,7 +257,7 @@ static void q40_irq_handler(unsigned int irq, struct pt_regs *fp) goto iirq; } q40_state[irq] |= IRQ_INPROGRESS; - m68k_handle_int(irq, fp); + __m68k_handle_int(irq, fp); q40_state[irq] &= ~IRQ_INPROGRESS; /* naively enable everything, if that fails than */ @@ -288,7 +288,7 @@ static void q40_irq_handler(unsigned int irq, struct pt_regs *fp) mir = master_inb(IIRQ_REG); /* should test whether keyboard irq is really enabled, doing it in defhand */ if (mir & Q40_IRQ_KEYB_MASK) - m68k_handle_int(Q40_IRQ_KEYBOARD, fp); + __m68k_handle_int(Q40_IRQ_KEYBOARD, fp); return; } diff --git a/arch/m68k/sun3/Makefile b/arch/m68k/sun3/Makefile index 4d4f0695d985..be1a8470d636 100644 --- a/arch/m68k/sun3/Makefile +++ b/arch/m68k/sun3/Makefile @@ -2,6 +2,6 @@ # Makefile for Linux arch/m68k/sun3 source directory # -obj-y := sun3_ksyms.o sun3ints.o sun3dvma.o sbus.o idprom.o +obj-y := sun3ints.o sun3dvma.o sbus.o idprom.o obj-$(CONFIG_SUN3) += config.o mmu_emu.o leds.o dvma.o intersil.o diff --git a/arch/m68k/sun3/config.c b/arch/m68k/sun3/config.c index d09d03b3d956..4851b8437a87 100644 --- a/arch/m68k/sun3/config.c +++ b/arch/m68k/sun3/config.c @@ -35,7 +35,7 @@ extern char _text, _end; char sun3_reserved_pmeg[SUN3_PMEGS_NUM]; extern unsigned long sun3_gettimeoffset(void); -extern void sun3_sched_init(irqreturn_t (*handler)(int, void *, struct pt_regs *)); +extern void sun3_sched_init(irq_handler_t handler); extern void sun3_get_model (char* model); extern void idprom_init (void); extern int sun3_hwclk(int set, struct rtc_time *t); @@ -162,7 +162,7 @@ void __init config_sun3(void) sun3_bootmem_alloc(memory_start, memory_end); } -void __init sun3_sched_init(irqreturn_t (*timer_routine)(int, void *, struct pt_regs *)) +void __init sun3_sched_init(irq_handler_t timer_routine) { sun3_disable_interrupts(); intersil_clock->cmd_reg=(INTERSIL_RUN|INTERSIL_INT_DISABLE|INTERSIL_24H_MODE); diff --git a/arch/m68k/sun3/idprom.c b/arch/m68k/sun3/idprom.c index 02c1fee6fe74..dca6ab6a4ede 100644 --- a/arch/m68k/sun3/idprom.c +++ b/arch/m68k/sun3/idprom.c @@ -6,6 +6,7 @@ * Sun3/3x models added by David Monro (davidm@psrg.cs.usyd.edu.au) */ +#include <linux/module.h> #include <linux/kernel.h> #include <linux/types.h> #include <linux/init.h> @@ -16,6 +17,8 @@ #include <asm/machines.h> /* Fun with Sun released architectures. */ struct idprom *idprom; +EXPORT_SYMBOL(idprom); + static struct idprom idprom_buffer; /* Here is the master table of Sun machines which use some implementation diff --git a/arch/m68k/sun3/sun3_ksyms.c b/arch/m68k/sun3/sun3_ksyms.c deleted file mode 100644 index 43e5a9af8abd..000000000000 --- a/arch/m68k/sun3/sun3_ksyms.c +++ /dev/null @@ -1,13 +0,0 @@ -#include <linux/module.h> -#include <linux/types.h> -#include <asm/dvma.h> -#include <asm/idprom.h> - -/* - * Add things here when you find the need for it. - */ -EXPORT_SYMBOL(dvma_map_align); -EXPORT_SYMBOL(dvma_unmap); -EXPORT_SYMBOL(dvma_malloc_align); -EXPORT_SYMBOL(dvma_free); -EXPORT_SYMBOL(idprom); diff --git a/arch/m68k/sun3/sun3dvma.c b/arch/m68k/sun3/sun3dvma.c index a2bc2da7f8f0..8709677fa025 100644 --- a/arch/m68k/sun3/sun3dvma.c +++ b/arch/m68k/sun3/sun3dvma.c @@ -6,6 +6,7 @@ * Contains common routines for sun3/sun3x DVMA management. */ +#include <linux/module.h> #include <linux/kernel.h> #include <linux/mm.h> #include <linux/list.h> @@ -312,6 +313,7 @@ inline unsigned long dvma_map_align(unsigned long kaddr, int len, int align) BUG(); return 0; } +EXPORT_SYMBOL(dvma_map_align); void dvma_unmap(void *baddr) { @@ -327,7 +329,7 @@ void dvma_unmap(void *baddr) return; } - +EXPORT_SYMBOL(dvma_unmap); void *dvma_malloc_align(unsigned long len, unsigned long align) { @@ -367,6 +369,7 @@ void *dvma_malloc_align(unsigned long len, unsigned long align) return (void *)vaddr; } +EXPORT_SYMBOL(dvma_malloc_align); void dvma_free(void *vaddr) { @@ -374,3 +377,4 @@ void dvma_free(void *vaddr) return; } +EXPORT_SYMBOL(dvma_free); diff --git a/arch/m68k/sun3/sun3ints.c b/arch/m68k/sun3/sun3ints.c index dc4ea7e074a6..baf74e8de8b5 100644 --- a/arch/m68k/sun3/sun3ints.c +++ b/arch/m68k/sun3/sun3ints.c @@ -15,6 +15,7 @@ #include <asm/intersil.h> #include <asm/oplib.h> #include <asm/sun3ints.h> +#include <asm/irq_regs.h> #include <linux/seq_file.h> extern void sun3_leds (unsigned char); @@ -48,7 +49,7 @@ void sun3_disable_irq(unsigned int irq) *sun3_intreg &= ~(1 << irq); } -static irqreturn_t sun3_int7(int irq, void *dev_id, struct pt_regs *fp) +static irqreturn_t sun3_int7(int irq, void *dev_id) { *sun3_intreg |= (1 << irq); if (!(kstat_cpu(0).irqs[irq] % 2000)) @@ -56,7 +57,7 @@ static irqreturn_t sun3_int7(int irq, void *dev_id, struct pt_regs *fp) return IRQ_HANDLED; } -static irqreturn_t sun3_int5(int irq, void *dev_id, struct pt_regs *fp) +static irqreturn_t sun3_int5(int irq, void *dev_id) { #ifdef CONFIG_SUN3 intersil_clear(); @@ -67,14 +68,14 @@ static irqreturn_t sun3_int5(int irq, void *dev_id, struct pt_regs *fp) #endif do_timer(1); #ifndef CONFIG_SMP - update_process_times(user_mode(fp)); + update_process_times(user_mode(get_irq_regs())); #endif if (!(kstat_cpu(0).irqs[irq] % 20)) sun3_leds(led_pattern[(kstat_cpu(0).irqs[irq] % 160) / 20]); return IRQ_HANDLED; } -static irqreturn_t sun3_vec255(int irq, void *dev_id, struct pt_regs *fp) +static irqreturn_t sun3_vec255(int irq, void *dev_id) { // intersil_clear(); return IRQ_HANDLED; @@ -84,7 +85,7 @@ static void sun3_inthandle(unsigned int irq, struct pt_regs *fp) { *sun3_intreg &= ~(1 << irq); - m68k_handle_int(irq, fp); + __m68k_handle_int(irq, fp); } static struct irq_controller sun3_irq_controller = { diff --git a/arch/m68k/sun3x/time.c b/arch/m68k/sun3x/time.c index 6f4204fbecd7..f5eaafb00d21 100644 --- a/arch/m68k/sun3x/time.c +++ b/arch/m68k/sun3x/time.c @@ -90,7 +90,7 @@ static void sun3x_timer_tick(int irq, void *dev_id, struct pt_regs *regs) } #endif -void __init sun3x_sched_init(irqreturn_t (*vector)(int, void *, struct pt_regs *)) +void __init sun3x_sched_init(irq_handler_t vector) { sun3_disable_interrupts(); diff --git a/arch/m68k/sun3x/time.h b/arch/m68k/sun3x/time.h index e7e43b4ec4a1..6909e1297534 100644 --- a/arch/m68k/sun3x/time.h +++ b/arch/m68k/sun3x/time.h @@ -3,7 +3,7 @@ extern int sun3x_hwclk(int set, struct rtc_time *t); unsigned long sun3x_gettimeoffset (void); -void sun3x_sched_init(irqreturn_t (*vector)(int, void *, struct pt_regs *)); +void sun3x_sched_init(irq_handler_t vector); struct mostek_dt { volatile unsigned char csr; diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig index 8a49884bd5ec..14af6cce2fa2 100644 --- a/arch/mips/Kconfig +++ b/arch/mips/Kconfig @@ -1010,11 +1010,6 @@ endchoice config ARC32 bool -config AU1X00_USB_DEVICE - bool - depends on MIPS_PB1500 || MIPS_PB1100 || MIPS_PB1000 - default n - config BOOT_ELF32 bool @@ -1465,10 +1460,8 @@ config MIPS_MT_DISABLED the option of an MT-enabled processor this option will be the only option in this menu. -config MIPS_MT_SMTC - bool "SMTC: Use all TCs on all VPEs for SMP" - depends on CPU_MIPS32_R2 - #depends on CPU_MIPS64_R2 # once there is hardware ... +config MIPS_MT_SMP + bool "Use 1 TC on each available VPE for SMP" depends on SYS_SUPPORTS_MULTITHREADING select CPU_MIPSR2_IRQ_VI select CPU_MIPSR2_SRS @@ -1476,11 +1469,13 @@ config MIPS_MT_SMTC select SMP select SYS_SUPPORTS_SMP help - This is a kernel model which is known a SMTC or lately has been - marketesed into SMVP. + This is a kernel model which is also known a VSMP or lately + has been marketesed into SMVP. -config MIPS_MT_SMP - bool "Use 1 TC on each available VPE for SMP" +config MIPS_MT_SMTC + bool "SMTC: Use all TCs on all VPEs for SMP" + depends on CPU_MIPS32_R2 + #depends on CPU_MIPS64_R2 # once there is hardware ... depends on SYS_SUPPORTS_MULTITHREADING select CPU_MIPSR2_IRQ_VI select CPU_MIPSR2_SRS @@ -1488,8 +1483,8 @@ config MIPS_MT_SMP select SMP select SYS_SUPPORTS_SMP help - This is a kernel model which is also known a VSMP or lately - has been marketesed into SMVP. + This is a kernel model which is known a SMTC or lately has been + marketesed into SMVP. config MIPS_VPE_LOADER bool "VPE loader support." diff --git a/arch/mips/Makefile b/arch/mips/Makefile index 2124350ab94d..641aa30b3638 100644 --- a/arch/mips/Makefile +++ b/arch/mips/Makefile @@ -91,8 +91,17 @@ cflags-y += -ffreestanding # carefully avoid to add it redundantly because gcc 3.3/3.4 complains # when fed the toolchain default! # -cflags-$(CONFIG_CPU_BIG_ENDIAN) += $(shell $(CC) -dumpmachine |grep -q 'mips.*el-.*' && echo -EB -D__MIPSEB__) -cflags-$(CONFIG_CPU_LITTLE_ENDIAN) += $(shell $(CC) -dumpmachine |grep -q 'mips.*el-.*' || echo -EL -D__MIPSEL__) +# Certain gcc versions upto gcc 4.1.1 (probably 4.2-subversion as of +# 2006-10-10 don't properly change the the predefined symbols if -EB / -EL +# are used, so we kludge that here. A bug has been filed at +# http://gcc.gnu.org/bugzilla/show_bug.cgi?id=29413. +# +undef-all += -UMIPSEB -U_MIPSEB -U__MIPSEB -U__MIPSEB__ +undef-all += -UMIPSEL -U_MIPSEL -U__MIPSEL -U__MIPSEL__ +predef-be += -DMIPSEB -D_MIPSEB -D__MIPSEB -D__MIPSEB__ +predef-le += -DMIPSEL -D_MIPSEL -D__MIPSEL -D__MIPSEL__ +cflags-$(CONFIG_CPU_BIG_ENDIAN) += $(shell $(CC) -dumpmachine |grep -q 'mips.*el-.*' && echo -EB $(undef-all) $(predef-be)) +cflags-$(CONFIG_CPU_LITTLE_ENDIAN) += $(shell $(CC) -dumpmachine |grep -q 'mips.*el-.*' || echo -EL $(undef-all) $(predef-le)) cflags-$(CONFIG_SB1XXX_CORELIS) += $(call cc-option,-mno-sched-prolog) \ -fno-omit-frame-pointer diff --git a/arch/mips/au1000/common/Makefile b/arch/mips/au1000/common/Makefile index bf682f50b859..4c35525edb4f 100644 --- a/arch/mips/au1000/common/Makefile +++ b/arch/mips/au1000/common/Makefile @@ -10,6 +10,5 @@ obj-y += prom.o irq.o puts.o time.o reset.o \ au1xxx_irqmap.o clocks.o platform.o power.o setup.o \ sleeper.o cputable.o dma.o dbdma.o gpio.o -obj-$(CONFIG_AU1X00_USB_DEVICE) += usbdev.o obj-$(CONFIG_KGDB) += dbg_io.o obj-$(CONFIG_PCI) += pci.o diff --git a/arch/mips/au1000/common/dma.c b/arch/mips/au1000/common/dma.c index fb7c47c1585d..c78260d4e837 100644 --- a/arch/mips/au1000/common/dma.c +++ b/arch/mips/au1000/common/dma.c @@ -160,7 +160,7 @@ void dump_au1000_dma_channel(unsigned int dmanr) * Requests the DMA done IRQ if irqhandler != NULL. */ int request_au1000_dma(int dev_id, const char *dev_str, - irqreturn_t (*irqhandler)(int, void *, struct pt_regs *), + irq_handler_t irqhandler, unsigned long irqflags, void *irq_dev_id) { diff --git a/arch/mips/au1000/common/irq.c b/arch/mips/au1000/common/irq.c index 316722ee8cf5..2abe132bb07d 100644 --- a/arch/mips/au1000/common/irq.c +++ b/arch/mips/au1000/common/irq.c @@ -67,7 +67,7 @@ extern void set_debug_traps(void); extern irq_cpustat_t irq_stat [NR_CPUS]; -extern void mips_timer_interrupt(struct pt_regs *regs); +extern void mips_timer_interrupt(void); static void setup_local_irq(unsigned int irq, int type, int int_req); static unsigned int startup_irq(unsigned int irq); @@ -81,10 +81,6 @@ inline void local_disable_irq(unsigned int irq_nr); void (*board_init_irq)(void); -#ifdef CONFIG_PM -extern irqreturn_t counter0_irq(int irq, void *dev_id, struct pt_regs *regs); -#endif - static DEFINE_SPINLOCK(irq_lock); @@ -292,7 +288,7 @@ static struct irq_chip level_irq_type = { }; #ifdef CONFIG_PM -void startup_match20_interrupt(irqreturn_t (*handler)(int, void *, struct pt_regs *)) +void startup_match20_interrupt(irq_handler_t handler) { struct irq_desc *desc = &irq_desc[AU1000_TOY_MATCH2_INT]; @@ -501,14 +497,15 @@ void __init arch_init_irq(void) * intcX_reqX_irqdispatch(). */ -void intc0_req0_irqdispatch(struct pt_regs *regs) +static void intc0_req0_irqdispatch(void) { int irq = 0; static unsigned long intc0_req0 = 0; intc0_req0 |= au_readl(IC0_REQ0INT); - if (!intc0_req0) return; + if (!intc0_req0) + return; #ifdef AU1000_USB_DEV_REQ_INT /* * Because of the tight timing of SETUP token to reply @@ -517,28 +514,29 @@ void intc0_req0_irqdispatch(struct pt_regs *regs) */ if ((intc0_req0 & (1<<AU1000_USB_DEV_REQ_INT))) { intc0_req0 &= ~(1<<AU1000_USB_DEV_REQ_INT); - do_IRQ(AU1000_USB_DEV_REQ_INT, regs); + do_IRQ(AU1000_USB_DEV_REQ_INT); return; } #endif irq = au_ffs(intc0_req0) - 1; intc0_req0 &= ~(1<<irq); - do_IRQ(irq, regs); + do_IRQ(irq); } -void intc0_req1_irqdispatch(struct pt_regs *regs) +static void intc0_req1_irqdispatch(void) { int irq = 0; static unsigned long intc0_req1 = 0; intc0_req1 |= au_readl(IC0_REQ1INT); - if (!intc0_req1) return; + if (!intc0_req1) + return; irq = au_ffs(intc0_req1) - 1; intc0_req1 &= ~(1<<irq); - do_IRQ(irq, regs); + do_IRQ(irq); } @@ -546,35 +544,37 @@ void intc0_req1_irqdispatch(struct pt_regs *regs) * Interrupt Controller 1: * interrupts 32 - 63 */ -void intc1_req0_irqdispatch(struct pt_regs *regs) +static void intc1_req0_irqdispatch(void) { int irq = 0; static unsigned long intc1_req0 = 0; intc1_req0 |= au_readl(IC1_REQ0INT); - if (!intc1_req0) return; + if (!intc1_req0) + return; irq = au_ffs(intc1_req0) - 1; intc1_req0 &= ~(1<<irq); irq += 32; - do_IRQ(irq, regs); + do_IRQ(irq); } -void intc1_req1_irqdispatch(struct pt_regs *regs) +static void intc1_req1_irqdispatch(void) { int irq = 0; static unsigned long intc1_req1 = 0; intc1_req1 |= au_readl(IC1_REQ1INT); - if (!intc1_req1) return; + if (!intc1_req1) + return; irq = au_ffs(intc1_req1) - 1; intc1_req1 &= ~(1<<irq); irq += 32; - do_IRQ(irq, regs); + do_IRQ(irq); } #ifdef CONFIG_PM @@ -660,20 +660,20 @@ restore_au1xxx_intctl(void) } #endif /* CONFIG_PM */ -asmlinkage void plat_irq_dispatch(struct pt_regs *regs) +asmlinkage void plat_irq_dispatch(void) { unsigned int pending = read_c0_status() & read_c0_cause() & ST0_IM; if (pending & CAUSEF_IP7) - mips_timer_interrupt(regs); + mips_timer_interrupt(); else if (pending & CAUSEF_IP2) - intc0_req0_irqdispatch(regs); + intc0_req0_irqdispatch(); else if (pending & CAUSEF_IP3) - intc0_req1_irqdispatch(regs); + intc0_req1_irqdispatch(); else if (pending & CAUSEF_IP4) - intc1_req0_irqdispatch(regs); + intc1_req0_irqdispatch(); else if (pending & CAUSEF_IP5) - intc1_req1_irqdispatch(regs); + intc1_req1_irqdispatch(); else - spurious_interrupt(regs); + spurious_interrupt(); } diff --git a/arch/mips/au1000/common/time.c b/arch/mips/au1000/common/time.c index 0a067f3113a5..94f09194d63d 100644 --- a/arch/mips/au1000/common/time.c +++ b/arch/mips/au1000/common/time.c @@ -41,7 +41,6 @@ #include <asm/compiler.h> #include <asm/mipsregs.h> -#include <asm/ptrace.h> #include <asm/time.h> #include <asm/div64.h> #include <asm/mach-au1x00/au1000.h> @@ -62,7 +61,7 @@ static unsigned int timerhi = 0, timerlo = 0; #error "unsupported HZ value! Must be in [100,1000]" #endif #define MATCH20_INC (328*100/HZ) /* magic number 328 is for HZ=100... */ -extern void startup_match20_interrupt(irqreturn_t (*handler)(int, void *, struct pt_regs *)); +extern void startup_match20_interrupt(irq_handler_t handler); static unsigned long last_pc0, last_match20; #endif @@ -79,7 +78,8 @@ static inline void ack_r4ktimer(unsigned long newval) * is provably more robust. */ unsigned long wtimer; -void mips_timer_interrupt(struct pt_regs *regs) + +void mips_timer_interrupt(void) { int irq = 63; unsigned long count; @@ -98,7 +98,7 @@ void mips_timer_interrupt(struct pt_regs *regs) kstat_this_cpu.irqs[irq]++; do_timer(1); #ifndef CONFIG_SMP - update_process_times(user_mode(regs)); + update_process_times(user_mode(get_irq_regs())); #endif r4k_cur += r4k_offset; ack_r4ktimer(r4k_cur); @@ -115,7 +115,7 @@ null: } #ifdef CONFIG_PM -irqreturn_t counter0_irq(int irq, void *dev_id, struct pt_regs *regs) +irqreturn_t counter0_irq(int irq, void *dev_id) { unsigned long pc0; int time_elapsed; @@ -139,7 +139,7 @@ irqreturn_t counter0_irq(int irq, void *dev_id, struct pt_regs *regs) while (time_elapsed > 0) { do_timer(1); #ifndef CONFIG_SMP - update_process_times(user_mode(regs)); + update_process_times(user_mode(get_irq_regs())); #endif time_elapsed -= MATCH20_INC; last_match20 += MATCH20_INC; @@ -158,7 +158,7 @@ irqreturn_t counter0_irq(int irq, void *dev_id, struct pt_regs *regs) jiffie_drift -= 999; do_timer(1); /* increment jiffies by one */ #ifndef CONFIG_SMP - update_process_times(user_mode(regs)); + update_process_times(user_mode(get_irq_regs())); #endif } diff --git a/arch/mips/au1000/common/usbdev.c b/arch/mips/au1000/common/usbdev.c deleted file mode 100644 index 63bcb3a95dc7..000000000000 --- a/arch/mips/au1000/common/usbdev.c +++ /dev/null @@ -1,1555 +0,0 @@ -/* - * BRIEF MODULE DESCRIPTION - * Au1000 USB Device-Side (device layer) - * - * Copyright 2001-2002 MontaVista Software Inc. - * Author: MontaVista Software, Inc. - * stevel@mvista.com or source@mvista.com - * - * This program is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License as published by the - * Free Software Foundation; either version 2 of the License, or (at your - * option) any later version. - * - * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED - * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF - * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN - * NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, - * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT - * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF - * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON - * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF - * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * You should have received a copy of the GNU General Public License along - * with this program; if not, write to the Free Software Foundation, Inc., - * 675 Mass Ave, Cambridge, MA 02139, USA. - */ -#include <linux/kernel.h> -#include <linux/ioport.h> -#include <linux/sched.h> -#include <linux/signal.h> -#include <linux/errno.h> -#include <linux/poll.h> -#include <linux/init.h> -#include <linux/slab.h> -#include <linux/fcntl.h> -#include <linux/module.h> -#include <linux/spinlock.h> -#include <linux/list.h> -#include <linux/smp_lock.h> -#define DEBUG -#include <linux/usb.h> - -#include <asm/io.h> -#include <asm/uaccess.h> -#include <asm/irq.h> -#include <asm/mipsregs.h> -#include <asm/au1000.h> -#include <asm/au1000_dma.h> -#include <asm/au1000_usbdev.h> - -#ifdef DEBUG -#undef VDEBUG -#ifdef VDEBUG -#define vdbg(fmt, arg...) printk(KERN_DEBUG __FILE__ ": " fmt "\n" , ## arg) -#else -#define vdbg(fmt, arg...) do {} while (0) -#endif -#else -#define vdbg(fmt, arg...) do {} while (0) -#endif - -#define ALLOC_FLAGS (in_interrupt () ? GFP_ATOMIC : GFP_KERNEL) - -#define EP_FIFO_DEPTH 8 - -typedef enum { - SETUP_STAGE = 0, - DATA_STAGE, - STATUS_STAGE -} ep0_stage_t; - -typedef struct { - int read_fifo; - int write_fifo; - int ctrl_stat; - int read_fifo_status; - int write_fifo_status; -} endpoint_reg_t; - -typedef struct { - usbdev_pkt_t *head; - usbdev_pkt_t *tail; - int count; -} pkt_list_t; - -typedef struct { - int active; - struct usb_endpoint_descriptor *desc; - endpoint_reg_t *reg; - /* Only one of these are used, unless this is the control ep */ - pkt_list_t inlist; - pkt_list_t outlist; - unsigned int indma, outdma; /* DMA channel numbers for IN, OUT */ - /* following are extracted from endpoint descriptor for easy access */ - int max_pkt_size; - int type; - int direction; - /* WE assign endpoint addresses! */ - int address; - spinlock_t lock; -} endpoint_t; - - -static struct usb_dev { - endpoint_t ep[6]; - ep0_stage_t ep0_stage; - - struct usb_device_descriptor * dev_desc; - struct usb_interface_descriptor* if_desc; - struct usb_config_descriptor * conf_desc; - u8 * full_conf_desc; - struct usb_string_descriptor * str_desc[6]; - - /* callback to function layer */ - void (*func_cb)(usbdev_cb_type_t type, unsigned long arg, - void *cb_data); - void* cb_data; - - usbdev_state_t state; // device state - int suspended; // suspended flag - int address; // device address - int interface; - int num_ep; - u8 alternate_setting; - u8 configuration; // configuration value - int remote_wakeup_en; -} usbdev; - - -static endpoint_reg_t ep_reg[] = { - // FIFO's 0 and 1 are EP0 default control - {USBD_EP0RD, USBD_EP0WR, USBD_EP0CS, USBD_EP0RDSTAT, USBD_EP0WRSTAT }, - {0}, - // FIFO 2 is EP2, IN - { -1, USBD_EP2WR, USBD_EP2CS, -1, USBD_EP2WRSTAT }, - // FIFO 3 is EP3, IN - { -1, USBD_EP3WR, USBD_EP3CS, -1, USBD_EP3WRSTAT }, - // FIFO 4 is EP4, OUT - {USBD_EP4RD, -1, USBD_EP4CS, USBD_EP4RDSTAT, -1 }, - // FIFO 5 is EP5, OUT - {USBD_EP5RD, -1, USBD_EP5CS, USBD_EP5RDSTAT, -1 } -}; - -static struct { - unsigned int id; - const char *str; -} ep_dma_id[] = { - { DMA_ID_USBDEV_EP0_TX, "USBDev EP0 IN" }, - { DMA_ID_USBDEV_EP0_RX, "USBDev EP0 OUT" }, - { DMA_ID_USBDEV_EP2_TX, "USBDev EP2 IN" }, - { DMA_ID_USBDEV_EP3_TX, "USBDev EP3 IN" }, - { DMA_ID_USBDEV_EP4_RX, "USBDev EP4 OUT" }, - { DMA_ID_USBDEV_EP5_RX, "USBDev EP5 OUT" } -}; - -#define DIR_OUT 0 -#define DIR_IN (1<<3) - -#define CONTROL_EP USB_ENDPOINT_XFER_CONTROL -#define BULK_EP USB_ENDPOINT_XFER_BULK - -static inline endpoint_t * -epaddr_to_ep(struct usb_dev* dev, int ep_addr) -{ - if (ep_addr >= 0 && ep_addr < 2) - return &dev->ep[0]; - if (ep_addr < 6) - return &dev->ep[ep_addr]; - return NULL; -} - -static const char* std_req_name[] = { - "GET_STATUS", - "CLEAR_FEATURE", - "RESERVED", - "SET_FEATURE", - "RESERVED", - "SET_ADDRESS", - "GET_DESCRIPTOR", - "SET_DESCRIPTOR", - "GET_CONFIGURATION", - "SET_CONFIGURATION", - "GET_INTERFACE", - "SET_INTERFACE", - "SYNCH_FRAME" -}; - -static inline const char* -get_std_req_name(int req) -{ - return (req >= 0 && req <= 12) ? std_req_name[req] : "UNKNOWN"; -} - -#if 0 -static void -dump_setup(struct usb_ctrlrequest* s) -{ - dbg("%s: requesttype=%d", __FUNCTION__, s->requesttype); - dbg("%s: request=%d %s", __FUNCTION__, s->request, - get_std_req_name(s->request)); - dbg("%s: value=0x%04x", __FUNCTION__, s->wValue); - dbg("%s: index=%d", __FUNCTION__, s->index); - dbg("%s: length=%d", __FUNCTION__, s->length); -} -#endif - -static inline usbdev_pkt_t * -alloc_packet(endpoint_t * ep, int data_size, void* data) -{ - usbdev_pkt_t* pkt = kmalloc(sizeof(usbdev_pkt_t) + data_size, - ALLOC_FLAGS); - if (!pkt) - return NULL; - pkt->ep_addr = ep->address; - pkt->size = data_size; - pkt->status = 0; - pkt->next = NULL; - if (data) - memcpy(pkt->payload, data, data_size); - - return pkt; -} - - -/* - * Link a packet to the tail of the enpoint's packet list. - * EP spinlock must be held when calling. - */ -static void -link_tail(endpoint_t * ep, pkt_list_t * list, usbdev_pkt_t * pkt) -{ - if (!list->tail) { - list->head = list->tail = pkt; - list->count = 1; - } else { - list->tail->next = pkt; - list->tail = pkt; - list->count++; - } -} - -/* - * Unlink and return a packet from the head of the given packet - * list. It is the responsibility of the caller to free the packet. - * EP spinlock must be held when calling. - */ -static usbdev_pkt_t * -unlink_head(pkt_list_t * list) -{ - usbdev_pkt_t *pkt; - - pkt = list->head; - if (!pkt || !list->count) { - return NULL; - } - - list->head = pkt->next; - if (!list->head) { - list->head = list->tail = NULL; - list->count = 0; - } else - list->count--; - - return pkt; -} - -/* - * Create and attach a new packet to the tail of the enpoint's - * packet list. EP spinlock must be held when calling. - */ -static usbdev_pkt_t * -add_packet(endpoint_t * ep, pkt_list_t * list, int size) -{ - usbdev_pkt_t *pkt = alloc_packet(ep, size, NULL); - if (!pkt) - return NULL; - - link_tail(ep, list, pkt); - return pkt; -} - - -/* - * Unlink and free a packet from the head of the enpoint's - * packet list. EP spinlock must be held when calling. - */ -static inline void -free_packet(pkt_list_t * list) -{ - kfree(unlink_head(list)); -} - -/* EP spinlock must be held when calling. */ -static inline void -flush_pkt_list(pkt_list_t * list) -{ - while (list->count) - free_packet(list); -} - -/* EP spinlock must be held when calling */ -static inline void -flush_write_fifo(endpoint_t * ep) -{ - if (ep->reg->write_fifo_status >= 0) { - au_writel(USBDEV_FSTAT_FLUSH | USBDEV_FSTAT_UF | - USBDEV_FSTAT_OF, - ep->reg->write_fifo_status); - //udelay(100); - //au_writel(USBDEV_FSTAT_UF | USBDEV_FSTAT_OF, - // ep->reg->write_fifo_status); - } -} - -/* EP spinlock must be held when calling */ -static inline void -flush_read_fifo(endpoint_t * ep) -{ - if (ep->reg->read_fifo_status >= 0) { - au_writel(USBDEV_FSTAT_FLUSH | USBDEV_FSTAT_UF | - USBDEV_FSTAT_OF, - ep->reg->read_fifo_status); - //udelay(100); - //au_writel(USBDEV_FSTAT_UF | USBDEV_FSTAT_OF, - // ep->reg->read_fifo_status); - } -} - - -/* EP spinlock must be held when calling. */ -static void -endpoint_flush(endpoint_t * ep) -{ - // First, flush all packets - flush_pkt_list(&ep->inlist); - flush_pkt_list(&ep->outlist); - - // Now flush the endpoint's h/w FIFO(s) - flush_write_fifo(ep); - flush_read_fifo(ep); -} - -/* EP spinlock must be held when calling. */ -static void -endpoint_stall(endpoint_t * ep) -{ - u32 cs; - - warn("%s", __FUNCTION__); - - cs = au_readl(ep->reg->ctrl_stat) | USBDEV_CS_STALL; - au_writel(cs, ep->reg->ctrl_stat); -} - -/* EP spinlock must be held when calling. */ -static void -endpoint_unstall(endpoint_t * ep) -{ - u32 cs; - - warn("%s", __FUNCTION__); - - cs = au_readl(ep->reg->ctrl_stat) & ~USBDEV_CS_STALL; - au_writel(cs, ep->reg->ctrl_stat); -} - -static void -endpoint_reset_datatoggle(endpoint_t * ep) -{ - // FIXME: is this possible? -} - - -/* EP spinlock must be held when calling. */ -static int -endpoint_fifo_read(endpoint_t * ep) -{ - int read_count = 0; - u8 *bufptr; - usbdev_pkt_t *pkt = ep->outlist.tail; - - if (!pkt) - return -EINVAL; - - bufptr = &pkt->payload[pkt->size]; - while (au_readl(ep->reg->read_fifo_status) & USBDEV_FSTAT_FCNT_MASK) { - *bufptr++ = au_readl(ep->reg->read_fifo) & 0xff; - read_count++; - pkt->size++; - } - - return read_count; -} - -#if 0 -/* EP spinlock must be held when calling. */ -static int -endpoint_fifo_write(endpoint_t * ep, int index) -{ - int write_count = 0; - u8 *bufptr; - usbdev_pkt_t *pkt = ep->inlist.head; - - if (!pkt) - return -EINVAL; - - bufptr = &pkt->payload[index]; - while ((au_readl(ep->reg->write_fifo_status) & - USBDEV_FSTAT_FCNT_MASK) < EP_FIFO_DEPTH) { - if (bufptr < pkt->payload + pkt->size) { - au_writel(*bufptr++, ep->reg->write_fifo); - write_count++; - } else { - break; - } - } - - return write_count; -} -#endif - -/* - * This routine is called to restart transmission of a packet. - * The endpoint's TSIZE must be set to the new packet's size, - * and DMA to the write FIFO needs to be restarted. - * EP spinlock must be held when calling. - */ -static void -kickstart_send_packet(endpoint_t * ep) -{ - u32 cs; - usbdev_pkt_t *pkt = ep->inlist.head; - - vdbg("%s: ep%d, pkt=%p", __FUNCTION__, ep->address, pkt); - - if (!pkt) { - err("%s: head=NULL! list->count=%d", __FUNCTION__, - ep->inlist.count); - return; - } - - dma_cache_wback_inv((unsigned long)pkt->payload, pkt->size); - - /* - * make sure FIFO is empty - */ - flush_write_fifo(ep); - - cs = au_readl(ep->reg->ctrl_stat) & USBDEV_CS_STALL; - cs |= (pkt->size << USBDEV_CS_TSIZE_BIT); - au_writel(cs, ep->reg->ctrl_stat); - - if (get_dma_active_buffer(ep->indma) == 1) { - set_dma_count1(ep->indma, pkt->size); - set_dma_addr1(ep->indma, virt_to_phys(pkt->payload)); - enable_dma_buffer1(ep->indma); // reenable - } else { - set_dma_count0(ep->indma, pkt->size); - set_dma_addr0(ep->indma, virt_to_phys(pkt->payload)); - enable_dma_buffer0(ep->indma); // reenable - } - if (dma_halted(ep->indma)) - start_dma(ep->indma); -} - - -/* - * This routine is called when a packet in the inlist has been - * completed. Frees the completed packet and starts sending the - * next. EP spinlock must be held when calling. - */ -static usbdev_pkt_t * -send_packet_complete(endpoint_t * ep) -{ - usbdev_pkt_t *pkt = unlink_head(&ep->inlist); - - if (pkt) { - pkt->status = - (au_readl(ep->reg->ctrl_stat) & USBDEV_CS_NAK) ? - PKT_STATUS_NAK : PKT_STATUS_ACK; - - vdbg("%s: ep%d, %s pkt=%p, list count=%d", __FUNCTION__, - ep->address, (pkt->status & PKT_STATUS_NAK) ? - "NAK" : "ACK", pkt, ep->inlist.count); - } - - /* - * The write fifo should already be drained if things are - * working right, but flush it anyway just in case. - */ - flush_write_fifo(ep); - - // begin transmitting next packet in the inlist - if (ep->inlist.count) { - kickstart_send_packet(ep); - } - - return pkt; -} - -/* - * Add a new packet to the tail of the given ep's packet - * inlist. The transmit complete interrupt frees packets from - * the head of this list. EP spinlock must be held when calling. - */ -static int -send_packet(struct usb_dev* dev, usbdev_pkt_t *pkt, int async) -{ - pkt_list_t *list; - endpoint_t* ep; - - if (!pkt || !(ep = epaddr_to_ep(dev, pkt->ep_addr))) - return -EINVAL; - - if (!pkt->size) - return 0; - - list = &ep->inlist; - - if (!async && list->count) { - halt_dma(ep->indma); - flush_pkt_list(list); - } - - link_tail(ep, list, pkt); - - vdbg("%s: ep%d, pkt=%p, size=%d, list count=%d", __FUNCTION__, - ep->address, pkt, pkt->size, list->count); - - if (list->count == 1) { - /* - * if the packet count is one, it means the list was empty, - * and no more data will go out this ep until we kick-start - * it again. - */ - kickstart_send_packet(ep); - } - - return pkt->size; -} - -/* - * This routine is called to restart reception of a packet. - * EP spinlock must be held when calling. - */ -static void -kickstart_receive_packet(endpoint_t * ep) -{ - usbdev_pkt_t *pkt; - - // get and link a new packet for next reception - if (!(pkt = add_packet(ep, &ep->outlist, ep->max_pkt_size))) { - err("%s: could not alloc new packet", __FUNCTION__); - return; - } - - if (get_dma_active_buffer(ep->outdma) == 1) { - clear_dma_done1(ep->outdma); - set_dma_count1(ep->outdma, ep->max_pkt_size); - set_dma_count0(ep->outdma, 0); - set_dma_addr1(ep->outdma, virt_to_phys(pkt->payload)); - enable_dma_buffer1(ep->outdma); // reenable - } else { - clear_dma_done0(ep->outdma); - set_dma_count0(ep->outdma, ep->max_pkt_size); - set_dma_count1(ep->outdma, 0); - set_dma_addr0(ep->outdma, virt_to_phys(pkt->payload)); - enable_dma_buffer0(ep->outdma); // reenable - } - if (dma_halted(ep->outdma)) - start_dma(ep->outdma); -} - - -/* - * This routine is called when a packet in the outlist has been - * completed (received) and we need to prepare for a new packet - * to be received. Halts DMA and computes the packet size from the - * remaining DMA counter. Then prepares a new packet for reception - * and restarts DMA. FIXME: what if another packet comes in - * on top of the completed packet? Counter would be wrong. - * EP spinlock must be held when calling. - */ -static usbdev_pkt_t * -receive_packet_complete(endpoint_t * ep) -{ - usbdev_pkt_t *pkt = ep->outlist.tail; - u32 cs; - - halt_dma(ep->outdma); - - cs = au_readl(ep->reg->ctrl_stat); - - if (!pkt) - return NULL; - - pkt->size = ep->max_pkt_size - get_dma_residue(ep->outdma); - if (pkt->size) - dma_cache_inv((unsigned long)pkt->payload, pkt->size); - /* - * need to pull out any remaining bytes in the FIFO. - */ - endpoint_fifo_read(ep); - /* - * should be drained now, but flush anyway just in case. - */ - flush_read_fifo(ep); - - pkt->status = (cs & USBDEV_CS_NAK) ? PKT_STATUS_NAK : PKT_STATUS_ACK; - if (ep->address == 0 && (cs & USBDEV_CS_SU)) - pkt->status |= PKT_STATUS_SU; - - vdbg("%s: ep%d, %s pkt=%p, size=%d", __FUNCTION__, - ep->address, (pkt->status & PKT_STATUS_NAK) ? - "NAK" : "ACK", pkt, pkt->size); - - kickstart_receive_packet(ep); - - return pkt; -} - - -/* - **************************************************************************** - * Here starts the standard device request handlers. They are - * all called by do_setup() via a table of function pointers. - **************************************************************************** - */ - -static ep0_stage_t -do_get_status(struct usb_dev* dev, struct usb_ctrlrequest* setup) -{ - switch (setup->bRequestType) { - case 0x80: // Device - // FIXME: send device status - break; - case 0x81: // Interface - // FIXME: send interface status - break; - case 0x82: // End Point - // FIXME: send endpoint status - break; - default: - // Invalid Command - endpoint_stall(&dev->ep[0]); // Stall End Point 0 - break; - } - - return STATUS_STAGE; -} - -static ep0_stage_t -do_clear_feature(struct usb_dev* dev, struct usb_ctrlrequest* setup) -{ - switch (setup->bRequestType) { - case 0x00: // Device - if ((le16_to_cpu(setup->wValue) & 0xff) == 1) - dev->remote_wakeup_en = 0; - else - endpoint_stall(&dev->ep[0]); - break; - case 0x02: // End Point - if ((le16_to_cpu(setup->wValue) & 0xff) == 0) { - endpoint_t *ep = - epaddr_to_ep(dev, - le16_to_cpu(setup->wIndex) & 0xff); - - endpoint_unstall(ep); - endpoint_reset_datatoggle(ep); - } else - endpoint_stall(&dev->ep[0]); - break; - } - - return SETUP_STAGE; -} - -static ep0_stage_t -do_reserved(struct usb_dev* dev, struct usb_ctrlrequest* setup) -{ - // Invalid request, stall End Point 0 - endpoint_stall(&dev->ep[0]); - return SETUP_STAGE; -} - -static ep0_stage_t -do_set_feature(struct usb_dev* dev, struct usb_ctrlrequest* setup) -{ - switch (setup->bRequestType) { - case 0x00: // Device - if ((le16_to_cpu(setup->wValue) & 0xff) == 1) - dev->remote_wakeup_en = 1; - else - endpoint_stall(&dev->ep[0]); - break; - case 0x02: // End Point - if ((le16_to_cpu(setup->wValue) & 0xff) == 0) { - endpoint_t *ep = - epaddr_to_ep(dev, - le16_to_cpu(setup->wIndex) & 0xff); - - endpoint_stall(ep); - } else - endpoint_stall(&dev->ep[0]); - break; - } - - return SETUP_STAGE; -} - -static ep0_stage_t -do_set_address(struct usb_dev* dev, struct usb_ctrlrequest* setup) -{ - int new_state = dev->state; - int new_addr = le16_to_cpu(setup->wValue); - - dbg("%s: our address=%d", __FUNCTION__, new_addr); - - if (new_addr > 127) { - // usb spec doesn't tell us what to do, so just go to - // default state - new_state = DEFAULT; - dev->address = 0; - } else if (dev->address != new_addr) { - dev->address = new_addr; - new_state = ADDRESS; - } - - if (dev->state != new_state) { - dev->state = new_state; - /* inform function layer of usbdev state change */ - dev->func_cb(CB_NEW_STATE, dev->state, dev->cb_data); - } - - return SETUP_STAGE; -} - -static ep0_stage_t -do_get_descriptor(struct usb_dev* dev, struct usb_ctrlrequest* setup) -{ - int strnum, desc_len = le16_to_cpu(setup->wLength); - - switch (le16_to_cpu(setup->wValue) >> 8) { - case USB_DT_DEVICE: - // send device descriptor! - desc_len = desc_len > dev->dev_desc->bLength ? - dev->dev_desc->bLength : desc_len; - dbg("sending device desc, size=%d", desc_len); - send_packet(dev, alloc_packet(&dev->ep[0], desc_len, - dev->dev_desc), 0); - break; - case USB_DT_CONFIG: - // If the config descr index in low-byte of - // setup->wValue is valid, send config descr, - // otherwise stall ep0. - if ((le16_to_cpu(setup->wValue) & 0xff) == 0) { - // send config descriptor! - if (desc_len <= USB_DT_CONFIG_SIZE) { - dbg("sending partial config desc, size=%d", - desc_len); - send_packet(dev, - alloc_packet(&dev->ep[0], - desc_len, - dev->conf_desc), - 0); - } else { - int len = le16_to_cpu(dev->conf_desc->wTotalLength); - dbg("sending whole config desc," - " size=%d, our size=%d", desc_len, len); - desc_len = desc_len > len ? len : desc_len; - send_packet(dev, - alloc_packet(&dev->ep[0], - desc_len, - dev->full_conf_desc), - 0); - } - } else - endpoint_stall(&dev->ep[0]); - break; - case USB_DT_STRING: - // If the string descr index in low-byte of setup->wValue - // is valid, send string descr, otherwise stall ep0. - strnum = le16_to_cpu(setup->wValue) & 0xff; - if (strnum >= 0 && strnum < 6) { - struct usb_string_descriptor *desc = - dev->str_desc[strnum]; - desc_len = desc_len > desc->bLength ? - desc->bLength : desc_len; - dbg("sending string desc %d", strnum); - send_packet(dev, - alloc_packet(&dev->ep[0], desc_len, - desc), 0); - } else - endpoint_stall(&dev->ep[0]); - break; - default: - // Invalid request - err("invalid get desc=%d, stalled", - le16_to_cpu(setup->wValue) >> 8); - endpoint_stall(&dev->ep[0]); // Stall endpoint 0 - break; - } - - return STATUS_STAGE; -} - -static ep0_stage_t -do_set_descriptor(struct usb_dev* dev, struct usb_ctrlrequest* setup) -{ - // TODO: implement - // there will be an OUT data stage (the descriptor to set) - return DATA_STAGE; -} - -static ep0_stage_t -do_get_configuration(struct usb_dev* dev, struct usb_ctrlrequest* setup) -{ - // send dev->configuration - dbg("sending config"); - send_packet(dev, alloc_packet(&dev->ep[0], 1, &dev->configuration), - 0); - return STATUS_STAGE; -} - -static ep0_stage_t -do_set_configuration(struct usb_dev* dev, struct usb_ctrlrequest* setup) -{ - // set active config to low-byte of setup->wValue - dev->configuration = le16_to_cpu(setup->wValue) & 0xff; - dbg("set config, config=%d", dev->configuration); - if (!dev->configuration && dev->state > DEFAULT) { - dev->state = ADDRESS; - /* inform function layer of usbdev state change */ - dev->func_cb(CB_NEW_STATE, dev->state, dev->cb_data); - } else if (dev->configuration == 1) { - dev->state = CONFIGURED; - /* inform function layer of usbdev state change */ - dev->func_cb(CB_NEW_STATE, dev->state, dev->cb_data); - } else { - // FIXME: "respond with request error" - how? - } - - return SETUP_STAGE; -} - -static ep0_stage_t -do_get_interface(struct usb_dev* dev, struct usb_ctrlrequest* setup) -{ - // interface must be zero. - if ((le16_to_cpu(setup->wIndex) & 0xff) || dev->state == ADDRESS) { - // FIXME: respond with "request error". how? - } else if (dev->state == CONFIGURED) { - // send dev->alternate_setting - dbg("sending alt setting"); - send_packet(dev, alloc_packet(&dev->ep[0], 1, - &dev->alternate_setting), 0); - } - - return STATUS_STAGE; - -} - -static ep0_stage_t -do_set_interface(struct usb_dev* dev, struct usb_ctrlrequest* setup) -{ - if (dev->state == ADDRESS) { - // FIXME: respond with "request error". how? - } else if (dev->state == CONFIGURED) { - dev->interface = le16_to_cpu(setup->wIndex) & 0xff; - dev->alternate_setting = - le16_to_cpu(setup->wValue) & 0xff; - // interface and alternate_setting must be zero - if (dev->interface || dev->alternate_setting) { - // FIXME: respond with "request error". how? - } - } - - return SETUP_STAGE; -} - -static ep0_stage_t -do_synch_frame(struct usb_dev* dev, struct usb_ctrlrequest* setup) -{ - // TODO - return SETUP_STAGE; -} - -typedef ep0_stage_t (*req_method_t)(struct usb_dev* dev, - struct usb_ctrlrequest* setup); - - -/* Table of the standard device request handlers */ -static const req_method_t req_method[] = { - do_get_status, - do_clear_feature, - do_reserved, - do_set_feature, - do_reserved, - do_set_address, - do_get_descriptor, - do_set_descriptor, - do_get_configuration, - do_set_configuration, - do_get_interface, - do_set_interface, - do_synch_frame -}; - - -// SETUP packet request dispatcher -static void -do_setup (struct usb_dev* dev, struct usb_ctrlrequest* setup) -{ - req_method_t m; - - dbg("%s: req %d %s", __FUNCTION__, setup->bRequestType, - get_std_req_name(setup->bRequestType)); - - if ((setup->bRequestType & USB_TYPE_MASK) != USB_TYPE_STANDARD || - (setup->bRequestType & USB_RECIP_MASK) != USB_RECIP_DEVICE) { - err("%s: invalid requesttype 0x%02x", __FUNCTION__, - setup->bRequestType); - return; - } - - if ((setup->bRequestType & 0x80) == USB_DIR_OUT && setup->wLength) - dbg("%s: OUT phase! length=%d", __FUNCTION__, setup->wLength); - - if (setup->bRequestType < sizeof(req_method)/sizeof(req_method_t)) - m = req_method[setup->bRequestType]; - else - m = do_reserved; - - dev->ep0_stage = (*m)(dev, setup); -} - -/* - * A SETUP, DATA0, or DATA1 packet has been received - * on the default control endpoint's fifo. - */ -static void -process_ep0_receive (struct usb_dev* dev) -{ - endpoint_t *ep0 = &dev->ep[0]; - usbdev_pkt_t *pkt; - - spin_lock(&ep0->lock); - - // complete packet and prepare a new packet - pkt = receive_packet_complete(ep0); - if (!pkt) { - // FIXME: should put a warn/err here. - spin_unlock(&ep0->lock); - return; - } - - // unlink immediately from endpoint. - unlink_head(&ep0->outlist); - - // override current stage if h/w says it's a setup packet - if (pkt->status & PKT_STATUS_SU) - dev->ep0_stage = SETUP_STAGE; - - switch (dev->ep0_stage) { - case SETUP_STAGE: - vdbg("SU bit is %s in setup stage", - (pkt->status & PKT_STATUS_SU) ? "set" : "not set"); - - if (pkt->size == sizeof(struct usb_ctrlrequest)) { -#ifdef VDEBUG - if (pkt->status & PKT_STATUS_ACK) - vdbg("received SETUP"); - else - vdbg("received NAK SETUP"); -#endif - do_setup(dev, (struct usb_ctrlrequest*)pkt->payload); - } else - err("%s: wrong size SETUP received", __FUNCTION__); - break; - case DATA_STAGE: - /* - * this setup has an OUT data stage. Of the standard - * device requests, only set_descriptor has this stage, - * so this packet is that descriptor. TODO: drop it for - * now, set_descriptor not implemented. - * - * Need to place a byte in the write FIFO here, to prepare - * to send a zero-length DATA ack packet to the host in the - * STATUS stage. - */ - au_writel(0, ep0->reg->write_fifo); - dbg("received OUT stage DATAx on EP0, size=%d", pkt->size); - dev->ep0_stage = SETUP_STAGE; - break; - case STATUS_STAGE: - // this setup had an IN data stage, and host is ACK'ing - // the packet we sent during that stage. - if (pkt->size != 0) - warn("received non-zero ACK on EP0??"); -#ifdef VDEBUG - else - vdbg("received ACK on EP0"); -#endif - dev->ep0_stage = SETUP_STAGE; - break; - } - - spin_unlock(&ep0->lock); - // we're done processing the packet, free it - kfree(pkt); -} - - -/* - * A DATA0/1 packet has been received on one of the OUT endpoints (4 or 5) - */ -static void -process_ep_receive (struct usb_dev* dev, endpoint_t *ep) -{ - usbdev_pkt_t *pkt; - - spin_lock(&ep->lock); - pkt = receive_packet_complete(ep); - spin_unlock(&ep->lock); - - dev->func_cb(CB_PKT_COMPLETE, (unsigned long)pkt, dev->cb_data); -} - - - -/* This ISR handles the receive complete and suspend events */ -static void -req_sus_intr (int irq, void *dev_id, struct pt_regs *regs) -{ - struct usb_dev *dev = (struct usb_dev *) dev_id; - u32 status; - - status = au_readl(USBD_INTSTAT); - au_writel(status, USBD_INTSTAT); // ack'em - - if (status & (1<<0)) - process_ep0_receive(dev); - if (status & (1<<4)) - process_ep_receive(dev, &dev->ep[4]); - if (status & (1<<5)) - process_ep_receive(dev, &dev->ep[5]); -} - - -/* This ISR handles the DMA done events on EP0 */ -static void -dma_done_ep0_intr(int irq, void *dev_id, struct pt_regs *regs) -{ - struct usb_dev *dev = (struct usb_dev *) dev_id; - usbdev_pkt_t* pkt; - endpoint_t *ep0 = &dev->ep[0]; - u32 cs0, buff_done; - - spin_lock(&ep0->lock); - cs0 = au_readl(ep0->reg->ctrl_stat); - - // first check packet transmit done - if ((buff_done = get_dma_buffer_done(ep0->indma)) != 0) { - // transmitted a DATAx packet during DATA stage - // on control endpoint 0 - // clear DMA done bit - if (buff_done & DMA_D0) - clear_dma_done0(ep0->indma); - if (buff_done & DMA_D1) - clear_dma_done1(ep0->indma); - - pkt = send_packet_complete(ep0); - kfree(pkt); - } - - /* - * Now check packet receive done. Shouldn't get these, - * the receive packet complete intr should happen - * before the DMA done intr occurs. - */ - if ((buff_done = get_dma_buffer_done(ep0->outdma)) != 0) { - // clear DMA done bit - if (buff_done & DMA_D0) - clear_dma_done0(ep0->outdma); - if (buff_done & DMA_D1) - clear_dma_done1(ep0->outdma); - - //process_ep0_receive(dev); - } - - spin_unlock(&ep0->lock); -} - -/* This ISR handles the DMA done events on endpoints 2,3,4,5 */ -static void -dma_done_ep_intr(int irq, void *dev_id, struct pt_regs *regs) -{ - struct usb_dev *dev = (struct usb_dev *) dev_id; - int i; - - for (i = 2; i < 6; i++) { - u32 buff_done; - usbdev_pkt_t* pkt; - endpoint_t *ep = &dev->ep[i]; - - if (!ep->active) continue; - - spin_lock(&ep->lock); - - if (ep->direction == USB_DIR_IN) { - buff_done = get_dma_buffer_done(ep->indma); - if (buff_done != 0) { - // transmitted a DATAx pkt on the IN ep - // clear DMA done bit - if (buff_done & DMA_D0) - clear_dma_done0(ep->indma); - if (buff_done & DMA_D1) - clear_dma_done1(ep->indma); - - pkt = send_packet_complete(ep); - - spin_unlock(&ep->lock); - dev->func_cb(CB_PKT_COMPLETE, - (unsigned long)pkt, - dev->cb_data); - spin_lock(&ep->lock); - } - } else { - /* - * Check packet receive done (OUT ep). Shouldn't get - * these, the rx packet complete intr should happen - * before the DMA done intr occurs. - */ - buff_done = get_dma_buffer_done(ep->outdma); - if (buff_done != 0) { - // received a DATAx pkt on the OUT ep - // clear DMA done bit - if (buff_done & DMA_D0) - clear_dma_done0(ep->outdma); - if (buff_done & DMA_D1) - clear_dma_done1(ep->outdma); - - //process_ep_receive(dev, ep); - } - } - - spin_unlock(&ep->lock); - } -} - - -/*************************************************************************** - * Here begins the external interface functions - *************************************************************************** - */ - -/* - * allocate a new packet - */ -int -usbdev_alloc_packet(int ep_addr, int data_size, usbdev_pkt_t** pkt) -{ - endpoint_t * ep = epaddr_to_ep(&usbdev, ep_addr); - usbdev_pkt_t* lpkt = NULL; - - if (!ep || !ep->active || ep->address < 2) - return -ENODEV; - if (data_size > ep->max_pkt_size) - return -EINVAL; - - lpkt = *pkt = alloc_packet(ep, data_size, NULL); - if (!lpkt) - return -ENOMEM; - return 0; -} - - -/* - * packet send - */ -int -usbdev_send_packet(int ep_addr, usbdev_pkt_t * pkt) -{ - unsigned long flags; - int count; - endpoint_t * ep; - - if (!pkt || !(ep = epaddr_to_ep(&usbdev, pkt->ep_addr)) || - !ep->active || ep->address < 2) - return -ENODEV; - if (ep->direction != USB_DIR_IN) - return -EINVAL; - - spin_lock_irqsave(&ep->lock, flags); - count = send_packet(&usbdev, pkt, 1); - spin_unlock_irqrestore(&ep->lock, flags); - - return count; -} - -/* - * packet receive - */ -int -usbdev_receive_packet(int ep_addr, usbdev_pkt_t** pkt) -{ - unsigned long flags; - usbdev_pkt_t* lpkt = NULL; - endpoint_t *ep = epaddr_to_ep(&usbdev, ep_addr); - - if (!ep || !ep->active || ep->address < 2) - return -ENODEV; - if (ep->direction != USB_DIR_OUT) - return -EINVAL; - - spin_lock_irqsave(&ep->lock, flags); - if (ep->outlist.count > 1) - lpkt = unlink_head(&ep->outlist); - spin_unlock_irqrestore(&ep->lock, flags); - - if (!lpkt) { - /* no packet available */ - *pkt = NULL; - return -ENODATA; - } - - *pkt = lpkt; - - return lpkt->size; -} - - -/* - * return total queued byte count on the endpoint. - */ -int -usbdev_get_byte_count(int ep_addr) -{ - unsigned long flags; - pkt_list_t *list; - usbdev_pkt_t *scan; - int count = 0; - endpoint_t * ep = epaddr_to_ep(&usbdev, ep_addr); - - if (!ep || !ep->active || ep->address < 2) - return -ENODEV; - - if (ep->direction == USB_DIR_IN) { - list = &ep->inlist; - - spin_lock_irqsave(&ep->lock, flags); - for (scan = list->head; scan; scan = scan->next) - count += scan->size; - spin_unlock_irqrestore(&ep->lock, flags); - } else { - list = &ep->outlist; - - spin_lock_irqsave(&ep->lock, flags); - if (list->count > 1) { - for (scan = list->head; scan != list->tail; - scan = scan->next) - count += scan->size; - } - spin_unlock_irqrestore(&ep->lock, flags); - } - - return count; -} - - -void -usbdev_exit(void) -{ - endpoint_t *ep; - int i; - - au_writel(0, USBD_INTEN); // disable usb dev ints - au_writel(0, USBD_ENABLE); // disable usb dev - - free_irq(AU1000_USB_DEV_REQ_INT, &usbdev); - free_irq(AU1000_USB_DEV_SUS_INT, &usbdev); - - // free all control endpoint resources - ep = &usbdev.ep[0]; - free_au1000_dma(ep->indma); - free_au1000_dma(ep->outdma); - endpoint_flush(ep); - - // free ep resources - for (i = 2; i < 6; i++) { - ep = &usbdev.ep[i]; - if (!ep->active) continue; - - if (ep->direction == USB_DIR_IN) { - free_au1000_dma(ep->indma); - } else { - free_au1000_dma(ep->outdma); - } - endpoint_flush(ep); - } - - kfree(usbdev.full_conf_desc); -} - -int -usbdev_init(struct usb_device_descriptor* dev_desc, - struct usb_config_descriptor* config_desc, - struct usb_interface_descriptor* if_desc, - struct usb_endpoint_descriptor* ep_desc, - struct usb_string_descriptor* str_desc[], - void (*cb)(usbdev_cb_type_t, unsigned long, void *), - void* cb_data) -{ - endpoint_t *ep0; - int i, ret=0; - u8* fcd; - - if (dev_desc->bNumConfigurations > 1 || - config_desc->bNumInterfaces > 1 || - if_desc->bNumEndpoints > 4) { - err("Only one config, one i/f, and no more " - "than 4 ep's allowed"); - ret = -EINVAL; - goto out; - } - - if (!cb) { - err("Function-layer callback required"); - ret = -EINVAL; - goto out; - } - - if (dev_desc->bMaxPacketSize0 != USBDEV_EP0_MAX_PACKET_SIZE) { - warn("EP0 Max Packet size must be %d", - USBDEV_EP0_MAX_PACKET_SIZE); - dev_desc->bMaxPacketSize0 = USBDEV_EP0_MAX_PACKET_SIZE; - } - - memset(&usbdev, 0, sizeof(struct usb_dev)); - - usbdev.state = DEFAULT; - usbdev.dev_desc = dev_desc; - usbdev.if_desc = if_desc; - usbdev.conf_desc = config_desc; - for (i=0; i<6; i++) - usbdev.str_desc[i] = str_desc[i]; - usbdev.func_cb = cb; - usbdev.cb_data = cb_data; - - /* Initialize default control endpoint */ - ep0 = &usbdev.ep[0]; - ep0->active = 1; - ep0->type = CONTROL_EP; - ep0->max_pkt_size = USBDEV_EP0_MAX_PACKET_SIZE; - spin_lock_init(&ep0->lock); - ep0->desc = NULL; // ep0 has no descriptor - ep0->address = 0; - ep0->direction = 0; - ep0->reg = &ep_reg[0]; - - /* Initialize the other requested endpoints */ - for (i = 0; i < if_desc->bNumEndpoints; i++) { - struct usb_endpoint_descriptor* epd = &ep_desc[i]; - endpoint_t *ep; - - if ((epd->bEndpointAddress & 0x80) == USB_DIR_IN) { - ep = &usbdev.ep[2]; - ep->address = 2; - if (ep->active) { - ep = &usbdev.ep[3]; - ep->address = 3; - if (ep->active) { - err("too many IN ep's requested"); - ret = -ENODEV; - goto out; - } - } - } else { - ep = &usbdev.ep[4]; - ep->address = 4; - if (ep->active) { - ep = &usbdev.ep[5]; - ep->address = 5; - if (ep->active) { - err("too many OUT ep's requested"); - ret = -ENODEV; - goto out; - } - } - } - - ep->active = 1; - epd->bEndpointAddress &= ~0x0f; - epd->bEndpointAddress |= (u8)ep->address; - ep->direction = epd->bEndpointAddress & 0x80; - ep->type = epd->bmAttributes & 0x03; - ep->max_pkt_size = le16_to_cpu(epd->wMaxPacketSize); - spin_lock_init(&ep->lock); - ep->desc = epd; - ep->reg = &ep_reg[ep->address]; - } - - /* - * initialize the full config descriptor - */ - usbdev.full_conf_desc = fcd = kmalloc(le16_to_cpu(config_desc->wTotalLength), - ALLOC_FLAGS); - if (!fcd) { - err("failed to alloc full config descriptor"); - ret = -ENOMEM; - goto out; - } - - memcpy(fcd, config_desc, USB_DT_CONFIG_SIZE); - fcd += USB_DT_CONFIG_SIZE; - memcpy(fcd, if_desc, USB_DT_INTERFACE_SIZE); - fcd += USB_DT_INTERFACE_SIZE; - for (i = 0; i < if_desc->bNumEndpoints; i++) { - memcpy(fcd, &ep_desc[i], USB_DT_ENDPOINT_SIZE); - fcd += USB_DT_ENDPOINT_SIZE; - } - - /* Now we're ready to enable the controller */ - au_writel(0x0002, USBD_ENABLE); - udelay(100); - au_writel(0x0003, USBD_ENABLE); - udelay(100); - - /* build and send config table based on ep descriptors */ - for (i = 0; i < 6; i++) { - endpoint_t *ep; - if (i == 1) - continue; // skip dummy ep - ep = &usbdev.ep[i]; - if (ep->active) { - au_writel((ep->address << 4) | 0x04, USBD_CONFIG); - au_writel(((ep->max_pkt_size & 0x380) >> 7) | - (ep->direction >> 4) | (ep->type << 4), - USBD_CONFIG); - au_writel((ep->max_pkt_size & 0x7f) << 1, USBD_CONFIG); - au_writel(0x00, USBD_CONFIG); - au_writel(ep->address, USBD_CONFIG); - } else { - u8 dir = (i==2 || i==3) ? DIR_IN : DIR_OUT; - au_writel((i << 4) | 0x04, USBD_CONFIG); - au_writel(((16 & 0x380) >> 7) | dir | - (BULK_EP << 4), USBD_CONFIG); - au_writel((16 & 0x7f) << 1, USBD_CONFIG); - au_writel(0x00, USBD_CONFIG); - au_writel(i, USBD_CONFIG); - } - } - - /* - * Enable Receive FIFO Complete interrupts only. Transmit - * complete is being handled by the DMA done interrupts. - */ - au_writel(0x31, USBD_INTEN); - - /* - * Controller is now enabled, request DMA and IRQ - * resources. - */ - - /* request the USB device transfer complete interrupt */ - if (request_irq(AU1000_USB_DEV_REQ_INT, req_sus_intr, IRQF_DISABLED, - "USBdev req", &usbdev)) { - err("Can't get device request intr"); - ret = -ENXIO; - goto out; - } - /* request the USB device suspend interrupt */ - if (request_irq(AU1000_USB_DEV_SUS_INT, req_sus_intr, IRQF_DISABLED, - "USBdev sus", &usbdev)) { - err("Can't get device suspend intr"); - ret = -ENXIO; - goto out; - } - - /* Request EP0 DMA and IRQ */ - if ((ep0->indma = request_au1000_dma(ep_dma_id[0].id, - ep_dma_id[0].str, - dma_done_ep0_intr, - IRQF_DISABLED, - &usbdev)) < 0) { - err("Can't get %s DMA", ep_dma_id[0].str); - ret = -ENXIO; - goto out; - } - if ((ep0->outdma = request_au1000_dma(ep_dma_id[1].id, - ep_dma_id[1].str, - NULL, 0, NULL)) < 0) { - err("Can't get %s DMA", ep_dma_id[1].str); - ret = -ENXIO; - goto out; - } - - // Flush the ep0 buffers and FIFOs - endpoint_flush(ep0); - // start packet reception on ep0 - kickstart_receive_packet(ep0); - - /* Request DMA and IRQ for the other endpoints */ - for (i = 2; i < 6; i++) { - endpoint_t *ep = &usbdev.ep[i]; - if (!ep->active) - continue; - - // Flush the endpoint buffers and FIFOs - endpoint_flush(ep); - - if (ep->direction == USB_DIR_IN) { - ep->indma = - request_au1000_dma(ep_dma_id[ep->address].id, - ep_dma_id[ep->address].str, - dma_done_ep_intr, - IRQF_DISABLED, - &usbdev); - if (ep->indma < 0) { - err("Can't get %s DMA", - ep_dma_id[ep->address].str); - ret = -ENXIO; - goto out; - } - } else { - ep->outdma = - request_au1000_dma(ep_dma_id[ep->address].id, - ep_dma_id[ep->address].str, - NULL, 0, NULL); - if (ep->outdma < 0) { - err("Can't get %s DMA", - ep_dma_id[ep->address].str); - ret = -ENXIO; - goto out; - } - - // start packet reception on OUT endpoint - kickstart_receive_packet(ep); - } - } - - out: - if (ret) - usbdev_exit(); - return ret; -} - -EXPORT_SYMBOL(usbdev_init); -EXPORT_SYMBOL(usbdev_exit); -EXPORT_SYMBOL(usbdev_alloc_packet); -EXPORT_SYMBOL(usbdev_receive_packet); -EXPORT_SYMBOL(usbdev_send_packet); -EXPORT_SYMBOL(usbdev_get_byte_count); diff --git a/arch/mips/au1000/db1x00/board_setup.c b/arch/mips/au1000/db1x00/board_setup.c index 7a79293f8527..8b08edb977be 100644 --- a/arch/mips/au1000/db1x00/board_setup.c +++ b/arch/mips/au1000/db1x00/board_setup.c @@ -58,11 +58,6 @@ void __init board_setup(void) pin_func = 0; /* not valid for 1550 */ -#ifdef CONFIG_AU1X00_USB_DEVICE - // 2nd USB port is USB device - pin_func = au_readl(SYS_PINFUNC) & (u32)(~0x8000); - au_writel(pin_func, SYS_PINFUNC); -#endif #if defined(CONFIG_IRDA) && (defined(CONFIG_SOC_AU1000) || defined(CONFIG_SOC_AU1100)) /* set IRFIRSEL instead of GPIO15 */ diff --git a/arch/mips/au1000/mtx-1/board_setup.c b/arch/mips/au1000/mtx-1/board_setup.c index e917e54fc683..13f9bf5f91a6 100644 --- a/arch/mips/au1000/mtx-1/board_setup.c +++ b/arch/mips/au1000/mtx-1/board_setup.c @@ -51,15 +51,11 @@ void board_reset (void) void __init board_setup(void) { -#if defined (CONFIG_USB_OHCI) || defined (CONFIG_AU1X00_USB_DEVICE) -#ifdef CONFIG_AU1X00_USB_DEVICE - // 2nd USB port is USB device - au_writel(au_readl(SYS_PINFUNC) & (u32)(~0x8000), SYS_PINFUNC); -#endif +#ifdef CONFIG_USB_OHCI // enable USB power switch au_writel( au_readl(GPIO2_DIR) | 0x10, GPIO2_DIR ); au_writel( 0x100000, GPIO2_OUTPUT ); -#endif // defined (CONFIG_USB_OHCI) || defined (CONFIG_AU1X00_USB_DEVICE) +#endif // defined (CONFIG_USB_OHCI) #ifdef CONFIG_PCI #if defined(__MIPSEB__) diff --git a/arch/mips/au1000/pb1000/board_setup.c b/arch/mips/au1000/pb1000/board_setup.c index 1cf18e16ab54..824cfafaff92 100644 --- a/arch/mips/au1000/pb1000/board_setup.c +++ b/arch/mips/au1000/pb1000/board_setup.c @@ -54,7 +54,7 @@ void __init board_setup(void) au_writel(0, SYS_PINSTATERD); udelay(100); -#if defined (CONFIG_USB_OHCI) || defined (CONFIG_AU1X00_USB_DEVICE) +#ifdef CONFIG_USB_OHCI /* zero and disable FREQ2 */ sys_freqctrl = au_readl(SYS_FREQCTRL0); sys_freqctrl &= ~0xFFF00000; @@ -105,22 +105,18 @@ void __init board_setup(void) #ifdef CONFIG_USB_OHCI sys_clksrc |= ((4<<12) | (0<<11) | (0<<10)); #endif -#ifdef CONFIG_AU1X00_USB_DEVICE - sys_clksrc |= ((4<<7) | (0<<6) | (0<<5)); -#endif au_writel(sys_clksrc, SYS_CLKSRC); // configure pins GPIO[14:9] as GPIO pin_func = au_readl(SYS_PINFUNC) & (u32)(~0x8080); -#ifndef CONFIG_AU1X00_USB_DEVICE // 2nd USB port is USB host pin_func |= 0x8000; -#endif + au_writel(pin_func, SYS_PINFUNC); au_writel(0x2800, SYS_TRIOUTCLR); au_writel(0x0030, SYS_OUTPUTCLR); -#endif // defined (CONFIG_USB_OHCI) || defined (CONFIG_AU1X00_USB_DEVICE) +#endif // defined (CONFIG_USB_OHCI) // make gpio 15 an input (for interrupt line) pin_func = au_readl(SYS_PINFUNC) & (u32)(~0x100); diff --git a/arch/mips/au1000/pb1100/board_setup.c b/arch/mips/au1000/pb1100/board_setup.c index db27b9331ff3..2d1533f116c0 100644 --- a/arch/mips/au1000/pb1100/board_setup.c +++ b/arch/mips/au1000/pb1100/board_setup.c @@ -55,7 +55,7 @@ void __init board_setup(void) au_writel(0, SYS_PININPUTEN); udelay(100); -#if defined (CONFIG_USB_OHCI) || defined (CONFIG_AU1X00_USB_DEVICE) +#ifdef CONFIG_USB_OHCI // configure pins GPIO[14:9] as GPIO pin_func = au_readl(SYS_PINFUNC) & (u32)(~0x80); @@ -92,12 +92,10 @@ void __init board_setup(void) // get USB Functionality pin state (device vs host drive pins) pin_func = au_readl(SYS_PINFUNC) & (u32)(~0x8000); -#ifndef CONFIG_AU1X00_USB_DEVICE // 2nd USB port is USB host pin_func |= 0x8000; -#endif au_writel(pin_func, SYS_PINFUNC); -#endif // defined (CONFIG_USB_OHCI) || defined (CONFIG_AU1X00_USB_DEVICE) +#endif // defined (CONFIG_USB_OHCI) /* Enable sys bus clock divider when IDLE state or no bus activity. */ au_writel(au_readl(SYS_POWERCTRL) | (0x3 << 5), SYS_POWERCTRL); diff --git a/arch/mips/au1000/pb1200/irqmap.c b/arch/mips/au1000/pb1200/irqmap.c index f66779f0d4cd..91983ba407c4 100644 --- a/arch/mips/au1000/pb1200/irqmap.c +++ b/arch/mips/au1000/pb1200/irqmap.c @@ -65,7 +65,7 @@ int __initdata au1xxx_nr_irqs = ARRAY_SIZE(au1xxx_irq_map); */ static volatile int pb1200_cascade_en=0; -irqreturn_t pb1200_cascade_handler( int irq, void *dev_id, struct pt_regs *regs) +irqreturn_t pb1200_cascade_handler( int irq, void *dev_id) { unsigned short bisr = bcsr->int_status; int extirq_nr = 0; @@ -76,8 +76,9 @@ irqreturn_t pb1200_cascade_handler( int irq, void *dev_id, struct pt_regs *regs) { extirq_nr = (PB1200_INT_BEGIN-1) + au_ffs(bisr); /* Ack and dispatch IRQ */ - do_IRQ(extirq_nr,regs); + do_IRQ(extirq_nr); } + return IRQ_RETVAL(1); } diff --git a/arch/mips/au1000/pb1500/board_setup.c b/arch/mips/au1000/pb1500/board_setup.c index 1a9a293de6ab..0ffdb4fd575b 100644 --- a/arch/mips/au1000/pb1500/board_setup.c +++ b/arch/mips/au1000/pb1500/board_setup.c @@ -56,7 +56,7 @@ void __init board_setup(void) au_writel(0, SYS_PINSTATERD); udelay(100); -#if defined (CONFIG_USB_OHCI) || defined (CONFIG_AU1X00_USB_DEVICE) +#ifdef CONFIG_USB_OHCI /* GPIO201 is input for PCMCIA card detect */ /* GPIO203 is input for PCMCIA interrupt request */ @@ -88,19 +88,14 @@ void __init board_setup(void) #ifdef CONFIG_USB_OHCI sys_clksrc |= ((4<<12) | (0<<11) | (0<<10)); #endif -#ifdef CONFIG_AU1X00_USB_DEVICE - sys_clksrc |= ((4<<7) | (0<<6) | (0<<5)); -#endif au_writel(sys_clksrc, SYS_CLKSRC); pin_func = au_readl(SYS_PINFUNC) & (u32)(~0x8000); -#ifndef CONFIG_AU1X00_USB_DEVICE // 2nd USB port is USB host pin_func |= 0x8000; -#endif au_writel(pin_func, SYS_PINFUNC); -#endif // defined (CONFIG_USB_OHCI) || defined (CONFIG_AU1X00_USB_DEVICE) +#endif // defined (CONFIG_USB_OHCI) diff --git a/arch/mips/basler/excite/excite_dbg_io.c b/arch/mips/basler/excite/excite_dbg_io.c index c04505afa47f..d289e3a868cf 100644 --- a/arch/mips/basler/excite/excite_dbg_io.c +++ b/arch/mips/basler/excite/excite_dbg_io.c @@ -112,7 +112,7 @@ int putDebugChar(int data) } /* KGDB interrupt handler */ -asmlinkage void excite_kgdb_inthdl(struct pt_regs *regs) +asmlinkage void excite_kgdb_inthdl(void) { if (unlikely( ((titan_readl(UAIIR) & 0x7) == 4) diff --git a/arch/mips/basler/excite/excite_iodev.c b/arch/mips/basler/excite/excite_iodev.c index 10bbb8cfb964..6af0b21ebc32 100644 --- a/arch/mips/basler/excite/excite_iodev.c +++ b/arch/mips/basler/excite/excite_iodev.c @@ -38,7 +38,7 @@ static int iodev_open(struct inode *, struct file *); static int iodev_release(struct inode *, struct file *); static ssize_t iodev_read(struct file *, char __user *, size_t s, loff_t *); static unsigned int iodev_poll(struct file *, struct poll_table_struct *); -static irqreturn_t iodev_irqhdl(int, void *, struct pt_regs *); +static irqreturn_t iodev_irqhdl(int, void *); @@ -108,16 +108,12 @@ static int __exit iodev_remove(struct device *dev) return misc_deregister(&miscdev); } - - static int iodev_open(struct inode *i, struct file *f) { return request_irq(iodev_irq, iodev_irqhdl, IRQF_DISABLED, iodev_name, &miscdev); } - - static int iodev_release(struct inode *i, struct file *f) { free_irq(iodev_irq, &miscdev); @@ -148,17 +144,13 @@ static unsigned int iodev_poll(struct file *f, struct poll_table_struct *p) return POLLOUT | POLLWRNORM; } - - - -static irqreturn_t iodev_irqhdl(int irq, void *ctxt, struct pt_regs *regs) +static irqreturn_t iodev_irqhdl(int irq, void *ctxt) { wake_up(&wq); + return IRQ_HANDLED; } - - static int __init iodev_init_module(void) { return driver_register(&iodev_driver); diff --git a/arch/mips/basler/excite/excite_irq.c b/arch/mips/basler/excite/excite_irq.c index 511ad8730f54..2e2061a286c5 100644 --- a/arch/mips/basler/excite/excite_irq.c +++ b/arch/mips/basler/excite/excite_irq.c @@ -56,7 +56,7 @@ void __init arch_init_irq(void) #endif } -asmlinkage void plat_irq_dispatch(struct pt_regs *regs) +asmlinkage void plat_irq_dispatch(void) { const u32 interrupts = read_c0_cause() >> 8, @@ -67,7 +67,7 @@ asmlinkage void plat_irq_dispatch(struct pt_regs *regs) /* process timer interrupt */ if (pending & (1 << TIMER_IRQ)) { - do_IRQ(TIMER_IRQ, regs); + do_IRQ(TIMER_IRQ); return; } @@ -80,7 +80,7 @@ asmlinkage void plat_irq_dispatch(struct pt_regs *regs) #else if (pending & (1 << USB_IRQ)) { #endif - do_IRQ(USB_IRQ, regs); + do_IRQ(USB_IRQ); return; } @@ -91,9 +91,9 @@ asmlinkage void plat_irq_dispatch(struct pt_regs *regs) if ((pending & (1 << TITAN_IRQ)) && msgint) { ocd_writel(msgint, INTP0Clear0 + (TITAN_MSGINT / 0x20 * 0x10)); #if defined(CONFIG_KGDB) - excite_kgdb_inthdl(regs); + excite_kgdb_inthdl(); #endif - do_IRQ(TITAN_IRQ, regs); + do_IRQ(TITAN_IRQ); return; } @@ -102,7 +102,7 @@ asmlinkage void plat_irq_dispatch(struct pt_regs *regs) msgintmask = ocd_readl(INTP0Mask0 + (FPGA0_MSGINT / 0x20 * 0x10)); msgint = msgintflags & msgintmask & (0x1 << (FPGA0_MSGINT % 0x20)); if ((pending & (1 << FPGA0_IRQ)) && msgint) { - do_IRQ(FPGA0_IRQ, regs); + do_IRQ(FPGA0_IRQ); return; } @@ -111,7 +111,7 @@ asmlinkage void plat_irq_dispatch(struct pt_regs *regs) msgintmask = ocd_readl(INTP0Mask0 + (FPGA1_MSGINT / 0x20 * 0x10)); msgint = msgintflags & msgintmask & (0x1 << (FPGA1_MSGINT % 0x20)); if ((pending & (1 << FPGA1_IRQ)) && msgint) { - do_IRQ(FPGA1_IRQ, regs); + do_IRQ(FPGA1_IRQ); return; } @@ -120,10 +120,10 @@ asmlinkage void plat_irq_dispatch(struct pt_regs *regs) msgintmask = ocd_readl(INTP0Mask0 + (PHY_MSGINT / 0x20 * 0x10)); msgint = msgintflags & msgintmask & (0x1 << (PHY_MSGINT % 0x20)); if ((pending & (1 << PHY_IRQ)) && msgint) { - do_IRQ(PHY_IRQ, regs); + do_IRQ(PHY_IRQ); return; } /* Process spurious interrupts */ - spurious_interrupt(regs); + spurious_interrupt(); } diff --git a/arch/mips/cobalt/irq.c b/arch/mips/cobalt/irq.c index 0b75f4fb7195..82e569d5b02c 100644 --- a/arch/mips/cobalt/irq.c +++ b/arch/mips/cobalt/irq.c @@ -16,7 +16,6 @@ #include <asm/i8259.h> #include <asm/irq_cpu.h> #include <asm/gt64120.h> -#include <asm/ptrace.h> #include <asm/mach-cobalt/cobalt.h> @@ -42,7 +41,7 @@ * 15 - IDE1 */ -static inline void galileo_irq(struct pt_regs *regs) +static inline void galileo_irq(void) { unsigned int mask, pending, devfn; @@ -52,7 +51,7 @@ static inline void galileo_irq(struct pt_regs *regs) if (pending & GALILEO_INTR_T0EXP) { GALILEO_OUTL(~GALILEO_INTR_T0EXP, GT_INTRCAUSE_OFS); - do_IRQ(COBALT_GALILEO_IRQ, regs); + do_IRQ(COBALT_GALILEO_IRQ); } else if (pending & GALILEO_INTR_RETRY_CTR) { @@ -68,44 +67,31 @@ static inline void galileo_irq(struct pt_regs *regs) } } -static inline void via_pic_irq(struct pt_regs *regs) +static inline void via_pic_irq(void) { int irq; irq = i8259_irq(); if (irq >= 0) - do_IRQ(irq, regs); + do_IRQ(irq); } -asmlinkage void plat_irq_dispatch(struct pt_regs *regs) +asmlinkage void plat_irq_dispatch(void) { - unsigned pending; - - pending = read_c0_status() & read_c0_cause(); - - if (pending & CAUSEF_IP2) /* COBALT_GALILEO_IRQ (18) */ - - galileo_irq(regs); - - else if (pending & CAUSEF_IP6) /* COBALT_VIA_IRQ (22) */ - - via_pic_irq(regs); - - else if (pending & CAUSEF_IP3) /* COBALT_ETH0_IRQ (19) */ - - do_IRQ(COBALT_CPU_IRQ + 3, regs); - - else if (pending & CAUSEF_IP4) /* COBALT_ETH1_IRQ (20) */ - - do_IRQ(COBALT_CPU_IRQ + 4, regs); - - else if (pending & CAUSEF_IP5) /* COBALT_SERIAL_IRQ (21) */ - - do_IRQ(COBALT_CPU_IRQ + 5, regs); - - else if (pending & CAUSEF_IP7) /* IRQ 23 */ - - do_IRQ(COBALT_CPU_IRQ + 7, regs); + unsigned pending = read_c0_status() & read_c0_cause(); + + if (pending & CAUSEF_IP2) /* COBALT_GALILEO_IRQ (18) */ + galileo_irq(); + else if (pending & CAUSEF_IP6) /* COBALT_VIA_IRQ (22) */ + via_pic_irq(); + else if (pending & CAUSEF_IP3) /* COBALT_ETH0_IRQ (19) */ + do_IRQ(COBALT_CPU_IRQ + 3); + else if (pending & CAUSEF_IP4) /* COBALT_ETH1_IRQ (20) */ + do_IRQ(COBALT_CPU_IRQ + 4); + else if (pending & CAUSEF_IP5) /* COBALT_SERIAL_IRQ (21) */ + do_IRQ(COBALT_CPU_IRQ + 5); + else if (pending & CAUSEF_IP7) /* IRQ 23 */ + do_IRQ(COBALT_CPU_IRQ + 7); } static struct irqaction irq_via = { diff --git a/arch/mips/cobalt/setup.c b/arch/mips/cobalt/setup.c index 0b347cffc768..bf9dc72b9720 100644 --- a/arch/mips/cobalt/setup.c +++ b/arch/mips/cobalt/setup.c @@ -50,8 +50,8 @@ const char *get_system_type(void) void __init plat_timer_setup(struct irqaction *irq) { - /* Load timer value for 1KHz (TCLK is 50MHz) */ - GALILEO_OUTL(50*1000*1000 / 1000, GT_TC0_OFS); + /* Load timer value for HZ (TCLK is 50MHz) */ + GALILEO_OUTL(50*1000*1000 / HZ, GT_TC0_OFS); /* Enable timer */ GALILEO_OUTL(GALILEO_ENTC0 | GALILEO_SELTC0, GT_TC_CONTROL_OFS); diff --git a/arch/mips/configs/bigsur_defconfig b/arch/mips/configs/bigsur_defconfig index c6a015940b41..ba3bf733d27d 100644 --- a/arch/mips/configs/bigsur_defconfig +++ b/arch/mips/configs/bigsur_defconfig @@ -1,7 +1,7 @@ # # Automatically generated make config: don't edit -# Linux kernel version: 2.6.18-rc1 -# Thu Jul 6 10:02:58 2006 +# Linux kernel version: 2.6.19-rc1 +# Wed Oct 11 01:41:41 2006 # CONFIG_MIPS=y @@ -25,8 +25,6 @@ CONFIG_MIPS=y # CONFIG_MIPS_COBALT is not set # CONFIG_MACH_DECSTATION is not set # CONFIG_MIPS_EV64120 is not set -# CONFIG_MIPS_IVR is not set -# CONFIG_MIPS_ITE8172 is not set # CONFIG_MACH_JAZZ is not set # CONFIG_LASAT is not set # CONFIG_MIPS_ATLAS is not set @@ -83,6 +81,7 @@ CONFIG_RWSEM_GENERIC_SPINLOCK=y CONFIG_GENERIC_FIND_NEXT_BIT=y CONFIG_GENERIC_HWEIGHT=y CONFIG_GENERIC_CALIBRATE_DELAY=y +CONFIG_GENERIC_TIME=y CONFIG_SCHED_NO_NO_OMIT_FRAME_POINTER=y CONFIG_DMA_COHERENT=y CONFIG_CPU_BIG_ENDIAN=y @@ -132,8 +131,8 @@ CONFIG_PAGE_SIZE_4KB=y # CONFIG_PAGE_SIZE_64KB is not set # CONFIG_SIBYTE_DMA_PAGEOPS is not set CONFIG_MIPS_MT_DISABLED=y -# CONFIG_MIPS_MT_SMTC is not set # CONFIG_MIPS_MT_SMP is not set +# CONFIG_MIPS_MT_SMTC is not set # CONFIG_MIPS_VPE_LOADER is not set CONFIG_CPU_HAS_LLSC=y CONFIG_CPU_HAS_SYNC=y @@ -185,9 +184,11 @@ CONFIG_LOCALVERSION="" CONFIG_LOCALVERSION_AUTO=y CONFIG_SWAP=y CONFIG_SYSVIPC=y +# CONFIG_IPC_NS is not set # CONFIG_POSIX_MQUEUE is not set # CONFIG_BSD_PROCESS_ACCT is not set -CONFIG_SYSCTL=y +# CONFIG_TASKSTATS is not set +# CONFIG_UTS_NS is not set # CONFIG_AUDIT is not set CONFIG_IKCONFIG=y CONFIG_IKCONFIG_PROC=y @@ -195,7 +196,9 @@ CONFIG_IKCONFIG_PROC=y # CONFIG_RELAY is not set CONFIG_INITRAMFS_SOURCE="" # CONFIG_CC_OPTIMIZE_FOR_SIZE is not set +CONFIG_SYSCTL=y CONFIG_EMBEDDED=y +# CONFIG_SYSCTL_SYSCALL is not set CONFIG_KALLSYMS=y # CONFIG_KALLSYMS_ALL is not set # CONFIG_KALLSYMS_EXTRA_PASS is not set @@ -204,12 +207,12 @@ CONFIG_PRINTK=y CONFIG_BUG=y CONFIG_ELF_CORE=y CONFIG_BASE_FULL=y -CONFIG_RT_MUTEXES=y CONFIG_FUTEX=y CONFIG_EPOLL=y CONFIG_SHMEM=y CONFIG_SLAB=y CONFIG_VM_EVENT_COUNTERS=y +CONFIG_RT_MUTEXES=y # CONFIG_TINY_SHMEM is not set CONFIG_BASE_SMALL=0 # CONFIG_SLOB is not set @@ -228,6 +231,7 @@ CONFIG_STOP_MACHINE=y # # Block layer # +CONFIG_BLOCK=y # CONFIG_BLK_DEV_IO_TRACE is not set # @@ -249,18 +253,17 @@ CONFIG_DEFAULT_IOSCHED="anticipatory" CONFIG_HW_HAS_PCI=y CONFIG_PCI=y CONFIG_PCI_DOMAINS=y +# CONFIG_PCI_MULTITHREAD_PROBE is not set CONFIG_PCI_DEBUG=y CONFIG_MMU=y # # PCCARD (PCMCIA/CardBus) support # -# CONFIG_PCCARD is not set # # PCI Hotplug Support # -# CONFIG_HOTPLUG_PCI is not set # # Executable file formats @@ -271,7 +274,7 @@ CONFIG_BINFMT_ELF=y CONFIG_MIPS32_COMPAT=y CONFIG_COMPAT=y CONFIG_MIPS32_O32=y -# CONFIG_MIPS32_N32 is not set +CONFIG_MIPS32_N32=y CONFIG_BINFMT_ELF32=y # @@ -288,6 +291,7 @@ CONFIG_PACKET_MMAP=y CONFIG_UNIX=y CONFIG_XFRM=y CONFIG_XFRM_USER=m +# CONFIG_XFRM_SUB_POLICY is not set CONFIG_NET_KEY=y CONFIG_INET=y # CONFIG_IP_MULTICAST is not set @@ -308,10 +312,12 @@ CONFIG_IP_PNP_BOOTP=y # CONFIG_INET_TUNNEL is not set CONFIG_INET_XFRM_MODE_TRANSPORT=m CONFIG_INET_XFRM_MODE_TUNNEL=m +CONFIG_INET_XFRM_MODE_BEET=y CONFIG_INET_DIAG=y CONFIG_INET_TCP_DIAG=y # CONFIG_TCP_CONG_ADVANCED is not set -CONFIG_TCP_CONG_BIC=y +CONFIG_TCP_CONG_CUBIC=y +CONFIG_DEFAULT_TCP_CONG="cubic" # CONFIG_IPV6 is not set # CONFIG_INET6_XFRM_TUNNEL is not set # CONFIG_INET6_TUNNEL is not set @@ -341,7 +347,6 @@ CONFIG_NETWORK_SECMARK=y # CONFIG_ATALK is not set # CONFIG_X25 is not set # CONFIG_LAPB is not set -# CONFIG_NET_DIVERT is not set # CONFIG_ECONET is not set # CONFIG_WAN_ROUTER is not set @@ -368,7 +373,6 @@ CONFIG_NETWORK_SECMARK=y # CONFIG_STANDALONE=y CONFIG_PREVENT_FIRMWARE_BUILD=y -# CONFIG_FW_LOADER is not set # CONFIG_DEBUG_DRIVER is not set # CONFIG_SYS_HYPERVISOR is not set @@ -404,7 +408,7 @@ CONFIG_BLK_DEV_LOOP=m CONFIG_BLK_DEV_NBD=m # CONFIG_BLK_DEV_SX8 is not set # CONFIG_BLK_DEV_RAM is not set -# CONFIG_BLK_DEV_INITRD is not set +CONFIG_BLK_DEV_INITRD=y # CONFIG_CDROM_PKTCDVD is not set # CONFIG_ATA_OVER_ETH is not set @@ -412,6 +416,7 @@ CONFIG_BLK_DEV_NBD=m # ATA/ATAPI/MFM/RLL support # CONFIG_IDE=y +CONFIG_IDE_MAX_HWIFS=4 CONFIG_BLK_DEV_IDE=y # @@ -429,10 +434,40 @@ CONFIG_BLK_DEV_IDEFLOPPY=y # IDE chipset support/bugfixes # CONFIG_IDE_GENERIC=y -# CONFIG_BLK_DEV_IDEPCI is not set +CONFIG_BLK_DEV_IDEPCI=y +# CONFIG_IDEPCI_SHARE_IRQ is not set +# CONFIG_BLK_DEV_OFFBOARD is not set +CONFIG_BLK_DEV_GENERIC=y +# CONFIG_BLK_DEV_OPTI621 is not set +CONFIG_BLK_DEV_IDEDMA_PCI=y +# CONFIG_BLK_DEV_IDEDMA_FORCED is not set +# CONFIG_IDEDMA_PCI_AUTO is not set +# CONFIG_BLK_DEV_AEC62XX is not set +# CONFIG_BLK_DEV_ALI15X3 is not set +# CONFIG_BLK_DEV_AMD74XX is not set +CONFIG_BLK_DEV_CMD64X=y +# CONFIG_BLK_DEV_TRIFLEX is not set +# CONFIG_BLK_DEV_CY82C693 is not set +# CONFIG_BLK_DEV_CS5520 is not set +# CONFIG_BLK_DEV_CS5530 is not set +# CONFIG_BLK_DEV_HPT34X is not set +# CONFIG_BLK_DEV_HPT366 is not set +# CONFIG_BLK_DEV_JMICRON is not set +# CONFIG_BLK_DEV_SC1200 is not set +# CONFIG_BLK_DEV_PIIX is not set +# CONFIG_BLK_DEV_IT821X is not set +# CONFIG_BLK_DEV_NS87415 is not set +# CONFIG_BLK_DEV_PDC202XX_OLD is not set +# CONFIG_BLK_DEV_PDC202XX_NEW is not set +# CONFIG_BLK_DEV_SVWKS is not set +# CONFIG_BLK_DEV_SIIMAGE is not set +# CONFIG_BLK_DEV_SLC90E66 is not set +# CONFIG_BLK_DEV_TRM290 is not set +# CONFIG_BLK_DEV_VIA82CXXX is not set # CONFIG_BLK_DEV_IDE_SWARM is not set # CONFIG_IDE_ARM is not set -# CONFIG_BLK_DEV_IDEDMA is not set +CONFIG_BLK_DEV_IDEDMA=y +# CONFIG_IDEDMA_IVB is not set # CONFIG_IDEDMA_AUTO is not set # CONFIG_BLK_DEV_HD is not set @@ -441,6 +476,12 @@ CONFIG_IDE_GENERIC=y # # CONFIG_RAID_ATTRS is not set # CONFIG_SCSI is not set +# CONFIG_SCSI_NETLINK is not set + +# +# Serial ATA (prod) and Parallel ATA (experimental) drivers +# +# CONFIG_ATA is not set # # Multi-device support (RAID and LVM) @@ -516,6 +557,7 @@ CONFIG_NET_SB1250_MAC=y # CONFIG_SK98LIN is not set # CONFIG_TIGON3 is not set # CONFIG_BNX2 is not set +# CONFIG_QLA3XXX is not set # # Ethernet (10000 Mbit) @@ -650,7 +692,6 @@ CONFIG_I2C_CHARDEV=y # CONFIG_I2C_ALGOBIT is not set # CONFIG_I2C_ALGOPCF is not set # CONFIG_I2C_ALGOPCA is not set -CONFIG_I2C_ALGO_SIBYTE=y # # I2C Hardware Bus support @@ -712,12 +753,12 @@ CONFIG_I2C_DEBUG_CHIP=y # # Misc devices # +# CONFIG_TIFM_CORE is not set # # Multimedia devices # # CONFIG_VIDEO_DEV is not set -CONFIG_VIDEO_V4L2=y # # Digital Video Broadcasting Devices @@ -729,6 +770,7 @@ CONFIG_VIDEO_V4L2=y # # CONFIG_FIRMWARE_EDID is not set # CONFIG_FB is not set +# CONFIG_BACKLIGHT_LCD_SUPPORT is not set # # Sound @@ -811,6 +853,7 @@ CONFIG_FS_MBCACHE=y # CONFIG_JFS_FS is not set CONFIG_FS_POSIX_ACL=y # CONFIG_XFS_FS is not set +# CONFIG_GFS2_FS is not set # CONFIG_OCFS2_FS is not set # CONFIG_MINIX_FS is not set # CONFIG_ROMFS_FS is not set @@ -840,8 +883,10 @@ CONFIG_DNOTIFY=y # CONFIG_PROC_FS=y CONFIG_PROC_KCORE=y +CONFIG_PROC_SYSCTL=y CONFIG_SYSFS=y -# CONFIG_TMPFS is not set +CONFIG_TMPFS=y +# CONFIG_TMPFS_POSIX_ACL is not set # CONFIG_HUGETLB_PAGE is not set CONFIG_RAMFS=y # CONFIG_CONFIGFS_FS is not set @@ -851,6 +896,7 @@ CONFIG_RAMFS=y # # CONFIG_ADFS_FS is not set # CONFIG_AFFS_FS is not set +# CONFIG_ECRYPT_FS is not set # CONFIG_HFS_FS is not set # CONFIG_HFSPLUS_FS is not set # CONFIG_BEFS_FS is not set @@ -881,7 +927,6 @@ CONFIG_SUNRPC=y # CONFIG_RPCSEC_GSS_SPKM3 is not set # CONFIG_SMB_FS is not set # CONFIG_CIFS is not set -# CONFIG_CIFS_DEBUG2 is not set # CONFIG_NCP_FS is not set # CONFIG_CODA_FS is not set # CONFIG_AFS_FS is not set @@ -899,6 +944,10 @@ CONFIG_MSDOS_PARTITION=y # CONFIG_NLS is not set # +# Distributed Lock Manager +# + +# # Profiling support # # CONFIG_PROFILING is not set @@ -907,7 +956,8 @@ CONFIG_MSDOS_PARTITION=y # Kernel hacking # CONFIG_TRACE_IRQFLAGS_SUPPORT=y -CONFIG_PRINTK_TIME=y +# CONFIG_PRINTK_TIME is not set +CONFIG_ENABLE_MUST_CHECK=y CONFIG_MAGIC_SYSRQ=y # CONFIG_UNUSED_SYMBOLS is not set CONFIG_DEBUG_KERNEL=y @@ -920,12 +970,15 @@ CONFIG_DETECT_SOFTLOCKUP=y # CONFIG_DEBUG_SPINLOCK is not set CONFIG_DEBUG_MUTEXES=y # CONFIG_DEBUG_RWSEMS is not set +# CONFIG_DEBUG_LOCK_ALLOC is not set +# CONFIG_PROVE_LOCKING is not set # CONFIG_DEBUG_SPINLOCK_SLEEP is not set # CONFIG_DEBUG_LOCKING_API_SELFTESTS is not set # CONFIG_DEBUG_KOBJECT is not set # CONFIG_DEBUG_INFO is not set # CONFIG_DEBUG_FS is not set # CONFIG_DEBUG_VM is not set +# CONFIG_DEBUG_LIST is not set CONFIG_FORCED_INLINING=y # CONFIG_RCU_TORTURE_TEST is not set CONFIG_CROSSCOMPILE=y @@ -946,6 +999,10 @@ CONFIG_KEYS_DEBUG_PROC_KEYS=y # Cryptographic options # CONFIG_CRYPTO=y +CONFIG_CRYPTO_ALGAPI=y +CONFIG_CRYPTO_BLKCIPHER=m +CONFIG_CRYPTO_HASH=y +CONFIG_CRYPTO_MANAGER=m CONFIG_CRYPTO_HMAC=y CONFIG_CRYPTO_NULL=y CONFIG_CRYPTO_MD4=y @@ -955,9 +1012,12 @@ CONFIG_CRYPTO_SHA256=y CONFIG_CRYPTO_SHA512=y CONFIG_CRYPTO_WP512=m CONFIG_CRYPTO_TGR192=m +CONFIG_CRYPTO_ECB=m +CONFIG_CRYPTO_CBC=m CONFIG_CRYPTO_DES=y CONFIG_CRYPTO_BLOWFISH=y CONFIG_CRYPTO_TWOFISH=y +CONFIG_CRYPTO_TWOFISH_COMMON=y CONFIG_CRYPTO_SERPENT=y CONFIG_CRYPTO_AES=m # CONFIG_CRYPTO_CAST5 is not set diff --git a/arch/mips/configs/jazz_defconfig b/arch/mips/configs/jazz_defconfig new file mode 100644 index 000000000000..382083ebea0a --- /dev/null +++ b/arch/mips/configs/jazz_defconfig @@ -0,0 +1,1404 @@ +# +# Automatically generated make config: don't edit +# Linux kernel version: 2.6.19-rc1 +# Sun Oct 8 19:03:07 2006 +# +CONFIG_MIPS=y + +# +# Machine selection +# +# CONFIG_MIPS_MTX1 is not set +# CONFIG_MIPS_BOSPORUS is not set +# CONFIG_MIPS_PB1000 is not set +# CONFIG_MIPS_PB1100 is not set +# CONFIG_MIPS_PB1500 is not set +# CONFIG_MIPS_PB1550 is not set +# CONFIG_MIPS_PB1200 is not set +# CONFIG_MIPS_DB1000 is not set +# CONFIG_MIPS_DB1100 is not set +# CONFIG_MIPS_DB1500 is not set +# CONFIG_MIPS_DB1550 is not set +# CONFIG_MIPS_DB1200 is not set +# CONFIG_MIPS_MIRAGE is not set +# CONFIG_BASLER_EXCITE is not set +# CONFIG_MIPS_COBALT is not set +# CONFIG_MACH_DECSTATION is not set +# CONFIG_MIPS_EV64120 is not set +CONFIG_MACH_JAZZ=y +# CONFIG_LASAT is not set +# CONFIG_MIPS_ATLAS is not set +# CONFIG_MIPS_MALTA is not set +# CONFIG_MIPS_SEAD is not set +# CONFIG_WR_PPMC is not set +# CONFIG_MIPS_SIM is not set +# CONFIG_MOMENCO_JAGUAR_ATX is not set +# CONFIG_MOMENCO_OCELOT is not set +# CONFIG_MOMENCO_OCELOT_3 is not set +# CONFIG_MOMENCO_OCELOT_C is not set +# CONFIG_MOMENCO_OCELOT_G is not set +# CONFIG_MIPS_XXS1500 is not set +# CONFIG_PNX8550_V2PCI is not set +# CONFIG_PNX8550_JBS is not set +# CONFIG_DDB5477 is not set +# CONFIG_MACH_VR41XX is not set +# CONFIG_PMC_YOSEMITE is not set +# CONFIG_QEMU is not set +# CONFIG_MARKEINS is not set +# CONFIG_SGI_IP22 is not set +# CONFIG_SGI_IP27 is not set +# CONFIG_SGI_IP32 is not set +# CONFIG_SIBYTE_BIGSUR is not set +# CONFIG_SIBYTE_SWARM is not set +# CONFIG_SIBYTE_SENTOSA is not set +# CONFIG_SIBYTE_RHONE is not set +# CONFIG_SIBYTE_CARMEL is not set +# CONFIG_SIBYTE_PTSWARM is not set +# CONFIG_SIBYTE_LITTLESUR is not set +# CONFIG_SIBYTE_CRHINE is not set +# CONFIG_SIBYTE_CRHONE is not set +# CONFIG_SNI_RM200_PCI is not set +# CONFIG_TOSHIBA_JMR3927 is not set +# CONFIG_TOSHIBA_RBTX4927 is not set +# CONFIG_TOSHIBA_RBTX4938 is not set +# CONFIG_ACER_PICA_61 is not set +# CONFIG_MIPS_MAGNUM_4000 is not set +CONFIG_OLIVETTI_M700=y +CONFIG_RWSEM_GENERIC_SPINLOCK=y +CONFIG_GENERIC_FIND_NEXT_BIT=y +CONFIG_GENERIC_HWEIGHT=y +CONFIG_GENERIC_CALIBRATE_DELAY=y +CONFIG_GENERIC_TIME=y +CONFIG_SCHED_NO_NO_OMIT_FRAME_POINTER=y +CONFIG_ARC=y +CONFIG_ARCH_MAY_HAVE_PC_FDC=y +CONFIG_DMA_NONCOHERENT=y +CONFIG_DMA_NEED_PCI_MAP_STATE=y +CONFIG_GENERIC_ISA_DMA=y +CONFIG_I8259=y +# CONFIG_CPU_BIG_ENDIAN is not set +CONFIG_CPU_LITTLE_ENDIAN=y +CONFIG_SYS_SUPPORTS_LITTLE_ENDIAN=y +CONFIG_ARC32=y +CONFIG_MIPS_L1_CACHE_SHIFT=5 +CONFIG_ARC_MEMORY=y +CONFIG_ARC_PROMLIB=y + +# +# CPU selection +# +# CONFIG_CPU_MIPS32_R1 is not set +# CONFIG_CPU_MIPS32_R2 is not set +# CONFIG_CPU_MIPS64_R1 is not set +# CONFIG_CPU_MIPS64_R2 is not set +# CONFIG_CPU_R3000 is not set +# CONFIG_CPU_TX39XX is not set +# CONFIG_CPU_VR41XX is not set +# CONFIG_CPU_R4300 is not set +CONFIG_CPU_R4X00=y +# CONFIG_CPU_TX49XX is not set +# CONFIG_CPU_R5000 is not set +# CONFIG_CPU_R5432 is not set +# CONFIG_CPU_R6000 is not set +# CONFIG_CPU_NEVADA is not set +# CONFIG_CPU_R8000 is not set +# CONFIG_CPU_R10000 is not set +# CONFIG_CPU_RM7000 is not set +# CONFIG_CPU_RM9000 is not set +# CONFIG_CPU_SB1 is not set +CONFIG_SYS_HAS_CPU_R4X00=y +CONFIG_SYS_SUPPORTS_32BIT_KERNEL=y +CONFIG_SYS_SUPPORTS_64BIT_KERNEL=y +CONFIG_CPU_SUPPORTS_32BIT_KERNEL=y +CONFIG_CPU_SUPPORTS_64BIT_KERNEL=y + +# +# Kernel type +# +CONFIG_32BIT=y +# CONFIG_64BIT is not set +CONFIG_PAGE_SIZE_4KB=y +# CONFIG_PAGE_SIZE_8KB is not set +# CONFIG_PAGE_SIZE_16KB is not set +# CONFIG_PAGE_SIZE_64KB is not set +CONFIG_MIPS_MT_DISABLED=y +# CONFIG_MIPS_MT_SMP is not set +# CONFIG_MIPS_MT_SMTC is not set +# CONFIG_MIPS_VPE_LOADER is not set +# CONFIG_64BIT_PHYS_ADDR is not set +CONFIG_CPU_HAS_LLSC=y +CONFIG_CPU_HAS_SYNC=y +CONFIG_GENERIC_HARDIRQS=y +CONFIG_GENERIC_IRQ_PROBE=y +CONFIG_ARCH_FLATMEM_ENABLE=y +CONFIG_SELECT_MEMORY_MODEL=y +CONFIG_FLATMEM_MANUAL=y +# CONFIG_DISCONTIGMEM_MANUAL is not set +# CONFIG_SPARSEMEM_MANUAL is not set +CONFIG_FLATMEM=y +CONFIG_FLAT_NODE_MEM_MAP=y +# CONFIG_SPARSEMEM_STATIC is not set +CONFIG_SPLIT_PTLOCK_CPUS=4 +# CONFIG_RESOURCES_64BIT is not set +# CONFIG_HZ_48 is not set +CONFIG_HZ_100=y +# CONFIG_HZ_128 is not set +# CONFIG_HZ_250 is not set +# CONFIG_HZ_256 is not set +# CONFIG_HZ_1000 is not set +# CONFIG_HZ_1024 is not set +CONFIG_SYS_SUPPORTS_100HZ=y +CONFIG_HZ=100 +# CONFIG_PREEMPT_NONE is not set +CONFIG_PREEMPT_VOLUNTARY=y +# CONFIG_PREEMPT is not set +CONFIG_LOCKDEP_SUPPORT=y +CONFIG_STACKTRACE_SUPPORT=y +CONFIG_DEFCONFIG_LIST="/lib/modules/$UNAME_RELEASE/.config" + +# +# Code maturity level options +# +CONFIG_EXPERIMENTAL=y +CONFIG_BROKEN_ON_SMP=y +CONFIG_INIT_ENV_ARG_LIMIT=32 + +# +# General setup +# +CONFIG_LOCALVERSION="" +CONFIG_LOCALVERSION_AUTO=y +CONFIG_SWAP=y +CONFIG_SYSVIPC=y +# CONFIG_IPC_NS is not set +CONFIG_POSIX_MQUEUE=y +CONFIG_BSD_PROCESS_ACCT=y +# CONFIG_BSD_PROCESS_ACCT_V3 is not set +# CONFIG_TASKSTATS is not set +# CONFIG_UTS_NS is not set +# CONFIG_AUDIT is not set +CONFIG_IKCONFIG=y +CONFIG_IKCONFIG_PROC=y +CONFIG_RELAY=y +CONFIG_INITRAMFS_SOURCE="" +# CONFIG_CC_OPTIMIZE_FOR_SIZE is not set +CONFIG_SYSCTL=y +CONFIG_EMBEDDED=y +# CONFIG_SYSCTL_SYSCALL is not set +CONFIG_KALLSYMS=y +# CONFIG_KALLSYMS_EXTRA_PASS is not set +CONFIG_HOTPLUG=y +CONFIG_PRINTK=y +CONFIG_BUG=y +CONFIG_ELF_CORE=y +CONFIG_BASE_FULL=y +CONFIG_FUTEX=y +CONFIG_EPOLL=y +CONFIG_SHMEM=y +CONFIG_SLAB=y +CONFIG_VM_EVENT_COUNTERS=y +CONFIG_RT_MUTEXES=y +# CONFIG_TINY_SHMEM is not set +CONFIG_BASE_SMALL=0 +# CONFIG_SLOB is not set + +# +# Loadable module support +# +CONFIG_MODULES=y +CONFIG_MODULE_UNLOAD=y +# CONFIG_MODULE_FORCE_UNLOAD is not set +CONFIG_MODVERSIONS=y +# CONFIG_MODULE_SRCVERSION_ALL is not set +CONFIG_KMOD=y + +# +# Block layer +# +CONFIG_BLOCK=y +# CONFIG_LBD is not set +# CONFIG_BLK_DEV_IO_TRACE is not set +# CONFIG_LSF is not set + +# +# IO Schedulers +# +CONFIG_IOSCHED_NOOP=y +CONFIG_IOSCHED_AS=y +CONFIG_IOSCHED_DEADLINE=y +CONFIG_IOSCHED_CFQ=y +CONFIG_DEFAULT_AS=y +# CONFIG_DEFAULT_DEADLINE is not set +# CONFIG_DEFAULT_CFQ is not set +# CONFIG_DEFAULT_NOOP is not set +CONFIG_DEFAULT_IOSCHED="anticipatory" + +# +# Bus options (PCI, PCMCIA, EISA, ISA, TC) +# +CONFIG_ISA=y +CONFIG_MMU=y +CONFIG_I8253=y + +# +# PCCARD (PCMCIA/CardBus) support +# +# CONFIG_PCCARD is not set + +# +# PCI Hotplug Support +# + +# +# Executable file formats +# +CONFIG_BINFMT_ELF=y +CONFIG_BINFMT_MISC=m +CONFIG_TRAD_SIGNALS=y + +# +# Networking +# +CONFIG_NET=y + +# +# Networking options +# +# CONFIG_NETDEBUG is not set +CONFIG_PACKET=m +CONFIG_PACKET_MMAP=y +CONFIG_UNIX=y +CONFIG_XFRM=y +# CONFIG_XFRM_USER is not set +# CONFIG_XFRM_SUB_POLICY is not set +CONFIG_NET_KEY=m +CONFIG_INET=y +CONFIG_IP_MULTICAST=y +# CONFIG_IP_ADVANCED_ROUTER is not set +CONFIG_IP_FIB_HASH=y +# CONFIG_IP_PNP is not set +CONFIG_NET_IPIP=m +CONFIG_NET_IPGRE=m +CONFIG_NET_IPGRE_BROADCAST=y +CONFIG_IP_MROUTE=y +CONFIG_IP_PIMSM_V1=y +CONFIG_IP_PIMSM_V2=y +# CONFIG_ARPD is not set +# CONFIG_SYN_COOKIES is not set +# CONFIG_INET_AH is not set +# CONFIG_INET_ESP is not set +# CONFIG_INET_IPCOMP is not set +# CONFIG_INET_XFRM_TUNNEL is not set +CONFIG_INET_TUNNEL=m +CONFIG_INET_XFRM_MODE_TRANSPORT=m +CONFIG_INET_XFRM_MODE_TUNNEL=m +CONFIG_INET_XFRM_MODE_BEET=y +CONFIG_INET_DIAG=y +CONFIG_INET_TCP_DIAG=y +# CONFIG_TCP_CONG_ADVANCED is not set +CONFIG_TCP_CONG_CUBIC=y +CONFIG_DEFAULT_TCP_CONG="cubic" + +# +# IP: Virtual Server Configuration +# +# CONFIG_IP_VS is not set +CONFIG_IPV6=m +CONFIG_IPV6_PRIVACY=y +CONFIG_IPV6_ROUTER_PREF=y +CONFIG_IPV6_ROUTE_INFO=y +CONFIG_INET6_AH=m +CONFIG_INET6_ESP=m +CONFIG_INET6_IPCOMP=m +# CONFIG_IPV6_MIP6 is not set +CONFIG_INET6_XFRM_TUNNEL=m +CONFIG_INET6_TUNNEL=m +CONFIG_INET6_XFRM_MODE_TRANSPORT=m +CONFIG_INET6_XFRM_MODE_TUNNEL=m +CONFIG_INET6_XFRM_MODE_BEET=m +# CONFIG_INET6_XFRM_MODE_ROUTEOPTIMIZATION is not set +CONFIG_IPV6_TUNNEL=m +# CONFIG_IPV6_SUBTREES is not set +# CONFIG_IPV6_MULTIPLE_TABLES is not set +CONFIG_NETWORK_SECMARK=y +CONFIG_NETFILTER=y +# CONFIG_NETFILTER_DEBUG is not set +CONFIG_BRIDGE_NETFILTER=y + +# +# Core Netfilter Configuration +# +CONFIG_NETFILTER_NETLINK=m +CONFIG_NETFILTER_NETLINK_QUEUE=m +CONFIG_NETFILTER_NETLINK_LOG=m +CONFIG_NETFILTER_XTABLES=m +CONFIG_NETFILTER_XT_TARGET_CLASSIFY=m +CONFIG_NETFILTER_XT_TARGET_CONNMARK=m +# CONFIG_NETFILTER_XT_TARGET_DSCP is not set +CONFIG_NETFILTER_XT_TARGET_MARK=m +CONFIG_NETFILTER_XT_TARGET_NFQUEUE=m +CONFIG_NETFILTER_XT_TARGET_NOTRACK=m +CONFIG_NETFILTER_XT_TARGET_SECMARK=m +# CONFIG_NETFILTER_XT_TARGET_CONNSECMARK is not set +CONFIG_NETFILTER_XT_MATCH_COMMENT=m +CONFIG_NETFILTER_XT_MATCH_CONNMARK=m +CONFIG_NETFILTER_XT_MATCH_CONNTRACK=m +CONFIG_NETFILTER_XT_MATCH_DCCP=m +# CONFIG_NETFILTER_XT_MATCH_DSCP is not set +CONFIG_NETFILTER_XT_MATCH_ESP=m +CONFIG_NETFILTER_XT_MATCH_HELPER=m +CONFIG_NETFILTER_XT_MATCH_LENGTH=m +CONFIG_NETFILTER_XT_MATCH_LIMIT=m +CONFIG_NETFILTER_XT_MATCH_MAC=m +CONFIG_NETFILTER_XT_MATCH_MARK=m +CONFIG_NETFILTER_XT_MATCH_POLICY=m +CONFIG_NETFILTER_XT_MATCH_MULTIPORT=m +CONFIG_NETFILTER_XT_MATCH_PHYSDEV=m +CONFIG_NETFILTER_XT_MATCH_PKTTYPE=m +CONFIG_NETFILTER_XT_MATCH_QUOTA=m +CONFIG_NETFILTER_XT_MATCH_REALM=m +CONFIG_NETFILTER_XT_MATCH_SCTP=m +CONFIG_NETFILTER_XT_MATCH_STATE=m +CONFIG_NETFILTER_XT_MATCH_STATISTIC=m +CONFIG_NETFILTER_XT_MATCH_STRING=m +CONFIG_NETFILTER_XT_MATCH_TCPMSS=m + +# +# IP: Netfilter Configuration +# +CONFIG_IP_NF_CONNTRACK=m +# CONFIG_IP_NF_CT_ACCT is not set +CONFIG_IP_NF_CONNTRACK_MARK=y +CONFIG_IP_NF_CONNTRACK_SECMARK=y +CONFIG_IP_NF_CONNTRACK_EVENTS=y +CONFIG_IP_NF_CONNTRACK_NETLINK=m +CONFIG_IP_NF_CT_PROTO_SCTP=m +CONFIG_IP_NF_FTP=m +CONFIG_IP_NF_IRC=m +# CONFIG_IP_NF_NETBIOS_NS is not set +CONFIG_IP_NF_TFTP=m +CONFIG_IP_NF_AMANDA=m +CONFIG_IP_NF_PPTP=m +CONFIG_IP_NF_H323=m +CONFIG_IP_NF_SIP=m +CONFIG_IP_NF_QUEUE=m +CONFIG_IP_NF_IPTABLES=m +CONFIG_IP_NF_MATCH_IPRANGE=m +CONFIG_IP_NF_MATCH_TOS=m +CONFIG_IP_NF_MATCH_RECENT=m +CONFIG_IP_NF_MATCH_ECN=m +CONFIG_IP_NF_MATCH_AH=m +CONFIG_IP_NF_MATCH_TTL=m +CONFIG_IP_NF_MATCH_OWNER=m +CONFIG_IP_NF_MATCH_ADDRTYPE=m +CONFIG_IP_NF_MATCH_HASHLIMIT=m +CONFIG_IP_NF_FILTER=m +CONFIG_IP_NF_TARGET_REJECT=m +CONFIG_IP_NF_TARGET_LOG=m +CONFIG_IP_NF_TARGET_ULOG=m +CONFIG_IP_NF_TARGET_TCPMSS=m +CONFIG_IP_NF_NAT=m +CONFIG_IP_NF_NAT_NEEDED=y +CONFIG_IP_NF_TARGET_MASQUERADE=m +CONFIG_IP_NF_TARGET_REDIRECT=m +CONFIG_IP_NF_TARGET_NETMAP=m +CONFIG_IP_NF_TARGET_SAME=m +CONFIG_IP_NF_NAT_SNMP_BASIC=m +CONFIG_IP_NF_NAT_IRC=m +CONFIG_IP_NF_NAT_FTP=m +CONFIG_IP_NF_NAT_TFTP=m +CONFIG_IP_NF_NAT_AMANDA=m +CONFIG_IP_NF_NAT_PPTP=m +CONFIG_IP_NF_NAT_H323=m +CONFIG_IP_NF_NAT_SIP=m +CONFIG_IP_NF_MANGLE=m +CONFIG_IP_NF_TARGET_TOS=m +CONFIG_IP_NF_TARGET_ECN=m +CONFIG_IP_NF_TARGET_TTL=m +CONFIG_IP_NF_TARGET_CLUSTERIP=m +CONFIG_IP_NF_RAW=m +CONFIG_IP_NF_ARPTABLES=m +CONFIG_IP_NF_ARPFILTER=m +CONFIG_IP_NF_ARP_MANGLE=m + +# +# IPv6: Netfilter Configuration (EXPERIMENTAL) +# +CONFIG_IP6_NF_QUEUE=m +CONFIG_IP6_NF_IPTABLES=m +CONFIG_IP6_NF_MATCH_RT=m +CONFIG_IP6_NF_MATCH_OPTS=m +CONFIG_IP6_NF_MATCH_FRAG=m +CONFIG_IP6_NF_MATCH_HL=m +CONFIG_IP6_NF_MATCH_OWNER=m +CONFIG_IP6_NF_MATCH_IPV6HEADER=m +CONFIG_IP6_NF_MATCH_AH=m +CONFIG_IP6_NF_MATCH_EUI64=m +CONFIG_IP6_NF_FILTER=m +CONFIG_IP6_NF_TARGET_LOG=m +CONFIG_IP6_NF_TARGET_REJECT=m +CONFIG_IP6_NF_MANGLE=m +CONFIG_IP6_NF_TARGET_HL=m +CONFIG_IP6_NF_RAW=m + +# +# DECnet: Netfilter Configuration +# +CONFIG_DECNET_NF_GRABULATOR=m + +# +# Bridge: Netfilter Configuration +# +CONFIG_BRIDGE_NF_EBTABLES=m +CONFIG_BRIDGE_EBT_BROUTE=m +CONFIG_BRIDGE_EBT_T_FILTER=m +CONFIG_BRIDGE_EBT_T_NAT=m +CONFIG_BRIDGE_EBT_802_3=m +CONFIG_BRIDGE_EBT_AMONG=m +CONFIG_BRIDGE_EBT_ARP=m +CONFIG_BRIDGE_EBT_IP=m +CONFIG_BRIDGE_EBT_LIMIT=m +CONFIG_BRIDGE_EBT_MARK=m +CONFIG_BRIDGE_EBT_PKTTYPE=m +CONFIG_BRIDGE_EBT_STP=m +CONFIG_BRIDGE_EBT_VLAN=m +CONFIG_BRIDGE_EBT_ARPREPLY=m +CONFIG_BRIDGE_EBT_DNAT=m +CONFIG_BRIDGE_EBT_MARK_T=m +CONFIG_BRIDGE_EBT_REDIRECT=m +CONFIG_BRIDGE_EBT_SNAT=m +CONFIG_BRIDGE_EBT_LOG=m +CONFIG_BRIDGE_EBT_ULOG=m + +# +# DCCP Configuration (EXPERIMENTAL) +# +# CONFIG_IP_DCCP is not set + +# +# SCTP Configuration (EXPERIMENTAL) +# +# CONFIG_IP_SCTP is not set + +# +# TIPC Configuration (EXPERIMENTAL) +# +# CONFIG_TIPC is not set +# CONFIG_ATM is not set +CONFIG_BRIDGE=m +# CONFIG_VLAN_8021Q is not set +CONFIG_DECNET=m +# CONFIG_DECNET_ROUTER is not set +CONFIG_LLC=m +# CONFIG_LLC2 is not set +# CONFIG_IPX is not set +# CONFIG_ATALK is not set +# CONFIG_X25 is not set +# CONFIG_LAPB is not set +# CONFIG_ECONET is not set +# CONFIG_WAN_ROUTER is not set + +# +# QoS and/or fair queueing +# +CONFIG_NET_SCHED=y +CONFIG_NET_SCH_CLK_JIFFIES=y +# CONFIG_NET_SCH_CLK_GETTIMEOFDAY is not set +# CONFIG_NET_SCH_CLK_CPU is not set + +# +# Queueing/Scheduling +# +CONFIG_NET_SCH_CBQ=m +CONFIG_NET_SCH_HTB=m +CONFIG_NET_SCH_HFSC=m +CONFIG_NET_SCH_PRIO=m +CONFIG_NET_SCH_RED=m +CONFIG_NET_SCH_SFQ=m +CONFIG_NET_SCH_TEQL=m +CONFIG_NET_SCH_TBF=m +CONFIG_NET_SCH_GRED=m +CONFIG_NET_SCH_DSMARK=m +CONFIG_NET_SCH_NETEM=m +CONFIG_NET_SCH_INGRESS=m + +# +# Classification +# +CONFIG_NET_CLS=y +CONFIG_NET_CLS_BASIC=m +CONFIG_NET_CLS_TCINDEX=m +CONFIG_NET_CLS_ROUTE4=m +CONFIG_NET_CLS_ROUTE=y +CONFIG_NET_CLS_FW=m +CONFIG_NET_CLS_U32=m +# CONFIG_CLS_U32_PERF is not set +# CONFIG_CLS_U32_MARK is not set +CONFIG_NET_CLS_RSVP=m +CONFIG_NET_CLS_RSVP6=m +# CONFIG_NET_EMATCH is not set +# CONFIG_NET_CLS_ACT is not set +CONFIG_NET_CLS_POLICE=y +# CONFIG_NET_CLS_IND is not set +CONFIG_NET_ESTIMATOR=y + +# +# Network testing +# +# CONFIG_NET_PKTGEN is not set +CONFIG_HAMRADIO=y + +# +# Packet Radio protocols +# +CONFIG_AX25=m +CONFIG_AX25_DAMA_SLAVE=y +CONFIG_NETROM=m +CONFIG_ROSE=m + +# +# AX.25 network device drivers +# +CONFIG_MKISS=m +CONFIG_6PACK=m +CONFIG_BPQETHER=m +# CONFIG_BAYCOM_SER_FDX is not set +# CONFIG_BAYCOM_SER_HDX is not set +# CONFIG_BAYCOM_PAR is not set +# CONFIG_BAYCOM_EPP is not set +# CONFIG_YAM is not set +# CONFIG_IRDA is not set +# CONFIG_BT is not set +CONFIG_IEEE80211=m +# CONFIG_IEEE80211_DEBUG is not set +CONFIG_IEEE80211_CRYPT_WEP=m +CONFIG_IEEE80211_CRYPT_CCMP=m +CONFIG_IEEE80211_SOFTMAC=m +# CONFIG_IEEE80211_SOFTMAC_DEBUG is not set +CONFIG_WIRELESS_EXT=y + +# +# Device Drivers +# + +# +# Generic Driver Options +# +CONFIG_STANDALONE=y +CONFIG_PREVENT_FIRMWARE_BUILD=y +CONFIG_FW_LOADER=y +# CONFIG_SYS_HYPERVISOR is not set + +# +# Connector - unified userspace <-> kernelspace linker +# +CONFIG_CONNECTOR=m + +# +# Memory Technology Devices (MTD) +# +# CONFIG_MTD is not set + +# +# Parallel port support +# +CONFIG_PARPORT=m +CONFIG_PARPORT_PC=m +# CONFIG_PARPORT_PC_FIFO is not set +# CONFIG_PARPORT_PC_SUPERIO is not set +# CONFIG_PARPORT_GSC is not set +# CONFIG_PARPORT_AX88796 is not set +CONFIG_PARPORT_1284=y + +# +# Plug and Play support +# +# CONFIG_PNP is not set + +# +# Block devices +# +CONFIG_BLK_DEV_FD=m +CONFIG_PARIDE=m +CONFIG_PARIDE_PARPORT=m + +# +# Parallel IDE high-level drivers +# +CONFIG_PARIDE_PD=m +CONFIG_PARIDE_PCD=m +CONFIG_PARIDE_PF=m +CONFIG_PARIDE_PT=m +CONFIG_PARIDE_PG=m + +# +# Parallel IDE protocol modules +# +CONFIG_PARIDE_ATEN=m +CONFIG_PARIDE_BPCK=m +CONFIG_PARIDE_BPCK6=m +CONFIG_PARIDE_COMM=m +CONFIG_PARIDE_DSTR=m +CONFIG_PARIDE_FIT2=m +CONFIG_PARIDE_FIT3=m +CONFIG_PARIDE_EPAT=m +# CONFIG_PARIDE_EPATC8 is not set +CONFIG_PARIDE_EPIA=m +CONFIG_PARIDE_FRIQ=m +CONFIG_PARIDE_FRPW=m +CONFIG_PARIDE_KBIC=m +CONFIG_PARIDE_KTTI=m +CONFIG_PARIDE_ON20=m +CONFIG_PARIDE_ON26=m +# CONFIG_BLK_DEV_COW_COMMON is not set +CONFIG_BLK_DEV_LOOP=m +CONFIG_BLK_DEV_CRYPTOLOOP=m +CONFIG_BLK_DEV_NBD=m +CONFIG_BLK_DEV_RAM=m +CONFIG_BLK_DEV_RAM_COUNT=16 +CONFIG_BLK_DEV_RAM_SIZE=4096 +CONFIG_BLK_DEV_RAM_BLOCKSIZE=1024 +# CONFIG_BLK_DEV_INITRD is not set +CONFIG_CDROM_PKTCDVD=m +CONFIG_CDROM_PKTCDVD_BUFFERS=8 +# CONFIG_CDROM_PKTCDVD_WCACHE is not set +CONFIG_ATA_OVER_ETH=m + +# +# ATA/ATAPI/MFM/RLL support +# +# CONFIG_IDE is not set + +# +# SCSI device support +# +CONFIG_RAID_ATTRS=m +CONFIG_SCSI=y +CONFIG_SCSI_NETLINK=y +CONFIG_SCSI_PROC_FS=y + +# +# SCSI support type (disk, tape, CD-ROM) +# +CONFIG_BLK_DEV_SD=y +CONFIG_CHR_DEV_ST=m +# CONFIG_CHR_DEV_OSST is not set +CONFIG_BLK_DEV_SR=m +CONFIG_BLK_DEV_SR_VENDOR=y +# CONFIG_CHR_DEV_SG is not set +# CONFIG_CHR_DEV_SCH is not set + +# +# Some SCSI devices (e.g. CD jukebox) support multiple LUNs +# +# CONFIG_SCSI_MULTI_LUN is not set +CONFIG_SCSI_CONSTANTS=y +# CONFIG_SCSI_LOGGING is not set + +# +# SCSI Transports +# +CONFIG_SCSI_SPI_ATTRS=y +CONFIG_SCSI_FC_ATTRS=y +CONFIG_SCSI_ISCSI_ATTRS=m +CONFIG_SCSI_SAS_ATTRS=m +# CONFIG_SCSI_SAS_LIBSAS is not set + +# +# SCSI low-level drivers +# +CONFIG_ISCSI_TCP=m +# CONFIG_SCSI_AHA152X is not set +# CONFIG_SCSI_AIC7XXX_OLD is not set +# CONFIG_SCSI_IN2000 is not set +# CONFIG_SCSI_DTC3280 is not set +# CONFIG_SCSI_FUTURE_DOMAIN is not set +# CONFIG_SCSI_GENERIC_NCR5380 is not set +# CONFIG_SCSI_GENERIC_NCR5380_MMIO is not set +CONFIG_SCSI_PPA=m +CONFIG_SCSI_IMM=m +# CONFIG_SCSI_IZIP_EPP16 is not set +# CONFIG_SCSI_IZIP_SLOW_CTR is not set +# CONFIG_SCSI_NCR53C406A is not set +# CONFIG_SCSI_PAS16 is not set +# CONFIG_SCSI_PSI240I is not set +# CONFIG_SCSI_QLOGIC_FAS is not set +# CONFIG_SCSI_SYM53C416 is not set +# CONFIG_SCSI_T128 is not set +# CONFIG_SCSI_DEBUG is not set +CONFIG_JAZZ_ESP=y + +# +# Serial ATA (prod) and Parallel ATA (experimental) drivers +# +# CONFIG_ATA is not set + +# +# Old CD-ROM drivers (not SCSI, not IDE) +# +# CONFIG_CD_NO_IDESCSI is not set + +# +# Multi-device support (RAID and LVM) +# +CONFIG_MD=y +CONFIG_BLK_DEV_MD=m +CONFIG_MD_LINEAR=m +CONFIG_MD_RAID0=m +CONFIG_MD_RAID1=m +CONFIG_MD_RAID10=m +CONFIG_MD_RAID456=m +CONFIG_MD_RAID5_RESHAPE=y +CONFIG_MD_MULTIPATH=m +CONFIG_MD_FAULTY=m +CONFIG_BLK_DEV_DM=m +# CONFIG_DM_DEBUG is not set +# CONFIG_DM_CRYPT is not set +CONFIG_DM_SNAPSHOT=m +CONFIG_DM_MIRROR=m +CONFIG_DM_ZERO=m +CONFIG_DM_MULTIPATH=m +CONFIG_DM_MULTIPATH_EMC=m + +# +# Fusion MPT device support +# +# CONFIG_FUSION is not set + +# +# IEEE 1394 (FireWire) support +# + +# +# I2O device support +# + +# +# Network device support +# +CONFIG_NETDEVICES=y +CONFIG_DUMMY=m +CONFIG_BONDING=m +CONFIG_EQUALIZER=m +CONFIG_TUN=m + +# +# ARCnet devices +# +# CONFIG_ARCNET is not set + +# +# PHY device support +# +CONFIG_PHYLIB=m + +# +# MII PHY device drivers +# +CONFIG_MARVELL_PHY=m +CONFIG_DAVICOM_PHY=m +CONFIG_QSEMI_PHY=m +CONFIG_LXT_PHY=m +CONFIG_CICADA_PHY=m +CONFIG_VITESSE_PHY=m +CONFIG_SMSC_PHY=m +# CONFIG_FIXED_PHY is not set + +# +# Ethernet (10 or 100Mbit) +# +CONFIG_NET_ETHERNET=y +CONFIG_MII=y +CONFIG_MIPS_JAZZ_SONIC=y +# CONFIG_NET_VENDOR_3COM is not set +# CONFIG_NET_VENDOR_SMC is not set +# CONFIG_DM9000 is not set +# CONFIG_NET_VENDOR_RACAL is not set +# CONFIG_AT1700 is not set +# CONFIG_DEPCA is not set +# CONFIG_HP100 is not set +CONFIG_NET_ISA=y +# CONFIG_E2100 is not set +# CONFIG_EWRK3 is not set +# CONFIG_EEXPRESS is not set +# CONFIG_EEXPRESS_PRO is not set +# CONFIG_HPLAN_PLUS is not set +# CONFIG_HPLAN is not set +# CONFIG_LP486E is not set +# CONFIG_ETH16I is not set +CONFIG_NE2000=m +# CONFIG_SEEQ8005 is not set +CONFIG_NET_PCI=y +# CONFIG_AC3200 is not set +# CONFIG_APRICOT is not set +# CONFIG_CS89x0 is not set +# CONFIG_LAN_SAA9730 is not set +# CONFIG_NET_POCKET is not set + +# +# Ethernet (1000 Mbit) +# + +# +# Ethernet (10000 Mbit) +# + +# +# Token Ring devices +# +# CONFIG_TR is not set + +# +# Wireless LAN (non-hamradio) +# +# CONFIG_NET_RADIO is not set + +# +# Wan interfaces +# +# CONFIG_WAN is not set +CONFIG_PLIP=m +# CONFIG_PPP is not set +# CONFIG_SLIP is not set +# CONFIG_SHAPER is not set +# CONFIG_NETCONSOLE is not set +# CONFIG_NETPOLL is not set +# CONFIG_NET_POLL_CONTROLLER is not set + +# +# ISDN subsystem +# +# CONFIG_ISDN is not set + +# +# Telephony Support +# +# CONFIG_PHONE is not set + +# +# Input device support +# +CONFIG_INPUT=y +CONFIG_INPUT_FF_MEMLESS=m + +# +# Userland interfaces +# +CONFIG_INPUT_MOUSEDEV=y +CONFIG_INPUT_MOUSEDEV_PSAUX=y +CONFIG_INPUT_MOUSEDEV_SCREEN_X=1024 +CONFIG_INPUT_MOUSEDEV_SCREEN_Y=768 +# CONFIG_INPUT_JOYDEV is not set +# CONFIG_INPUT_TSDEV is not set +# CONFIG_INPUT_EVDEV is not set +# CONFIG_INPUT_EVBUG is not set + +# +# Input Device Drivers +# +CONFIG_INPUT_KEYBOARD=y +CONFIG_KEYBOARD_ATKBD=y +# CONFIG_KEYBOARD_SUNKBD is not set +# CONFIG_KEYBOARD_LKKBD is not set +# CONFIG_KEYBOARD_XTKBD is not set +# CONFIG_KEYBOARD_NEWTON is not set +# CONFIG_KEYBOARD_STOWAWAY is not set +CONFIG_INPUT_MOUSE=y +CONFIG_MOUSE_PS2=y +# CONFIG_MOUSE_SERIAL is not set +# CONFIG_MOUSE_INPORT is not set +# CONFIG_MOUSE_LOGIBM is not set +# CONFIG_MOUSE_PC110PAD is not set +# CONFIG_MOUSE_VSXXXAA is not set +# CONFIG_INPUT_JOYSTICK is not set +# CONFIG_INPUT_TOUCHSCREEN is not set +# CONFIG_INPUT_MISC is not set + +# +# Hardware I/O ports +# +CONFIG_SERIO=y +CONFIG_SERIO_I8042=y +CONFIG_SERIO_SERPORT=y +CONFIG_SERIO_PARKBD=m +CONFIG_SERIO_LIBPS2=y +CONFIG_SERIO_RAW=m +# CONFIG_GAMEPORT is not set + +# +# Character devices +# +CONFIG_VT=y +CONFIG_VT_CONSOLE=y +CONFIG_HW_CONSOLE=y +CONFIG_VT_HW_CONSOLE_BINDING=y +# CONFIG_SERIAL_NONSTANDARD is not set + +# +# Serial drivers +# +CONFIG_SERIAL_8250=m +CONFIG_SERIAL_8250_NR_UARTS=4 +CONFIG_SERIAL_8250_RUNTIME_UARTS=4 +CONFIG_SERIAL_8250_EXTENDED=y +# CONFIG_SERIAL_8250_MANY_PORTS is not set +CONFIG_SERIAL_8250_SHARE_IRQ=y +CONFIG_SERIAL_8250_DETECT_IRQ=y +CONFIG_SERIAL_8250_RSA=y + +# +# Non-8250 serial port support +# +CONFIG_SERIAL_CORE=m +CONFIG_UNIX98_PTYS=y +CONFIG_LEGACY_PTYS=y +CONFIG_LEGACY_PTY_COUNT=256 +CONFIG_PRINTER=m +# CONFIG_LP_CONSOLE is not set +CONFIG_PPDEV=m +CONFIG_TIPAR=m + +# +# IPMI +# +# CONFIG_IPMI_HANDLER is not set + +# +# Watchdog Cards +# +# CONFIG_WATCHDOG is not set +# CONFIG_HW_RANDOM is not set +CONFIG_RTC=m +# CONFIG_GEN_RTC is not set +# CONFIG_DTLK is not set +# CONFIG_R3964 is not set + +# +# Ftape, the floppy tape device driver +# +# CONFIG_RAW_DRIVER is not set + +# +# TPM devices +# +# CONFIG_TCG_TPM is not set +# CONFIG_TELCLOCK is not set + +# +# I2C support +# +# CONFIG_I2C is not set + +# +# SPI support +# +# CONFIG_SPI is not set +# CONFIG_SPI_MASTER is not set + +# +# Dallas's 1-wire bus +# +CONFIG_W1=m +CONFIG_W1_CON=y + +# +# 1-wire Bus Masters +# + +# +# 1-wire Slaves +# +# CONFIG_W1_SLAVE_THERM is not set +# CONFIG_W1_SLAVE_SMEM is not set +# CONFIG_W1_SLAVE_DS2433 is not set + +# +# Hardware Monitoring support +# +# CONFIG_HWMON is not set +# CONFIG_HWMON_VID is not set + +# +# Misc devices +# +# CONFIG_TIFM_CORE is not set + +# +# Multimedia devices +# +# CONFIG_VIDEO_DEV is not set + +# +# Digital Video Broadcasting Devices +# +# CONFIG_DVB is not set + +# +# Graphics support +# +# CONFIG_FIRMWARE_EDID is not set +# CONFIG_FB is not set + +# +# Console display driver support +# +CONFIG_VGA_CONSOLE=y +# CONFIG_VGACON_SOFT_SCROLLBACK is not set +# CONFIG_MDA_CONSOLE is not set +CONFIG_DUMMY_CONSOLE=y +# CONFIG_BACKLIGHT_LCD_SUPPORT is not set + +# +# Sound +# +# CONFIG_SOUND is not set + +# +# USB support +# +# CONFIG_USB_ARCH_HAS_HCD is not set +# CONFIG_USB_ARCH_HAS_OHCI is not set +# CONFIG_USB_ARCH_HAS_EHCI is not set + +# +# NOTE: USB_STORAGE enables SCSI, and 'SCSI disk support' +# + +# +# USB Gadget Support +# +# CONFIG_USB_GADGET is not set + +# +# MMC/SD Card support +# +# CONFIG_MMC is not set + +# +# LED devices +# +# CONFIG_NEW_LEDS is not set + +# +# LED drivers +# + +# +# LED Triggers +# + +# +# InfiniBand support +# + +# +# EDAC - error detection and reporting (RAS) (EXPERIMENTAL) +# + +# +# Real Time Clock +# +# CONFIG_RTC_CLASS is not set + +# +# DMA Engine support +# +# CONFIG_DMA_ENGINE is not set + +# +# DMA Clients +# + +# +# DMA Devices +# + +# +# File systems +# +CONFIG_EXT2_FS=m +# CONFIG_EXT2_FS_XATTR is not set +# CONFIG_EXT2_FS_XIP is not set +CONFIG_EXT3_FS=y +CONFIG_EXT3_FS_XATTR=y +# CONFIG_EXT3_FS_POSIX_ACL is not set +# CONFIG_EXT3_FS_SECURITY is not set +CONFIG_JBD=y +# CONFIG_JBD_DEBUG is not set +CONFIG_FS_MBCACHE=y +CONFIG_REISERFS_FS=m +# CONFIG_REISERFS_CHECK is not set +# CONFIG_REISERFS_PROC_INFO is not set +CONFIG_REISERFS_FS_XATTR=y +CONFIG_REISERFS_FS_POSIX_ACL=y +CONFIG_REISERFS_FS_SECURITY=y +# CONFIG_JFS_FS is not set +CONFIG_FS_POSIX_ACL=y +CONFIG_XFS_FS=m +CONFIG_XFS_QUOTA=y +CONFIG_XFS_SECURITY=y +# CONFIG_XFS_POSIX_ACL is not set +# CONFIG_XFS_RT is not set +# CONFIG_GFS2_FS is not set +# CONFIG_OCFS2_FS is not set +CONFIG_MINIX_FS=m +CONFIG_ROMFS_FS=m +CONFIG_INOTIFY=y +CONFIG_INOTIFY_USER=y +# CONFIG_QUOTA is not set +CONFIG_QUOTACTL=y +CONFIG_DNOTIFY=y +CONFIG_AUTOFS_FS=m +CONFIG_AUTOFS4_FS=m +CONFIG_FUSE_FS=m + +# +# CD-ROM/DVD Filesystems +# +CONFIG_ISO9660_FS=m +CONFIG_JOLIET=y +CONFIG_ZISOFS=y +CONFIG_ZISOFS_FS=m +CONFIG_UDF_FS=m +CONFIG_UDF_NLS=y + +# +# DOS/FAT/NT Filesystems +# +CONFIG_FAT_FS=m +CONFIG_MSDOS_FS=m +CONFIG_VFAT_FS=m +CONFIG_FAT_DEFAULT_CODEPAGE=437 +CONFIG_FAT_DEFAULT_IOCHARSET="iso8859-1" +CONFIG_NTFS_FS=m +# CONFIG_NTFS_DEBUG is not set +# CONFIG_NTFS_RW is not set + +# +# Pseudo filesystems +# +CONFIG_PROC_FS=y +CONFIG_PROC_KCORE=y +CONFIG_PROC_SYSCTL=y +CONFIG_SYSFS=y +CONFIG_TMPFS=y +# CONFIG_TMPFS_POSIX_ACL is not set +# CONFIG_HUGETLB_PAGE is not set +CONFIG_RAMFS=y +# CONFIG_CONFIGFS_FS is not set + +# +# Miscellaneous filesystems +# +CONFIG_ADFS_FS=m +# CONFIG_ADFS_FS_RW is not set +CONFIG_AFFS_FS=m +# CONFIG_ECRYPT_FS is not set +CONFIG_HFS_FS=m +# CONFIG_HFSPLUS_FS is not set +CONFIG_BEFS_FS=m +# CONFIG_BEFS_DEBUG is not set +CONFIG_BFS_FS=m +CONFIG_EFS_FS=m +CONFIG_CRAMFS=m +CONFIG_VXFS_FS=m +CONFIG_HPFS_FS=m +CONFIG_QNX4FS_FS=m +CONFIG_SYSV_FS=m +CONFIG_UFS_FS=m +# CONFIG_UFS_FS_WRITE is not set +# CONFIG_UFS_DEBUG is not set + +# +# Network File Systems +# +CONFIG_NFS_FS=m +CONFIG_NFS_V3=y +# CONFIG_NFS_V3_ACL is not set +# CONFIG_NFS_V4 is not set +# CONFIG_NFS_DIRECTIO is not set +CONFIG_NFSD=m +CONFIG_NFSD_V3=y +# CONFIG_NFSD_V3_ACL is not set +# CONFIG_NFSD_V4 is not set +CONFIG_NFSD_TCP=y +CONFIG_LOCKD=m +CONFIG_LOCKD_V4=y +CONFIG_EXPORTFS=m +CONFIG_NFS_COMMON=y +CONFIG_SUNRPC=m +CONFIG_SUNRPC_GSS=m +CONFIG_RPCSEC_GSS_KRB5=m +CONFIG_RPCSEC_GSS_SPKM3=m +CONFIG_SMB_FS=m +# CONFIG_SMB_NLS_DEFAULT is not set +CONFIG_CIFS=m +# CONFIG_CIFS_STATS is not set +# CONFIG_CIFS_WEAK_PW_HASH is not set +# CONFIG_CIFS_XATTR is not set +# CONFIG_CIFS_DEBUG2 is not set +# CONFIG_CIFS_EXPERIMENTAL is not set +CONFIG_NCP_FS=m +CONFIG_NCPFS_PACKET_SIGNING=y +CONFIG_NCPFS_IOCTL_LOCKING=y +CONFIG_NCPFS_STRONG=y +CONFIG_NCPFS_NFS_NS=y +CONFIG_NCPFS_OS2_NS=y +CONFIG_NCPFS_SMALLDOS=y +CONFIG_NCPFS_NLS=y +CONFIG_NCPFS_EXTRAS=y +CONFIG_CODA_FS=m +CONFIG_CODA_FS_OLD_API=y +CONFIG_AFS_FS=m +CONFIG_RXRPC=m +# CONFIG_9P_FS is not set + +# +# Partition Types +# +CONFIG_PARTITION_ADVANCED=y +# CONFIG_ACORN_PARTITION is not set +# CONFIG_OSF_PARTITION is not set +# CONFIG_AMIGA_PARTITION is not set +# CONFIG_ATARI_PARTITION is not set +# CONFIG_MAC_PARTITION is not set +CONFIG_MSDOS_PARTITION=y +# CONFIG_BSD_DISKLABEL is not set +# CONFIG_MINIX_SUBPARTITION is not set +# CONFIG_SOLARIS_X86_PARTITION is not set +# CONFIG_UNIXWARE_DISKLABEL is not set +# CONFIG_LDM_PARTITION is not set +# CONFIG_SGI_PARTITION is not set +# CONFIG_ULTRIX_PARTITION is not set +# CONFIG_SUN_PARTITION is not set +# CONFIG_KARMA_PARTITION is not set +# CONFIG_EFI_PARTITION is not set + +# +# Native Language Support +# +CONFIG_NLS=m +CONFIG_NLS_DEFAULT="iso8859-1" +CONFIG_NLS_CODEPAGE_437=m +CONFIG_NLS_CODEPAGE_737=m +CONFIG_NLS_CODEPAGE_775=m +CONFIG_NLS_CODEPAGE_850=m +CONFIG_NLS_CODEPAGE_852=m +CONFIG_NLS_CODEPAGE_855=m +CONFIG_NLS_CODEPAGE_857=m +CONFIG_NLS_CODEPAGE_860=m +CONFIG_NLS_CODEPAGE_861=m +CONFIG_NLS_CODEPAGE_862=m +CONFIG_NLS_CODEPAGE_863=m +CONFIG_NLS_CODEPAGE_864=m +CONFIG_NLS_CODEPAGE_865=m +CONFIG_NLS_CODEPAGE_866=m +CONFIG_NLS_CODEPAGE_869=m +CONFIG_NLS_CODEPAGE_936=m +CONFIG_NLS_CODEPAGE_950=m +CONFIG_NLS_CODEPAGE_932=m +CONFIG_NLS_CODEPAGE_949=m +CONFIG_NLS_CODEPAGE_874=m +CONFIG_NLS_ISO8859_8=m +CONFIG_NLS_CODEPAGE_1250=m +CONFIG_NLS_CODEPAGE_1251=m +CONFIG_NLS_ASCII=m +CONFIG_NLS_ISO8859_1=m +CONFIG_NLS_ISO8859_2=m +CONFIG_NLS_ISO8859_3=m +CONFIG_NLS_ISO8859_4=m +CONFIG_NLS_ISO8859_5=m +CONFIG_NLS_ISO8859_6=m +CONFIG_NLS_ISO8859_7=m +CONFIG_NLS_ISO8859_9=m +CONFIG_NLS_ISO8859_13=m +CONFIG_NLS_ISO8859_14=m +CONFIG_NLS_ISO8859_15=m +CONFIG_NLS_KOI8_R=m +CONFIG_NLS_KOI8_U=m +CONFIG_NLS_UTF8=m + +# +# Distributed Lock Manager +# + +# +# Profiling support +# +# CONFIG_PROFILING is not set + +# +# Kernel hacking +# +CONFIG_TRACE_IRQFLAGS_SUPPORT=y +# CONFIG_PRINTK_TIME is not set +CONFIG_ENABLE_MUST_CHECK=y +# CONFIG_MAGIC_SYSRQ is not set +# CONFIG_UNUSED_SYMBOLS is not set +# CONFIG_DEBUG_KERNEL is not set +CONFIG_LOG_BUF_SHIFT=14 +# CONFIG_DEBUG_FS is not set +CONFIG_CROSSCOMPILE=y +CONFIG_CMDLINE="" + +# +# Security options +# +CONFIG_KEYS=y +CONFIG_KEYS_DEBUG_PROC_KEYS=y +# CONFIG_SECURITY is not set + +# +# Cryptographic options +# +CONFIG_CRYPTO=y +CONFIG_CRYPTO_ALGAPI=y +CONFIG_CRYPTO_BLKCIPHER=m +CONFIG_CRYPTO_HASH=y +CONFIG_CRYPTO_MANAGER=m +CONFIG_CRYPTO_HMAC=y +CONFIG_CRYPTO_NULL=m +CONFIG_CRYPTO_MD4=m +CONFIG_CRYPTO_MD5=m +CONFIG_CRYPTO_SHA1=m +CONFIG_CRYPTO_SHA256=m +CONFIG_CRYPTO_SHA512=m +CONFIG_CRYPTO_WP512=m +CONFIG_CRYPTO_TGR192=m +CONFIG_CRYPTO_ECB=m +CONFIG_CRYPTO_CBC=m +CONFIG_CRYPTO_DES=m +CONFIG_CRYPTO_BLOWFISH=m +CONFIG_CRYPTO_TWOFISH=m +CONFIG_CRYPTO_TWOFISH_COMMON=m +CONFIG_CRYPTO_SERPENT=m +CONFIG_CRYPTO_AES=m +CONFIG_CRYPTO_CAST5=m +CONFIG_CRYPTO_CAST6=m +CONFIG_CRYPTO_TEA=m +CONFIG_CRYPTO_ARC4=m +CONFIG_CRYPTO_KHAZAD=m +CONFIG_CRYPTO_ANUBIS=m +CONFIG_CRYPTO_DEFLATE=m +CONFIG_CRYPTO_MICHAEL_MIC=m +CONFIG_CRYPTO_CRC32C=m +# CONFIG_CRYPTO_TEST is not set + +# +# Hardware crypto devices +# + +# +# Library routines +# +CONFIG_CRC_CCITT=m +CONFIG_CRC16=m +CONFIG_CRC32=y +CONFIG_LIBCRC32C=m +CONFIG_ZLIB_INFLATE=m +CONFIG_ZLIB_DEFLATE=m +CONFIG_TEXTSEARCH=y +CONFIG_TEXTSEARCH_KMP=m +CONFIG_TEXTSEARCH_BM=m +CONFIG_TEXTSEARCH_FSM=m +CONFIG_PLIST=y diff --git a/arch/mips/configs/malta_defconfig b/arch/mips/configs/malta_defconfig index aeefe2873e38..101e80347dce 100644 --- a/arch/mips/configs/malta_defconfig +++ b/arch/mips/configs/malta_defconfig @@ -1,7 +1,7 @@ # # Automatically generated make config: don't edit -# Linux kernel version: 2.6.18-rc1 -# Thu Jul 6 10:04:13 2006 +# Linux kernel version: 2.6.19-rc1 +# Fri Oct 6 17:34:55 2006 # CONFIG_MIPS=y @@ -25,8 +25,6 @@ CONFIG_MIPS=y # CONFIG_MIPS_COBALT is not set # CONFIG_MACH_DECSTATION is not set # CONFIG_MIPS_EV64120 is not set -# CONFIG_MIPS_IVR is not set -# CONFIG_MIPS_ITE8172 is not set # CONFIG_MACH_JAZZ is not set # CONFIG_LASAT is not set # CONFIG_MIPS_ATLAS is not set @@ -67,6 +65,7 @@ CONFIG_RWSEM_GENERIC_SPINLOCK=y CONFIG_GENERIC_FIND_NEXT_BIT=y CONFIG_GENERIC_HWEIGHT=y CONFIG_GENERIC_CALIBRATE_DELAY=y +CONFIG_GENERIC_TIME=y CONFIG_SCHED_NO_NO_OMIT_FRAME_POINTER=y CONFIG_ARCH_MAY_HAVE_PC_FDC=y CONFIG_DMA_NONCOHERENT=y @@ -134,19 +133,19 @@ CONFIG_MIPS_CPU_SCACHE=y CONFIG_CPU_HAS_PREFETCH=y # CONFIG_MIPS_MT_DISABLED is not set # CONFIG_MIPS_MT_SMTC is not set -# CONFIG_MIPS_MT_SMP is not set -CONFIG_MIPS_VPE_LOADER=y +CONFIG_MIPS_MT_SMP=y +# CONFIG_MIPS_VPE_LOADER is not set CONFIG_MIPS_MT=y CONFIG_SYS_SUPPORTS_MULTITHREADING=y CONFIG_MIPS_MT_FPAFF=y -CONFIG_MIPS_VPE_LOADER_TOM=y -CONFIG_MIPS_VPE_APSP_API=y -CONFIG_MIPS_APSP_KSPD=y # CONFIG_64BIT_PHYS_ADDR is not set CONFIG_CPU_HAS_LLSC=y +CONFIG_CPU_MIPSR2_IRQ_VI=y +CONFIG_CPU_MIPSR2_SRS=y CONFIG_CPU_HAS_SYNC=y CONFIG_GENERIC_HARDIRQS=y CONFIG_GENERIC_IRQ_PROBE=y +CONFIG_IRQ_PER_CPU=y CONFIG_CPU_SUPPORTS_HIGHMEM=y CONFIG_ARCH_FLATMEM_ENABLE=y CONFIG_SELECT_MEMORY_MODEL=y @@ -158,6 +157,9 @@ CONFIG_FLAT_NODE_MEM_MAP=y # CONFIG_SPARSEMEM_STATIC is not set CONFIG_SPLIT_PTLOCK_CPUS=4 # CONFIG_RESOURCES_64BIT is not set +CONFIG_SMP=y +CONFIG_SYS_SUPPORTS_SMP=y +CONFIG_NR_CPUS=2 # CONFIG_HZ_48 is not set CONFIG_HZ_100=y # CONFIG_HZ_128 is not set @@ -170,6 +172,7 @@ CONFIG_HZ=100 CONFIG_PREEMPT_NONE=y # CONFIG_PREEMPT_VOLUNTARY is not set # CONFIG_PREEMPT is not set +CONFIG_PREEMPT_BKL=y CONFIG_LOCKDEP_SUPPORT=y CONFIG_STACKTRACE_SUPPORT=y CONFIG_DEFCONFIG_LIST="/lib/modules/$UNAME_RELEASE/.config" @@ -178,7 +181,7 @@ CONFIG_DEFCONFIG_LIST="/lib/modules/$UNAME_RELEASE/.config" # Code maturity level options # CONFIG_EXPERIMENTAL=y -CONFIG_BROKEN_ON_SMP=y +CONFIG_LOCK_KERNEL=y CONFIG_INIT_ENV_ARG_LIMIT=32 # @@ -188,15 +191,20 @@ CONFIG_LOCALVERSION="" CONFIG_LOCALVERSION_AUTO=y CONFIG_SWAP=y CONFIG_SYSVIPC=y +# CONFIG_IPC_NS is not set # CONFIG_POSIX_MQUEUE is not set # CONFIG_BSD_PROCESS_ACCT is not set -CONFIG_SYSCTL=y +# CONFIG_TASKSTATS is not set +# CONFIG_UTS_NS is not set # CONFIG_AUDIT is not set # CONFIG_IKCONFIG is not set +# CONFIG_CPUSETS is not set CONFIG_RELAY=y CONFIG_INITRAMFS_SOURCE="" # CONFIG_CC_OPTIMIZE_FOR_SIZE is not set +CONFIG_SYSCTL=y CONFIG_EMBEDDED=y +# CONFIG_SYSCTL_SYSCALL is not set CONFIG_KALLSYMS=y # CONFIG_KALLSYMS_EXTRA_PASS is not set CONFIG_HOTPLUG=y @@ -204,12 +212,12 @@ CONFIG_PRINTK=y CONFIG_BUG=y CONFIG_ELF_CORE=y CONFIG_BASE_FULL=y -CONFIG_RT_MUTEXES=y CONFIG_FUTEX=y CONFIG_EPOLL=y CONFIG_SHMEM=y CONFIG_SLAB=y CONFIG_VM_EVENT_COUNTERS=y +CONFIG_RT_MUTEXES=y # CONFIG_TINY_SHMEM is not set CONFIG_BASE_SMALL=0 # CONFIG_SLOB is not set @@ -223,10 +231,12 @@ CONFIG_MODULE_UNLOAD=y CONFIG_MODVERSIONS=y CONFIG_MODULE_SRCVERSION_ALL=y CONFIG_KMOD=y +CONFIG_STOP_MACHINE=y # # Block layer # +CONFIG_BLOCK=y # CONFIG_LBD is not set # CONFIG_BLK_DEV_IO_TRACE is not set # CONFIG_LSF is not set @@ -249,6 +259,7 @@ CONFIG_DEFAULT_IOSCHED="anticipatory" # CONFIG_HW_HAS_PCI=y CONFIG_PCI=y +# CONFIG_PCI_MULTITHREAD_PROBE is not set CONFIG_MMU=y # @@ -282,6 +293,7 @@ CONFIG_PACKET_MMAP=y CONFIG_UNIX=y CONFIG_XFRM=y CONFIG_XFRM_USER=m +# CONFIG_XFRM_SUB_POLICY is not set CONFIG_NET_KEY=y CONFIG_INET=y CONFIG_IP_MULTICAST=y @@ -313,10 +325,12 @@ CONFIG_INET_XFRM_TUNNEL=m CONFIG_INET_TUNNEL=m CONFIG_INET_XFRM_MODE_TRANSPORT=m CONFIG_INET_XFRM_MODE_TUNNEL=m +CONFIG_INET_XFRM_MODE_BEET=y CONFIG_INET_DIAG=y CONFIG_INET_TCP_DIAG=y # CONFIG_TCP_CONG_ADVANCED is not set -CONFIG_TCP_CONG_BIC=y +CONFIG_TCP_CONG_CUBIC=y +CONFIG_DEFAULT_TCP_CONG="cubic" # # IP: Virtual Server Configuration @@ -358,11 +372,16 @@ CONFIG_IPV6_ROUTE_INFO=y CONFIG_INET6_AH=m CONFIG_INET6_ESP=m CONFIG_INET6_IPCOMP=m +# CONFIG_IPV6_MIP6 is not set CONFIG_INET6_XFRM_TUNNEL=m CONFIG_INET6_TUNNEL=m CONFIG_INET6_XFRM_MODE_TRANSPORT=m CONFIG_INET6_XFRM_MODE_TUNNEL=m +CONFIG_INET6_XFRM_MODE_BEET=m +# CONFIG_INET6_XFRM_MODE_ROUTEOPTIMIZATION is not set CONFIG_IPV6_TUNNEL=m +# CONFIG_IPV6_SUBTREES is not set +# CONFIG_IPV6_MULTIPLE_TABLES is not set CONFIG_NETWORK_SECMARK=y CONFIG_NETFILTER=y # CONFIG_NETFILTER_DEBUG is not set @@ -377,6 +396,7 @@ CONFIG_NETFILTER_NETLINK_LOG=m CONFIG_NETFILTER_XTABLES=m CONFIG_NETFILTER_XT_TARGET_CLASSIFY=m CONFIG_NETFILTER_XT_TARGET_CONNMARK=m +# CONFIG_NETFILTER_XT_TARGET_DSCP is not set CONFIG_NETFILTER_XT_TARGET_MARK=m CONFIG_NETFILTER_XT_TARGET_NFQUEUE=m CONFIG_NETFILTER_XT_TARGET_NOTRACK=m @@ -387,6 +407,7 @@ CONFIG_NETFILTER_XT_MATCH_CONNBYTES=m CONFIG_NETFILTER_XT_MATCH_CONNMARK=m CONFIG_NETFILTER_XT_MATCH_CONNTRACK=m CONFIG_NETFILTER_XT_MATCH_DCCP=m +# CONFIG_NETFILTER_XT_MATCH_DSCP is not set CONFIG_NETFILTER_XT_MATCH_ESP=m CONFIG_NETFILTER_XT_MATCH_HELPER=m CONFIG_NETFILTER_XT_MATCH_LENGTH=m @@ -429,7 +450,6 @@ CONFIG_IP_NF_MATCH_IPRANGE=m CONFIG_IP_NF_MATCH_TOS=m CONFIG_IP_NF_MATCH_RECENT=m CONFIG_IP_NF_MATCH_ECN=m -CONFIG_IP_NF_MATCH_DSCP=m CONFIG_IP_NF_MATCH_AH=m CONFIG_IP_NF_MATCH_TTL=m CONFIG_IP_NF_MATCH_OWNER=m @@ -457,7 +477,6 @@ CONFIG_IP_NF_NAT_SIP=m CONFIG_IP_NF_MANGLE=m CONFIG_IP_NF_TARGET_TOS=m CONFIG_IP_NF_TARGET_ECN=m -CONFIG_IP_NF_TARGET_DSCP=m CONFIG_IP_NF_TARGET_TTL=m CONFIG_IP_NF_TARGET_CLUSTERIP=m CONFIG_IP_NF_RAW=m @@ -536,13 +555,12 @@ CONFIG_LLC=m # CONFIG_LLC2 is not set # CONFIG_IPX is not set CONFIG_ATALK=m -CONFIG_DEV_APPLETALK=y +CONFIG_DEV_APPLETALK=m CONFIG_IPDDP=m CONFIG_IPDDP_ENCAP=y CONFIG_IPDDP_DECAP=y # CONFIG_X25 is not set # CONFIG_LAPB is not set -CONFIG_NET_DIVERT=y # CONFIG_ECONET is not set # CONFIG_WAN_ROUTER is not set @@ -604,6 +622,7 @@ CONFIG_IEEE80211_CRYPT_CCMP=m CONFIG_IEEE80211_SOFTMAC=m # CONFIG_IEEE80211_SOFTMAC_DEBUG is not set CONFIG_WIRELESS_EXT=y +CONFIG_FIB_RULES=y # # Device Drivers @@ -652,6 +671,7 @@ CONFIG_BLK_DEV_NBD=m CONFIG_BLK_DEV_RAM=y CONFIG_BLK_DEV_RAM_COUNT=16 CONFIG_BLK_DEV_RAM_SIZE=4096 +CONFIG_BLK_DEV_RAM_BLOCKSIZE=1024 # CONFIG_BLK_DEV_INITRD is not set CONFIG_CDROM_PKTCDVD=m CONFIG_CDROM_PKTCDVD_BUFFERS=8 @@ -662,6 +682,7 @@ CONFIG_ATA_OVER_ETH=m # ATA/ATAPI/MFM/RLL support # CONFIG_IDE=y +CONFIG_IDE_MAX_HWIFS=4 CONFIG_BLK_DEV_IDE=y # @@ -699,6 +720,7 @@ CONFIG_IDEDMA_PCI_AUTO=y # CONFIG_BLK_DEV_CS5530 is not set # CONFIG_BLK_DEV_HPT34X is not set # CONFIG_BLK_DEV_HPT366 is not set +# CONFIG_BLK_DEV_JMICRON is not set # CONFIG_BLK_DEV_SC1200 is not set CONFIG_BLK_DEV_PIIX=y # CONFIG_BLK_DEV_IT821X is not set @@ -721,6 +743,7 @@ CONFIG_IDEDMA_AUTO=y # CONFIG_RAID_ATTRS=m CONFIG_SCSI=m +CONFIG_SCSI_NETLINK=y CONFIG_SCSI_PROC_FS=y # @@ -742,12 +765,13 @@ CONFIG_SCSI_CONSTANTS=y CONFIG_SCSI_LOGGING=y # -# SCSI Transport Attributes +# SCSI Transports # CONFIG_SCSI_SPI_ATTRS=m CONFIG_SCSI_FC_ATTRS=m CONFIG_SCSI_ISCSI_ATTRS=m CONFIG_SCSI_SAS_ATTRS=m +# CONFIG_SCSI_SAS_LIBSAS is not set # # SCSI low-level drivers @@ -765,21 +789,23 @@ CONFIG_AIC7XXX_DEBUG_MASK=0 CONFIG_AIC7XXX_REG_PRETTY_PRINT=y # CONFIG_SCSI_AIC7XXX_OLD is not set # CONFIG_SCSI_AIC79XX is not set +# CONFIG_SCSI_AIC94XX is not set # CONFIG_SCSI_DPT_I2O is not set +# CONFIG_SCSI_ARCMSR is not set # CONFIG_MEGARAID_NEWGEN is not set # CONFIG_MEGARAID_LEGACY is not set # CONFIG_MEGARAID_SAS is not set -# CONFIG_SCSI_SATA is not set # CONFIG_SCSI_HPTIOP is not set # CONFIG_SCSI_DMX3191D is not set # CONFIG_SCSI_FUTURE_DOMAIN is not set # CONFIG_SCSI_IPS is not set # CONFIG_SCSI_INITIO is not set # CONFIG_SCSI_INIA100 is not set +# CONFIG_SCSI_STEX is not set # CONFIG_SCSI_SYM53C8XX_2 is not set -# CONFIG_SCSI_IPR is not set # CONFIG_SCSI_QLOGIC_1280 is not set # CONFIG_SCSI_QLA_FC is not set +# CONFIG_SCSI_QLA_ISCSI is not set # CONFIG_SCSI_LPFC is not set # CONFIG_SCSI_DC395x is not set # CONFIG_SCSI_DC390T is not set @@ -787,6 +813,11 @@ CONFIG_AIC7XXX_REG_PRETTY_PRINT=y # CONFIG_SCSI_DEBUG is not set # +# Serial ATA (prod) and Parallel ATA (experimental) drivers +# +# CONFIG_ATA is not set + +# # Multi-device support (RAID and LVM) # CONFIG_MD=y @@ -800,6 +831,7 @@ CONFIG_MD_RAID5_RESHAPE=y CONFIG_MD_MULTIPATH=m CONFIG_MD_FAULTY=m CONFIG_BLK_DEV_DM=m +# CONFIG_DM_DEBUG is not set CONFIG_DM_CRYPT=m CONFIG_DM_SNAPSHOT=m CONFIG_DM_MIRROR=m @@ -854,6 +886,7 @@ CONFIG_LXT_PHY=m CONFIG_CICADA_PHY=m CONFIG_VITESSE_PHY=m CONFIG_SMSC_PHY=m +# CONFIG_FIXED_PHY is not set # # Ethernet (10 or 100Mbit) @@ -873,6 +906,7 @@ CONFIG_MII=y # CONFIG_HP100 is not set CONFIG_NET_PCI=y CONFIG_PCNET32=y +# CONFIG_PCNET32_NAPI is not set # CONFIG_AMD8111_ETH is not set # CONFIG_ADAPTEC_STARFIRE is not set # CONFIG_B44 is not set @@ -909,6 +943,7 @@ CONFIG_PCNET32=y # CONFIG_VIA_VELOCITY is not set # CONFIG_TIGON3 is not set # CONFIG_BNX2 is not set +# CONFIG_QLA3XXX is not set # # Ethernet (10000 Mbit) @@ -956,6 +991,7 @@ CONFIG_PCNET32=y # Input device support # CONFIG_INPUT=y +# CONFIG_INPUT_FF_MEMLESS is not set # # Userland interfaces @@ -1070,12 +1106,12 @@ CONFIG_RTC=y # # Misc devices # +# CONFIG_TIFM_CORE is not set # # Multimedia devices # # CONFIG_VIDEO_DEV is not set -CONFIG_VIDEO_V4L2=y # # Digital Video Broadcasting Devices @@ -1093,6 +1129,7 @@ CONFIG_VIDEO_V4L2=y # # CONFIG_VGA_CONSOLE is not set CONFIG_DUMMY_CONSOLE=y +# CONFIG_BACKLIGHT_LCD_SUPPORT is not set # # Sound @@ -1191,6 +1228,7 @@ CONFIG_XFS_QUOTA=y CONFIG_XFS_SECURITY=y CONFIG_XFS_POSIX_ACL=y # CONFIG_XFS_RT is not set +# CONFIG_GFS2_FS is not set # CONFIG_OCFS2_FS is not set CONFIG_MINIX_FS=m CONFIG_ROMFS_FS=m @@ -1230,8 +1268,10 @@ CONFIG_FAT_DEFAULT_IOCHARSET="iso8859-1" # CONFIG_PROC_FS=y CONFIG_PROC_KCORE=y +CONFIG_PROC_SYSCTL=y CONFIG_SYSFS=y CONFIG_TMPFS=y +# CONFIG_TMPFS_POSIX_ACL is not set # CONFIG_HUGETLB_PAGE is not set CONFIG_RAMFS=y # CONFIG_CONFIGFS_FS is not set @@ -1279,7 +1319,6 @@ CONFIG_SUNRPC=y # CONFIG_RPCSEC_GSS_SPKM3 is not set # CONFIG_SMB_FS is not set # CONFIG_CIFS is not set -# CONFIG_CIFS_DEBUG2 is not set # CONFIG_NCP_FS is not set # CONFIG_CODA_FS is not set # CONFIG_AFS_FS is not set @@ -1336,6 +1375,11 @@ CONFIG_NLS_KOI8_U=m CONFIG_NLS_UTF8=m # +# Distributed Lock Manager +# +# CONFIG_DLM is not set + +# # Profiling support # # CONFIG_PROFILING is not set @@ -1345,10 +1389,11 @@ CONFIG_NLS_UTF8=m # CONFIG_TRACE_IRQFLAGS_SUPPORT=y # CONFIG_PRINTK_TIME is not set +CONFIG_ENABLE_MUST_CHECK=y # CONFIG_MAGIC_SYSRQ is not set # CONFIG_UNUSED_SYMBOLS is not set # CONFIG_DEBUG_KERNEL is not set -CONFIG_LOG_BUF_SHIFT=14 +CONFIG_LOG_BUF_SHIFT=15 # CONFIG_DEBUG_FS is not set CONFIG_CROSSCOMPILE=y CONFIG_CMDLINE="" @@ -1363,6 +1408,10 @@ CONFIG_CMDLINE="" # Cryptographic options # CONFIG_CRYPTO=y +CONFIG_CRYPTO_ALGAPI=y +CONFIG_CRYPTO_BLKCIPHER=m +CONFIG_CRYPTO_HASH=y +CONFIG_CRYPTO_MANAGER=m CONFIG_CRYPTO_HMAC=y CONFIG_CRYPTO_NULL=m CONFIG_CRYPTO_MD4=m @@ -1372,9 +1421,12 @@ CONFIG_CRYPTO_SHA256=m CONFIG_CRYPTO_SHA512=m CONFIG_CRYPTO_WP512=m CONFIG_CRYPTO_TGR192=m +CONFIG_CRYPTO_ECB=m +CONFIG_CRYPTO_CBC=m CONFIG_CRYPTO_DES=m CONFIG_CRYPTO_BLOWFISH=m CONFIG_CRYPTO_TWOFISH=m +CONFIG_CRYPTO_TWOFISH_COMMON=m CONFIG_CRYPTO_SERPENT=m CONFIG_CRYPTO_AES=m CONFIG_CRYPTO_CAST5=m diff --git a/arch/mips/configs/pb1100_defconfig b/arch/mips/configs/pb1100_defconfig index 741f8258075c..9e672f63a0aa 100644 --- a/arch/mips/configs/pb1100_defconfig +++ b/arch/mips/configs/pb1100_defconfig @@ -76,7 +76,6 @@ CONFIG_SYS_SUPPORTS_LITTLE_ENDIAN=y CONFIG_SOC_AU1100=y CONFIG_SOC_AU1X00=y CONFIG_SWAP_IO_SPACE=y -# CONFIG_AU1X00_USB_DEVICE is not set CONFIG_MIPS_L1_CACHE_SHIFT=5 # diff --git a/arch/mips/configs/pb1500_defconfig b/arch/mips/configs/pb1500_defconfig index 8576340714da..d0c0f4af1bff 100644 --- a/arch/mips/configs/pb1500_defconfig +++ b/arch/mips/configs/pb1500_defconfig @@ -75,7 +75,6 @@ CONFIG_CPU_LITTLE_ENDIAN=y CONFIG_SYS_SUPPORTS_LITTLE_ENDIAN=y CONFIG_SOC_AU1500=y CONFIG_SOC_AU1X00=y -# CONFIG_AU1X00_USB_DEVICE is not set CONFIG_MIPS_L1_CACHE_SHIFT=5 # diff --git a/arch/mips/ddb5xxx/ddb5477/irq.c b/arch/mips/ddb5xxx/ddb5477/irq.c index 513fc6722d84..a8bd2e66705c 100644 --- a/arch/mips/ddb5xxx/ddb5477/irq.c +++ b/arch/mips/ddb5xxx/ddb5477/irq.c @@ -153,8 +153,7 @@ u8 i8259_interrupt_ack(void) * the first level int-handler will jump here if it is a vrc5477 irq */ #define NUM_5477_IRQS 32 -static void -vrc5477_irq_dispatch(struct pt_regs *regs) +static void vrc5477_irq_dispatch(void) { u32 intStatus; u32 bitmask; @@ -178,7 +177,7 @@ vrc5477_irq_dispatch(struct pt_regs *regs) /* check for i8259 interrupts */ if (intStatus & (1 << VRC5477_I8259_CASCADE)) { int i8259_irq = i8259_interrupt_ack(); - do_IRQ(I8259_IRQ_BASE + i8259_irq, regs); + do_IRQ(I8259_IRQ_BASE + i8259_irq); return; } } @@ -186,7 +185,7 @@ vrc5477_irq_dispatch(struct pt_regs *regs) for (i=0, bitmask=1; i<= NUM_5477_IRQS; bitmask <<=1, i++) { /* do we need to "and" with the int mask? */ if (intStatus & bitmask) { - do_IRQ(VRC5477_IRQ_BASE + i, regs); + do_IRQ(VRC5477_IRQ_BASE + i); return; } } @@ -194,18 +193,18 @@ vrc5477_irq_dispatch(struct pt_regs *regs) #define VR5477INTS (STATUSF_IP2|STATUSF_IP3|STATUSF_IP4|STATUSF_IP5|STATUSF_IP6) -asmlinkage void plat_irq_dispatch(struct pt_regs *regs) +asmlinkage void plat_irq_dispatch(void) { unsigned int pending = read_c0_cause() & read_c0_status(); if (pending & STATUSF_IP7) - do_IRQ(CPU_IRQ_BASE + 7, regs); + do_IRQ(CPU_IRQ_BASE + 7); else if (pending & VR5477INTS) - vrc5477_irq_dispatch(regs); + vrc5477_irq_dispatch(); else if (pending & STATUSF_IP0) - do_IRQ(CPU_IRQ_BASE, regs); + do_IRQ(CPU_IRQ_BASE); else if (pending & STATUSF_IP1) - do_IRQ(CPU_IRQ_BASE + 1, regs); + do_IRQ(CPU_IRQ_BASE + 1); else - spurious_interrupt(regs); + spurious_interrupt(); } diff --git a/arch/mips/dec/ecc-berr.c b/arch/mips/dec/ecc-berr.c index cc24c5ed0c05..3e374d05978f 100644 --- a/arch/mips/dec/ecc-berr.c +++ b/arch/mips/dec/ecc-berr.c @@ -24,6 +24,7 @@ #include <asm/addrspace.h> #include <asm/bootinfo.h> #include <asm/cpu.h> +#include <asm/irq_regs.h> #include <asm/processor.h> #include <asm/system.h> #include <asm/traps.h> @@ -200,8 +201,10 @@ int dec_ecc_be_handler(struct pt_regs *regs, int is_fixup) return dec_ecc_be_backend(regs, is_fixup, 0); } -irqreturn_t dec_ecc_be_interrupt(int irq, void *dev_id, struct pt_regs *regs) +irqreturn_t dec_ecc_be_interrupt(int irq, void *dev_id) { + struct pt_regs *regs = get_irq_regs(); + int action = dec_ecc_be_backend(regs, 0, 1); if (action == MIPS_BE_DISCARD) diff --git a/arch/mips/dec/int-handler.S b/arch/mips/dec/int-handler.S index 455a65b91cb0..31dd47d1002d 100644 --- a/arch/mips/dec/int-handler.S +++ b/arch/mips/dec/int-handler.S @@ -264,10 +264,10 @@ srlv t3,t1,t2 handle_it: - jal do_IRQ - move a1,sp - - j ret_from_irq + LONG_L s0, TI_REGS($28) + LONG_S sp, TI_REGS($28) + PTR_LA ra, ret_from_irq + j do_IRQ nop #ifdef CONFIG_32BIT @@ -277,9 +277,8 @@ fpu: #endif spurious: - jal spurious_interrupt - nop - j ret_from_irq + PTR_LA ra, _ret_from_irq + j spurious_interrupt nop END(plat_irq_dispatch) diff --git a/arch/mips/dec/kn01-berr.c b/arch/mips/dec/kn01-berr.c index b9271db9bc76..f19b4617a0a6 100644 --- a/arch/mips/dec/kn01-berr.c +++ b/arch/mips/dec/kn01-berr.c @@ -150,10 +150,10 @@ int dec_kn01_be_handler(struct pt_regs *regs, int is_fixup) return dec_kn01_be_backend(regs, is_fixup, 0); } -irqreturn_t dec_kn01_be_interrupt(int irq, void *dev_id, - struct pt_regs *regs) +irqreturn_t dec_kn01_be_interrupt(int irq, void *dev_id) { volatile u16 *csr = (void *)CKSEG1ADDR(KN01_SLOT_BASE + KN01_CSR); + struct pt_regs *regs = get_irq_regs(); int action; if (!(*csr & KN01_CSR_MEMERR)) diff --git a/arch/mips/dec/kn02xa-berr.c b/arch/mips/dec/kn02xa-berr.c index 6cd3f94f79fe..7a053aadcd3a 100644 --- a/arch/mips/dec/kn02xa-berr.c +++ b/arch/mips/dec/kn02xa-berr.c @@ -21,6 +21,8 @@ #include <linux/types.h> #include <asm/addrspace.h> +#include <asm/irq_regs.h> +#include <asm/ptrace.h> #include <asm/system.h> #include <asm/traps.h> @@ -104,9 +106,9 @@ int dec_kn02xa_be_handler(struct pt_regs *regs, int is_fixup) return dec_kn02xa_be_backend(regs, is_fixup, 0); } -irqreturn_t dec_kn02xa_be_interrupt(int irq, void *dev_id, - struct pt_regs *regs) +irqreturn_t dec_kn02xa_be_interrupt(int irq, void *dev_id) { + struct pt_regs *regs = get_irq_regs(); int action = dec_kn02xa_be_backend(regs, 0, 1); if (action == MIPS_BE_DISCARD) diff --git a/arch/mips/dec/reset.c b/arch/mips/dec/reset.c index f78c6da47921..56397227adb0 100644 --- a/arch/mips/dec/reset.c +++ b/arch/mips/dec/reset.c @@ -8,7 +8,6 @@ #include <linux/linkage.h> #include <asm/addrspace.h> -#include <asm/ptrace.h> typedef void ATTRIB_NORET (* noret_func_t)(void); @@ -35,7 +34,7 @@ void ATTRIB_NORET dec_machine_power_off(void) back_to_prom(); } -irqreturn_t dec_intr_halt(int irq, void *dev_id, struct pt_regs *regs) +irqreturn_t dec_intr_halt(int irq, void *dev_id) { dec_machine_halt(); } diff --git a/arch/mips/dec/setup.c b/arch/mips/dec/setup.c index d43241c2f541..6b7481e97bec 100644 --- a/arch/mips/dec/setup.c +++ b/arch/mips/dec/setup.c @@ -46,7 +46,7 @@ extern void dec_machine_restart(char *command); extern void dec_machine_halt(void); extern void dec_machine_power_off(void); -extern irqreturn_t dec_intr_halt(int irq, void *dev_id, struct pt_regs *regs); +extern irqreturn_t dec_intr_halt(int irq, void *dev_id); unsigned long dec_kn_slot_base, dec_kn_slot_size; diff --git a/arch/mips/emma2rh/common/irq.c b/arch/mips/emma2rh/common/irq.c index 3af57693c84c..c191b3e9d9d9 100644 --- a/arch/mips/emma2rh/common/irq.c +++ b/arch/mips/emma2rh/common/irq.c @@ -39,7 +39,7 @@ /* * the first level int-handler will jump here if it is a emma2rh irq */ -asmlinkage void emma2rh_irq_dispatch(struct pt_regs *regs) +void emma2rh_irq_dispatch(void) { u32 intStatus; u32 bitmask; @@ -56,7 +56,7 @@ asmlinkage void emma2rh_irq_dispatch(struct pt_regs *regs) & emma2rh_in32(EMMA2RH_BHIF_SW_INT_EN); for (i = 0, bitmask = 1; i < 32; i++, bitmask <<= 1) { if (swIntStatus & bitmask) { - do_IRQ(EMMA2RH_SW_IRQ_BASE + i, regs); + do_IRQ(EMMA2RH_SW_IRQ_BASE + i); return; } } @@ -65,7 +65,7 @@ asmlinkage void emma2rh_irq_dispatch(struct pt_regs *regs) for (i = 0, bitmask = 1; i < 32; i++, bitmask <<= 1) { if (intStatus & bitmask) { - do_IRQ(EMMA2RH_IRQ_BASE + i, regs); + do_IRQ(EMMA2RH_IRQ_BASE + i); return; } } @@ -81,7 +81,7 @@ asmlinkage void emma2rh_irq_dispatch(struct pt_regs *regs) & emma2rh_in32(EMMA2RH_GPIO_INT_MASK); for (i = 0, bitmask = 1; i < 32; i++, bitmask <<= 1) { if (gpioIntStatus & bitmask) { - do_IRQ(EMMA2RH_GPIO_IRQ_BASE + i, regs); + do_IRQ(EMMA2RH_GPIO_IRQ_BASE + i); return; } } @@ -90,7 +90,7 @@ asmlinkage void emma2rh_irq_dispatch(struct pt_regs *regs) for (i = 32, bitmask = 1; i < 64; i++, bitmask <<= 1) { if (intStatus & bitmask) { - do_IRQ(EMMA2RH_IRQ_BASE + i, regs); + do_IRQ(EMMA2RH_IRQ_BASE + i); return; } } @@ -100,7 +100,7 @@ asmlinkage void emma2rh_irq_dispatch(struct pt_regs *regs) for (i = 64, bitmask = 1; i < 96; i++, bitmask <<= 1) { if (intStatus & bitmask) { - do_IRQ(EMMA2RH_IRQ_BASE + i, regs); + do_IRQ(EMMA2RH_IRQ_BASE + i); return; } } diff --git a/arch/mips/emma2rh/markeins/irq.c b/arch/mips/emma2rh/markeins/irq.c index 2a736be42c8c..c93369cb4115 100644 --- a/arch/mips/emma2rh/markeins/irq.c +++ b/arch/mips/emma2rh/markeins/irq.c @@ -57,7 +57,7 @@ extern void emma2rh_sw_irq_init(u32 base); extern void emma2rh_gpio_irq_init(u32 base); extern void emma2rh_irq_init(u32 base); -extern asmlinkage void emma2rh_irq_dispatch(struct pt_regs *regs); +extern void emma2rh_irq_dispatch(void); static struct irqaction irq_cascade = { .handler = no_action, @@ -114,20 +114,20 @@ void __init arch_init_irq(void) setup_irq(CPU_IRQ_BASE + CPU_EMMA2RH_CASCADE, &irq_cascade); } -asmlinkage void plat_irq_dispatch(struct pt_regs *regs) +asmlinkage void plat_irq_dispatch(void) { unsigned int pending = read_c0_status() & read_c0_cause(); if (pending & STATUSF_IP7) - do_IRQ(CPU_IRQ_BASE + 7, regs); + do_IRQ(CPU_IRQ_BASE + 7); else if (pending & STATUSF_IP2) - emma2rh_irq_dispatch(regs); + emma2rh_irq_dispatch(); else if (pending & STATUSF_IP1) - do_IRQ(CPU_IRQ_BASE + 1, regs); + do_IRQ(CPU_IRQ_BASE + 1); else if (pending & STATUSF_IP0) - do_IRQ(CPU_IRQ_BASE + 0, regs); + do_IRQ(CPU_IRQ_BASE + 0); else - spurious_interrupt(regs); + spurious_interrupt(); } diff --git a/arch/mips/gt64120/common/time.c b/arch/mips/gt64120/common/time.c index 7feca49350d1..c83ae6acd601 100644 --- a/arch/mips/gt64120/common/time.c +++ b/arch/mips/gt64120/common/time.c @@ -10,7 +10,7 @@ #include <linux/kernel.h> #include <linux/sched.h> #include <linux/kernel_stat.h> -#include <asm/ptrace.h> +#include <asm/irq_regs.h> #include <asm/gt64120.h> /* @@ -19,7 +19,7 @@ * differently than other MIPS interrupts. */ -static void gt64120_irq(int irq, void *dev_id, struct pt_regs *regs) +static irqreturn_t gt64120_irq(int irq, void *dev_id) { unsigned int irq_src, int_high_src, irq_src_mask, int_high_src_mask; int handled = 0; @@ -36,12 +36,14 @@ static void gt64120_irq(int irq, void *dev_id, struct pt_regs *regs) irq_src &= ~0x00000800; do_timer(1); #ifndef CONFIG_SMP - update_process_times(user_mode(regs)); + update_process_times(user_mode(get_irq_regs())); #endif } GT_WRITE(GT_INTRCAUSE_OFS, 0); GT_WRITE(GT_HINTRCAUSE_OFS, 0); + + return IRQ_HANDLED; } /* diff --git a/arch/mips/gt64120/ev64120/irq.c b/arch/mips/gt64120/ev64120/irq.c index 5d939ac58f3f..ed4d82b9a24a 100644 --- a/arch/mips/gt64120/ev64120/irq.c +++ b/arch/mips/gt64120/ev64120/irq.c @@ -46,22 +46,22 @@ #include <asm/system.h> #include <asm/gt64120.h> -asmlinkage void plat_irq_dispatch(struct pt_regs *regs) +asmlinkage void plat_irq_dispatch(void) { unsigned int pending = read_c0_status() & read_c0_cause(); if (pending & STATUSF_IP4) /* int2 hardware line (timer) */ - do_IRQ(4, regs); + do_IRQ(4); else if (pending & STATUSF_IP2) /* int0 hardware line */ - do_IRQ(GT_INTA, regs); + do_IRQ(GT_INTA); else if (pending & STATUSF_IP5) /* int3 hardware line */ - do_IRQ(GT_INTD, regs); + do_IRQ(GT_INTD); else if (pending & STATUSF_IP6) /* int4 hardware line */ - do_IRQ(6, regs); + do_IRQ(6); else if (pending & STATUSF_IP7) /* compare int */ - do_IRQ(7, regs); + do_IRQ(7); else - spurious_interrupt(regs); + spurious_interrupt(); } static void disable_ev64120_irq(unsigned int irq_nr) diff --git a/arch/mips/gt64120/ev64120/setup.c b/arch/mips/gt64120/ev64120/setup.c index 4236da31ecc6..91c2d3f41617 100644 --- a/arch/mips/gt64120/ev64120/setup.c +++ b/arch/mips/gt64120/ev64120/setup.c @@ -42,7 +42,6 @@ #include <asm/irq.h> #include <asm/pci.h> #include <asm/processor.h> -#include <asm/ptrace.h> #include <asm/time.h> #include <asm/reboot.h> #include <asm/traps.h> diff --git a/arch/mips/gt64120/momenco_ocelot/irq.c b/arch/mips/gt64120/momenco_ocelot/irq.c index 885f67f32ea3..d9294401ccb0 100644 --- a/arch/mips/gt64120/momenco_ocelot/irq.c +++ b/arch/mips/gt64120/momenco_ocelot/irq.c @@ -48,22 +48,22 @@ #include <asm/mipsregs.h> #include <asm/system.h> -asmlinkage void plat_irq_dispatch(struct pt_regs *regs) +asmlinkage void plat_irq_dispatch(void) { unsigned int pending = read_c0_status() & read_c0_cause(); if (pending & STATUSF_IP2) /* int0 hardware line */ - do_IRQ(2, regs); + do_IRQ(2); else if (pending & STATUSF_IP3) /* int1 hardware line */ - do_IRQ(3, regs); + do_IRQ(3); else if (pending & STATUSF_IP4) /* int2 hardware line */ - do_IRQ(4, regs); + do_IRQ(4); else if (pending & STATUSF_IP5) /* int3 hardware line */ - do_IRQ(5, regs); + do_IRQ(5); else if (pending & STATUSF_IP6) /* int4 hardware line */ - do_IRQ(6, regs); + do_IRQ(6); else if (pending & STATUSF_IP7) /* cpu timer */ - do_IRQ(7, regs); + do_IRQ(7); else { /* * Now look at the extended interrupts @@ -71,13 +71,13 @@ asmlinkage void plat_irq_dispatch(struct pt_regs *regs) pending = (read_c0_cause() & (read_c0_intcontrol() << 8)) >> 16; if (pending & STATUSF_IP8) /* int6 hardware line */ - do_IRQ(8, regs); + do_IRQ(8); else if (pending & STATUSF_IP9) /* int7 hardware line */ - do_IRQ(9, regs); + do_IRQ(9); else if (pending & STATUSF_IP10) /* int8 hardware line */ - do_IRQ(10, regs); + do_IRQ(10); else if (pending & STATUSF_IP11) /* int9 hardware line */ - do_IRQ(11, regs); + do_IRQ(11); } } diff --git a/arch/mips/gt64120/momenco_ocelot/setup.c b/arch/mips/gt64120/momenco_ocelot/setup.c index 9804642ecf89..0e5bbee2d5b7 100644 --- a/arch/mips/gt64120/momenco_ocelot/setup.c +++ b/arch/mips/gt64120/momenco_ocelot/setup.c @@ -56,7 +56,6 @@ #include <asm/irq.h> #include <asm/pci.h> #include <asm/processor.h> -#include <asm/ptrace.h> #include <asm/reboot.h> #include <asm/traps.h> #include <linux/bootmem.h> diff --git a/arch/mips/gt64120/wrppmc/irq.c b/arch/mips/gt64120/wrppmc/irq.c index 8d75a43ce877..eedfc24e1eae 100644 --- a/arch/mips/gt64120/wrppmc/irq.c +++ b/arch/mips/gt64120/wrppmc/irq.c @@ -30,18 +30,18 @@ #include <asm/irq_cpu.h> #include <asm/gt64120.h> -asmlinkage void plat_irq_dispatch(struct pt_regs *regs) +asmlinkage void plat_irq_dispatch(void) { unsigned int pending = read_c0_status() & read_c0_cause(); if (pending & STATUSF_IP7) - do_IRQ(WRPPMC_MIPS_TIMER_IRQ, regs); /* CPU Compare/Count internal timer */ + do_IRQ(WRPPMC_MIPS_TIMER_IRQ); /* CPU Compare/Count internal timer */ else if (pending & STATUSF_IP6) - do_IRQ(WRPPMC_UART16550_IRQ, regs); /* UART 16550 port */ + do_IRQ(WRPPMC_UART16550_IRQ); /* UART 16550 port */ else if (pending & STATUSF_IP3) - do_IRQ(WRPPMC_PCI_INTA_IRQ, regs); /* PCI INT_A */ + do_IRQ(WRPPMC_PCI_INTA_IRQ); /* PCI INT_A */ else - spurious_interrupt(regs); + spurious_interrupt(); } /** diff --git a/arch/mips/jazz/irq.c b/arch/mips/jazz/irq.c index eef05093deb4..d5bd6b3a0933 100644 --- a/arch/mips/jazz/irq.c +++ b/arch/mips/jazz/irq.c @@ -94,26 +94,26 @@ void __init arch_init_irq(void) change_c0_status(ST0_IM, IE_IRQ4 | IE_IRQ3 | IE_IRQ2 | IE_IRQ1); } -static void loc_call(unsigned int irq, struct pt_regs *regs, unsigned int mask) +static void loc_call(unsigned int irq, unsigned int mask) { r4030_write_reg16(JAZZ_IO_IRQ_ENABLE, r4030_read_reg16(JAZZ_IO_IRQ_ENABLE) & mask); - do_IRQ(irq, regs); + do_IRQ(irq); r4030_write_reg16(JAZZ_IO_IRQ_ENABLE, r4030_read_reg16(JAZZ_IO_IRQ_ENABLE) | mask); } -static void ll_local_dev(struct pt_regs *regs) +static void ll_local_dev(void) { switch (r4030_read_reg32(JAZZ_IO_IRQ_SOURCE)) { case 0: panic("Unimplemented loc_no_irq handler"); break; case 4: - loc_call(JAZZ_PARALLEL_IRQ, regs, JAZZ_IE_PARALLEL); + loc_call(JAZZ_PARALLEL_IRQ, JAZZ_IE_PARALLEL); break; case 8: - loc_call(JAZZ_PARALLEL_IRQ, regs, JAZZ_IE_FLOPPY); + loc_call(JAZZ_PARALLEL_IRQ, JAZZ_IE_FLOPPY); break; case 12: panic("Unimplemented loc_sound handler"); @@ -122,27 +122,27 @@ static void ll_local_dev(struct pt_regs *regs) panic("Unimplemented loc_video handler"); break; case 20: - loc_call(JAZZ_ETHERNET_IRQ, regs, JAZZ_IE_ETHERNET); + loc_call(JAZZ_ETHERNET_IRQ, JAZZ_IE_ETHERNET); break; case 24: - loc_call(JAZZ_SCSI_IRQ, regs, JAZZ_IE_SCSI); + loc_call(JAZZ_SCSI_IRQ, JAZZ_IE_SCSI); break; case 28: - loc_call(JAZZ_KEYBOARD_IRQ, regs, JAZZ_IE_KEYBOARD); + loc_call(JAZZ_KEYBOARD_IRQ, JAZZ_IE_KEYBOARD); break; case 32: - loc_call(JAZZ_MOUSE_IRQ, regs, JAZZ_IE_MOUSE); + loc_call(JAZZ_MOUSE_IRQ, JAZZ_IE_MOUSE); break; case 36: - loc_call(JAZZ_SERIAL1_IRQ, regs, JAZZ_IE_SERIAL1); + loc_call(JAZZ_SERIAL1_IRQ, JAZZ_IE_SERIAL1); break; case 40: - loc_call(JAZZ_SERIAL2_IRQ, regs, JAZZ_IE_SERIAL2); + loc_call(JAZZ_SERIAL2_IRQ, JAZZ_IE_SERIAL2); break; } } -asmlinkage void plat_irq_dispatch(struct pt_regs *regs) +asmlinkage void plat_irq_dispatch(void) { unsigned int pending = read_c0_cause() & read_c0_status() & ST0_IM; @@ -150,13 +150,13 @@ asmlinkage void plat_irq_dispatch(struct pt_regs *regs) write_c0_compare(0); else if (pending & IE_IRQ4) { r4030_read_reg32(JAZZ_TIMER_REGISTER); - do_IRQ(JAZZ_TIMER_IRQ, regs); + do_IRQ(JAZZ_TIMER_IRQ); } else if (pending & IE_IRQ3) panic("Unimplemented ISA NMI handler"); else if (pending & IE_IRQ2) - do_IRQ(r4030_read_reg32(JAZZ_EISA_IRQ_ACK), regs); + do_IRQ(r4030_read_reg32(JAZZ_EISA_IRQ_ACK)); else if (pending & IE_IRQ1) { - ll_local_dev(regs); + ll_local_dev(); } else if (unlikely(pending & IE_IRQ0)) panic("Unimplemented local_dma handler"); else if (pending & IE_SW1) { diff --git a/arch/mips/jazz/setup.c b/arch/mips/jazz/setup.c index 487a9ea1ef00..d848f1a07786 100644 --- a/arch/mips/jazz/setup.c +++ b/arch/mips/jazz/setup.c @@ -19,12 +19,12 @@ #include <linux/fb.h> #include <linux/ide.h> #include <linux/pm.h> +#include <linux/screen_info.h> #include <asm/bootinfo.h> #include <asm/irq.h> #include <asm/jazz.h> #include <asm/jazzdma.h> -#include <asm/ptrace.h> #include <asm/reboot.h> #include <asm/io.h> #include <asm/pgtable.h> @@ -37,7 +37,7 @@ extern void jazz_machine_restart(char *command); extern void jazz_machine_halt(void); extern void jazz_machine_power_off(void); -void __init plat_time_init(struct irqaction *irq) +void __init plat_timer_setup(struct irqaction *irq) { /* set the clock to 100 Hz */ r4030_write_reg32(JAZZ_TIMER_INTERVAL, 9); @@ -45,10 +45,27 @@ void __init plat_time_init(struct irqaction *irq) } static struct resource jazz_io_resources[] = { - { "dma1", 0x00, 0x1f, IORESOURCE_BUSY }, - { "timer", 0x40, 0x5f, IORESOURCE_BUSY }, - { "dma page reg", 0x80, 0x8f, IORESOURCE_BUSY }, - { "dma2", 0xc0, 0xdf, IORESOURCE_BUSY }, + { + .start = 0x00, + .end = 0x1f, + .name = "dma1", + .flags = IORESOURCE_BUSY + }, { + .start = 0x40, + .end = 0x5f, + .name = "timer", + .end = IORESOURCE_BUSY + }, { + .start = 0x80, + .end = 0x8f, + .name = "dma page reg", + .flags = IORESOURCE_BUSY + }, { + .start = 0xc0, + .end = 0xdf, + .name = "dma2", + .flags = IORESOURCE_BUSY + } }; void __init plat_mem_setup(void) @@ -81,8 +98,6 @@ void __init plat_mem_setup(void) _machine_halt = jazz_machine_halt; pm_power_off = jazz_machine_power_off; -#warning "Somebody should check if screen_info is ok for Jazz." - screen_info = (struct screen_info) { 0, 0, /* orig-x, orig-y */ 0, /* unused */ diff --git a/arch/mips/jmr3927/rbhma3100/irq.c b/arch/mips/jmr3927/rbhma3100/irq.c index 722174481467..39a0243bed9a 100644 --- a/arch/mips/jmr3927/rbhma3100/irq.c +++ b/arch/mips/jmr3927/rbhma3100/irq.c @@ -46,6 +46,7 @@ #include <linux/smp_lock.h> #include <linux/bitops.h> +#include <asm/irq_regs.h> #include <asm/io.h> #include <asm/mipsregs.h> #include <asm/system.h> @@ -239,45 +240,80 @@ struct tb_irq_space jmr3927_ioc_irqspace = { .space_id = 0, can_share : 1 }; + struct tb_irq_space jmr3927_irc_irqspace = { - .next = NULL, - .start_irqno = JMR3927_IRQ_IRC, - nr_irqs : JMR3927_NR_IRQ_IRC, - .mask_func = mask_irq_irc, - .unmask_func = unmask_irq_irc, - .name = "on-chip", - .space_id = 0, - can_share : 0 + .next = NULL, + .start_irqno = JMR3927_IRQ_IRC, + .nr_irqs = JMR3927_NR_IRQ_IRC, + .mask_func = mask_irq_irc, + .unmask_func = unmask_irq_irc, + .name = "on-chip", + .space_id = 0, + .can_share = 0 }; -void jmr3927_spurious(struct pt_regs *regs) + +#ifdef CONFIG_TX_BRANCH_LIKELY_BUG_WORKAROUND +static int tx_branch_likely_bug_count = 0; +static int have_tx_branch_likely_bug = 0; + +static void tx_branch_likely_bug_fixup(void) +{ + struct pt_regs *regs = get_irq_regs(); + + /* TX39/49-BUG: Under this condition, the insn in delay slot + of the branch likely insn is executed (not nullified) even + the branch condition is false. */ + if (!have_tx_branch_likely_bug) + return; + if ((regs->cp0_epc & 0xfff) == 0xffc && + KSEGX(regs->cp0_epc) != KSEG0 && + KSEGX(regs->cp0_epc) != KSEG1) { + unsigned int insn = *(unsigned int*)(regs->cp0_epc - 4); + /* beql,bnel,blezl,bgtzl */ + /* bltzl,bgezl,blezall,bgezall */ + /* bczfl, bcztl */ + if ((insn & 0xf0000000) == 0x50000000 || + (insn & 0xfc0e0000) == 0x04020000 || + (insn & 0xf3fe0000) == 0x41020000) { + regs->cp0_epc -= 4; + tx_branch_likely_bug_count++; + printk(KERN_INFO + "fix branch-likery bug in %s (insn %08x)\n", + current->comm, insn); + } + } +} +#endif + +static void jmr3927_spurious(void) { #ifdef CONFIG_TX_BRANCH_LIKELY_BUG_WORKAROUND - tx_branch_likely_bug_fixup(regs); + tx_branch_likely_bug_fixup(); #endif printk(KERN_WARNING "spurious interrupt (cause 0x%lx, pc 0x%lx, ra 0x%lx).\n", regs->cp0_cause, regs->cp0_epc, regs->regs[31]); } -asmlinkage void plat_irq_dispatch(struct pt_regs *regs) +asmlinkage void plat_irq_dispatch(void) { int irq; #ifdef CONFIG_TX_BRANCH_LIKELY_BUG_WORKAROUND - tx_branch_likely_bug_fixup(regs); + tx_branch_likely_bug_fixup(); #endif if ((regs->cp0_cause & CAUSEF_IP7) == 0) { #if 0 - jmr3927_spurious(regs); + jmr3927_spurious(); #endif return; } irq = (regs->cp0_cause >> CAUSEB_IP2) & 0x0f; - do_IRQ(irq + JMR3927_IRQ_IRC, regs); + do_IRQ(irq + JMR3927_IRQ_IRC); } -static irqreturn_t jmr3927_ioc_interrupt(int irq, void *dev_id, struct pt_regs *regs) +static irqreturn_t jmr3927_ioc_interrupt(int irq, void *dev_id) { unsigned char istat = jmr3927_ioc_reg_in(JMR3927_IOC_INTS2_ADDR); int i; @@ -285,7 +321,7 @@ static irqreturn_t jmr3927_ioc_interrupt(int irq, void *dev_id, struct pt_regs * for (i = 0; i < JMR3927_NR_IRQ_IOC; i++) { if (istat & (1 << i)) { irq = JMR3927_IRQ_IOC + i; - do_IRQ(irq, regs); + do_IRQ(irq); } } return IRQ_HANDLED; @@ -295,7 +331,7 @@ static struct irqaction ioc_action = { jmr3927_ioc_interrupt, 0, CPU_MASK_NONE, "IOC", NULL, NULL, }; -static irqreturn_t jmr3927_isac_interrupt(int irq, void *dev_id, struct pt_regs *regs) +static irqreturn_t jmr3927_isac_interrupt(int irq, void *dev_id) { unsigned char istat = jmr3927_isac_reg_in(JMR3927_ISAC_INTS2_ADDR); int i; @@ -303,7 +339,7 @@ static irqreturn_t jmr3927_isac_interrupt(int irq, void *dev_id, struct pt_regs for (i = 0; i < JMR3927_NR_IRQ_ISAC; i++) { if (istat & (1 << i)) { irq = JMR3927_IRQ_ISAC + i; - do_IRQ(irq, regs); + do_IRQ(irq); } } return IRQ_HANDLED; @@ -314,7 +350,7 @@ static struct irqaction isac_action = { }; -static irqreturn_t jmr3927_isaerr_interrupt(int irq, void * dev_id, struct pt_regs * regs) +static irqreturn_t jmr3927_isaerr_interrupt(int irq, void *dev_id) { printk(KERN_WARNING "ISA error interrupt (irq 0x%x).\n", irq); @@ -324,7 +360,7 @@ static struct irqaction isaerr_action = { jmr3927_isaerr_interrupt, 0, CPU_MASK_NONE, "ISA error", NULL, NULL, }; -static irqreturn_t jmr3927_pcierr_interrupt(int irq, void * dev_id, struct pt_regs * regs) +static irqreturn_t jmr3927_pcierr_interrupt(int irq, void *dev_id) { printk(KERN_WARNING "PCI error interrupt (irq 0x%x).\n", irq); printk(KERN_WARNING "pcistat:%02x, lbstat:%04lx\n", @@ -439,33 +475,3 @@ void jmr3927_irq_init(u32 irq_base) jmr3927_irq_base = irq_base; } - -#ifdef CONFIG_TX_BRANCH_LIKELY_BUG_WORKAROUND -static int tx_branch_likely_bug_count = 0; -static int have_tx_branch_likely_bug = 0; -void tx_branch_likely_bug_fixup(struct pt_regs *regs) -{ - /* TX39/49-BUG: Under this condition, the insn in delay slot - of the branch likely insn is executed (not nullified) even - the branch condition is false. */ - if (!have_tx_branch_likely_bug) - return; - if ((regs->cp0_epc & 0xfff) == 0xffc && - KSEGX(regs->cp0_epc) != KSEG0 && - KSEGX(regs->cp0_epc) != KSEG1) { - unsigned int insn = *(unsigned int*)(regs->cp0_epc - 4); - /* beql,bnel,blezl,bgtzl */ - /* bltzl,bgezl,blezall,bgezall */ - /* bczfl, bcztl */ - if ((insn & 0xf0000000) == 0x50000000 || - (insn & 0xfc0e0000) == 0x04020000 || - (insn & 0xf3fe0000) == 0x41020000) { - regs->cp0_epc -= 4; - tx_branch_likely_bug_count++; - printk(KERN_INFO - "fix branch-likery bug in %s (insn %08x)\n", - current->comm, insn); - } - } -} -#endif diff --git a/arch/mips/kernel/asm-offsets.c b/arch/mips/kernel/asm-offsets.c index ec28077d5ee2..e9ce5b3721af 100644 --- a/arch/mips/kernel/asm-offsets.c +++ b/arch/mips/kernel/asm-offsets.c @@ -93,11 +93,12 @@ void output_thread_info_defines(void) offset("#define TI_TASK ", struct thread_info, task); offset("#define TI_EXEC_DOMAIN ", struct thread_info, exec_domain); offset("#define TI_FLAGS ", struct thread_info, flags); + offset("#define TI_TP_VALUE ", struct thread_info, tp_value); offset("#define TI_CPU ", struct thread_info, cpu); offset("#define TI_PRE_COUNT ", struct thread_info, preempt_count); offset("#define TI_ADDR_LIMIT ", struct thread_info, addr_limit); offset("#define TI_RESTART_BLOCK ", struct thread_info, restart_block); - offset("#define TI_TP_VALUE ", struct thread_info, tp_value); + offset("#define TI_REGS ", struct thread_info, regs); constant("#define _THREAD_SIZE_ORDER ", THREAD_SIZE_ORDER); constant("#define _THREAD_SIZE ", THREAD_SIZE); constant("#define _THREAD_MASK ", THREAD_MASK); diff --git a/arch/mips/kernel/cpu-probe.c b/arch/mips/kernel/cpu-probe.c index 9fbf8430c849..8485af340ee1 100644 --- a/arch/mips/kernel/cpu-probe.c +++ b/arch/mips/kernel/cpu-probe.c @@ -135,7 +135,6 @@ static inline void check_wait(void) case CPU_R5000: case CPU_NEVADA: case CPU_RM7000: - case CPU_RM9000: case CPU_4KC: case CPU_4KEC: case CPU_4KSC: @@ -164,6 +163,14 @@ static inline void check_wait(void) } else printk(" unavailable.\n"); break; + case CPU_RM9000: + if ((c->processor_id & 0x00ff) >= 0x40) { + cpu_wait = r4k_wait; + printk(" available.\n"); + } else { + printk(" unavailable.\n"); + } + break; default: printk(" unavailable.\n"); break; diff --git a/arch/mips/kernel/entry.S b/arch/mips/kernel/entry.S index 766655f35250..417c08ac76eb 100644 --- a/arch/mips/kernel/entry.S +++ b/arch/mips/kernel/entry.S @@ -20,10 +20,7 @@ #include <asm/mipsmtregs.h> #endif -#ifdef CONFIG_PREEMPT - .macro preempt_stop - .endm -#else +#ifndef CONFIG_PREEMPT .macro preempt_stop local_irq_disable .endm @@ -32,9 +29,16 @@ .text .align 5 +FEXPORT(ret_from_irq) + LONG_S s0, TI_REGS($28) +#ifdef CONFIG_PREEMPT +FEXPORT(ret_from_exception) +#else + b _ret_from_irq FEXPORT(ret_from_exception) preempt_stop -FEXPORT(ret_from_irq) +#endif +FEXPORT(_ret_from_irq) LONG_L t0, PT_STATUS(sp) # returning to kernel mode? andi t0, t0, KU_USER beqz t0, resume_kernel @@ -79,7 +83,6 @@ FEXPORT(syscall_exit) FEXPORT(restore_all) # restore full frame #ifdef CONFIG_MIPS_MT_SMTC /* Detect and execute deferred IPI "interrupts" */ - move a0,sp jal deferred_smtc_ipi /* Re-arm any temporarily masked interrupts not explicitly "acked" */ mfc0 v0, CP0_TCSTATUS diff --git a/arch/mips/kernel/genex.S b/arch/mips/kernel/genex.S index af6ef2fd8300..5baca16993d0 100644 --- a/arch/mips/kernel/genex.S +++ b/arch/mips/kernel/genex.S @@ -131,8 +131,9 @@ NESTED(handle_int, PT_SIZE, sp) CLI TRACE_IRQS_OFF + LONG_L s0, TI_REGS($28) + LONG_S sp, TI_REGS($28) PTR_LA ra, ret_from_irq - move a0, sp j plat_irq_dispatch END(handle_int) @@ -219,7 +220,9 @@ NESTED(except_vec_vi_handler, 0, sp) #endif /* CONFIG_MIPS_MT_SMTC */ CLI TRACE_IRQS_OFF - move a0, sp + + LONG_L s0, TI_REGS($28) + LONG_S sp, TI_REGS($28) PTR_LA ra, ret_from_irq jr v0 END(except_vec_vi_handler) diff --git a/arch/mips/kernel/irq-msc01.c b/arch/mips/kernel/irq-msc01.c index 63dfeb41796b..650a80ca3741 100644 --- a/arch/mips/kernel/irq-msc01.c +++ b/arch/mips/kernel/irq-msc01.c @@ -1,16 +1,17 @@ /* - * Copyright (c) 2004 MIPS Inc - * Author: chris@mips.com - * * This program is free software; you can redistribute it and/or modify it * under the terms of the GNU General Public License as published by the * Free Software Foundation; either version 2 of the License, or (at your * option) any later version. + * + * Copyright (c) 2004 MIPS Inc + * Author: chris@mips.com + * + * Copyright (C) 2004, 06 Ralf Baechle <ralf@linux-mips.org> */ #include <linux/module.h> #include <linux/interrupt.h> #include <linux/kernel.h> -#include <asm/ptrace.h> #include <linux/sched.h> #include <linux/kernel_stat.h> #include <asm/io.h> @@ -115,14 +116,14 @@ static void end_msc_irq(unsigned int irq) /* * Interrupt handler for interrupts coming from SOC-it. */ -void ll_msc_irq(struct pt_regs *regs) +void ll_msc_irq(void) { unsigned int irq; /* read the interrupt vector register */ MSCIC_READ(MSC01_IC_VEC, irq); if (irq < 64) - do_IRQ(irq + irq_base, regs); + do_IRQ(irq + irq_base); else { /* Ignore spurious interrupt */ } diff --git a/arch/mips/kernel/irq-mv6434x.c b/arch/mips/kernel/irq-mv6434x.c index b117e64da64d..37d106202b83 100644 --- a/arch/mips/kernel/irq-mv6434x.c +++ b/arch/mips/kernel/irq-mv6434x.c @@ -1,7 +1,7 @@ /* * Copyright 2002 Momentum Computer * Author: mdharm@momenco.com - * Copyright (C) 2004 Ralf Baechle <ralf@linux-mips.org> + * Copyright (C) 2004, 06 Ralf Baechle <ralf@linux-mips.org> * * This program is free software; you can redistribute it and/or modify it * under the terms of the GNU General Public License as published by the @@ -15,7 +15,6 @@ #include <linux/mv643xx.h> #include <linux/sched.h> -#include <asm/ptrace.h> #include <asm/io.h> #include <asm/irq.h> #include <asm/marvell.h> @@ -113,7 +112,7 @@ static void end_mv64340_irq(unsigned int irq) * Interrupt handler for interrupts coming from the Marvell chip. * It could be built in ethernet ports etc... */ -void ll_mv64340_irq(struct pt_regs *regs) +void ll_mv64340_irq(void) { unsigned int irq_src_low, irq_src_high; unsigned int irq_mask_low, irq_mask_high; @@ -129,9 +128,9 @@ void ll_mv64340_irq(struct pt_regs *regs) irq_src_high &= irq_mask_high; if (irq_src_low) - do_IRQ(ls1bit32(irq_src_low) + irq_base, regs); + do_IRQ(ls1bit32(irq_src_low) + irq_base); else - do_IRQ(ls1bit32(irq_src_high) + irq_base + 32, regs); + do_IRQ(ls1bit32(irq_src_high) + irq_base + 32); } #define shutdown_mv64340_irq disable_mv64340_irq diff --git a/arch/mips/kernel/irq.c b/arch/mips/kernel/irq.c index a00b0e7ab9b1..dd24434392b6 100644 --- a/arch/mips/kernel/irq.c +++ b/arch/mips/kernel/irq.c @@ -53,9 +53,8 @@ unsigned long irq_hwmask[NR_IRQS]; * SMP cross-CPU interrupts have their own specific * handlers). */ -asmlinkage unsigned int do_IRQ(unsigned int irq, struct pt_regs *regs) +asmlinkage unsigned int do_IRQ(unsigned int irq) { - struct pt_regs *old_regs = set_irq_regs(regs); irq_enter(); __DO_IRQ_SMTC_HOOK(); @@ -63,7 +62,6 @@ asmlinkage unsigned int do_IRQ(unsigned int irq, struct pt_regs *regs) irq_exit(); - set_irq_regs(old_regs); return 1; } @@ -112,7 +110,7 @@ skip: return 0; } -asmlinkage void spurious_interrupt(struct pt_regs *regs) +asmlinkage void spurious_interrupt(void) { atomic_inc(&irq_err_count); } diff --git a/arch/mips/kernel/proc.c b/arch/mips/kernel/proc.c index d8beef107902..4ed37ba19731 100644 --- a/arch/mips/kernel/proc.c +++ b/arch/mips/kernel/proc.c @@ -89,9 +89,9 @@ static const char *cpu_name[] = { static int show_cpuinfo(struct seq_file *m, void *v) { - unsigned int version = current_cpu_data.processor_id; - unsigned int fp_vers = current_cpu_data.fpu_id; unsigned long n = (unsigned long) v - 1; + unsigned int version = cpu_data[n].processor_id; + unsigned int fp_vers = cpu_data[n].fpu_id; char fmt [64]; #ifdef CONFIG_SMP @@ -107,9 +107,9 @@ static int show_cpuinfo(struct seq_file *m, void *v) seq_printf(m, "processor\t\t: %ld\n", n); sprintf(fmt, "cpu model\t\t: %%s V%%d.%%d%s\n", - cpu_has_fpu ? " FPU V%d.%d" : ""); - seq_printf(m, fmt, cpu_name[current_cpu_data.cputype <= CPU_LAST ? - current_cpu_data.cputype : CPU_UNKNOWN], + cpu_data[n].options & MIPS_CPU_FPU ? " FPU V%d.%d" : ""); + seq_printf(m, fmt, cpu_name[cpu_data[n].cputype <= CPU_LAST ? + cpu_data[n].cputype : CPU_UNKNOWN], (version >> 4) & 0x0f, version & 0x0f, (fp_vers >> 4) & 0x0f, fp_vers & 0x0f); seq_printf(m, "BogoMIPS\t\t: %lu.%02lu\n", @@ -118,7 +118,7 @@ static int show_cpuinfo(struct seq_file *m, void *v) seq_printf(m, "wait instruction\t: %s\n", cpu_wait ? "yes" : "no"); seq_printf(m, "microsecond timers\t: %s\n", cpu_has_counter ? "yes" : "no"); - seq_printf(m, "tlb_entries\t\t: %d\n", current_cpu_data.tlbsize); + seq_printf(m, "tlb_entries\t\t: %d\n", cpu_data[n].tlbsize); seq_printf(m, "extra interrupt vector\t: %s\n", cpu_has_divec ? "yes" : "no"); seq_printf(m, "hardware watchpoint\t: %s\n", diff --git a/arch/mips/kernel/process.c b/arch/mips/kernel/process.c index 045d987bc683..9f307eb1a31e 100644 --- a/arch/mips/kernel/process.c +++ b/arch/mips/kernel/process.c @@ -115,7 +115,7 @@ void start_thread(struct pt_regs * regs, unsigned long pc, unsigned long sp) status |= KU_USER; regs->cp0_status = status; clear_used_math(); - lose_fpu(); + clear_fpu_owner(); if (cpu_has_dsp) __init_dsp(); regs->cp0_epc = pc; diff --git a/arch/mips/kernel/ptrace.c b/arch/mips/kernel/ptrace.c index 362d1728e531..258d74fd0b63 100644 --- a/arch/mips/kernel/ptrace.c +++ b/arch/mips/kernel/ptrace.c @@ -106,6 +106,7 @@ int ptrace_setregs (struct task_struct *child, __s64 __user *data) int ptrace_getfpregs (struct task_struct *child, __u32 __user *data) { int i; + unsigned int tmp; if (!access_ok(VERIFY_WRITE, data, 33 * 8)) return -EIO; @@ -121,10 +122,10 @@ int ptrace_getfpregs (struct task_struct *child, __u32 __user *data) __put_user (child->thread.fpu.fcr31, data + 64); + preempt_disable(); if (cpu_has_fpu) { - unsigned int flags, tmp; + unsigned int flags; - preempt_disable(); if (cpu_has_mipsmt) { unsigned int vpflags = dvpe(); flags = read_c0_status(); @@ -138,11 +139,11 @@ int ptrace_getfpregs (struct task_struct *child, __u32 __user *data) __asm__ __volatile__("cfc1\t%0,$0" : "=r" (tmp)); write_c0_status(flags); } - preempt_enable(); - __put_user (tmp, data + 65); } else { - __put_user ((__u32) 0, data + 65); + tmp = 0; } + preempt_enable(); + __put_user (tmp, data + 65); return 0; } @@ -245,16 +246,17 @@ long arch_ptrace(struct task_struct *child, long request, long addr, long data) unsigned int mtflags; #endif /* CONFIG_MIPS_MT_SMTC */ - if (!cpu_has_fpu) + preempt_disable(); + if (!cpu_has_fpu) { + preempt_enable(); break; + } #ifdef CONFIG_MIPS_MT_SMTC /* Read-modify-write of Status must be atomic */ local_irq_save(irqflags); mtflags = dmt(); #endif /* CONFIG_MIPS_MT_SMTC */ - - preempt_disable(); if (cpu_has_mipsmt) { unsigned int vpflags = dvpe(); flags = read_c0_status(); diff --git a/arch/mips/kernel/ptrace32.c b/arch/mips/kernel/ptrace32.c index f40ecd8be05f..d9a39c169450 100644 --- a/arch/mips/kernel/ptrace32.c +++ b/arch/mips/kernel/ptrace32.c @@ -175,7 +175,9 @@ asmlinkage int sys32_ptrace(int request, int pid, int addr, int data) unsigned int mtflags; #endif /* CONFIG_MIPS_MT_SMTC */ + preempt_disable(); if (!cpu_has_fpu) { + preempt_enable(); tmp = 0; break; } @@ -186,7 +188,6 @@ asmlinkage int sys32_ptrace(int request, int pid, int addr, int data) mtflags = dmt(); #endif /* CONFIG_MIPS_MT_SMTC */ - preempt_disable(); if (cpu_has_mipsmt) { unsigned int vpflags = dvpe(); flags = read_c0_status(); diff --git a/arch/mips/kernel/rtlx.c b/arch/mips/kernel/rtlx.c index cdab1b2cd134..8c8c8324f775 100644 --- a/arch/mips/kernel/rtlx.c +++ b/arch/mips/kernel/rtlx.c @@ -61,16 +61,16 @@ static int sp_stopping = 0; extern void *vpe_get_shared(int index); -static void rtlx_dispatch(struct pt_regs *regs) +static void rtlx_dispatch(void) { - do_IRQ(MIPSCPU_INT_BASE + MIPS_CPU_RTLX_IRQ, regs); + do_IRQ(MIPSCPU_INT_BASE + MIPS_CPU_RTLX_IRQ); } /* Interrupt handler may be called before rtlx_init has otherwise had a chance to run. */ -static irqreturn_t rtlx_interrupt(int irq, void *dev_id, struct pt_regs *regs) +static irqreturn_t rtlx_interrupt(int irq, void *dev_id) { int i; diff --git a/arch/mips/kernel/smp-mt.c b/arch/mips/kernel/smp-mt.c index 766253c44f3f..3b5f3b632622 100644 --- a/arch/mips/kernel/smp-mt.c +++ b/arch/mips/kernel/smp-mt.c @@ -106,22 +106,22 @@ void __init sanitize_tlb_entries(void) clear_c0_mvpcontrol(MVPCONTROL_VPC); } -static void ipi_resched_dispatch (struct pt_regs *regs) +static void ipi_resched_dispatch(void) { - do_IRQ(MIPSCPU_INT_BASE + MIPS_CPU_IPI_RESCHED_IRQ, regs); + do_IRQ(MIPSCPU_INT_BASE + MIPS_CPU_IPI_RESCHED_IRQ); } -static void ipi_call_dispatch (struct pt_regs *regs) +static void ipi_call_dispatch(void) { - do_IRQ(MIPSCPU_INT_BASE + MIPS_CPU_IPI_CALL_IRQ, regs); + do_IRQ(MIPSCPU_INT_BASE + MIPS_CPU_IPI_CALL_IRQ); } -irqreturn_t ipi_resched_interrupt(int irq, void *dev_id, struct pt_regs *regs) +static irqreturn_t ipi_resched_interrupt(int irq, void *dev_id) { return IRQ_HANDLED; } -irqreturn_t ipi_call_interrupt(int irq, void *dev_id, struct pt_regs *regs) +static irqreturn_t ipi_call_interrupt(int irq, void *dev_id) { smp_call_function_interrupt(); @@ -250,8 +250,8 @@ void __init plat_prepare_cpus(unsigned int max_cpus) { /* set up ipi interrupts */ if (cpu_has_vint) { - set_vi_handler (MIPS_CPU_IPI_RESCHED_IRQ, ipi_resched_dispatch); - set_vi_handler (MIPS_CPU_IPI_CALL_IRQ, ipi_call_dispatch); + set_vi_handler(MIPS_CPU_IPI_RESCHED_IRQ, ipi_resched_dispatch); + set_vi_handler(MIPS_CPU_IPI_CALL_IRQ, ipi_call_dispatch); } cpu_ipi_resched_irq = MIPSCPU_INT_BASE + MIPS_CPU_IPI_RESCHED_IRQ; diff --git a/arch/mips/kernel/smp.c b/arch/mips/kernel/smp.c index 221895802dca..db80957ada89 100644 --- a/arch/mips/kernel/smp.c +++ b/arch/mips/kernel/smp.c @@ -310,7 +310,7 @@ static void flush_tlb_all_ipi(void *info) void flush_tlb_all(void) { - on_each_cpu(flush_tlb_all_ipi, 0, 1, 1); + on_each_cpu(flush_tlb_all_ipi, NULL, 1, 1); } static void flush_tlb_mm_ipi(void *mm) @@ -467,14 +467,18 @@ static DEFINE_PER_CPU(struct cpu, cpu_devices); static int __init topology_init(void) { - int cpu; - int ret; + int i, ret; - for_each_present_cpu(cpu) { - ret = register_cpu(&per_cpu(cpu_devices, cpu), cpu); +#ifdef CONFIG_NUMA + for_each_online_node(i) + register_one_node(i); +#endif /* CONFIG_NUMA */ + + for_each_present_cpu(i) { + ret = register_cpu(&per_cpu(cpu_devices, i), i); if (ret) printk(KERN_WARNING "topology_init: register_cpu %d " - "failed (%d)\n", cpu, ret); + "failed (%d)\n", i, ret); } return 0; diff --git a/arch/mips/kernel/smtc-asm.S b/arch/mips/kernel/smtc-asm.S index 76cb31d57482..1cb9441f1474 100644 --- a/arch/mips/kernel/smtc-asm.S +++ b/arch/mips/kernel/smtc-asm.S @@ -97,15 +97,12 @@ FEXPORT(__smtc_ipi_vector) SAVE_ALL CLI TRACE_IRQS_OFF - move a0,sp /* Function to be invoked passed stack pad slot 5 */ lw t0,PT_PADSLOT5(sp) /* Argument from sender passed in stack pad slot 4 */ - lw a1,PT_PADSLOT4(sp) - jalr t0 - nop - j ret_from_irq - nop + lw a0,PT_PADSLOT4(sp) + PTR_LA ra, _ret_from_irq + jr t0 /* * Called from idle loop to provoke processing of queued IPIs diff --git a/arch/mips/kernel/smtc.c b/arch/mips/kernel/smtc.c index 604bcc5cb7c8..cc1f7474f7d7 100644 --- a/arch/mips/kernel/smtc.c +++ b/arch/mips/kernel/smtc.c @@ -82,7 +82,7 @@ struct smtc_ipi_q freeIPIq; /* Forward declarations */ -void ipi_decode(struct pt_regs *, struct smtc_ipi *); +void ipi_decode(struct smtc_ipi *); void post_direct_ipi(int cpu, struct smtc_ipi *pipi); void setup_cross_vpe_interrupts(void); void init_smtc_stats(void); @@ -820,19 +820,19 @@ void post_direct_ipi(int cpu, struct smtc_ipi *pipi) write_tc_c0_tcrestart(__smtc_ipi_vector); } -void ipi_resched_interrupt(struct pt_regs *regs) +static void ipi_resched_interrupt(void) { /* Return from interrupt should be enough to cause scheduler check */ } -void ipi_call_interrupt(struct pt_regs *regs) +static void ipi_call_interrupt(void) { /* Invoke generic function invocation code in smp.c */ smp_call_function_interrupt(); } -void ipi_decode(struct pt_regs *regs, struct smtc_ipi *pipi) +void ipi_decode(struct smtc_ipi *pipi) { void *arg_copy = pipi->arg; int type_copy = pipi->type; @@ -846,15 +846,15 @@ void ipi_decode(struct pt_regs *regs, struct smtc_ipi *pipi) #ifdef SMTC_IDLE_HOOK_DEBUG clock_hang_reported[dest_copy] = 0; #endif /* SMTC_IDLE_HOOK_DEBUG */ - local_timer_interrupt(0, NULL, regs); + local_timer_interrupt(0, NULL); break; case LINUX_SMP_IPI: switch ((int)arg_copy) { case SMP_RESCHEDULE_YOURSELF: - ipi_resched_interrupt(regs); + ipi_resched_interrupt(); break; case SMP_CALL_FUNCTION: - ipi_call_interrupt(regs); + ipi_call_interrupt(); break; default: printk("Impossible SMTC IPI Argument 0x%x\n", @@ -868,7 +868,7 @@ void ipi_decode(struct pt_regs *regs, struct smtc_ipi *pipi) } } -void deferred_smtc_ipi(struct pt_regs *regs) +void deferred_smtc_ipi(void) { struct smtc_ipi *pipi; unsigned long flags; @@ -883,7 +883,7 @@ void deferred_smtc_ipi(struct pt_regs *regs) while((pipi = smtc_ipi_dq(&IPIQ[q])) != NULL) { /* ipi_decode() should be called with interrupts off */ local_irq_save(flags); - ipi_decode(regs, pipi); + ipi_decode(pipi); local_irq_restore(flags); } } @@ -917,7 +917,7 @@ void smtc_timer_broadcast(int vpe) static int cpu_ipi_irq = MIPSCPU_INT_BASE + MIPS_CPU_IPI_IRQ; -static irqreturn_t ipi_interrupt(int irq, void *dev_idm, struct pt_regs *regs) +static irqreturn_t ipi_interrupt(int irq, void *dev_idm) { int my_vpe = cpu_data[smp_processor_id()].vpe_id; int my_tc = cpu_data[smp_processor_id()].tc_id; @@ -978,7 +978,7 @@ static irqreturn_t ipi_interrupt(int irq, void *dev_idm, struct pt_regs *regs) * with interrupts off */ local_irq_save(flags); - ipi_decode(regs, pipi); + ipi_decode(pipi); local_irq_restore(flags); } } @@ -987,9 +987,9 @@ static irqreturn_t ipi_interrupt(int irq, void *dev_idm, struct pt_regs *regs) return IRQ_HANDLED; } -static void ipi_irq_dispatch(struct pt_regs *regs) +static void ipi_irq_dispatch(void) { - do_IRQ(cpu_ipi_irq, regs); + do_IRQ(cpu_ipi_irq); } static struct irqaction irq_ipi; diff --git a/arch/mips/kernel/time.c b/arch/mips/kernel/time.c index d349eb9e4ffb..debe86c2f691 100644 --- a/arch/mips/kernel/time.c +++ b/arch/mips/kernel/time.c @@ -324,8 +324,7 @@ static long last_rtc_update; */ void local_timer_interrupt(int irq, void *dev_id) { - if (current->pid) - profile_tick(CPU_PROFILING); + profile_tick(CPU_PROFILING); update_process_times(user_mode(get_irq_regs())); } @@ -434,9 +433,8 @@ int (*perf_irq)(void) = null_perf_irq; EXPORT_SYMBOL(null_perf_irq); EXPORT_SYMBOL(perf_irq); -asmlinkage void ll_timer_interrupt(int irq, struct pt_regs *regs) +asmlinkage void ll_timer_interrupt(int irq) { - struct pt_regs *old_regs = set_irq_regs(regs); int r2 = cpu_has_mips_r2; irq_enter(); @@ -458,12 +456,10 @@ asmlinkage void ll_timer_interrupt(int irq, struct pt_regs *regs) out: irq_exit(); - set_irq_regs(old_regs); } -asmlinkage void ll_local_timer_interrupt(int irq, struct pt_regs *regs) +asmlinkage void ll_local_timer_interrupt(int irq) { - struct pt_regs *old_regs = set_irq_regs(regs); irq_enter(); if (smp_processor_id() != 0) kstat_this_cpu.irqs[irq]++; @@ -472,7 +468,6 @@ asmlinkage void ll_local_timer_interrupt(int irq, struct pt_regs *regs) local_timer_interrupt(irq, NULL); irq_exit(); - set_irq_regs(old_regs); } /* diff --git a/arch/mips/kernel/traps.c b/arch/mips/kernel/traps.c index b7292a56d4cd..cce8313ec27d 100644 --- a/arch/mips/kernel/traps.c +++ b/arch/mips/kernel/traps.c @@ -66,7 +66,7 @@ extern asmlinkage void handle_mcheck(void); extern asmlinkage void handle_reserved(void); extern int fpu_emulator_cop1Handler(struct pt_regs *xcp, - struct mips_fpu_struct *ctx); + struct mips_fpu_struct *ctx, int has_fpu); void (*board_be_init)(void); int (*board_be_handler)(struct pt_regs *regs, int is_fixup); @@ -641,7 +641,7 @@ asmlinkage void do_fpe(struct pt_regs *regs, unsigned long fcr31) preempt_enable(); /* Run the emulator */ - sig = fpu_emulator_cop1Handler (regs, ¤t->thread.fpu); + sig = fpu_emulator_cop1Handler (regs, ¤t->thread.fpu, 1); preempt_disable(); @@ -791,11 +791,13 @@ asmlinkage void do_cpu(struct pt_regs *regs) set_used_math(); } - preempt_enable(); - - if (!cpu_has_fpu) { - int sig = fpu_emulator_cop1Handler(regs, - ¤t->thread.fpu); + if (cpu_has_fpu) { + preempt_enable(); + } else { + int sig; + preempt_enable(); + sig = fpu_emulator_cop1Handler(regs, + ¤t->thread.fpu, 0); if (sig) force_sig(sig, current); #ifdef CONFIG_MIPS_MT_FPAFF diff --git a/arch/mips/lasat/interrupt.c b/arch/mips/lasat/interrupt.c index 456be8fc961a..a144a002dcc4 100644 --- a/arch/mips/lasat/interrupt.c +++ b/arch/mips/lasat/interrupt.c @@ -108,14 +108,14 @@ static unsigned long get_int_status_200(void) return int_status; } -asmlinkage void plat_irq_dispatch(struct pt_regs *regs) +asmlinkage void plat_irq_dispatch(void) { unsigned long int_status; unsigned int cause = read_c0_cause(); int irq; if (cause & CAUSEF_IP7) { /* R4000 count / compare IRQ */ - ll_timer_interrupt(7, regs); + ll_timer_interrupt(7); return; } @@ -125,7 +125,7 @@ asmlinkage void plat_irq_dispatch(struct pt_regs *regs) if (int_status) { irq = ls1bit32(int_status); - do_IRQ(irq, regs); + do_IRQ(irq); } } diff --git a/arch/mips/math-emu/cp1emu.c b/arch/mips/math-emu/cp1emu.c index 3f0d5d26d506..80531b35cd61 100644 --- a/arch/mips/math-emu/cp1emu.c +++ b/arch/mips/math-emu/cp1emu.c @@ -38,8 +38,6 @@ #include <asm/inst.h> #include <asm/bootinfo.h> -#include <asm/cpu.h> -#include <asm/cpu-features.h> #include <asm/processor.h> #include <asm/ptrace.h> #include <asm/signal.h> @@ -1233,7 +1231,8 @@ static int fpu_emu(struct pt_regs *xcp, struct mips_fpu_struct *ctx, return 0; } -int fpu_emulator_cop1Handler(struct pt_regs *xcp, struct mips_fpu_struct *ctx) +int fpu_emulator_cop1Handler(struct pt_regs *xcp, struct mips_fpu_struct *ctx, + int has_fpu) { unsigned long oldepc, prevepc; mips_instruction insn; @@ -1263,7 +1262,7 @@ int fpu_emulator_cop1Handler(struct pt_regs *xcp, struct mips_fpu_struct *ctx) ieee754_csr.rm = mips_rm[ieee754_csr.rm]; } - if (cpu_has_fpu) + if (has_fpu) break; if (sig) break; diff --git a/arch/mips/mips-boards/atlas/atlas_int.c b/arch/mips/mips-boards/atlas/atlas_int.c index a020a3cb4f4b..be624b8c3b0e 100644 --- a/arch/mips/mips-boards/atlas/atlas_int.c +++ b/arch/mips/mips-boards/atlas/atlas_int.c @@ -101,7 +101,7 @@ static inline int ls1bit32(unsigned int x) return b; } -static inline void atlas_hw0_irqdispatch(struct pt_regs *regs) +static inline void atlas_hw0_irqdispatch(void) { unsigned long int_status; int irq; @@ -116,7 +116,7 @@ static inline void atlas_hw0_irqdispatch(struct pt_regs *regs) DEBUG_INT("atlas_hw0_irqdispatch: irq=%d\n", irq); - do_IRQ(irq, regs); + do_IRQ(irq); } static inline int clz(unsigned long x) @@ -188,7 +188,7 @@ static inline unsigned int irq_ffs(unsigned int pending) * then we just return, if multiple IRQs are pending then we will just take * another exception, big deal. */ -asmlinkage void plat_irq_dispatch(struct pt_regs *regs) +asmlinkage void plat_irq_dispatch(void) { unsigned int pending = read_c0_cause() & read_c0_status() & ST0_IM; int irq; @@ -196,11 +196,11 @@ asmlinkage void plat_irq_dispatch(struct pt_regs *regs) irq = irq_ffs(pending); if (irq == MIPSCPU_INT_ATLAS) - atlas_hw0_irqdispatch(regs); + atlas_hw0_irqdispatch(); else if (irq >= 0) - do_IRQ(MIPSCPU_INT_BASE + irq, regs); + do_IRQ(MIPSCPU_INT_BASE + irq); else - spurious_interrupt(regs); + spurious_interrupt(); } static inline void init_atlas_irqs (int base) diff --git a/arch/mips/mips-boards/generic/time.c b/arch/mips/mips-boards/generic/time.c index 8d15861fce61..6f8a9fe7c1e3 100644 --- a/arch/mips/mips-boards/generic/time.c +++ b/arch/mips/mips-boards/generic/time.c @@ -30,7 +30,6 @@ #include <asm/mipsregs.h> #include <asm/mipsmtregs.h> -#include <asm/ptrace.h> #include <asm/hardirq.h> #include <asm/irq.h> #include <asm/div64.h> @@ -82,19 +81,19 @@ static inline void scroll_display_message(void) } } -static void mips_timer_dispatch (struct pt_regs *regs) +static void mips_timer_dispatch(void) { - do_IRQ (mips_cpu_timer_irq, regs); + do_IRQ(mips_cpu_timer_irq); } /* * Redeclare until I get around mopping the timer code insanity on MIPS. */ -extern int null_perf_irq(struct pt_regs *regs); +extern int null_perf_irq(void); -extern int (*perf_irq)(struct pt_regs *regs); +extern int (*perf_irq)(void); -irqreturn_t mips_timer_interrupt(int irq, void *dev_id, struct pt_regs *regs) +irqreturn_t mips_timer_interrupt(int irq, void *dev_id) { int cpu = smp_processor_id(); @@ -119,7 +118,7 @@ irqreturn_t mips_timer_interrupt(int irq, void *dev_id, struct pt_regs *regs) * perf counter overflow, or both. */ if (read_c0_cause() & (1 << 26)) - perf_irq(regs); + perf_irq(); if (read_c0_cause() & (1 << 30)) { /* If timer interrupt, make it de-assert */ @@ -139,13 +138,13 @@ irqreturn_t mips_timer_interrupt(int irq, void *dev_id, struct pt_regs *regs) * the tick on VPE 0 to run the full timer_interrupt(). */ if (cpu_data[cpu].vpe_id == 0) { - timer_interrupt(irq, NULL, regs); + timer_interrupt(irq, NULL); smtc_timer_broadcast(cpu_data[cpu].vpe_id); scroll_display_message(); } else { write_c0_compare(read_c0_count() + (mips_hpt_frequency/HZ)); - local_timer_interrupt(irq, dev_id, regs); + local_timer_interrupt(irq, dev_id); smtc_timer_broadcast(cpu_data[cpu].vpe_id); } } @@ -159,12 +158,12 @@ irqreturn_t mips_timer_interrupt(int irq, void *dev_id, struct pt_regs *regs) * timer int. */ if (!r2 || (read_c0_cause() & (1 << 26))) - if (perf_irq(regs)) + if (perf_irq()) goto out; /* we keep interrupt disabled all the time */ if (!r2 || (read_c0_cause() & (1 << 30))) - timer_interrupt(irq, NULL, regs); + timer_interrupt(irq, NULL); scroll_display_message(); } else { @@ -180,7 +179,7 @@ irqreturn_t mips_timer_interrupt(int irq, void *dev_id, struct pt_regs *regs) /* * Other CPUs should do profiling and process accounting */ - local_timer_interrupt(irq, dev_id, regs); + local_timer_interrupt(irq, dev_id); } out: #endif /* CONFIG_MIPS_MT_SMTC */ diff --git a/arch/mips/mips-boards/malta/malta_int.c b/arch/mips/mips-boards/malta/malta_int.c index 7cc0ba4f553a..6244d0e2c7de 100644 --- a/arch/mips/mips-boards/malta/malta_int.c +++ b/arch/mips/mips-boards/malta/malta_int.c @@ -114,7 +114,7 @@ static inline int get_int(void) return irq; } -static void malta_hw0_irqdispatch(struct pt_regs *regs) +static void malta_hw0_irqdispatch(void) { int irq; @@ -123,17 +123,21 @@ static void malta_hw0_irqdispatch(struct pt_regs *regs) return; /* interrupt has already been cleared */ } - do_IRQ(MALTA_INT_BASE+irq, regs); + do_IRQ(MALTA_INT_BASE + irq); } -void corehi_irqdispatch(struct pt_regs *regs) +static void corehi_irqdispatch(void) { + unsigned int intedge, intsteer, pcicmd, pcibadaddr; + unsigned int pcimstat, intisr, inten, intpol; unsigned int intrcause,datalo,datahi; - unsigned int pcimstat, intisr, inten, intpol, intedge, intsteer, pcicmd, pcibadaddr; + struct pt_regs *regs; printk("CoreHI interrupt, shouldn't happen, so we die here!!!\n"); - printk("epc : %08lx\nStatus: %08lx\nCause : %08lx\nbadVaddr : %08lx\n" -, regs->cp0_epc, regs->cp0_status, regs->cp0_cause, regs->cp0_badvaddr); + printk("epc : %08lx\nStatus: %08lx\n" + "Cause : %08lx\nbadVaddr : %08lx\n", + regs->cp0_epc, regs->cp0_status, + regs->cp0_cause, regs->cp0_badvaddr); /* Read all the registers and then print them as there is a problem with interspersed printk's upsetting the Bonito controller. @@ -146,7 +150,7 @@ void corehi_irqdispatch(struct pt_regs *regs) case MIPS_REVISION_CORID_CORE_FPGA3: case MIPS_REVISION_CORID_CORE_24K: case MIPS_REVISION_CORID_CORE_EMUL_MSC: - ll_msc_irq(regs); + ll_msc_irq(); break; case MIPS_REVISION_CORID_QED_RM5261: case MIPS_REVISION_CORID_CORE_LV: @@ -208,23 +212,23 @@ static inline unsigned int irq_ffs(unsigned int pending) unsigned int a0 = 7; unsigned int t0; - t0 = s0 & 0xf000; + t0 = pending & 0xf000; t0 = t0 < 1; t0 = t0 << 2; a0 = a0 - t0; - s0 = s0 << t0; + pending = pending << t0; - t0 = s0 & 0xc000; + t0 = pending & 0xc000; t0 = t0 < 1; t0 = t0 << 1; a0 = a0 - t0; - s0 = s0 << t0; + pending = pending << t0; - t0 = s0 & 0x8000; + t0 = pending & 0x8000; t0 = t0 < 1; //t0 = t0 << 2; a0 = a0 - t0; - //s0 = s0 << t0; + //pending = pending << t0; return a0; #endif @@ -255,7 +259,7 @@ static inline unsigned int irq_ffs(unsigned int pending) * another exception, big deal. */ -asmlinkage void plat_irq_dispatch(struct pt_regs *regs) +asmlinkage void plat_irq_dispatch(void) { unsigned int pending = read_c0_cause() & read_c0_status() & ST0_IM; int irq; @@ -263,11 +267,11 @@ asmlinkage void plat_irq_dispatch(struct pt_regs *regs) irq = irq_ffs(pending); if (irq == MIPSCPU_INT_I8259A) - malta_hw0_irqdispatch(regs); + malta_hw0_irqdispatch(); else if (irq > 0) - do_IRQ(MIPSCPU_INT_BASE + irq, regs); + do_IRQ(MIPSCPU_INT_BASE + irq); else - spurious_interrupt(regs); + spurious_interrupt(); } static struct irqaction i8259irq = { diff --git a/arch/mips/mips-boards/sead/sead_int.c b/arch/mips/mips-boards/sead/sead_int.c index 9168d934c661..f445fcddfdfd 100644 --- a/arch/mips/mips-boards/sead/sead_int.c +++ b/arch/mips/mips-boards/sead/sead_int.c @@ -98,7 +98,7 @@ static inline unsigned int irq_ffs(unsigned int pending) * then we just return, if multiple IRQs are pending then we will just take * another exception, big deal. */ -asmlinkage void plat_irq_dispatch(struct pt_regs *regs) +asmlinkage void plat_irq_dispatch(void) { unsigned int pending = read_c0_cause() & read_c0_status() & ST0_IM; int irq; @@ -106,7 +106,7 @@ asmlinkage void plat_irq_dispatch(struct pt_regs *regs) irq = irq_ffs(pending); if (irq >= 0) - do_IRQ(MIPSCPU_INT_BASE + irq, regs); + do_IRQ(MIPSCPU_INT_BASE + irq); else spurious_interrupt(regs); } diff --git a/arch/mips/mips-boards/sim/sim_int.c b/arch/mips/mips-boards/sim/sim_int.c index 2c15c8efec4e..2ce449dce6f2 100644 --- a/arch/mips/mips-boards/sim/sim_int.c +++ b/arch/mips/mips-boards/sim/sim_int.c @@ -71,12 +71,7 @@ static inline unsigned int irq_ffs(unsigned int pending) #endif } -static inline void sim_hw0_irqdispatch(struct pt_regs *regs) -{ - do_IRQ(2, regs); -} - -asmlinkage void plat_irq_dispatch(struct pt_regs *regs) +asmlinkage void plat_irq_dispatch(void) { unsigned int pending = read_c0_cause() & read_c0_status() & ST0_IM; int irq; @@ -84,9 +79,9 @@ asmlinkage void plat_irq_dispatch(struct pt_regs *regs) irq = irq_ffs(pending); if (irq > 0) - do_IRQ(MIPSCPU_INT_BASE + irq, regs); + do_IRQ(MIPSCPU_INT_BASE + irq); else - spurious_interrupt(regs); + spurious_interrupt(); } void __init arch_init_irq(void) diff --git a/arch/mips/mips-boards/sim/sim_time.c b/arch/mips/mips-boards/sim/sim_time.c index 230929ecd57f..c566b9bd0427 100644 --- a/arch/mips/mips-boards/sim/sim_time.c +++ b/arch/mips/mips-boards/sim/sim_time.c @@ -15,7 +15,6 @@ #include <linux/mc146818rtc.h> #include <linux/timex.h> #include <asm/mipsregs.h> -#include <asm/ptrace.h> #include <asm/hardirq.h> #include <asm/irq.h> #include <asm/div64.h> @@ -33,7 +32,7 @@ unsigned long cpu_khz; -irqreturn_t sim_timer_interrupt(int irq, void *dev_id, struct pt_regs *regs) +irqreturn_t sim_timer_interrupt(int irq, void *dev_id) { #ifdef CONFIG_SMP int cpu = smp_processor_id(); @@ -44,7 +43,7 @@ irqreturn_t sim_timer_interrupt(int irq, void *dev_id, struct pt_regs *regs) */ #ifndef CONFIG_MIPS_MT_SMTC if (cpu == 0) { - timer_interrupt(irq, dev_id, regs); + timer_interrupt(irq, dev_id); } else { /* Everyone else needs to reset the timer int here as @@ -84,7 +83,7 @@ irqreturn_t sim_timer_interrupt(int irq, void *dev_id, struct pt_regs *regs) irq_enable_hazard(); evpe(vpflags); - if(cpu_data[cpu].vpe_id == 0) timer_interrupt(irq, dev_id, regs); + if(cpu_data[cpu].vpe_id == 0) timer_interrupt(irq, dev_id); else write_c0_compare (read_c0_count() + ( mips_hpt_frequency/HZ)); smtc_timer_broadcast(cpu_data[cpu].vpe_id); @@ -93,10 +92,10 @@ irqreturn_t sim_timer_interrupt(int irq, void *dev_id, struct pt_regs *regs) /* * every CPU should do profiling and process accounting */ - local_timer_interrupt (irq, dev_id, regs); + local_timer_interrupt (irq, dev_id); return IRQ_HANDLED; #else - return timer_interrupt (irq, dev_id, regs); + return timer_interrupt (irq, dev_id); #endif } @@ -177,9 +176,9 @@ void __init sim_time_init(void) static int mips_cpu_timer_irq; -static void mips_timer_dispatch (struct pt_regs *regs) +static void mips_timer_dispatch(void) { - do_IRQ (mips_cpu_timer_irq, regs); + do_IRQ(mips_cpu_timer_irq); } diff --git a/arch/mips/momentum/jaguar_atx/irq.c b/arch/mips/momentum/jaguar_atx/irq.c index f9067469a656..2efb25aa1aed 100644 --- a/arch/mips/momentum/jaguar_atx/irq.c +++ b/arch/mips/momentum/jaguar_atx/irq.c @@ -40,33 +40,33 @@ #include <asm/mipsregs.h> #include <asm/time.h> -asmlinkage void plat_irq_dispatch(struct pt_regs *regs) +asmlinkage void plat_irq_dispatch(void) { unsigned int pending = read_c0_cause() & read_c0_status(); if (pending & STATUSF_IP0) - do_IRQ(0, regs); + do_IRQ(0); else if (pending & STATUSF_IP1) - do_IRQ(1, regs); + do_IRQ(1); else if (pending & STATUSF_IP2) - do_IRQ(2, regs); + do_IRQ(2); else if (pending & STATUSF_IP3) - do_IRQ(3, regs); + do_IRQ(3); else if (pending & STATUSF_IP4) - do_IRQ(4, regs); + do_IRQ(4); else if (pending & STATUSF_IP5) - do_IRQ(5, regs); + do_IRQ(5); else if (pending & STATUSF_IP6) - do_IRQ(6, regs); + do_IRQ(6); else if (pending & STATUSF_IP7) - ll_timer_interrupt(7, regs); + ll_timer_interrupt(7); else { /* * Now look at the extended interrupts */ pending = (read_c0_cause() & (read_c0_intcontrol() << 8)) >> 16; if (pending & STATUSF_IP8) - ll_mv64340_irq(regs); + ll_mv64340_irq(); } } diff --git a/arch/mips/momentum/jaguar_atx/setup.c b/arch/mips/momentum/jaguar_atx/setup.c index e6fe2992227d..5a510142b978 100644 --- a/arch/mips/momentum/jaguar_atx/setup.c +++ b/arch/mips/momentum/jaguar_atx/setup.c @@ -62,7 +62,6 @@ #include <asm/io.h> #include <asm/irq.h> #include <asm/processor.h> -#include <asm/ptrace.h> #include <asm/reboot.h> #include <asm/tlbflush.h> diff --git a/arch/mips/momentum/ocelot_3/irq.c b/arch/mips/momentum/ocelot_3/irq.c index 793782a9c195..cea0e5deb80e 100644 --- a/arch/mips/momentum/ocelot_3/irq.c +++ b/arch/mips/momentum/ocelot_3/irq.c @@ -75,26 +75,26 @@ void __init arch_init_irq(void) } -asmlinkage void plat_irq_dispatch(struct pt_regs *regs) +asmlinkage void plat_irq_dispatch(void) { unsigned int pending = read_c0_cause() & read_c0_status(); if (pending & STATUSF_IP0) - do_IRQ(0, regs); + do_IRQ(0); else if (pending & STATUSF_IP1) - do_IRQ(1, regs); + do_IRQ(1); else if (pending & STATUSF_IP2) - do_IRQ(2, regs); + do_IRQ(2); else if (pending & STATUSF_IP3) - do_IRQ(3, regs); + do_IRQ(3); else if (pending & STATUSF_IP4) - do_IRQ(4, regs); + do_IRQ(4); else if (pending & STATUSF_IP5) - do_IRQ(5, regs); + do_IRQ(5); else if (pending & STATUSF_IP6) - do_IRQ(6, regs); + do_IRQ(6); else if (pending & STATUSF_IP7) - do_IRQ(7, regs); + do_IRQ(7); else { /* * Now look at the extended interrupts @@ -102,8 +102,8 @@ asmlinkage void plat_irq_dispatch(struct pt_regs *regs) pending = (read_c0_cause() & (read_c0_intcontrol() << 8)) >> 16; if (pending & STATUSF_IP8) - ll_mv64340_irq(regs); + ll_mv64340_irq(); else - spurious_interrupt(regs); + spurious_interrupt(); } } diff --git a/arch/mips/momentum/ocelot_3/setup.c b/arch/mips/momentum/ocelot_3/setup.c index 435d0787329e..7d74f8c54129 100644 --- a/arch/mips/momentum/ocelot_3/setup.c +++ b/arch/mips/momentum/ocelot_3/setup.c @@ -67,7 +67,6 @@ #include <asm/irq.h> #include <asm/pci.h> #include <asm/processor.h> -#include <asm/ptrace.h> #include <asm/reboot.h> #include <asm/mc146818rtc.h> #include <asm/tlbflush.h> diff --git a/arch/mips/momentum/ocelot_c/cpci-irq.c b/arch/mips/momentum/ocelot_c/cpci-irq.c index a5dc230520df..47e3fa32b075 100644 --- a/arch/mips/momentum/ocelot_c/cpci-irq.c +++ b/arch/mips/momentum/ocelot_c/cpci-irq.c @@ -21,7 +21,6 @@ #include <linux/interrupt.h> #include <linux/irq.h> #include <linux/kernel.h> -#include <asm/ptrace.h> #include <linux/sched.h> #include <linux/kernel_stat.h> #include <asm/io.h> @@ -112,7 +111,7 @@ static void end_cpci_irq(unsigned int irq) * Interrupt handler for interrupts coming from the FPGA chip. * It could be built in ethernet ports etc... */ -void ll_cpci_irq(struct pt_regs *regs) +void ll_cpci_irq(void) { unsigned int irq_src, irq_mask; @@ -123,7 +122,7 @@ void ll_cpci_irq(struct pt_regs *regs) /* mask for just the interrupts we want */ irq_src &= ~irq_mask; - do_IRQ(ls1bit8(irq_src) + CPCI_IRQ_BASE, regs); + do_IRQ(ls1bit8(irq_src) + CPCI_IRQ_BASE); } #define shutdown_cpci_irq disable_cpci_irq diff --git a/arch/mips/momentum/ocelot_c/irq.c b/arch/mips/momentum/ocelot_c/irq.c index 9d44ae1e156b..ea65223a6d2c 100644 --- a/arch/mips/momentum/ocelot_c/irq.c +++ b/arch/mips/momentum/ocelot_c/irq.c @@ -59,31 +59,31 @@ static struct irqaction cascade_mv64340 = { no_action, IRQF_DISABLED, CPU_MASK_NONE, "cascade via MV64340", NULL, NULL }; -extern void ll_uart_irq(struct pt_regs *regs); -extern void ll_cpci_irq(struct pt_regs *regs); +extern void ll_uart_irq(void); +extern void ll_cpci_irq(void); -asmlinkage void plat_irq_dispatch(struct pt_regs *regs) +asmlinkage void plat_irq_dispatch(void) { unsigned int pending = read_c0_cause() & read_c0_status(); if (pending & STATUSF_IP0) - do_IRQ(0, regs); + do_IRQ(0); else if (pending & STATUSF_IP1) - do_IRQ(1, regs); + do_IRQ(1); else if (pending & STATUSF_IP2) - do_IRQ(2, regs); + do_IRQ(2); else if (pending & STATUSF_IP3) - ll_uart_irq(regs); + ll_uart_irq(); else if (pending & STATUSF_IP4) - do_IRQ(4, regs); + do_IRQ(4); else if (pending & STATUSF_IP5) - ll_cpci_irq(regs); + ll_cpci_irq(); else if (pending & STATUSF_IP6) - ll_mv64340_irq(regs); + ll_mv64340_irq(); else if (pending & STATUSF_IP7) - do_IRQ(7, regs); + do_IRQ(7); else - spurious_interrupt(regs); + spurious_interrupt(); } void __init arch_init_irq(void) diff --git a/arch/mips/momentum/ocelot_c/setup.c b/arch/mips/momentum/ocelot_c/setup.c index 36f570ecc6fb..9c0c462af650 100644 --- a/arch/mips/momentum/ocelot_c/setup.c +++ b/arch/mips/momentum/ocelot_c/setup.c @@ -62,7 +62,6 @@ #include <asm/irq.h> #include <asm/pci.h> #include <asm/processor.h> -#include <asm/ptrace.h> #include <asm/reboot.h> #include <asm/marvell.h> #include <linux/bootmem.h> diff --git a/arch/mips/momentum/ocelot_c/uart-irq.c b/arch/mips/momentum/ocelot_c/uart-irq.c index 9f33d8f1d826..510257dc205a 100644 --- a/arch/mips/momentum/ocelot_c/uart-irq.c +++ b/arch/mips/momentum/ocelot_c/uart-irq.c @@ -16,7 +16,6 @@ #include <linux/interrupt.h> #include <linux/irq.h> #include <linux/kernel.h> -#include <asm/ptrace.h> #include <linux/sched.h> #include <linux/kernel_stat.h> #include <asm/io.h> @@ -105,7 +104,7 @@ static void end_uart_irq(unsigned int irq) /* * Interrupt handler for interrupts coming from the FPGA chip. */ -void ll_uart_irq(struct pt_regs *regs) +void ll_uart_irq(void) { unsigned int irq_src, irq_mask; @@ -116,7 +115,7 @@ void ll_uart_irq(struct pt_regs *regs) /* mask for just the interrupts we want */ irq_src &= ~irq_mask; - do_IRQ(ls1bit8(irq_src) + 74, regs); + do_IRQ(ls1bit8(irq_src) + 74); } #define shutdown_uart_irq disable_uart_irq diff --git a/arch/mips/momentum/ocelot_g/gt-irq.c b/arch/mips/momentum/ocelot_g/gt-irq.c index 6cd87cf0195a..7b5cc6648f7e 100644 --- a/arch/mips/momentum/ocelot_g/gt-irq.c +++ b/arch/mips/momentum/ocelot_g/gt-irq.c @@ -14,7 +14,6 @@ #include <linux/module.h> #include <linux/interrupt.h> #include <linux/kernel.h> -#include <asm/ptrace.h> #include <linux/sched.h> #include <linux/kernel_stat.h> #include <asm/gt64240.h> @@ -108,7 +107,7 @@ int disable_galileo_irq(int int_cause, int bit_num) * we keep this particular structure in the function. */ -static irqreturn_t gt64240_p0int_irq(int irq, void *dev, struct pt_regs *regs) +static irqreturn_t gt64240_p0int_irq(int irq, void *dev) { uint32_t irq_src, irq_src_mask; int handled; @@ -135,7 +134,7 @@ static irqreturn_t gt64240_p0int_irq(int irq, void *dev, struct pt_regs *regs) /* handle the timer call */ do_timer(1); #ifndef CONFIG_SMP - update_process_times(user_mode(regs)); + update_process_times(user_mode(get_irq_regs())); #endif } diff --git a/arch/mips/momentum/ocelot_g/irq.c b/arch/mips/momentum/ocelot_g/irq.c index 7a4a419804f1..da46524e87cb 100644 --- a/arch/mips/momentum/ocelot_g/irq.c +++ b/arch/mips/momentum/ocelot_g/irq.c @@ -48,22 +48,22 @@ #include <asm/mipsregs.h> #include <asm/system.h> -asmlinkage void plat_irq_dispatch(struct pt_regs *regs) +asmlinkage void plat_irq_dispatch(void) { unsigned int pending = read_c0_cause() & read_c0_status(); if (pending & STATUSF_IP2) - do_IRQ(2, regs); + do_IRQ(2); else if (pending & STATUSF_IP3) - do_IRQ(3, regs); + do_IRQ(3); else if (pending & STATUSF_IP4) - do_IRQ(4, regs); + do_IRQ(4); else if (pending & STATUSF_IP5) - do_IRQ(5, regs); + do_IRQ(5); else if (pending & STATUSF_IP6) - do_IRQ(6, regs); + do_IRQ(6); else if (pending & STATUSF_IP7) - do_IRQ(7, regs); + do_IRQ(7); else { /* * Now look at the extended interrupts @@ -71,15 +71,15 @@ asmlinkage void plat_irq_dispatch(struct pt_regs *regs) pending = (read_c0_cause() & (read_c0_intcontrol() << 8)) >> 16; if (pending & STATUSF_IP8) - do_IRQ(8, regs); + do_IRQ(8); else if (pending & STATUSF_IP9) - do_IRQ(9, regs); + do_IRQ(9); else if (pending & STATUSF_IP10) - do_IRQ(10, regs); + do_IRQ(10); else if (pending & STATUSF_IP11) - do_IRQ(11, regs); + do_IRQ(11); else - spurious_interrupt(regs); + spurious_interrupt(); } } diff --git a/arch/mips/momentum/ocelot_g/setup.c b/arch/mips/momentum/ocelot_g/setup.c index c580b1de33bc..56ec47039c16 100644 --- a/arch/mips/momentum/ocelot_g/setup.c +++ b/arch/mips/momentum/ocelot_g/setup.c @@ -58,7 +58,6 @@ #include <asm/irq.h> #include <asm/pci.h> #include <asm/processor.h> -#include <asm/ptrace.h> #include <asm/reboot.h> #include <linux/bootmem.h> diff --git a/arch/mips/oprofile/op_impl.h b/arch/mips/oprofile/op_impl.h index 5cfce7d87a4d..354e54496406 100644 --- a/arch/mips/oprofile/op_impl.h +++ b/arch/mips/oprofile/op_impl.h @@ -12,8 +12,8 @@ struct pt_regs; -extern int null_perf_irq(struct pt_regs *regs); -extern int (*perf_irq)(struct pt_regs *regs); +extern int null_perf_irq(void); +extern int (*perf_irq)(void); /* Per-counter configuration as set via oprofilefs. */ struct op_counter_config { diff --git a/arch/mips/oprofile/op_model_mipsxx.c b/arch/mips/oprofile/op_model_mipsxx.c index a175d673540f..dd0aec9c3ce1 100644 --- a/arch/mips/oprofile/op_model_mipsxx.c +++ b/arch/mips/oprofile/op_model_mipsxx.c @@ -3,12 +3,13 @@ * License. See the file "COPYING" in the main directory of this archive * for more details. * - * Copyright (C) 2004, 2005 by Ralf Baechle + * Copyright (C) 2004, 05, 06 by Ralf Baechle * Copyright (C) 2005 by MIPS Technologies, Inc. */ #include <linux/oprofile.h> #include <linux/interrupt.h> #include <linux/smp.h> +#include <asm/irq_regs.h> #include "op_impl.h" @@ -170,7 +171,7 @@ static void mipsxx_cpu_stop(void *args) } } -static int mipsxx_perfcount_handler(struct pt_regs *regs) +static int mipsxx_perfcount_handler(void) { unsigned int counters = op_model_mipsxx_ops.num_counters; unsigned int control; @@ -184,7 +185,7 @@ static int mipsxx_perfcount_handler(struct pt_regs *regs) counter = r_c0_perfcntr ## n(); \ if ((control & M_PERFCTL_INTERRUPT_ENABLE) && \ (counter & M_COUNTER_OVERFLOW)) { \ - oprofile_add_sample(regs, n); \ + oprofile_add_sample(get_irq_regs(), n); \ w_c0_perfcntr ## n(reg.counter[n]); \ handled = 1; \ } diff --git a/arch/mips/pci/pci-ip32.c b/arch/mips/pci/pci-ip32.c index 17c7932cf0ae..618ea7dbc474 100644 --- a/arch/mips/pci/pci-ip32.c +++ b/arch/mips/pci/pci-ip32.c @@ -22,7 +22,7 @@ * registered on the bridge error irq. It's conceivable that some of these * conditions warrant a panic. Anybody care to say which ones? */ -static irqreturn_t macepci_error(int irq, void *dev, struct pt_regs *regs) +static irqreturn_t macepci_error(int irq, void *dev) { char s; unsigned int flags = mace->pci.error; diff --git a/arch/mips/philips/pnx8550/common/int.c b/arch/mips/philips/pnx8550/common/int.c index 3c93512be1ec..710611615ca2 100644 --- a/arch/mips/philips/pnx8550/common/int.c +++ b/arch/mips/philips/pnx8550/common/int.c @@ -23,6 +23,7 @@ * 59 Temple Place - Suite 330, Boston MA 02111-1307, USA. * */ +#include <linux/compiler.h> #include <linux/init.h> #include <linux/irq.h> #include <linux/sched.h> @@ -52,7 +53,7 @@ static char gic_prio[PNX8550_INT_GIC_TOTINT] = { 1 // 70 }; -static void hw0_irqdispatch(int irq, struct pt_regs *regs) +static void hw0_irqdispatch(int irq) { /* find out which interrupt */ irq = PNX8550_GIC_VECTOR_0 >> 3; @@ -61,42 +62,39 @@ static void hw0_irqdispatch(int irq, struct pt_regs *regs) printk("hw0_irqdispatch: irq 0, spurious interrupt?\n"); return; } - do_IRQ(PNX8550_INT_GIC_MIN + irq, regs); + do_IRQ(PNX8550_INT_GIC_MIN + irq); } -static void timer_irqdispatch(int irq, struct pt_regs *regs) +static void timer_irqdispatch(int irq) { irq = (0x01c0 & read_c0_config7()) >> 6; - if (irq == 0) { + if (unlikely(irq == 0)) { printk("timer_irqdispatch: irq 0, spurious interrupt?\n"); return; } - if (irq & 0x1) { - do_IRQ(PNX8550_INT_TIMER1, regs); - } - if (irq & 0x2) { - do_IRQ(PNX8550_INT_TIMER2, regs); - } - if (irq & 0x4) { - do_IRQ(PNX8550_INT_TIMER3, regs); - } + if (irq & 0x1) + do_IRQ(PNX8550_INT_TIMER1); + if (irq & 0x2) + do_IRQ(PNX8550_INT_TIMER2); + if (irq & 0x4) + do_IRQ(PNX8550_INT_TIMER3); } -asmlinkage void plat_irq_dispatch(struct pt_regs *regs) +asmlinkage void plat_irq_dispatch(void) { unsigned int pending = read_c0_status() & read_c0_cause(); if (pending & STATUSF_IP2) - hw0_irqdispatch(2, regs); + hw0_irqdispatch(2); else if (pending & STATUSF_IP7) { if (read_c0_config7() & 0x01c0) - timer_irqdispatch(7, regs); + timer_irqdispatch(7); } - spurious_interrupt(regs); + spurious_interrupt(); } static inline void modify_cp0_intmask(unsigned clr_mask, unsigned set_mask) diff --git a/arch/mips/pmc-sierra/yosemite/irq.c b/arch/mips/pmc-sierra/yosemite/irq.c index b91d0aa3b7ed..adb048527e76 100644 --- a/arch/mips/pmc-sierra/yosemite/irq.c +++ b/arch/mips/pmc-sierra/yosemite/irq.c @@ -56,15 +56,13 @@ #define HYPERTRANSPORT_INTC 0x7a /* INTC# */ #define HYPERTRANSPORT_INTD 0x7b /* INTD# */ -extern void jaguar_mailbox_irq(struct pt_regs *); - /* * Handle hypertransport & SMP interrupts. The interrupt lines are scarce. * For interprocessor interrupts, the best thing to do is to use the INTMSG * register. We use the same external interrupt line, i.e. INTB3 and monitor * another status bit */ -asmlinkage void ll_ht_smp_irq_handler(int irq, struct pt_regs *regs) +static void ll_ht_smp_irq_handler(int irq) { u32 status = OCD_READ(RM9000x2_OCD_INTP0STATUS4); @@ -107,50 +105,35 @@ asmlinkage void ll_ht_smp_irq_handler(int irq, struct pt_regs *regs) } #endif /* CONFIG_HT_LEVEL_TRIGGER */ - do_IRQ(irq, regs); -} - -asmlinkage void do_extended_irq(struct pt_regs *regs) -{ - unsigned int intcontrol = read_c0_intcontrol(); - unsigned int cause = read_c0_cause(); - unsigned int status = read_c0_status(); - unsigned int pending_sr, pending_ic; - - pending_sr = status & cause & 0xff00; - pending_ic = (cause >> 8) & intcontrol & 0xff00; - - if (pending_ic & (1 << 13)) - do_IRQ(13, regs); - + do_IRQ(irq); } -asmlinkage void plat_irq_dispatch(struct pt_regs *regs) +asmlinkage void plat_irq_dispatch(void) { unsigned int cause = read_c0_cause(); unsigned int status = read_c0_status(); unsigned int pending = cause & status; if (pending & STATUSF_IP7) { - do_IRQ(7, regs); + do_IRQ(7); } else if (pending & STATUSF_IP2) { #ifdef CONFIG_HYPERTRANSPORT - ll_ht_smp_irq_handler(2, regs); + ll_ht_smp_irq_handler(2); #else - do_IRQ(2, regs); + do_IRQ(2); #endif } else if (pending & STATUSF_IP3) { - do_IRQ(3, regs); + do_IRQ(3); } else if (pending & STATUSF_IP4) { - do_IRQ(4, regs); + do_IRQ(4); } else if (pending & STATUSF_IP5) { #ifdef CONFIG_SMP - titan_mailbox_irq(regs); + titan_mailbox_irq(); #else - do_IRQ(5, regs); + do_IRQ(5); #endif } else if (pending & STATUSF_IP6) { - do_IRQ(4, regs); + do_IRQ(4); } } @@ -178,18 +161,3 @@ void __init arch_init_irq(void) register_gdb_console(); #endif } - -#ifdef CONFIG_KGDB -/* - * The 16550 DUART has two ports, but is allocated one IRQ - * for the serial console. Hence, a generic framework for - * serial IRQ routing in place. Currently, just calls the - * do_IRQ fuction. But, going in the future, need to check - * DUART registers for channel A and B, then decide the - * appropriate action - */ -asmlinkage void yosemite_kgdb_irq(int irq, struct pt_regs *regs) -{ - do_IRQ(irq, regs); -} -#endif diff --git a/arch/mips/pmc-sierra/yosemite/setup.c b/arch/mips/pmc-sierra/yosemite/setup.c index 0a6ee8e5eec2..1b9b0d396d3e 100644 --- a/arch/mips/pmc-sierra/yosemite/setup.c +++ b/arch/mips/pmc-sierra/yosemite/setup.c @@ -46,7 +46,6 @@ #include <asm/io.h> #include <asm/irq.h> #include <asm/processor.h> -#include <asm/ptrace.h> #include <asm/reboot.h> #include <asm/serial.h> #include <asm/titan_dep.h> diff --git a/arch/mips/pmc-sierra/yosemite/smp.c b/arch/mips/pmc-sierra/yosemite/smp.c index c197311e15d3..65fa3a23ea5e 100644 --- a/arch/mips/pmc-sierra/yosemite/smp.c +++ b/arch/mips/pmc-sierra/yosemite/smp.c @@ -110,7 +110,7 @@ void prom_smp_finish(void) { } -asmlinkage void titan_mailbox_irq(struct pt_regs *regs) +asmlinkage void titan_mailbox_irq(void) { int cpu = smp_processor_id(); unsigned long status; diff --git a/arch/mips/qemu/q-irq.c b/arch/mips/qemu/q-irq.c index 3352374c4c7d..f5ea2fe10f14 100644 --- a/arch/mips/qemu/q-irq.c +++ b/arch/mips/qemu/q-irq.c @@ -9,19 +9,19 @@ extern asmlinkage void qemu_handle_int(void); -asmlinkage void plat_irq_dispatch(struct pt_regs *regs) +asmlinkage void plat_irq_dispatch(void) { unsigned int pending = read_c0_status() & read_c0_cause(); if (pending & 0x8000) { - ll_timer_interrupt(Q_COUNT_COMPARE_IRQ, regs); + ll_timer_interrupt(Q_COUNT_COMPARE_IRQ); return; } if (pending & 0x0400) { int irq = i8259_irq(); if (likely(irq >= 0)) - do_IRQ(irq, regs); + do_IRQ(irq); return; } diff --git a/arch/mips/sgi-ip22/ip22-berr.c b/arch/mips/sgi-ip22/ip22-berr.c index a28dc7800072..de6a0cc32fea 100644 --- a/arch/mips/sgi-ip22/ip22-berr.c +++ b/arch/mips/sgi-ip22/ip22-berr.c @@ -12,6 +12,7 @@ #include <asm/system.h> #include <asm/traps.h> #include <asm/branch.h> +#include <asm/irq_regs.h> #include <asm/sgi/mc.h> #include <asm/sgi/hpc3.h> #include <asm/sgi/ioc.h> @@ -85,9 +86,10 @@ static void print_buserr(void) * and then clear the interrupt when this happens. */ -void ip22_be_interrupt(int irq, struct pt_regs *regs) +void ip22_be_interrupt(int irq) { const int field = 2 * sizeof(unsigned long); + const struct pt_regs *regs = get_irq_regs(); save_and_clear_buserr(); print_buserr(); diff --git a/arch/mips/sgi-ip22/ip22-eisa.c b/arch/mips/sgi-ip22/ip22-eisa.c index ee0514a29922..0d18ed47c47a 100644 --- a/arch/mips/sgi-ip22/ip22-eisa.c +++ b/arch/mips/sgi-ip22/ip22-eisa.c @@ -70,7 +70,7 @@ static char __init *decode_eisa_sig(unsigned long addr) return sig_str; } -static irqreturn_t ip22_eisa_intr(int irq, void *dev_id, struct pt_regs *regs) +static irqreturn_t ip22_eisa_intr(int irq, void *dev_id) { u8 eisa_irq; u8 dma1, dma2; @@ -80,7 +80,7 @@ static irqreturn_t ip22_eisa_intr(int irq, void *dev_id, struct pt_regs *regs) dma2 = inb(EISA_DMA2_STATUS); if (eisa_irq < EISA_MAX_IRQ) { - do_IRQ(eisa_irq, regs); + do_IRQ(eisa_irq); return IRQ_HANDLED; } @@ -89,6 +89,7 @@ static irqreturn_t ip22_eisa_intr(int irq, void *dev_id, struct pt_regs *regs) outb(0x20, EISA_INT2_CTRL); outb(0x20, EISA_INT1_CTRL); + return IRQ_NONE; } diff --git a/arch/mips/sgi-ip22/ip22-int.c b/arch/mips/sgi-ip22/ip22-int.c index f66026e5d64b..af518898eaa1 100644 --- a/arch/mips/sgi-ip22/ip22-int.c +++ b/arch/mips/sgi-ip22/ip22-int.c @@ -222,7 +222,7 @@ static struct irq_chip ip22_local3_irq_type = { .end = end_local3_irq, }; -static void indy_local0_irqdispatch(struct pt_regs *regs) +static void indy_local0_irqdispatch(void) { u8 mask = sgint->istat0 & sgint->imask0; u8 mask2; @@ -236,11 +236,10 @@ static void indy_local0_irqdispatch(struct pt_regs *regs) /* if irq == 0, then the interrupt has already been cleared */ if (irq) - do_IRQ(irq, regs); - return; + do_IRQ(irq); } -static void indy_local1_irqdispatch(struct pt_regs *regs) +static void indy_local1_irqdispatch(void) { u8 mask = sgint->istat1 & sgint->imask1; u8 mask2; @@ -254,19 +253,18 @@ static void indy_local1_irqdispatch(struct pt_regs *regs) /* if irq == 0, then the interrupt has already been cleared */ if (irq) - do_IRQ(irq, regs); - return; + do_IRQ(irq); } -extern void ip22_be_interrupt(int irq, struct pt_regs *regs); +extern void ip22_be_interrupt(int irq); -static void indy_buserror_irq(struct pt_regs *regs) +static void indy_buserror_irq(void) { int irq = SGI_BUSERR_IRQ; irq_enter(); kstat_this_cpu.irqs[irq]++; - ip22_be_interrupt(irq, regs); + ip22_be_interrupt(irq); irq_exit(); } @@ -305,8 +303,8 @@ static struct irqaction map1_cascade = { #define SGI_INTERRUPTS SGINT_LOCAL3 #endif -extern void indy_r4k_timer_interrupt(struct pt_regs *regs); -extern void indy_8254timer_irq(struct pt_regs *regs); +extern void indy_r4k_timer_interrupt(void); +extern void indy_8254timer_irq(void); /* * IRQs on the INDY look basically (barring software IRQs which we don't use @@ -336,7 +334,7 @@ extern void indy_8254timer_irq(struct pt_regs *regs); * another exception, big deal. */ -asmlinkage void plat_irq_dispatch(struct pt_regs *regs) +asmlinkage void plat_irq_dispatch(void) { unsigned int pending = read_c0_cause(); @@ -344,15 +342,15 @@ asmlinkage void plat_irq_dispatch(struct pt_regs *regs) * First we check for r4k counter/timer IRQ. */ if (pending & CAUSEF_IP7) - indy_r4k_timer_interrupt(regs); + indy_r4k_timer_interrupt(); else if (pending & CAUSEF_IP2) - indy_local0_irqdispatch(regs); + indy_local0_irqdispatch(); else if (pending & CAUSEF_IP3) - indy_local1_irqdispatch(regs); + indy_local1_irqdispatch(); else if (pending & CAUSEF_IP6) - indy_buserror_irq(regs); + indy_buserror_irq(); else if (pending & (CAUSEF_IP4 | CAUSEF_IP5)) - indy_8254timer_irq(regs); + indy_8254timer_irq(); } extern void mips_cpu_irq_init(unsigned int irq_base); diff --git a/arch/mips/sgi-ip22/ip22-time.c b/arch/mips/sgi-ip22/ip22-time.c index 3462b0d98def..205554734099 100644 --- a/arch/mips/sgi-ip22/ip22-time.c +++ b/arch/mips/sgi-ip22/ip22-time.c @@ -175,7 +175,7 @@ static __init void indy_time_init(void) } /* Generic SGI handler for (spurious) 8254 interrupts */ -void indy_8254timer_irq(struct pt_regs *regs) +void indy_8254timer_irq(void) { int irq = SGI_8254_0_IRQ; ULONG cnt; @@ -189,16 +189,14 @@ void indy_8254timer_irq(struct pt_regs *regs) irq_exit(); } -void indy_r4k_timer_interrupt(struct pt_regs *regs) +void indy_r4k_timer_interrupt(void) { - struct pt_regs *old_regs = set_irq_regs(regs); int irq = SGI_TIMER_IRQ; irq_enter(); kstat_this_cpu.irqs[irq]++; timer_interrupt(irq, NULL); irq_exit(); - set_irq_regs(old_regs); } void __init plat_timer_setup(struct irqaction *irq) diff --git a/arch/mips/sgi-ip27/ip27-irq.c b/arch/mips/sgi-ip27/ip27-irq.c index 24a85372284f..f01ba1f90770 100644 --- a/arch/mips/sgi-ip27/ip27-irq.c +++ b/arch/mips/sgi-ip27/ip27-irq.c @@ -30,7 +30,6 @@ #include <asm/mipsregs.h> #include <asm/system.h> -#include <asm/ptrace.h> #include <asm/processor.h> #include <asm/pci/bridge.h> #include <asm/sn/addrs.h> @@ -129,7 +128,7 @@ static int ms1bit(unsigned long x) * Kanoj 05.13.00 */ -static void ip27_do_irq_mask0(struct pt_regs *regs) +static void ip27_do_irq_mask0(void) { int irq, swlevel; hubreg_t pend0, mask0; @@ -164,13 +163,13 @@ static void ip27_do_irq_mask0(struct pt_regs *regs) struct slice_data *si = cpu_data[cpu].data; irq = si->level_to_irq[swlevel]; - do_IRQ(irq, regs); + do_IRQ(irq); } LOCAL_HUB_L(PI_INT_PEND0); } -static void ip27_do_irq_mask1(struct pt_regs *regs) +static void ip27_do_irq_mask1(void) { int irq, swlevel; hubreg_t pend1, mask1; @@ -190,17 +189,17 @@ static void ip27_do_irq_mask1(struct pt_regs *regs) /* "map" swlevel to irq */ irq = si->level_to_irq[swlevel]; LOCAL_HUB_CLR_INTR(swlevel); - do_IRQ(irq, regs); + do_IRQ(irq); LOCAL_HUB_L(PI_INT_PEND1); } -static void ip27_prof_timer(struct pt_regs *regs) +static void ip27_prof_timer(void) { panic("CPU %d got a profiling interrupt", smp_processor_id()); } -static void ip27_hub_error(struct pt_regs *regs) +static void ip27_hub_error(void) { panic("CPU %d got a hub error interrupt", smp_processor_id()); } @@ -418,22 +417,22 @@ int __devinit request_bridge_irq(struct bridge_controller *bc) return irq; } -extern void ip27_rt_timer_interrupt(struct pt_regs *regs); +extern void ip27_rt_timer_interrupt(void); -asmlinkage void plat_irq_dispatch(struct pt_regs *regs) +asmlinkage void plat_irq_dispatch(void) { unsigned long pending = read_c0_cause() & read_c0_status(); if (pending & CAUSEF_IP4) - ip27_rt_timer_interrupt(regs); + ip27_rt_timer_interrupt(); else if (pending & CAUSEF_IP2) /* PI_INT_PEND_0 or CC_PEND_{A|B} */ - ip27_do_irq_mask0(regs); + ip27_do_irq_mask0(); else if (pending & CAUSEF_IP3) /* PI_INT_PEND_1 */ - ip27_do_irq_mask1(regs); + ip27_do_irq_mask1(); else if (pending & CAUSEF_IP5) - ip27_prof_timer(regs); + ip27_prof_timer(); else if (pending & CAUSEF_IP6) - ip27_hub_error(regs); + ip27_hub_error(); } void __init arch_init_irq(void) diff --git a/arch/mips/sgi-ip27/ip27-klnuma.c b/arch/mips/sgi-ip27/ip27-klnuma.c index d777b7d1a9fe..f9f404a8ddad 100644 --- a/arch/mips/sgi-ip27/ip27-klnuma.c +++ b/arch/mips/sgi-ip27/ip27-klnuma.c @@ -26,7 +26,7 @@ static cpumask_t ktext_repmask; * kernel. For example, we should never put a copy on a headless node, * and we should respect the topology of the machine. */ -void __init setup_replication_mask() +void __init setup_replication_mask(void) { cnodeid_t cnode; diff --git a/arch/mips/sgi-ip27/ip27-timer.c b/arch/mips/sgi-ip27/ip27-timer.c index 257ce118e380..4e870fc4469b 100644 --- a/arch/mips/sgi-ip27/ip27-timer.c +++ b/arch/mips/sgi-ip27/ip27-timer.c @@ -89,7 +89,7 @@ static int set_rtc_mmss(unsigned long nowtime) static unsigned int rt_timer_irq; -void ip27_rt_timer_interrupt(struct pt_regs *regs) +void ip27_rt_timer_interrupt(void) { int cpu = smp_processor_id(); int cpuA = cputoslice(cpu) == 0; @@ -111,7 +111,7 @@ again: if (cpu == 0) do_timer(1); - update_process_times(user_mode(regs)); + update_process_times(user_mode(get_irq_regs())); /* * If we have an externally synchronized Linux clock, then update diff --git a/arch/mips/sgi-ip32/crime.c b/arch/mips/sgi-ip32/crime.c index 41b5eca1148c..bff508704d03 100644 --- a/arch/mips/sgi-ip32/crime.c +++ b/arch/mips/sgi-ip32/crime.c @@ -14,7 +14,6 @@ #include <asm/bootinfo.h> #include <asm/io.h> #include <asm/mipsregs.h> -#include <asm/ptrace.h> #include <asm/page.h> #include <asm/ip32/crime.h> #include <asm/ip32/mace.h> @@ -40,8 +39,7 @@ void __init crime_init(void) id, rev, field, (unsigned long) CRIME_BASE); } -irqreturn_t -crime_memerr_intr (unsigned int irq, void *dev_id, struct pt_regs *regs) +irqreturn_t crime_memerr_intr(unsigned int irq, void *dev_id) { unsigned long stat, addr; int fatal = 0; @@ -92,8 +90,7 @@ crime_memerr_intr (unsigned int irq, void *dev_id, struct pt_regs *regs) return IRQ_HANDLED; } -irqreturn_t -crime_cpuerr_intr (unsigned int irq, void *dev_id, struct pt_regs *regs) +irqreturn_t crime_cpuerr_intr(unsigned int irq, void *dev_id) { unsigned long stat = crime->cpu_error_stat & CRIME_CPU_ERROR_MASK; unsigned long addr = crime->cpu_error_addr & CRIME_CPU_ERROR_ADDR_MASK; diff --git a/arch/mips/sgi-ip32/ip32-irq.c b/arch/mips/sgi-ip32/ip32-irq.c index c64a820373de..c9acadd0846b 100644 --- a/arch/mips/sgi-ip32/ip32-irq.c +++ b/arch/mips/sgi-ip32/ip32-irq.c @@ -120,10 +120,8 @@ static void inline flush_mace_bus(void) static DEFINE_SPINLOCK(ip32_irq_lock); /* Some initial interrupts to set up */ -extern irqreturn_t crime_memerr_intr (int irq, void *dev_id, - struct pt_regs *regs); -extern irqreturn_t crime_cpuerr_intr (int irq, void *dev_id, - struct pt_regs *regs); +extern irqreturn_t crime_memerr_intr(int irq, void *dev_id); +extern irqreturn_t crime_cpuerr_intr(int irq, void *dev_id); struct irqaction memerr_irq = { crime_memerr_intr, IRQF_DISABLED, CPU_MASK_NONE, "CRIME memory error", NULL, NULL }; @@ -479,7 +477,7 @@ static struct irq_chip ip32_mace_interrupt = { .end = end_mace_irq, }; -static void ip32_unknown_interrupt(struct pt_regs *regs) +static void ip32_unknown_interrupt(void) { printk ("Unknown interrupt occurred!\n"); printk ("cp0_status: %08x\n", read_c0_status()); @@ -492,7 +490,7 @@ static void ip32_unknown_interrupt(struct pt_regs *regs) printk ("MACE PCI control register: %08x\n", mace->pci.control); printk("Register dump:\n"); - show_regs(regs); + show_regs(get_irq_regs()); printk("Please mail this report to linux-mips@linux-mips.org\n"); printk("Spinning..."); @@ -501,7 +499,7 @@ static void ip32_unknown_interrupt(struct pt_regs *regs) /* CRIME 1.1 appears to deliver all interrupts to this one pin. */ /* change this to loop over all edge-triggered irqs, exception masked out ones */ -static void ip32_irq0(struct pt_regs *regs) +static void ip32_irq0(void) { uint64_t crime_int; int irq = 0; @@ -516,50 +514,50 @@ static void ip32_irq0(struct pt_regs *regs) } irq++; DBG("*irq %u*\n", irq); - do_IRQ(irq, regs); + do_IRQ(irq); } -static void ip32_irq1(struct pt_regs *regs) +static void ip32_irq1(void) { - ip32_unknown_interrupt(regs); + ip32_unknown_interrupt(); } -static void ip32_irq2(struct pt_regs *regs) +static void ip32_irq2(void) { - ip32_unknown_interrupt(regs); + ip32_unknown_interrupt(); } -static void ip32_irq3(struct pt_regs *regs) +static void ip32_irq3(void) { - ip32_unknown_interrupt(regs); + ip32_unknown_interrupt(); } -static void ip32_irq4(struct pt_regs *regs) +static void ip32_irq4(void) { - ip32_unknown_interrupt(regs); + ip32_unknown_interrupt(); } -static void ip32_irq5(struct pt_regs *regs) +static void ip32_irq5(void) { - ll_timer_interrupt(IP32_R4K_TIMER_IRQ, regs); + ll_timer_interrupt(IP32_R4K_TIMER_IRQ); } -asmlinkage void plat_irq_dispatch(struct pt_regs *regs) +asmlinkage void plat_irq_dispatch(void) { unsigned int pending = read_c0_cause(); if (likely(pending & IE_IRQ0)) - ip32_irq0(regs); + ip32_irq0(); else if (unlikely(pending & IE_IRQ1)) - ip32_irq1(regs); + ip32_irq1(); else if (unlikely(pending & IE_IRQ2)) - ip32_irq2(regs); + ip32_irq2(); else if (unlikely(pending & IE_IRQ3)) - ip32_irq3(regs); + ip32_irq3(); else if (unlikely(pending & IE_IRQ4)) - ip32_irq4(regs); + ip32_irq4(); else if (likely(pending & IE_IRQ5)) - ip32_irq5(regs); + ip32_irq5(); } void __init arch_init_irq(void) diff --git a/arch/mips/sibyte/bcm1480/irq.c b/arch/mips/sibyte/bcm1480/irq.c index a46b75b23ecb..8b1f41484923 100644 --- a/arch/mips/sibyte/bcm1480/irq.c +++ b/arch/mips/sibyte/bcm1480/irq.c @@ -25,9 +25,9 @@ #include <linux/kernel_stat.h> #include <asm/errno.h> +#include <asm/irq_regs.h> #include <asm/signal.h> #include <asm/system.h> -#include <asm/ptrace.h> #include <asm/io.h> #include <asm/sibyte/bcm1480_regs.h> @@ -284,8 +284,7 @@ void __init init_bcm1480_irqs(void) } -static irqreturn_t bcm1480_dummy_handler(int irq, void *dev_id, - struct pt_regs *regs) +static irqreturn_t bcm1480_dummy_handler(int irq, void *dev_id) { return IRQ_NONE; } @@ -453,7 +452,7 @@ void __init arch_init_irq(void) #define duart_out(reg, val) csr_out32(val, IOADDR(A_DUART_CHANREG(kgdb_port,reg))) #define duart_in(reg) csr_in32(IOADDR(A_DUART_CHANREG(kgdb_port,reg))) -void bcm1480_kgdb_interrupt(struct pt_regs *regs) +static void bcm1480_kgdb_interrupt(void) { /* * Clear break-change status (allow some time for the remote @@ -464,16 +463,15 @@ void bcm1480_kgdb_interrupt(struct pt_regs *regs) mdelay(500); duart_out(R_DUART_CMD, V_DUART_MISC_CMD_RESET_BREAK_INT | M_DUART_RX_EN | M_DUART_TX_EN); - set_async_breakpoint(®s->cp0_epc); + set_async_breakpoint(&get_irq_regs()->cp0_epc); } #endif /* CONFIG_KGDB */ -extern void bcm1480_timer_interrupt(struct pt_regs *regs); -extern void bcm1480_mailbox_interrupt(struct pt_regs *regs); -extern void bcm1480_kgdb_interrupt(struct pt_regs *regs); +extern void bcm1480_timer_interrupt(void); +extern void bcm1480_mailbox_interrupt(void); -asmlinkage void plat_irq_dispatch(struct pt_regs *regs) +asmlinkage void plat_irq_dispatch(void) { unsigned int pending; @@ -486,21 +484,21 @@ asmlinkage void plat_irq_dispatch(struct pt_regs *regs) #ifdef CONFIG_SIBYTE_BCM1480_PROF if (pending & CAUSEF_IP7) /* Cpu performance counter interrupt */ - sbprof_cpu_intr(exception_epc(regs)); + sbprof_cpu_intr(); else #endif if (pending & CAUSEF_IP4) - bcm1480_timer_interrupt(regs); + bcm1480_timer_interrupt(); #ifdef CONFIG_SMP else if (pending & CAUSEF_IP3) - bcm1480_mailbox_interrupt(regs); + bcm1480_mailbox_interrupt(); #endif #ifdef CONFIG_KGDB else if (pending & CAUSEF_IP6) - bcm1480_kgdb_interrupt(regs); /* KGDB (uart 1) */ + bcm1480_kgdb_interrupt(); /* KGDB (uart 1) */ #endif else if (pending & CAUSEF_IP2) { @@ -521,9 +519,9 @@ asmlinkage void plat_irq_dispatch(struct pt_regs *regs) if (mask_h) { if (mask_h ^ 1) - do_IRQ(fls64(mask_h) - 1, regs); + do_IRQ(fls64(mask_h) - 1); else - do_IRQ(63 + fls64(mask_l), regs); + do_IRQ(63 + fls64(mask_l)); } } } diff --git a/arch/mips/sibyte/bcm1480/smp.c b/arch/mips/sibyte/bcm1480/smp.c index 584a4b33faac..bf328277c775 100644 --- a/arch/mips/sibyte/bcm1480/smp.c +++ b/arch/mips/sibyte/bcm1480/smp.c @@ -34,21 +34,21 @@ extern void smp_call_function_interrupt(void); * independent of board/firmware */ -static void *mailbox_0_set_regs[] = { +static volatile void *mailbox_0_set_regs[] = { IOADDR(A_BCM1480_IMR_CPU0_BASE + R_BCM1480_IMR_MAILBOX_0_SET_CPU), IOADDR(A_BCM1480_IMR_CPU1_BASE + R_BCM1480_IMR_MAILBOX_0_SET_CPU), IOADDR(A_BCM1480_IMR_CPU2_BASE + R_BCM1480_IMR_MAILBOX_0_SET_CPU), IOADDR(A_BCM1480_IMR_CPU3_BASE + R_BCM1480_IMR_MAILBOX_0_SET_CPU), }; -static void *mailbox_0_clear_regs[] = { +static volatile void *mailbox_0_clear_regs[] = { IOADDR(A_BCM1480_IMR_CPU0_BASE + R_BCM1480_IMR_MAILBOX_0_CLR_CPU), IOADDR(A_BCM1480_IMR_CPU1_BASE + R_BCM1480_IMR_MAILBOX_0_CLR_CPU), IOADDR(A_BCM1480_IMR_CPU2_BASE + R_BCM1480_IMR_MAILBOX_0_CLR_CPU), IOADDR(A_BCM1480_IMR_CPU3_BASE + R_BCM1480_IMR_MAILBOX_0_CLR_CPU), }; -static void *mailbox_0_regs[] = { +static volatile void *mailbox_0_regs[] = { IOADDR(A_BCM1480_IMR_CPU0_BASE + R_BCM1480_IMR_MAILBOX_0_CPU), IOADDR(A_BCM1480_IMR_CPU1_BASE + R_BCM1480_IMR_MAILBOX_0_CPU), IOADDR(A_BCM1480_IMR_CPU2_BASE + R_BCM1480_IMR_MAILBOX_0_CPU), @@ -88,7 +88,7 @@ void core_send_ipi(int cpu, unsigned int action) __raw_writeq((((u64)action)<< 48), mailbox_0_set_regs[cpu]); } -void bcm1480_mailbox_interrupt(struct pt_regs *regs) +void bcm1480_mailbox_interrupt(void) { int cpu = smp_processor_id(); unsigned int action; diff --git a/arch/mips/sibyte/bcm1480/time.c b/arch/mips/sibyte/bcm1480/time.c index 7e088f6c4a86..bf12af46132e 100644 --- a/arch/mips/sibyte/bcm1480/time.c +++ b/arch/mips/sibyte/bcm1480/time.c @@ -31,7 +31,6 @@ #include <linux/kernel_stat.h> #include <asm/irq.h> -#include <asm/ptrace.h> #include <asm/addrspace.h> #include <asm/time.h> #include <asm/io.h> @@ -100,10 +99,10 @@ void bcm1480_time_init(void) #include <asm/sibyte/sb1250.h> -void bcm1480_timer_interrupt(struct pt_regs *regs) +void bcm1480_timer_interrupt(void) { int cpu = smp_processor_id(); - int irq = K_BCM1480_INT_TIMER_0+cpu; + int irq = K_BCM1480_INT_TIMER_0 + cpu; /* Reset the timer */ __raw_writeq(M_SCD_TIMER_ENABLE|M_SCD_TIMER_MODE_CONTINUOUS, @@ -113,13 +112,13 @@ void bcm1480_timer_interrupt(struct pt_regs *regs) /* * CPU 0 handles the global timer interrupt job */ - ll_timer_interrupt(irq, regs); + ll_timer_interrupt(irq); } else { /* * other CPUs should just do profiling and process accounting */ - ll_local_timer_interrupt(irq, regs); + ll_local_timer_interrupt(irq); } } diff --git a/arch/mips/sibyte/sb1250/irq.c b/arch/mips/sibyte/sb1250/irq.c index f9bd9f074517..d5d26770daf6 100644 --- a/arch/mips/sibyte/sb1250/irq.c +++ b/arch/mips/sibyte/sb1250/irq.c @@ -28,7 +28,6 @@ #include <asm/errno.h> #include <asm/signal.h> #include <asm/system.h> -#include <asm/ptrace.h> #include <asm/io.h> #include <asm/sibyte/sb1250_regs.h> @@ -254,8 +253,7 @@ void __init init_sb1250_irqs(void) } -static irqreturn_t sb1250_dummy_handler(int irq, void *dev_id, - struct pt_regs *regs) +static irqreturn_t sb1250_dummy_handler(int irq, void *dev_id) { return IRQ_NONE; } @@ -403,7 +401,7 @@ void __init arch_init_irq(void) #define duart_out(reg, val) csr_out32(val, IOADDR(A_DUART_CHANREG(kgdb_port,reg))) #define duart_in(reg) csr_in32(IOADDR(A_DUART_CHANREG(kgdb_port,reg))) -static void sb1250_kgdb_interrupt(struct pt_regs *regs) +static void sb1250_kgdb_interrupt(void) { /* * Clear break-change status (allow some time for the remote @@ -414,16 +412,15 @@ static void sb1250_kgdb_interrupt(struct pt_regs *regs) mdelay(500); duart_out(R_DUART_CMD, V_DUART_MISC_CMD_RESET_BREAK_INT | M_DUART_RX_EN | M_DUART_TX_EN); - set_async_breakpoint(®s->cp0_epc); + set_async_breakpoint(&get_irq_regs()->cp0_epc); } #endif /* CONFIG_KGDB */ -extern void sb1250_timer_interrupt(struct pt_regs *regs); -extern void sb1250_mailbox_interrupt(struct pt_regs *regs); -extern void sb1250_kgdb_interrupt(struct pt_regs *regs); +extern void sb1250_timer_interrupt(void); +extern void sb1250_mailbox_interrupt(void); -asmlinkage void plat_irq_dispatch(struct pt_regs *regs) +asmlinkage void plat_irq_dispatch(void) { unsigned int pending; @@ -446,21 +443,21 @@ asmlinkage void plat_irq_dispatch(struct pt_regs *regs) #ifdef CONFIG_SIBYTE_SB1250_PROF if (pending & CAUSEF_IP7) /* Cpu performance counter interrupt */ - sbprof_cpu_intr(exception_epc(regs)); + sbprof_cpu_intr(); else #endif if (pending & CAUSEF_IP4) - sb1250_timer_interrupt(regs); + sb1250_timer_interrupt(); #ifdef CONFIG_SMP else if (pending & CAUSEF_IP3) - sb1250_mailbox_interrupt(regs); + sb1250_mailbox_interrupt(); #endif #ifdef CONFIG_KGDB else if (pending & CAUSEF_IP6) /* KGDB (uart 1) */ - sb1250_kgdb_interrupt(regs); + sb1250_kgdb_interrupt(); #endif else if (pending & CAUSEF_IP2) { @@ -475,9 +472,9 @@ asmlinkage void plat_irq_dispatch(struct pt_regs *regs) mask = __raw_readq(IOADDR(A_IMR_REGISTER(smp_processor_id(), R_IMR_INTERRUPT_STATUS_BASE))); if (mask) - do_IRQ(fls64(mask) - 1, regs); + do_IRQ(fls64(mask) - 1); else - spurious_interrupt(regs); + spurious_interrupt(); } else - spurious_interrupt(regs); + spurious_interrupt(); } diff --git a/arch/mips/sibyte/sb1250/smp.c b/arch/mips/sibyte/sb1250/smp.c index f859db02d3c9..c38e1f34460d 100644 --- a/arch/mips/sibyte/sb1250/smp.c +++ b/arch/mips/sibyte/sb1250/smp.c @@ -76,7 +76,7 @@ void core_send_ipi(int cpu, unsigned int action) __raw_writeq((((u64)action) << 48), mailbox_set_regs[cpu]); } -void sb1250_mailbox_interrupt(struct pt_regs *regs) +void sb1250_mailbox_interrupt(void) { int cpu = smp_processor_id(); unsigned int action; diff --git a/arch/mips/sibyte/sb1250/time.c b/arch/mips/sibyte/sb1250/time.c index 4b669dc86ef4..0ccf1796dd78 100644 --- a/arch/mips/sibyte/sb1250/time.c +++ b/arch/mips/sibyte/sb1250/time.c @@ -31,7 +31,6 @@ #include <linux/kernel_stat.h> #include <asm/irq.h> -#include <asm/ptrace.h> #include <asm/addrspace.h> #include <asm/time.h> #include <asm/io.h> @@ -125,7 +124,7 @@ void sb1250_time_init(void) */ } -void sb1250_timer_interrupt(struct pt_regs *regs) +void sb1250_timer_interrupt(void) { int cpu = smp_processor_id(); int irq = K_INT_TIMER_0 + cpu; @@ -138,13 +137,13 @@ void sb1250_timer_interrupt(struct pt_regs *regs) /* * CPU 0 handles the global timer interrupt job */ - ll_timer_interrupt(irq, regs); + ll_timer_interrupt(irq); } else { /* * other CPUs should just do profiling and process accounting */ - ll_local_timer_interrupt(irq, regs); + ll_local_timer_interrupt(irq); } } diff --git a/arch/mips/sni/irq.c b/arch/mips/sni/irq.c index cda165f42b6a..48fb74a7aaec 100644 --- a/arch/mips/sni/irq.c +++ b/arch/mips/sni/irq.c @@ -69,20 +69,20 @@ static struct irq_chip pciasic_irq_type = { * hwint0 should deal with MP agent, ASIC PCI, EISA NMI and debug * button interrupts. Later ... */ -static void pciasic_hwint0(struct pt_regs *regs) +static void pciasic_hwint0(void) { panic("Received int0 but no handler yet ..."); } /* This interrupt was used for the com1 console on the first prototypes. */ -static void pciasic_hwint2(struct pt_regs *regs) +static void pciasic_hwint2(void) { /* I think this shouldn't happen on production machines. */ panic("hwint2 and no handler yet"); } /* hwint5 is the r4k count / compare interrupt */ -static void pciasic_hwint5(struct pt_regs *regs) +static void pciasic_hwint5(void) { panic("hwint5 and no handler yet"); } @@ -103,7 +103,7 @@ static unsigned int ls1bit8(unsigned int x) * * The EISA_INT bit in CSITPEND is high active, all others are low active. */ -static void pciasic_hwint1(struct pt_regs *regs) +static void pciasic_hwint1(void) { u8 pend = *(volatile char *)PCIMT_CSITPEND; unsigned long flags; @@ -119,13 +119,13 @@ static void pciasic_hwint1(struct pt_regs *regs) if (unlikely(irq < 0)) return; - do_IRQ(irq, regs); + do_IRQ(irq); } if (!(pend & IT_SCSI)) { flags = read_c0_status(); clear_c0_status(ST0_IM); - do_IRQ(PCIMT_IRQ_SCSI, regs); + do_IRQ(PCIMT_IRQ_SCSI); write_c0_status(flags); } } @@ -133,7 +133,7 @@ static void pciasic_hwint1(struct pt_regs *regs) /* * hwint 3 should deal with the PCI A - D interrupts, */ -static void pciasic_hwint3(struct pt_regs *regs) +static void pciasic_hwint3(void) { u8 pend = *(volatile char *)PCIMT_CSITPEND; int irq; @@ -141,21 +141,21 @@ static void pciasic_hwint3(struct pt_regs *regs) pend &= (IT_INTA | IT_INTB | IT_INTC | IT_INTD); clear_c0_status(IE_IRQ3); irq = PCIMT_IRQ_INT2 + ls1bit8(pend); - do_IRQ(irq, regs); + do_IRQ(irq); set_c0_status(IE_IRQ3); } /* * hwint 4 is used for only the onboard PCnet 32. */ -static void pciasic_hwint4(struct pt_regs *regs) +static void pciasic_hwint4(void) { clear_c0_status(IE_IRQ4); - do_IRQ(PCIMT_IRQ_ETHERNET, regs); + do_IRQ(PCIMT_IRQ_ETHERNET); set_c0_status(IE_IRQ4); } -asmlinkage void plat_irq_dispatch(struct pt_regs *regs) +asmlinkage void plat_irq_dispatch(void) { unsigned int pending = read_c0_status() & read_c0_cause(); static unsigned char led_cache; @@ -163,17 +163,17 @@ asmlinkage void plat_irq_dispatch(struct pt_regs *regs) *(volatile unsigned char *) PCIMT_CSLED = ++led_cache; if (pending & 0x0800) - pciasic_hwint1(regs); + pciasic_hwint1(); else if (pending & 0x4000) - pciasic_hwint4(regs); + pciasic_hwint4(); else if (pending & 0x2000) - pciasic_hwint3(regs); + pciasic_hwint3(); else if (pending & 0x1000) - pciasic_hwint2(regs); + pciasic_hwint2(); else if (pending & 0x8000) - pciasic_hwint5(regs); + pciasic_hwint5(); else if (pending & 0x0400) - pciasic_hwint0(regs); + pciasic_hwint0(); } void __init init_pciasic(void) diff --git a/arch/mips/sni/setup.c b/arch/mips/sni/setup.c index 4e98feb15410..afeb7f13e5b5 100644 --- a/arch/mips/sni/setup.c +++ b/arch/mips/sni/setup.c @@ -31,7 +31,6 @@ #include <asm/irq.h> #include <asm/mc146818-time.h> #include <asm/processor.h> -#include <asm/ptrace.h> #include <asm/reboot.h> #include <asm/sni.h> #include <asm/time.h> diff --git a/arch/mips/tx4927/common/tx4927_irq.c b/arch/mips/tx4927/common/tx4927_irq.c index cd176f6a06c8..8266a88a3f88 100644 --- a/arch/mips/tx4927/common/tx4927_irq.c +++ b/arch/mips/tx4927/common/tx4927_irq.c @@ -576,24 +576,24 @@ static int tx4927_irq_nested(void) return (sw_irq); } -asmlinkage void plat_irq_dispatch(struct pt_regs *regs) +asmlinkage void plat_irq_dispatch(void) { unsigned int pending = read_c0_status() & read_c0_cause(); if (pending & STATUSF_IP7) /* cpu timer */ - do_IRQ(TX4927_IRQ_CPU_TIMER, regs); + do_IRQ(TX4927_IRQ_CPU_TIMER); else if (pending & STATUSF_IP2) { /* tx4927 pic */ unsigned int irq = tx4927_irq_nested(); if (unlikely(irq == 0)) { - spurious_interrupt(regs); + spurious_interrupt(); return; } - do_IRQ(irq, regs); + do_IRQ(irq); } else if (pending & STATUSF_IP0) /* user line 0 */ - do_IRQ(TX4927_IRQ_USER0, regs); + do_IRQ(TX4927_IRQ_USER0); else if (pending & STATUSF_IP1) /* user line 1 */ - do_IRQ(TX4927_IRQ_USER1, regs); + do_IRQ(TX4927_IRQ_USER1); else - spurious_interrupt(regs); + spurious_interrupt(); } diff --git a/arch/mips/tx4927/common/tx4927_setup.c b/arch/mips/tx4927/common/tx4927_setup.c index 3ace4037343e..4658b2ae4833 100644 --- a/arch/mips/tx4927/common/tx4927_setup.c +++ b/arch/mips/tx4927/common/tx4927_setup.c @@ -53,19 +53,9 @@ void __init tx4927_time_init(void); void dump_cp0(char *key); -void (*__wbflush) (void); - -static void tx4927_write_buffer_flush(void) -{ - __asm__ __volatile__ - ("sync\n\t" "nop\n\t" "loop: bc0f loop\n\t" "nop\n\t"); -} - - void __init plat_mem_setup(void) { board_time_init = tx4927_time_init; - __wbflush = tx4927_write_buffer_flush; #ifdef CONFIG_TOSHIBA_RBTX4927 { diff --git a/arch/mips/tx4927/toshiba_rbtx4927/toshiba_rbtx4927_irq.c b/arch/mips/tx4927/toshiba_rbtx4927/toshiba_rbtx4927_irq.c index b0f021f2a6c4..0c3c3f668230 100644 --- a/arch/mips/tx4927/toshiba_rbtx4927/toshiba_rbtx4927_irq.c +++ b/arch/mips/tx4927/toshiba_rbtx4927/toshiba_rbtx4927_irq.c @@ -127,9 +127,9 @@ JP7 is not bus master -- do NOT use -- only 4 pci bus master's allowed -- SouthB #include <asm/irq.h> #include <asm/pci.h> #include <asm/processor.h> -#include <asm/ptrace.h> #include <asm/reboot.h> #include <asm/time.h> +#include <asm/wbflush.h> #include <linux/bootmem.h> #include <linux/blkdev.h> #ifdef CONFIG_RTC_DS1742 diff --git a/arch/mips/tx4927/toshiba_rbtx4927/toshiba_rbtx4927_setup.c b/arch/mips/tx4927/toshiba_rbtx4927/toshiba_rbtx4927_setup.c index f0d70c476005..735cb8778f4c 100644 --- a/arch/mips/tx4927/toshiba_rbtx4927/toshiba_rbtx4927_setup.c +++ b/arch/mips/tx4927/toshiba_rbtx4927/toshiba_rbtx4927_setup.c @@ -58,8 +58,8 @@ #include <asm/page.h> #include <asm/io.h> #include <asm/irq.h> +#include <asm/irq_regs.h> #include <asm/processor.h> -#include <asm/ptrace.h> #include <asm/reboot.h> #include <asm/time.h> #include <linux/bootmem.h> @@ -160,8 +160,7 @@ int tx4927_pci66 = 0; /* 0:auto */ char *toshiba_name = ""; #ifdef CONFIG_PCI -static void tx4927_pcierr_interrupt(int irq, void *dev_id, - struct pt_regs *regs) +static void tx4927_pcierr_interrupt(int irq, void *dev_id) { #ifdef CONFIG_BLK_DEV_IDEPCI /* ignore MasterAbort for ide probing... */ @@ -185,7 +184,7 @@ static void tx4927_pcierr_interrupt(int irq, void *dev_id, (unsigned long) tx4927_ccfgptr->ccfg, (unsigned long) (tx4927_ccfgptr->tear >> 32), (unsigned long) tx4927_ccfgptr->tear); - show_regs(regs); + show_regs(get_irq_regs()); } void __init toshiba_rbtx4927_pci_irq_init(void) diff --git a/arch/mips/tx4938/common/irq.c b/arch/mips/tx4938/common/irq.c index cbfb34221b59..77fe2454f5b9 100644 --- a/arch/mips/tx4938/common/irq.c +++ b/arch/mips/tx4938/common/irq.c @@ -30,6 +30,7 @@ #include <asm/irq.h> #include <asm/mipsregs.h> #include <asm/system.h> +#include <asm/wbflush.h> #include <asm/tx4938/rbtx4938.h> /**********************************************************************************/ @@ -104,8 +105,6 @@ tx4938_irq_cp0_init(void) irq_desc[i].depth = 1; irq_desc[i].chip = &tx4938_irq_cp0_type; } - - return; } static unsigned int @@ -113,7 +112,7 @@ tx4938_irq_cp0_startup(unsigned int irq) { tx4938_irq_cp0_enable(irq); - return (0); + return 0; } static void @@ -144,16 +143,12 @@ tx4938_irq_cp0_disable(unsigned int irq) clear_c0_status(tx4938_irq_cp0_mask(irq)); spin_unlock_irqrestore(&tx4938_cp0_lock, flags); - - return; } static void tx4938_irq_cp0_mask_and_ack(unsigned int irq) { tx4938_irq_cp0_disable(irq); - - return; } static void @@ -162,8 +157,6 @@ tx4938_irq_cp0_end(unsigned int irq) if (!(irq_desc[irq].status & (IRQ_DISABLED | IRQ_INPROGRESS))) { tx4938_irq_cp0_enable(irq); } - - return; } /**********************************************************************************/ @@ -227,7 +220,7 @@ tx4938_irq_pic_addr(int irq) } } - return (0); + return 0; } u32 @@ -278,7 +271,7 @@ tx4938_irq_pic_mask(int irq) return (0x00000007); } } - return (0x00000000); + return 0x00000000; } static void @@ -292,8 +285,6 @@ tx4938_irq_pic_modify(unsigned pic_reg, unsigned clr_bits, unsigned set_bits) TX4938_WR(pic_reg, val); mmiowb(); TX4938_RD(pic_reg); - - return; } static void __init @@ -317,8 +308,6 @@ tx4938_irq_pic_init(void) TX4938_WR(0xff1ff600, TX4938_RD(0xff1ff600) | 0x1); /* irq enable */ spin_unlock_irqrestore(&tx4938_pic_lock, flags); - - return; } static unsigned int @@ -326,15 +315,13 @@ tx4938_irq_pic_startup(unsigned int irq) { tx4938_irq_pic_enable(irq); - return (0); + return 0; } static void tx4938_irq_pic_shutdown(unsigned int irq) { tx4938_irq_pic_disable(irq); - - return; } static void @@ -348,8 +335,6 @@ tx4938_irq_pic_enable(unsigned int irq) tx4938_irq_pic_mask(irq)); spin_unlock_irqrestore(&tx4938_pic_lock, flags); - - return; } static void @@ -363,16 +348,12 @@ tx4938_irq_pic_disable(unsigned int irq) tx4938_irq_pic_mask(irq), 0); spin_unlock_irqrestore(&tx4938_pic_lock, flags); - - return; } static void tx4938_irq_pic_mask_and_ack(unsigned int irq) { tx4938_irq_pic_disable(irq); - - return; } static void @@ -381,8 +362,6 @@ tx4938_irq_pic_end(unsigned int irq) if (!(irq_desc[irq].status & (IRQ_DISABLED | IRQ_INPROGRESS))) { tx4938_irq_pic_enable(irq); } - - return; } /**********************************************************************************/ @@ -394,8 +373,6 @@ tx4938_irq_init(void) { tx4938_irq_cp0_init(); tx4938_irq_pic_init(); - - return; } int @@ -417,23 +394,23 @@ tx4938_irq_nested(void) } wbflush(); - return (sw_irq); + return sw_irq; } -asmlinkage void plat_irq_dispatch(struct pt_regs *regs) +asmlinkage void plat_irq_dispatch(void) { unsigned int pending = read_c0_cause() & read_c0_status(); if (pending & STATUSF_IP7) - do_IRQ(TX4938_IRQ_CPU_TIMER, regs); + do_IRQ(TX4938_IRQ_CPU_TIMER); else if (pending & STATUSF_IP2) { int irq = tx4938_irq_nested(); if (irq) - do_IRQ(irq, regs); + do_IRQ(irq); else - spurious_interrupt(regs); + spurious_interrupt(); } else if (pending & STATUSF_IP1) - do_IRQ(TX4938_IRQ_USER1, regs); + do_IRQ(TX4938_IRQ_USER1); else if (pending & STATUSF_IP0) - do_IRQ(TX4938_IRQ_USER0, regs); + do_IRQ(TX4938_IRQ_USER0); } diff --git a/arch/mips/tx4938/common/setup.c b/arch/mips/tx4938/common/setup.c index 71859c4fee84..f415a1f18fba 100644 --- a/arch/mips/tx4938/common/setup.c +++ b/arch/mips/tx4938/common/setup.c @@ -41,29 +41,10 @@ void __init tx4938_setup(void); void __init tx4938_time_init(void); void dump_cp0(char *key); -void (*__wbflush) (void); - -static void -tx4938_write_buffer_flush(void) -{ - mmiowb(); - - __asm__ __volatile__( - ".set push\n\t" - ".set noreorder\n\t" - "lw $0,%0\n\t" - "nop\n\t" - ".set pop" - : /* no output */ - : "m" (*(int *)KSEG1) - : "memory"); -} - void __init plat_mem_setup(void) { board_time_init = tx4938_time_init; - __wbflush = tx4938_write_buffer_flush; toshiba_rbtx4938_setup(); } diff --git a/arch/mips/tx4938/toshiba_rbtx4938/irq.c b/arch/mips/tx4938/toshiba_rbtx4938/irq.c index 83f2750825a4..102e473c10a2 100644 --- a/arch/mips/tx4938/toshiba_rbtx4938/irq.c +++ b/arch/mips/tx4938/toshiba_rbtx4938/irq.c @@ -81,9 +81,9 @@ IRQ Device #include <asm/io.h> #include <asm/irq.h> #include <asm/processor.h> -#include <asm/ptrace.h> #include <asm/reboot.h> #include <asm/time.h> +#include <asm/wbflush.h> #include <linux/bootmem.h> #include <asm/tx4938/rbtx4938.h> diff --git a/arch/mips/tx4938/toshiba_rbtx4938/spi_txx9.c b/arch/mips/tx4938/toshiba_rbtx4938/spi_txx9.c index fae3136f462d..b926e6a75c29 100644 --- a/arch/mips/tx4938/toshiba_rbtx4938/spi_txx9.c +++ b/arch/mips/tx4938/toshiba_rbtx4938/spi_txx9.c @@ -35,7 +35,8 @@ void __init txx9_spi_init(unsigned long base, int (*cs_func)(int chipid, int on) } static DECLARE_WAIT_QUEUE_HEAD(txx9_spi_wait); -static void txx9_spi_interrupt(int irq, void *dev_id, struct pt_regs *regs) + +static void txx9_spi_interrupt(int irq, void *dev_id) { /* disable rx intr */ tx4938_spiptr->cr0 &= ~TXx9_SPCR0_RBSIE; diff --git a/arch/mips/vr41xx/common/icu.c b/arch/mips/vr41xx/common/icu.c index 7a5c31d58378..c215c0d39fae 100644 --- a/arch/mips/vr41xx/common/icu.c +++ b/arch/mips/vr41xx/common/icu.c @@ -635,7 +635,7 @@ int vr41xx_set_intassign(unsigned int irq, unsigned char intassign) EXPORT_SYMBOL(vr41xx_set_intassign); -static int icu_get_irq(unsigned int irq, struct pt_regs *regs) +static int icu_get_irq(unsigned int irq) { uint16_t pend1, pend2; uint16_t mask1, mask2; diff --git a/arch/mips/vr41xx/common/irq.c b/arch/mips/vr41xx/common/irq.c index 4733c5344467..397ba94cd7ec 100644 --- a/arch/mips/vr41xx/common/irq.c +++ b/arch/mips/vr41xx/common/irq.c @@ -25,7 +25,7 @@ #include <asm/vr41xx/irq.h> typedef struct irq_cascade { - int (*get_irq)(unsigned int, struct pt_regs *); + int (*get_irq)(unsigned int); } irq_cascade_t; static irq_cascade_t irq_cascade[NR_IRQS] __cacheline_aligned; @@ -36,7 +36,7 @@ static struct irqaction cascade_irqaction = { .name = "cascade", }; -int cascade_irq(unsigned int irq, int (*get_irq)(unsigned int, struct pt_regs *)) +int cascade_irq(unsigned int irq, int (*get_irq)(unsigned int)) { int retval = 0; @@ -59,7 +59,7 @@ int cascade_irq(unsigned int irq, int (*get_irq)(unsigned int, struct pt_regs *) EXPORT_SYMBOL_GPL(cascade_irq); -static void irq_dispatch(unsigned int irq, struct pt_regs *regs) +static void irq_dispatch(unsigned int irq) { irq_cascade_t *cascade; struct irq_desc *desc; @@ -74,39 +74,39 @@ static void irq_dispatch(unsigned int irq, struct pt_regs *regs) unsigned int source_irq = irq; desc = irq_desc + source_irq; desc->chip->ack(source_irq); - irq = cascade->get_irq(irq, regs); + irq = cascade->get_irq(irq); if (irq < 0) atomic_inc(&irq_err_count); else - irq_dispatch(irq, regs); + irq_dispatch(irq); desc->chip->end(source_irq); } else - do_IRQ(irq, regs); + do_IRQ(irq); } -asmlinkage void plat_irq_dispatch(struct pt_regs *regs) +asmlinkage void plat_irq_dispatch(void) { unsigned int pending = read_c0_cause() & read_c0_status() & ST0_IM; if (pending & CAUSEF_IP7) - do_IRQ(7, regs); + do_IRQ(7); else if (pending & 0x7800) { if (pending & CAUSEF_IP3) - irq_dispatch(3, regs); + irq_dispatch(3); else if (pending & CAUSEF_IP4) - irq_dispatch(4, regs); + irq_dispatch(4); else if (pending & CAUSEF_IP5) - irq_dispatch(5, regs); + irq_dispatch(5); else if (pending & CAUSEF_IP6) - irq_dispatch(6, regs); + irq_dispatch(6); } else if (pending & CAUSEF_IP2) - irq_dispatch(2, regs); + irq_dispatch(2); else if (pending & CAUSEF_IP0) - do_IRQ(0, regs); + do_IRQ(0); else if (pending & CAUSEF_IP1) - do_IRQ(1, regs); + do_IRQ(1); else - spurious_interrupt(regs); + spurious_interrupt(); } void __init arch_init_irq(void) diff --git a/arch/parisc/kernel/drivers.c b/arch/parisc/kernel/drivers.c index 2b257e4f17df..d6c486e9501c 100644 --- a/arch/parisc/kernel/drivers.c +++ b/arch/parisc/kernel/drivers.c @@ -424,7 +424,7 @@ struct parisc_device * create_tree_node(char id, struct device *parent) /* make the generic dma mask a pointer to the parisc one */ dev->dev.dma_mask = &dev->dma_mask; dev->dev.coherent_dma_mask = dev->dma_mask; - if (!device_register(&dev->dev)) { + if (device_register(&dev->dev)) { kfree(dev); return NULL; } @@ -853,9 +853,9 @@ static void print_parisc_device(struct parisc_device *dev) */ void init_parisc_bus(void) { - if (!bus_register(&parisc_bus_type)) + if (bus_register(&parisc_bus_type)) panic("Could not register PA-RISC bus type\n"); - if (!device_register(&root)) + if (device_register(&root)) panic("Could not register PA-RISC root device\n"); get_device(&root); } diff --git a/arch/parisc/kernel/firmware.c b/arch/parisc/kernel/firmware.c index c2531ae032cf..9158b707c0dd 100644 --- a/arch/parisc/kernel/firmware.c +++ b/arch/parisc/kernel/firmware.c @@ -160,13 +160,14 @@ void __init set_firmware_width(void) { #ifdef __LP64__ int retval; + unsigned long flags; - spin_lock_irq(&pdc_lock); + spin_lock_irqsave(&pdc_lock, flags); retval = mem_pdc_call(PDC_MODEL, PDC_MODEL_CAPABILITIES, __pa(pdc_result), 0); convert_to_wide(pdc_result); if(pdc_result[0] != NARROW_FIRMWARE) parisc_narrow_firmware = 0; - spin_unlock_irq(&pdc_lock); + spin_unlock_irqrestore(&pdc_lock, flags); #endif } @@ -196,10 +197,11 @@ void pdc_emergency_unlock(void) int pdc_add_valid(unsigned long address) { int retval; + unsigned long flags; - spin_lock_irq(&pdc_lock); + spin_lock_irqsave(&pdc_lock, flags); retval = mem_pdc_call(PDC_ADD_VALID, PDC_ADD_VALID_VERIFY, address); - spin_unlock_irq(&pdc_lock); + spin_unlock_irqrestore(&pdc_lock, flags); return retval; } @@ -216,15 +218,16 @@ EXPORT_SYMBOL(pdc_add_valid); int __init pdc_chassis_info(struct pdc_chassis_info *chassis_info, void *led_info, unsigned long len) { int retval; + unsigned long flags; - spin_lock_irq(&pdc_lock); + spin_lock_irqsave(&pdc_lock, flags); memcpy(&pdc_result, chassis_info, sizeof(*chassis_info)); memcpy(&pdc_result2, led_info, len); retval = mem_pdc_call(PDC_CHASSIS, PDC_RETURN_CHASSIS_INFO, __pa(pdc_result), __pa(pdc_result2), len); memcpy(chassis_info, pdc_result, sizeof(*chassis_info)); memcpy(led_info, pdc_result2, len); - spin_unlock_irq(&pdc_lock); + spin_unlock_irqrestore(&pdc_lock, flags); return retval; } @@ -239,13 +242,14 @@ int __init pdc_chassis_info(struct pdc_chassis_info *chassis_info, void *led_inf int pdc_pat_chassis_send_log(unsigned long state, unsigned long data) { int retval = 0; + unsigned long flags; if (!is_pdc_pat()) return -1; - spin_lock_irq(&pdc_lock); + spin_lock_irqsave(&pdc_lock, flags); retval = mem_pdc_call(PDC_PAT_CHASSIS_LOG, PDC_PAT_CHASSIS_WRITE_LOG, __pa(&state), __pa(&data)); - spin_unlock_irq(&pdc_lock); + spin_unlock_irqrestore(&pdc_lock, flags); return retval; } @@ -258,10 +262,11 @@ int pdc_pat_chassis_send_log(unsigned long state, unsigned long data) int pdc_chassis_disp(unsigned long disp) { int retval = 0; + unsigned long flags; - spin_lock_irq(&pdc_lock); + spin_lock_irqsave(&pdc_lock, flags); retval = mem_pdc_call(PDC_CHASSIS, PDC_CHASSIS_DISP, disp); - spin_unlock_irq(&pdc_lock); + spin_unlock_irqrestore(&pdc_lock, flags); return retval; } @@ -273,11 +278,12 @@ int pdc_chassis_disp(unsigned long disp) int pdc_chassis_warn(unsigned long *warn) { int retval = 0; + unsigned long flags; - spin_lock_irq(&pdc_lock); + spin_lock_irqsave(&pdc_lock, flags); retval = mem_pdc_call(PDC_CHASSIS, PDC_CHASSIS_WARN, __pa(pdc_result)); *warn = pdc_result[0]; - spin_unlock_irq(&pdc_lock); + spin_unlock_irqrestore(&pdc_lock, flags); return retval; } @@ -292,15 +298,16 @@ int pdc_chassis_warn(unsigned long *warn) int __init pdc_coproc_cfg(struct pdc_coproc_cfg *pdc_coproc_info) { int retval; + unsigned long flags; - spin_lock_irq(&pdc_lock); + spin_lock_irqsave(&pdc_lock, flags); retval = mem_pdc_call(PDC_COPROC, PDC_COPROC_CFG, __pa(pdc_result)); convert_to_wide(pdc_result); pdc_coproc_info->ccr_functional = pdc_result[0]; pdc_coproc_info->ccr_present = pdc_result[1]; pdc_coproc_info->revision = pdc_result[17]; pdc_coproc_info->model = pdc_result[18]; - spin_unlock_irq(&pdc_lock); + spin_unlock_irqrestore(&pdc_lock, flags); return retval; } @@ -320,14 +327,15 @@ int pdc_iodc_read(unsigned long *actcnt, unsigned long hpa, unsigned int index, void *iodc_data, unsigned int iodc_data_size) { int retval; + unsigned long flags; - spin_lock_irq(&pdc_lock); + spin_lock_irqsave(&pdc_lock, flags); retval = mem_pdc_call(PDC_IODC, PDC_IODC_READ, __pa(pdc_result), hpa, index, __pa(pdc_result2), iodc_data_size); convert_to_wide(pdc_result); *actcnt = pdc_result[0]; memcpy(iodc_data, pdc_result2, iodc_data_size); - spin_unlock_irq(&pdc_lock); + spin_unlock_irqrestore(&pdc_lock, flags); return retval; } @@ -346,14 +354,15 @@ int pdc_system_map_find_mods(struct pdc_system_map_mod_info *pdc_mod_info, struct pdc_module_path *mod_path, long mod_index) { int retval; + unsigned long flags; - spin_lock_irq(&pdc_lock); + spin_lock_irqsave(&pdc_lock, flags); retval = mem_pdc_call(PDC_SYSTEM_MAP, PDC_FIND_MODULE, __pa(pdc_result), __pa(pdc_result2), mod_index); convert_to_wide(pdc_result); memcpy(pdc_mod_info, pdc_result, sizeof(*pdc_mod_info)); memcpy(mod_path, pdc_result2, sizeof(*mod_path)); - spin_unlock_irq(&pdc_lock); + spin_unlock_irqrestore(&pdc_lock, flags); pdc_mod_info->mod_addr = f_extend(pdc_mod_info->mod_addr); return retval; @@ -372,13 +381,14 @@ int pdc_system_map_find_addrs(struct pdc_system_map_addr_info *pdc_addr_info, long mod_index, long addr_index) { int retval; + unsigned long flags; - spin_lock_irq(&pdc_lock); + spin_lock_irqsave(&pdc_lock, flags); retval = mem_pdc_call(PDC_SYSTEM_MAP, PDC_FIND_ADDRESS, __pa(pdc_result), mod_index, addr_index); convert_to_wide(pdc_result); memcpy(pdc_addr_info, pdc_result, sizeof(*pdc_addr_info)); - spin_unlock_irq(&pdc_lock); + spin_unlock_irqrestore(&pdc_lock, flags); pdc_addr_info->mod_addr = f_extend(pdc_addr_info->mod_addr); return retval; @@ -393,12 +403,13 @@ int pdc_system_map_find_addrs(struct pdc_system_map_addr_info *pdc_addr_info, int pdc_model_info(struct pdc_model *model) { int retval; + unsigned long flags; - spin_lock_irq(&pdc_lock); + spin_lock_irqsave(&pdc_lock, flags); retval = mem_pdc_call(PDC_MODEL, PDC_MODEL_INFO, __pa(pdc_result), 0); convert_to_wide(pdc_result); memcpy(model, pdc_result, sizeof(*model)); - spin_unlock_irq(&pdc_lock); + spin_unlock_irqrestore(&pdc_lock, flags); return retval; } @@ -414,8 +425,9 @@ int pdc_model_info(struct pdc_model *model) int pdc_model_sysmodel(char *name) { int retval; + unsigned long flags; - spin_lock_irq(&pdc_lock); + spin_lock_irqsave(&pdc_lock, flags); retval = mem_pdc_call(PDC_MODEL, PDC_MODEL_SYSMODEL, __pa(pdc_result), OS_ID_HPUX, __pa(name)); convert_to_wide(pdc_result); @@ -425,7 +437,7 @@ int pdc_model_sysmodel(char *name) } else { name[0] = 0; } - spin_unlock_irq(&pdc_lock); + spin_unlock_irqrestore(&pdc_lock, flags); return retval; } @@ -443,12 +455,13 @@ int pdc_model_sysmodel(char *name) int pdc_model_versions(unsigned long *versions, int id) { int retval; + unsigned long flags; - spin_lock_irq(&pdc_lock); + spin_lock_irqsave(&pdc_lock, flags); retval = mem_pdc_call(PDC_MODEL, PDC_MODEL_VERSIONS, __pa(pdc_result), id); convert_to_wide(pdc_result); *versions = pdc_result[0]; - spin_unlock_irq(&pdc_lock); + spin_unlock_irqrestore(&pdc_lock, flags); return retval; } @@ -463,13 +476,14 @@ int pdc_model_versions(unsigned long *versions, int id) int pdc_model_cpuid(unsigned long *cpu_id) { int retval; + unsigned long flags; - spin_lock_irq(&pdc_lock); + spin_lock_irqsave(&pdc_lock, flags); pdc_result[0] = 0; /* preset zero (call may not be implemented!) */ retval = mem_pdc_call(PDC_MODEL, PDC_MODEL_CPU_ID, __pa(pdc_result), 0); convert_to_wide(pdc_result); *cpu_id = pdc_result[0]; - spin_unlock_irq(&pdc_lock); + spin_unlock_irqrestore(&pdc_lock, flags); return retval; } @@ -484,13 +498,14 @@ int pdc_model_cpuid(unsigned long *cpu_id) int pdc_model_capabilities(unsigned long *capabilities) { int retval; + unsigned long flags; - spin_lock_irq(&pdc_lock); + spin_lock_irqsave(&pdc_lock, flags); pdc_result[0] = 0; /* preset zero (call may not be implemented!) */ retval = mem_pdc_call(PDC_MODEL, PDC_MODEL_CAPABILITIES, __pa(pdc_result), 0); convert_to_wide(pdc_result); *capabilities = pdc_result[0]; - spin_unlock_irq(&pdc_lock); + spin_unlock_irqrestore(&pdc_lock, flags); return retval; } @@ -504,12 +519,13 @@ int pdc_model_capabilities(unsigned long *capabilities) int pdc_cache_info(struct pdc_cache_info *cache_info) { int retval; + unsigned long flags; - spin_lock_irq(&pdc_lock); + spin_lock_irqsave(&pdc_lock, flags); retval = mem_pdc_call(PDC_CACHE, PDC_CACHE_INFO, __pa(pdc_result), 0); convert_to_wide(pdc_result); memcpy(cache_info, pdc_result, sizeof(*cache_info)); - spin_unlock_irq(&pdc_lock); + spin_unlock_irqrestore(&pdc_lock, flags); return retval; } @@ -523,13 +539,14 @@ int pdc_cache_info(struct pdc_cache_info *cache_info) int pdc_spaceid_bits(unsigned long *space_bits) { int retval; + unsigned long flags; - spin_lock_irq(&pdc_lock); + spin_lock_irqsave(&pdc_lock, flags); pdc_result[0] = 0; retval = mem_pdc_call(PDC_CACHE, PDC_CACHE_RET_SPID, __pa(pdc_result), 0); convert_to_wide(pdc_result); *space_bits = pdc_result[0]; - spin_unlock_irq(&pdc_lock); + spin_unlock_irqrestore(&pdc_lock, flags); return retval; } @@ -544,11 +561,12 @@ int pdc_spaceid_bits(unsigned long *space_bits) int pdc_btlb_info(struct pdc_btlb_info *btlb) { int retval; + unsigned long flags; - spin_lock_irq(&pdc_lock); + spin_lock_irqsave(&pdc_lock, flags); retval = mem_pdc_call(PDC_BLOCK_TLB, PDC_BTLB_INFO, __pa(pdc_result), 0); memcpy(btlb, pdc_result, sizeof(*btlb)); - spin_unlock_irq(&pdc_lock); + spin_unlock_irqrestore(&pdc_lock, flags); if(retval < 0) { btlb->max_size = 0; @@ -572,13 +590,14 @@ int pdc_mem_map_hpa(struct pdc_memory_map *address, struct pdc_module_path *mod_path) { int retval; + unsigned long flags; - spin_lock_irq(&pdc_lock); + spin_lock_irqsave(&pdc_lock, flags); memcpy(pdc_result2, mod_path, sizeof(*mod_path)); retval = mem_pdc_call(PDC_MEM_MAP, PDC_MEM_MAP_HPA, __pa(pdc_result), __pa(pdc_result2)); memcpy(address, pdc_result, sizeof(*address)); - spin_unlock_irq(&pdc_lock); + spin_unlock_irqrestore(&pdc_lock, flags); return retval; } @@ -594,8 +613,9 @@ int pdc_mem_map_hpa(struct pdc_memory_map *address, int pdc_lan_station_id(char *lan_addr, unsigned long hpa) { int retval; + unsigned long flags; - spin_lock_irq(&pdc_lock); + spin_lock_irqsave(&pdc_lock, flags); retval = mem_pdc_call(PDC_LAN_STATION_ID, PDC_LAN_STATION_ID_READ, __pa(pdc_result), hpa); if (retval < 0) { @@ -604,7 +624,7 @@ int pdc_lan_station_id(char *lan_addr, unsigned long hpa) } else { memcpy(lan_addr, pdc_result, PDC_LAN_STATION_ID_SIZE); } - spin_unlock_irq(&pdc_lock); + spin_unlock_irqrestore(&pdc_lock, flags); return retval; } @@ -623,13 +643,14 @@ EXPORT_SYMBOL(pdc_lan_station_id); int pdc_stable_read(unsigned long staddr, void *memaddr, unsigned long count) { int retval; + unsigned long flags; - spin_lock_irq(&pdc_lock); + spin_lock_irqsave(&pdc_lock, flags); retval = mem_pdc_call(PDC_STABLE, PDC_STABLE_READ, staddr, __pa(pdc_result), count); convert_to_wide(pdc_result); memcpy(memaddr, pdc_result, count); - spin_unlock_irq(&pdc_lock); + spin_unlock_irqrestore(&pdc_lock, flags); return retval; } @@ -648,13 +669,14 @@ EXPORT_SYMBOL(pdc_stable_read); int pdc_stable_write(unsigned long staddr, void *memaddr, unsigned long count) { int retval; + unsigned long flags; - spin_lock_irq(&pdc_lock); + spin_lock_irqsave(&pdc_lock, flags); memcpy(pdc_result, memaddr, count); convert_to_wide(pdc_result); retval = mem_pdc_call(PDC_STABLE, PDC_STABLE_WRITE, staddr, __pa(pdc_result), count); - spin_unlock_irq(&pdc_lock); + spin_unlock_irqrestore(&pdc_lock, flags); return retval; } @@ -672,11 +694,12 @@ EXPORT_SYMBOL(pdc_stable_write); int pdc_stable_get_size(unsigned long *size) { int retval; + unsigned long flags; - spin_lock_irq(&pdc_lock); + spin_lock_irqsave(&pdc_lock, flags); retval = mem_pdc_call(PDC_STABLE, PDC_STABLE_RETURN_SIZE, __pa(pdc_result)); *size = pdc_result[0]; - spin_unlock_irq(&pdc_lock); + spin_unlock_irqrestore(&pdc_lock, flags); return retval; } @@ -691,10 +714,11 @@ EXPORT_SYMBOL(pdc_stable_get_size); int pdc_stable_verify_contents(void) { int retval; + unsigned long flags; - spin_lock_irq(&pdc_lock); + spin_lock_irqsave(&pdc_lock, flags); retval = mem_pdc_call(PDC_STABLE, PDC_STABLE_VERIFY_CONTENTS); - spin_unlock_irq(&pdc_lock); + spin_unlock_irqrestore(&pdc_lock, flags); return retval; } @@ -709,10 +733,11 @@ EXPORT_SYMBOL(pdc_stable_verify_contents); int pdc_stable_initialize(void) { int retval; + unsigned long flags; - spin_lock_irq(&pdc_lock); + spin_lock_irqsave(&pdc_lock, flags); retval = mem_pdc_call(PDC_STABLE, PDC_STABLE_INITIALIZE); - spin_unlock_irq(&pdc_lock); + spin_unlock_irqrestore(&pdc_lock, flags); return retval; } @@ -735,8 +760,9 @@ EXPORT_SYMBOL(pdc_stable_initialize); int pdc_get_initiator(struct hardware_path *hwpath, struct pdc_initiator *initiator) { int retval; + unsigned long flags; - spin_lock_irq(&pdc_lock); + spin_lock_irqsave(&pdc_lock, flags); /* BCJ-XXXX series boxes. E.G. "9000/785/C3000" */ #define IS_SPROCKETS() (strlen(boot_cpu_data.pdc.sys_model_name) == 14 && \ @@ -776,7 +802,8 @@ int pdc_get_initiator(struct hardware_path *hwpath, struct pdc_initiator *initia } out: - spin_unlock_irq(&pdc_lock); + spin_unlock_irqrestore(&pdc_lock, flags); + return (retval >= PDC_OK); } EXPORT_SYMBOL(pdc_get_initiator); @@ -794,13 +821,14 @@ EXPORT_SYMBOL(pdc_get_initiator); int pdc_pci_irt_size(unsigned long *num_entries, unsigned long hpa) { int retval; + unsigned long flags; - spin_lock_irq(&pdc_lock); + spin_lock_irqsave(&pdc_lock, flags); retval = mem_pdc_call(PDC_PCI_INDEX, PDC_PCI_GET_INT_TBL_SIZE, __pa(pdc_result), hpa); convert_to_wide(pdc_result); *num_entries = pdc_result[0]; - spin_unlock_irq(&pdc_lock); + spin_unlock_irqrestore(&pdc_lock, flags); return retval; } @@ -817,14 +845,15 @@ int pdc_pci_irt_size(unsigned long *num_entries, unsigned long hpa) int pdc_pci_irt(unsigned long num_entries, unsigned long hpa, void *tbl) { int retval; + unsigned long flags; BUG_ON((unsigned long)tbl & 0x7); - spin_lock_irq(&pdc_lock); + spin_lock_irqsave(&pdc_lock, flags); pdc_result[0] = num_entries; retval = mem_pdc_call(PDC_PCI_INDEX, PDC_PCI_GET_INT_TBL, __pa(pdc_result), hpa, __pa(tbl)); - spin_unlock_irq(&pdc_lock); + spin_unlock_irqrestore(&pdc_lock, flags); return retval; } @@ -842,12 +871,15 @@ int pdc_pci_irt(unsigned long num_entries, unsigned long hpa, void *tbl) unsigned int pdc_pci_config_read(void *hpa, unsigned long cfg_addr) { int retval; - spin_lock_irq(&pdc_lock); + unsigned long flags; + + spin_lock_irqsave(&pdc_lock, flags); pdc_result[0] = 0; pdc_result[1] = 0; retval = mem_pdc_call(PDC_PCI_INDEX, PDC_PCI_READ_CONFIG, __pa(pdc_result), hpa, cfg_addr&~3UL, 4UL); - spin_unlock_irq(&pdc_lock); + spin_unlock_irqrestore(&pdc_lock, flags); + return retval ? ~0 : (unsigned int) pdc_result[0]; } @@ -863,12 +895,15 @@ unsigned int pdc_pci_config_read(void *hpa, unsigned long cfg_addr) void pdc_pci_config_write(void *hpa, unsigned long cfg_addr, unsigned int val) { int retval; - spin_lock_irq(&pdc_lock); + unsigned long flags; + + spin_lock_irqsave(&pdc_lock, flags); pdc_result[0] = 0; retval = mem_pdc_call(PDC_PCI_INDEX, PDC_PCI_WRITE_CONFIG, __pa(pdc_result), hpa, cfg_addr&~3UL, 4UL, (unsigned long) val); - spin_unlock_irq(&pdc_lock); + spin_unlock_irqrestore(&pdc_lock, flags); + return retval; } #endif /* UNTESTED CODE */ @@ -882,12 +917,13 @@ void pdc_pci_config_write(void *hpa, unsigned long cfg_addr, unsigned int val) int pdc_tod_read(struct pdc_tod *tod) { int retval; + unsigned long flags; - spin_lock_irq(&pdc_lock); + spin_lock_irqsave(&pdc_lock, flags); retval = mem_pdc_call(PDC_TOD, PDC_TOD_READ, __pa(pdc_result), 0); convert_to_wide(pdc_result); memcpy(tod, pdc_result, sizeof(*tod)); - spin_unlock_irq(&pdc_lock); + spin_unlock_irqrestore(&pdc_lock, flags); return retval; } @@ -903,10 +939,11 @@ EXPORT_SYMBOL(pdc_tod_read); int pdc_tod_set(unsigned long sec, unsigned long usec) { int retval; + unsigned long flags; - spin_lock_irq(&pdc_lock); + spin_lock_irqsave(&pdc_lock, flags); retval = mem_pdc_call(PDC_TOD, PDC_TOD_WRITE, sec, usec); - spin_unlock_irq(&pdc_lock); + spin_unlock_irqrestore(&pdc_lock, flags); return retval; } @@ -917,13 +954,14 @@ int pdc_mem_mem_table(struct pdc_memory_table_raddr *r_addr, struct pdc_memory_table *tbl, unsigned long entries) { int retval; + unsigned long flags; - spin_lock_irq(&pdc_lock); + spin_lock_irqsave(&pdc_lock, flags); retval = mem_pdc_call(PDC_MEM, PDC_MEM_TABLE, __pa(pdc_result), __pa(pdc_result2), entries); convert_to_wide(pdc_result); memcpy(r_addr, pdc_result, sizeof(*r_addr)); memcpy(tbl, pdc_result2, entries * sizeof(*tbl)); - spin_unlock_irq(&pdc_lock); + spin_unlock_irqrestore(&pdc_lock, flags); return retval; } @@ -936,11 +974,12 @@ int pdc_mem_mem_table(struct pdc_memory_table_raddr *r_addr, int pdc_do_firm_test_reset(unsigned long ftc_bitmap) { int retval; + unsigned long flags; - spin_lock_irq(&pdc_lock); + spin_lock_irqsave(&pdc_lock, flags); retval = mem_pdc_call(PDC_BROADCAST_RESET, PDC_DO_FIRM_TEST_RESET, PDC_FIRM_TEST_MAGIC, ftc_bitmap); - spin_unlock_irq(&pdc_lock); + spin_unlock_irqrestore(&pdc_lock, flags); return retval; } @@ -953,10 +992,11 @@ int pdc_do_firm_test_reset(unsigned long ftc_bitmap) int pdc_do_reset(void) { int retval; + unsigned long flags; - spin_lock_irq(&pdc_lock); + spin_lock_irqsave(&pdc_lock, flags); retval = mem_pdc_call(PDC_BROADCAST_RESET, PDC_DO_RESET); - spin_unlock_irq(&pdc_lock); + spin_unlock_irqrestore(&pdc_lock, flags); return retval; } @@ -970,16 +1010,17 @@ int pdc_do_reset(void) int __init pdc_soft_power_info(unsigned long *power_reg) { int retval; + unsigned long flags; *power_reg = (unsigned long) (-1); - spin_lock_irq(&pdc_lock); + spin_lock_irqsave(&pdc_lock, flags); retval = mem_pdc_call(PDC_SOFT_POWER, PDC_SOFT_POWER_INFO, __pa(pdc_result), 0); if (retval == PDC_OK) { convert_to_wide(pdc_result); *power_reg = f_extend(pdc_result[0]); } - spin_unlock_irq(&pdc_lock); + spin_unlock_irqrestore(&pdc_lock, flags); return retval; } @@ -998,9 +1039,12 @@ int __init pdc_soft_power_info(unsigned long *power_reg) int pdc_soft_power_button(int sw_control) { int retval; - spin_lock_irq(&pdc_lock); + unsigned long flags; + + spin_lock_irqsave(&pdc_lock, flags); retval = mem_pdc_call(PDC_SOFT_POWER, PDC_SOFT_POWER_ENABLE, __pa(pdc_result), sw_control); - spin_unlock_irq(&pdc_lock); + spin_unlock_irqrestore(&pdc_lock, flags); + return retval; } @@ -1011,9 +1055,11 @@ int pdc_soft_power_button(int sw_control) */ void pdc_io_reset(void) { - spin_lock_irq(&pdc_lock); + unsigned long flags; + + spin_lock_irqsave(&pdc_lock, flags); mem_pdc_call(PDC_IO, PDC_IO_RESET, 0); - spin_unlock_irq(&pdc_lock); + spin_unlock_irqrestore(&pdc_lock, flags); } /* @@ -1027,9 +1073,11 @@ void pdc_io_reset(void) */ void pdc_io_reset_devices(void) { - spin_lock_irq(&pdc_lock); + unsigned long flags; + + spin_lock_irqsave(&pdc_lock, flags); mem_pdc_call(PDC_IO, PDC_IO_RESET_DEVICES, 0); - spin_unlock_irq(&pdc_lock); + spin_unlock_irqrestore(&pdc_lock, flags); } @@ -1146,10 +1194,11 @@ int pdc_sti_call(unsigned long func, unsigned long flags, unsigned long glob_cfg) { int retval; + unsigned long irqflags; - spin_lock_irq(&pdc_lock); + spin_lock_irqsave(&pdc_lock, irqflags); retval = real32_call(func, flags, inptr, outputr, glob_cfg); - spin_unlock_irq(&pdc_lock); + spin_unlock_irqrestore(&pdc_lock, irqflags); return retval; } @@ -1166,11 +1215,12 @@ EXPORT_SYMBOL(pdc_sti_call); int pdc_pat_cell_get_number(struct pdc_pat_cell_num *cell_info) { int retval; + unsigned long flags; - spin_lock_irq(&pdc_lock); + spin_lock_irqsave(&pdc_lock, flags); retval = mem_pdc_call(PDC_PAT_CELL, PDC_PAT_CELL_GET_NUMBER, __pa(pdc_result)); memcpy(cell_info, pdc_result, sizeof(*cell_info)); - spin_unlock_irq(&pdc_lock); + spin_unlock_irqrestore(&pdc_lock, flags); return retval; } @@ -1190,16 +1240,17 @@ int pdc_pat_cell_module(unsigned long *actcnt, unsigned long ploc, unsigned long unsigned long view_type, void *mem_addr) { int retval; + unsigned long flags; static struct pdc_pat_cell_mod_maddr_block result __attribute__ ((aligned (8))); - spin_lock_irq(&pdc_lock); + spin_lock_irqsave(&pdc_lock, flags); retval = mem_pdc_call(PDC_PAT_CELL, PDC_PAT_CELL_MODULE, __pa(pdc_result), ploc, mod, view_type, __pa(&result)); if(!retval) { *actcnt = pdc_result[0]; memcpy(mem_addr, &result, *actcnt); } - spin_unlock_irq(&pdc_lock); + spin_unlock_irqrestore(&pdc_lock, flags); return retval; } @@ -1214,12 +1265,13 @@ int pdc_pat_cell_module(unsigned long *actcnt, unsigned long ploc, unsigned long int pdc_pat_cpu_get_number(struct pdc_pat_cpu_num *cpu_info, void *hpa) { int retval; + unsigned long flags; - spin_lock_irq(&pdc_lock); + spin_lock_irqsave(&pdc_lock, flags); retval = mem_pdc_call(PDC_PAT_CPU, PDC_PAT_CPU_GET_NUMBER, __pa(&pdc_result), hpa); memcpy(cpu_info, pdc_result, sizeof(*cpu_info)); - spin_unlock_irq(&pdc_lock); + spin_unlock_irqrestore(&pdc_lock, flags); return retval; } @@ -1235,12 +1287,13 @@ int pdc_pat_cpu_get_number(struct pdc_pat_cpu_num *cpu_info, void *hpa) int pdc_pat_get_irt_size(unsigned long *num_entries, unsigned long cell_num) { int retval; + unsigned long flags; - spin_lock_irq(&pdc_lock); + spin_lock_irqsave(&pdc_lock, flags); retval = mem_pdc_call(PDC_PAT_IO, PDC_PAT_IO_GET_PCI_ROUTING_TABLE_SIZE, __pa(pdc_result), cell_num); *num_entries = pdc_result[0]; - spin_unlock_irq(&pdc_lock); + spin_unlock_irqrestore(&pdc_lock, flags); return retval; } @@ -1255,11 +1308,12 @@ int pdc_pat_get_irt_size(unsigned long *num_entries, unsigned long cell_num) int pdc_pat_get_irt(void *r_addr, unsigned long cell_num) { int retval; + unsigned long flags; - spin_lock_irq(&pdc_lock); + spin_lock_irqsave(&pdc_lock, flags); retval = mem_pdc_call(PDC_PAT_IO, PDC_PAT_IO_GET_PCI_ROUTING_TABLE, __pa(r_addr), cell_num); - spin_unlock_irq(&pdc_lock); + spin_unlock_irqrestore(&pdc_lock, flags); return retval; } @@ -1276,13 +1330,14 @@ int pdc_pat_pd_get_addr_map(unsigned long *actual_len, void *mem_addr, unsigned long count, unsigned long offset) { int retval; + unsigned long flags; - spin_lock_irq(&pdc_lock); + spin_lock_irqsave(&pdc_lock, flags); retval = mem_pdc_call(PDC_PAT_PD, PDC_PAT_PD_GET_ADDR_MAP, __pa(pdc_result), __pa(pdc_result2), count, offset); *actual_len = pdc_result[0]; memcpy(mem_addr, pdc_result2, *actual_len); - spin_unlock_irq(&pdc_lock); + spin_unlock_irqrestore(&pdc_lock, flags); return retval; } @@ -1297,7 +1352,9 @@ int pdc_pat_pd_get_addr_map(unsigned long *actual_len, void *mem_addr, int pdc_pat_io_pci_cfg_read(unsigned long pci_addr, int pci_size, u32 *mem_addr) { int retval; - spin_lock_irq(&pdc_lock); + unsigned long flags; + + spin_lock_irqsave(&pdc_lock, flags); retval = mem_pdc_call(PDC_PAT_IO, PDC_PAT_IO_PCI_CONFIG_READ, __pa(pdc_result), pci_addr, pci_size); switch(pci_size) { @@ -1305,7 +1362,7 @@ int pdc_pat_io_pci_cfg_read(unsigned long pci_addr, int pci_size, u32 *mem_addr) case 2: *(u16 *)mem_addr = (u16) pdc_result[0]; case 4: *(u32 *)mem_addr = (u32) pdc_result[0]; } - spin_unlock_irq(&pdc_lock); + spin_unlock_irqrestore(&pdc_lock, flags); return retval; } @@ -1321,11 +1378,12 @@ int pdc_pat_io_pci_cfg_read(unsigned long pci_addr, int pci_size, u32 *mem_addr) int pdc_pat_io_pci_cfg_write(unsigned long pci_addr, int pci_size, u32 val) { int retval; + unsigned long flags; - spin_lock_irq(&pdc_lock); + spin_lock_irqsave(&pdc_lock, flags); retval = mem_pdc_call(PDC_PAT_IO, PDC_PAT_IO_PCI_CONFIG_WRITE, pci_addr, pci_size, val); - spin_unlock_irq(&pdc_lock); + spin_unlock_irqrestore(&pdc_lock, flags); return retval; } diff --git a/arch/parisc/kernel/irq.c b/arch/parisc/kernel/irq.c index 9bdd0197ceb7..b39c5b9aff46 100644 --- a/arch/parisc/kernel/irq.c +++ b/arch/parisc/kernel/irq.c @@ -35,8 +35,8 @@ #undef PARISC_IRQ_CR16_COUNTS -extern irqreturn_t timer_interrupt(int, void *, struct pt_regs *); -extern irqreturn_t ipi_interrupt(int, void *, struct pt_regs *); +extern irqreturn_t timer_interrupt(int, void *); +extern irqreturn_t ipi_interrupt(int, void *); #define EIEM_MASK(irq) (1UL<<(CPU_IRQ_MAX - irq)) @@ -347,12 +347,14 @@ static inline int eirr_to_irq(unsigned long eirr) /* ONLY called from entry.S:intr_extint() */ void do_cpu_irq_mask(struct pt_regs *regs) { + struct pt_regs *old_regs; unsigned long eirr_val; int irq, cpu = smp_processor_id(); #ifdef CONFIG_SMP cpumask_t dest; #endif + old_regs = set_irq_regs(regs); local_irq_disable(); irq_enter(); @@ -375,10 +377,11 @@ void do_cpu_irq_mask(struct pt_regs *regs) goto set_out; } #endif - __do_IRQ(irq, regs); + __do_IRQ(irq); out: irq_exit(); + set_irq_regs(old_regs); return; set_out: diff --git a/arch/parisc/kernel/parisc_ksyms.c b/arch/parisc/kernel/parisc_ksyms.c index 6d57553d8ef8..8f6a0b312f7a 100644 --- a/arch/parisc/kernel/parisc_ksyms.c +++ b/arch/parisc/kernel/parisc_ksyms.c @@ -69,10 +69,6 @@ EXPORT_SYMBOL(memcpy_toio); EXPORT_SYMBOL(memcpy_fromio); EXPORT_SYMBOL(memset_io); -#include <asm/unistd.h> -EXPORT_SYMBOL(sys_lseek); -EXPORT_SYMBOL(sys_write); - #include <asm/semaphore.h> EXPORT_SYMBOL(__up); EXPORT_SYMBOL(__down_interruptible); diff --git a/arch/parisc/kernel/smp.c b/arch/parisc/kernel/smp.c index faad338f310e..4a23a97b06cd 100644 --- a/arch/parisc/kernel/smp.c +++ b/arch/parisc/kernel/smp.c @@ -154,7 +154,7 @@ halt_processor(void) irqreturn_t -ipi_interrupt(int irq, void *dev_id, struct pt_regs *regs) +ipi_interrupt(int irq, void *dev_id) { int this_cpu = smp_processor_id(); struct cpuinfo_parisc *p = &cpu_data[this_cpu]; @@ -414,19 +414,6 @@ smp_flush_tlb_all(void) on_each_cpu(flush_tlb_all_local, NULL, 1, 1); } - -void -smp_do_timer(struct pt_regs *regs) -{ - int cpu = smp_processor_id(); - struct cpuinfo_parisc *data = &cpu_data[cpu]; - - if (!--data->prof_counter) { - data->prof_counter = data->prof_multiplier; - update_process_times(user_mode(regs)); - } -} - /* * Called by secondaries to update state and initialize CPU registers. */ diff --git a/arch/parisc/kernel/time.c b/arch/parisc/kernel/time.c index 1d58ce0e37ad..bad7d1eb62b9 100644 --- a/arch/parisc/kernel/time.c +++ b/arch/parisc/kernel/time.c @@ -34,10 +34,6 @@ static unsigned long clocktick __read_mostly; /* timer cycles per tick */ -#ifdef CONFIG_SMP -extern void smp_do_timer(struct pt_regs *regs); -#endif - /* * We keep time on PA-RISC Linux by using the Interval Timer which is * a pair of registers; one is read-only and one is write-only; both @@ -55,21 +51,22 @@ extern void smp_do_timer(struct pt_regs *regs); * held off for an arbitrarily long period of time by interrupts being * disabled, so we may miss one or more ticks. */ -irqreturn_t timer_interrupt(int irq, void *dev_id, struct pt_regs *regs) +irqreturn_t timer_interrupt(int irq, void *dev_id) { unsigned long now; unsigned long next_tick; unsigned long cycles_elapsed, ticks_elapsed; unsigned long cycles_remainder; unsigned int cpu = smp_processor_id(); + struct cpuinfo_parisc *cpuinfo = &cpu_data[cpu]; /* gcc can optimize for "read-only" case with a local clocktick */ unsigned long cpt = clocktick; - profile_tick(CPU_PROFILING, regs); + profile_tick(CPU_PROFILING); /* Initialize next_tick to the expected tick time. */ - next_tick = cpu_data[cpu].it_value; + next_tick = cpuinfo->it_value; /* Get current interval timer. * CR16 reads as 64 bits in CPU wide mode. @@ -120,7 +117,7 @@ irqreturn_t timer_interrupt(int irq, void *dev_id, struct pt_regs *regs) */ next_tick = now + cycles_remainder; - cpu_data[cpu].it_value = next_tick; + cpuinfo->it_value = next_tick; /* Skip one clocktick on purpose if we are likely to miss next_tick. * We want to avoid the new next_tick being less than CR16. @@ -131,18 +128,19 @@ irqreturn_t timer_interrupt(int irq, void *dev_id, struct pt_regs *regs) next_tick += cpt; /* Program the IT when to deliver the next interrupt. */ - /* Only bottom 32-bits of next_tick are written to cr16. */ + /* Only bottom 32-bits of next_tick are written to cr16. */ mtctl(next_tick, 16); /* Done mucking with unreliable delivery of interrupts. * Go do system house keeping. */ -#ifdef CONFIG_SMP - smp_do_timer(regs); -#else - update_process_times(user_mode(regs)); -#endif + + if (!--cpuinfo->prof_counter) { + cpuinfo->prof_counter = cpuinfo->prof_multiplier; + update_process_times(user_mode(get_irq_regs())); + } + if (cpu == 0) { write_seqlock(&xtime_lock); do_timer(ticks_elapsed); @@ -319,13 +317,15 @@ void __init time_init(void) start_cpu_itimer(); /* get CPU 0 started */ - if(pdc_tod_read(&tod_data) == 0) { - write_seqlock_irq(&xtime_lock); + if (pdc_tod_read(&tod_data) == 0) { + unsigned long flags; + + write_seqlock_irqsave(&xtime_lock, flags); xtime.tv_sec = tod_data.tod_sec; xtime.tv_nsec = tod_data.tod_usec * 1000; set_normalized_timespec(&wall_to_monotonic, -xtime.tv_sec, -xtime.tv_nsec); - write_sequnlock_irq(&xtime_lock); + write_sequnlock_irqrestore(&xtime_lock, flags); } else { printk(KERN_ERR "Error reading tod clock\n"); xtime.tv_sec = 0; diff --git a/arch/powerpc/boot/Makefile b/arch/powerpc/boot/Makefile index 003520b56303..37ddfcab0003 100644 --- a/arch/powerpc/boot/Makefile +++ b/arch/powerpc/boot/Makefile @@ -105,10 +105,10 @@ wrapperbits := $(extra-y) $(addprefix $(obj)/,addnote hack-coff) # Bits for building various flavours of zImage ifneq ($(CROSS32_COMPILE),) -CROSSWRAP := -C $(CROSS32_COMPILE) +CROSSWRAP := -C "$(CROSS32_COMPILE)" else ifneq ($(CROSS_COMPILE),) -CROSSWRAP := -C $(CROSS_COMPILE) +CROSSWRAP := -C "$(CROSS_COMPILE)" endif endif @@ -151,12 +151,12 @@ $(obj)/zImage.initrd.miboot: vmlinux $(wrapperbits) $(obj)/uImage: vmlinux $(wrapperbits) $(call cmd,wrap,uboot) -image-$(CONFIG_PPC_PSERIES) += zImage.pseries -image-$(CONFIG_PPC_MAPLE) += zImage.pseries -image-$(CONFIG_PPC_CELL) += zImage.pseries -image-$(CONFIG_PPC_CHRP) += zImage.chrp -image-$(CONFIG_PPC_PMAC) += zImage.pmac -image-$(CONFIG_DEFAULT_UIMAGE) += uImage +image-$(CONFIG_PPC_PSERIES) += zImage.pseries +image-$(CONFIG_PPC_MAPLE) += zImage.pseries +image-$(CONFIG_PPC_IBM_CELL_BLADE) += zImage.pseries +image-$(CONFIG_PPC_CHRP) += zImage.chrp +image-$(CONFIG_PPC_PMAC) += zImage.pmac +image-$(CONFIG_DEFAULT_UIMAGE) += uImage # For 32-bit powermacs, build the COFF and miboot images # as well as the ELF images. diff --git a/arch/powerpc/boot/dts/mpc8349emitx.dts b/arch/powerpc/boot/dts/mpc8349emitx.dts new file mode 100644 index 000000000000..27807fc45888 --- /dev/null +++ b/arch/powerpc/boot/dts/mpc8349emitx.dts @@ -0,0 +1,246 @@ +/* + * MPC8349E-mITX Device Tree Source + * + * Copyright 2006 Freescale Semiconductor Inc. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2 of the License, or (at your + * option) any later version. + */ +/ { + model = "MPC8349EMITX"; + compatible = "MPC834xMITX"; + #address-cells = <1>; + #size-cells = <1>; + + cpus { + #cpus = <1>; + #address-cells = <1>; + #size-cells = <0>; + + PowerPC,8349@0 { + device_type = "cpu"; + reg = <0>; + d-cache-line-size = <20>; + i-cache-line-size = <20>; + d-cache-size = <8000>; + i-cache-size = <8000>; + timebase-frequency = <0>; // from bootloader + bus-frequency = <0>; // from bootloader + clock-frequency = <0>; // from bootloader + 32-bit; + }; + }; + + memory { + device_type = "memory"; + reg = <00000000 10000000>; + }; + + soc8349@e0000000 { + #address-cells = <1>; + #size-cells = <1>; + #interrupt-cells = <2>; + device_type = "soc"; + ranges = <0 e0000000 00100000>; + reg = <e0000000 00000200>; + bus-frequency = <0>; // from bootloader + + wdt@200 { + device_type = "watchdog"; + compatible = "mpc83xx_wdt"; + reg = <200 100>; + }; + + i2c@3000 { + device_type = "i2c"; + compatible = "fsl-i2c"; + reg = <3000 100>; + interrupts = <e 8>; + interrupt-parent = <700>; + dfsrr; + }; + + i2c@3100 { + device_type = "i2c"; + compatible = "fsl-i2c"; + reg = <3100 100>; + interrupts = <f 8>; + interrupt-parent = <700>; + dfsrr; + }; + + spi@7000 { + device_type = "spi"; + compatible = "mpc83xx_spi"; + reg = <7000 1000>; + interrupts = <10 8>; + interrupt-parent = <700>; + mode = <0>; + }; + + usb@22000 { + device_type = "usb"; + compatible = "fsl-usb2-mph"; + reg = <22000 1000>; + #address-cells = <1>; + #size-cells = <0>; + interrupt-parent = <700>; + interrupts = <27 2>; + phy_type = "ulpi"; + port1; + }; + + usb@23000 { + device_type = "usb"; + compatible = "fsl-usb2-dr"; + reg = <23000 1000>; + #address-cells = <1>; + #size-cells = <0>; + interrupt-parent = <700>; + interrupts = <26 2>; + phy_type = "ulpi"; + }; + + mdio@24520 { + device_type = "mdio"; + compatible = "gianfar"; + reg = <24520 20>; + #address-cells = <1>; + #size-cells = <0>; + linux,phandle = <24520>; + + /* Vitesse 8201 */ + ethernet-phy@1c { + linux,phandle = <245201c>; + interrupt-parent = <700>; + interrupts = <12 2>; + reg = <1c>; + device_type = "ethernet-phy"; + }; + + /* Vitesse 7385 */ + ethernet-phy@1f { + linux,phandle = <245201f>; + interrupt-parent = <700>; + interrupts = <12 2>; + reg = <1f>; + device_type = "ethernet-phy"; + }; + }; + + ethernet@24000 { + device_type = "network"; + model = "TSEC"; + compatible = "gianfar"; + reg = <24000 1000>; + address = [ 00 00 00 00 00 00 ]; + local-mac-address = [ 00 00 00 00 00 00 ]; + interrupts = <20 8 21 8 22 8>; + interrupt-parent = <700>; + phy-handle = <245201c>; + }; + + ethernet@25000 { + #address-cells = <1>; + #size-cells = <0>; + device_type = "network"; + model = "TSEC"; + compatible = "gianfar"; + reg = <25000 1000>; + address = [ 00 00 00 00 00 00 ]; + local-mac-address = [ 00 00 00 00 00 00 ]; + interrupts = <23 8 24 8 25 8>; + interrupt-parent = <700>; + phy-handle = <245201f>; + }; + + serial@4500 { + device_type = "serial"; + compatible = "ns16550"; + reg = <4500 100>; + clock-frequency = <0>; // from bootloader + interrupts = <9 8>; + interrupt-parent = <700>; + }; + + serial@4600 { + device_type = "serial"; + compatible = "ns16550"; + reg = <4600 100>; + clock-frequency = <0>; // from bootloader + interrupts = <a 8>; + interrupt-parent = <700>; + }; + + pci@8500 { + interrupt-map-mask = <f800 0 0 7>; + interrupt-map = < + /* IDSEL 0x10 - SATA */ + 8000 0 0 1 700 16 8 /* SATA_INTA */ + >; + interrupt-parent = <700>; + interrupts = <42 8>; + bus-range = <0 0>; + ranges = <42000000 0 80000000 80000000 0 10000000 + 02000000 0 90000000 90000000 0 10000000 + 01000000 0 00000000 e2000000 0 01000000>; + clock-frequency = <3f940aa>; + #interrupt-cells = <1>; + #size-cells = <2>; + #address-cells = <3>; + reg = <8500 100>; + compatible = "83xx"; + device_type = "pci"; + }; + + pci@8600 { + interrupt-map-mask = <f800 0 0 7>; + interrupt-map = < + /* IDSEL 0x0E - MiniPCI Slot */ + 7000 0 0 1 700 15 8 /* PCI_INTA */ + + /* IDSEL 0x0F - PCI Slot */ + 7800 0 0 1 700 14 8 /* PCI_INTA */ + 7800 0 0 2 700 15 8 /* PCI_INTB */ + >; + interrupt-parent = <700>; + interrupts = <43 8>; + bus-range = <1 1>; + ranges = <42000000 0 a0000000 a0000000 0 10000000 + 02000000 0 b0000000 b0000000 0 10000000 + 01000000 0 00000000 e3000000 0 01000000>; + clock-frequency = <3f940aa>; + #interrupt-cells = <1>; + #size-cells = <2>; + #address-cells = <3>; + reg = <8600 100>; + compatible = "83xx"; + device_type = "pci"; + }; + + crypto@30000 { + device_type = "crypto"; + model = "SEC2"; + compatible = "talitos"; + reg = <30000 10000>; + interrupts = <b 8>; + interrupt-parent = <700>; + num-channels = <4>; + channel-fifo-len = <18>; + exec-units-mask = <0000007e>; + descriptor-types-mask = <01010ebf>; + }; + + pic@700 { + linux,phandle = <700>; + interrupt-controller; + #address-cells = <0>; + #interrupt-cells = <2>; + reg = <700 100>; + built-in; + device_type = "ipic"; + }; + }; +}; diff --git a/arch/powerpc/boot/of.c b/arch/powerpc/boot/of.c index fd99f789a37b..3a71845afc6c 100644 --- a/arch/powerpc/boot/of.c +++ b/arch/powerpc/boot/of.c @@ -176,12 +176,9 @@ static void *claim(unsigned long virt, unsigned long size, unsigned long align) static void *of_try_claim(u32 size) { unsigned long addr = 0; - static u8 first_time = 1; - if (first_time) { + if (claim_base == 0) claim_base = _ALIGN_UP((unsigned long)_end, ONE_MB); - first_time = 0; - } for(; claim_base < RAM_END; claim_base += ONE_MB) { #ifdef DEBUG diff --git a/arch/powerpc/configs/iseries_defconfig b/arch/powerpc/configs/iseries_defconfig index d58f82f836f8..b5005506c2f8 100644 --- a/arch/powerpc/configs/iseries_defconfig +++ b/arch/powerpc/configs/iseries_defconfig @@ -1,7 +1,7 @@ # # Automatically generated make config: don't edit -# Linux kernel version: 2.6.18-rc6 -# Sun Sep 10 10:22:57 2006 +# Linux kernel version: 2.6.19-rc1 +# Fri Oct 6 13:25:04 2006 # CONFIG_PPC64=y CONFIG_64BIT=y @@ -22,6 +22,7 @@ CONFIG_ARCH_MAY_HAVE_PC_FDC=y CONFIG_PPC_OF=y # CONFIG_PPC_UDBG_16550 is not set # CONFIG_GENERIC_TBSYNC is not set +CONFIG_AUDIT_ARCH=y # CONFIG_DEFAULT_UIMAGE is not set # @@ -52,10 +53,11 @@ CONFIG_LOCALVERSION="" CONFIG_LOCALVERSION_AUTO=y CONFIG_SWAP=y CONFIG_SYSVIPC=y +# CONFIG_IPC_NS is not set CONFIG_POSIX_MQUEUE=y # CONFIG_BSD_PROCESS_ACCT is not set # CONFIG_TASKSTATS is not set -CONFIG_SYSCTL=y +# CONFIG_UTS_NS is not set CONFIG_AUDIT=y CONFIG_AUDITSYSCALL=y CONFIG_IKCONFIG=y @@ -64,7 +66,9 @@ CONFIG_IKCONFIG_PROC=y # CONFIG_RELAY is not set CONFIG_INITRAMFS_SOURCE="" CONFIG_CC_OPTIMIZE_FOR_SIZE=y +CONFIG_SYSCTL=y # CONFIG_EMBEDDED is not set +# CONFIG_SYSCTL_SYSCALL is not set CONFIG_KALLSYMS=y # CONFIG_KALLSYMS_ALL is not set # CONFIG_KALLSYMS_EXTRA_PASS is not set @@ -73,12 +77,12 @@ CONFIG_PRINTK=y CONFIG_BUG=y CONFIG_ELF_CORE=y CONFIG_BASE_FULL=y -CONFIG_RT_MUTEXES=y CONFIG_FUTEX=y CONFIG_EPOLL=y CONFIG_SHMEM=y CONFIG_SLAB=y CONFIG_VM_EVENT_COUNTERS=y +CONFIG_RT_MUTEXES=y # CONFIG_TINY_SHMEM is not set CONFIG_BASE_SMALL=0 # CONFIG_SLOB is not set @@ -97,6 +101,7 @@ CONFIG_STOP_MACHINE=y # # Block layer # +CONFIG_BLOCK=y # CONFIG_BLK_DEV_IO_TRACE is not set # @@ -115,13 +120,18 @@ CONFIG_DEFAULT_IOSCHED="anticipatory" # # Platform support # -# CONFIG_PPC_MULTIPLATFORM is not set -CONFIG_PPC_ISERIES=y +CONFIG_PPC_MULTIPLATFORM=y # CONFIG_EMBEDDED6xx is not set # CONFIG_APUS is not set +# CONFIG_PPC_PSERIES is not set +CONFIG_PPC_ISERIES=y +# CONFIG_PPC_PMAC is not set +# CONFIG_PPC_MAPLE is not set +# CONFIG_PPC_PASEMI is not set # CONFIG_PPC_CELL is not set # CONFIG_PPC_CELL_NATIVE is not set -# CONFIG_UDBG_RTAS_CONSOLE is not set +# CONFIG_PPC_IBM_CELL_BLADE is not set +# CONFIG_U3_DART is not set # CONFIG_PPC_RTAS is not set # CONFIG_MMIO_NVRAM is not set CONFIG_IBMVIO=y @@ -147,12 +157,15 @@ CONFIG_BINFMT_ELF=y CONFIG_FORCE_MAX_ZONEORDER=13 CONFIG_IOMMU_VMERGE=y CONFIG_ARCH_ENABLE_MEMORY_HOTPLUG=y +# CONFIG_KEXEC is not set +# CONFIG_CRASH_DUMP is not set CONFIG_IRQ_ALL_CPUS=y CONFIG_LPARCFG=y # CONFIG_NUMA is not set CONFIG_ARCH_SELECT_MEMORY_MODEL=y CONFIG_ARCH_FLATMEM_ENABLE=y CONFIG_ARCH_SPARSEMEM_ENABLE=y +CONFIG_ARCH_POPULATES_NODE_MAP=y CONFIG_SELECT_MEMORY_MODEL=y CONFIG_FLATMEM_MANUAL=y # CONFIG_DISCONTIGMEM_MANUAL is not set @@ -179,6 +192,7 @@ CONFIG_GENERIC_ISA_DMA=y CONFIG_PCI=y CONFIG_PCI_DOMAINS=y # CONFIG_PCIEPORTBUS is not set +# CONFIG_PCI_MULTITHREAD_PROBE is not set # CONFIG_PCI_DEBUG is not set # @@ -206,6 +220,7 @@ CONFIG_PACKET=y CONFIG_UNIX=y CONFIG_XFRM=y CONFIG_XFRM_USER=m +CONFIG_XFRM_SUB_POLICY=y CONFIG_NET_KEY=m CONFIG_INET=y CONFIG_IP_MULTICAST=y @@ -224,10 +239,12 @@ CONFIG_INET_XFRM_TUNNEL=m CONFIG_INET_TUNNEL=y CONFIG_INET_XFRM_MODE_TRANSPORT=y CONFIG_INET_XFRM_MODE_TUNNEL=y +CONFIG_INET_XFRM_MODE_BEET=m CONFIG_INET_DIAG=y CONFIG_INET_TCP_DIAG=y # CONFIG_TCP_CONG_ADVANCED is not set -CONFIG_TCP_CONG_BIC=y +CONFIG_TCP_CONG_CUBIC=y +CONFIG_DEFAULT_TCP_CONG="cubic" # # IP: Virtual Server Configuration @@ -247,6 +264,7 @@ CONFIG_NETFILTER=y CONFIG_NETFILTER_XTABLES=m CONFIG_NETFILTER_XT_TARGET_CLASSIFY=m CONFIG_NETFILTER_XT_TARGET_CONNMARK=m +CONFIG_NETFILTER_XT_TARGET_DSCP=m CONFIG_NETFILTER_XT_TARGET_MARK=m CONFIG_NETFILTER_XT_TARGET_NFQUEUE=m CONFIG_NETFILTER_XT_TARGET_NOTRACK=m @@ -255,6 +273,7 @@ CONFIG_NETFILTER_XT_MATCH_CONNBYTES=m CONFIG_NETFILTER_XT_MATCH_CONNMARK=m CONFIG_NETFILTER_XT_MATCH_CONNTRACK=m # CONFIG_NETFILTER_XT_MATCH_DCCP is not set +CONFIG_NETFILTER_XT_MATCH_DSCP=m # CONFIG_NETFILTER_XT_MATCH_ESP is not set CONFIG_NETFILTER_XT_MATCH_HELPER=m CONFIG_NETFILTER_XT_MATCH_LENGTH=m @@ -294,7 +313,6 @@ CONFIG_IP_NF_MATCH_IPRANGE=m CONFIG_IP_NF_MATCH_TOS=m CONFIG_IP_NF_MATCH_RECENT=m CONFIG_IP_NF_MATCH_ECN=m -CONFIG_IP_NF_MATCH_DSCP=m # CONFIG_IP_NF_MATCH_AH is not set CONFIG_IP_NF_MATCH_TTL=m CONFIG_IP_NF_MATCH_OWNER=m @@ -319,7 +337,6 @@ CONFIG_IP_NF_NAT_AMANDA=m CONFIG_IP_NF_MANGLE=m CONFIG_IP_NF_TARGET_TOS=m CONFIG_IP_NF_TARGET_ECN=m -CONFIG_IP_NF_TARGET_DSCP=m CONFIG_IP_NF_TARGET_TTL=m CONFIG_IP_NF_TARGET_CLUSTERIP=m CONFIG_IP_NF_RAW=m @@ -351,7 +368,6 @@ CONFIG_LLC=y # CONFIG_ATALK is not set # CONFIG_X25 is not set # CONFIG_LAPB is not set -# CONFIG_NET_DIVERT is not set # CONFIG_ECONET is not set # CONFIG_WAN_ROUTER is not set @@ -433,6 +449,7 @@ CONFIG_BLK_DEV_INITRD=y # # CONFIG_RAID_ATTRS is not set CONFIG_SCSI=y +CONFIG_SCSI_NETLINK=y CONFIG_SCSI_PROC_FS=y # @@ -454,12 +471,14 @@ CONFIG_SCSI_CONSTANTS=y # CONFIG_SCSI_LOGGING is not set # -# SCSI Transport Attributes +# SCSI Transports # CONFIG_SCSI_SPI_ATTRS=y CONFIG_SCSI_FC_ATTRS=y # CONFIG_SCSI_ISCSI_ATTRS is not set -# CONFIG_SCSI_SAS_ATTRS is not set +CONFIG_SCSI_SAS_ATTRS=m +CONFIG_SCSI_SAS_LIBSAS=m +CONFIG_SCSI_SAS_LIBSAS_DEBUG=y # # SCSI low-level drivers @@ -472,10 +491,11 @@ CONFIG_SCSI_FC_ATTRS=y # CONFIG_SCSI_AIC7XXX is not set # CONFIG_SCSI_AIC7XXX_OLD is not set # CONFIG_SCSI_AIC79XX is not set +# CONFIG_SCSI_AIC94XX is not set +# CONFIG_SCSI_ARCMSR is not set # CONFIG_MEGARAID_NEWGEN is not set # CONFIG_MEGARAID_LEGACY is not set # CONFIG_MEGARAID_SAS is not set -# CONFIG_ATA is not set # CONFIG_SCSI_HPTIOP is not set # CONFIG_SCSI_BUSLOGIC is not set # CONFIG_SCSI_DMX3191D is not set @@ -486,16 +506,22 @@ CONFIG_SCSI_FC_ATTRS=y CONFIG_SCSI_IBMVSCSI=m # CONFIG_SCSI_INITIO is not set # CONFIG_SCSI_INIA100 is not set +# CONFIG_SCSI_STEX is not set # CONFIG_SCSI_SYM53C8XX_2 is not set -# CONFIG_SCSI_IPR is not set # CONFIG_SCSI_QLOGIC_1280 is not set # CONFIG_SCSI_QLA_FC is not set +# CONFIG_SCSI_QLA_ISCSI is not set # CONFIG_SCSI_LPFC is not set # CONFIG_SCSI_DC395x is not set # CONFIG_SCSI_DC390T is not set # CONFIG_SCSI_DEBUG is not set # +# Serial ATA (prod) and Parallel ATA (experimental) drivers +# +# CONFIG_ATA is not set + +# # Multi-device support (RAID and LVM) # CONFIG_MD=y @@ -508,6 +534,7 @@ CONFIG_MD_RAID10=m CONFIG_MD_MULTIPATH=m CONFIG_MD_FAULTY=m CONFIG_BLK_DEV_DM=y +# CONFIG_DM_DEBUG is not set CONFIG_DM_CRYPT=m CONFIG_DM_SNAPSHOT=m CONFIG_DM_MIRROR=m @@ -573,6 +600,7 @@ CONFIG_MII=y # CONFIG_HP100 is not set CONFIG_NET_PCI=y CONFIG_PCNET32=y +CONFIG_PCNET32_NAPI=y # CONFIG_AMD8111_ETH is not set # CONFIG_ADAPTEC_STARFIRE is not set # CONFIG_B44 is not set @@ -610,6 +638,7 @@ CONFIG_E1000=m # CONFIG_VIA_VELOCITY is not set # CONFIG_TIGON3 is not set # CONFIG_BNX2 is not set +# CONFIG_QLA3XXX is not set # # Ethernet (10000 Mbit) @@ -649,6 +678,7 @@ CONFIG_PPP_BSDCOMP=m # CONFIG_PPP_MPPE is not set CONFIG_PPPOE=m # CONFIG_SLIP is not set +CONFIG_SLHC=m # CONFIG_NET_FC is not set # CONFIG_SHAPER is not set CONFIG_NETCONSOLE=y @@ -671,6 +701,7 @@ CONFIG_NET_POLL_CONTROLLER=y # Input device support # CONFIG_INPUT=y +# CONFIG_INPUT_FF_MEMLESS is not set # # Userland interfaces @@ -774,12 +805,12 @@ CONFIG_MAX_RAW_DEVS=256 # # Misc devices # +# CONFIG_TIFM_CORE is not set # # Multimedia devices # # CONFIG_VIDEO_DEV is not set -CONFIG_VIDEO_V4L2=y # # Digital Video Broadcasting Devices @@ -893,6 +924,9 @@ CONFIG_XFS_FS=m CONFIG_XFS_SECURITY=y CONFIG_XFS_POSIX_ACL=y # CONFIG_XFS_RT is not set +CONFIG_GFS2_FS=m +CONFIG_GFS2_FS_LOCKING_NOLOCK=m +CONFIG_GFS2_FS_LOCKING_DLM=m # CONFIG_OCFS2_FS is not set # CONFIG_MINIX_FS is not set # CONFIG_ROMFS_FS is not set @@ -929,12 +963,14 @@ CONFIG_FAT_DEFAULT_IOCHARSET="iso8859-1" # CONFIG_PROC_FS=y CONFIG_PROC_KCORE=y +CONFIG_PROC_SYSCTL=y CONFIG_SYSFS=y CONFIG_TMPFS=y +CONFIG_TMPFS_POSIX_ACL=y # CONFIG_HUGETLBFS is not set # CONFIG_HUGETLB_PAGE is not set CONFIG_RAMFS=y -# CONFIG_CONFIGFS_FS is not set +CONFIG_CONFIGFS_FS=m # # Miscellaneous filesystems @@ -988,6 +1024,7 @@ CONFIG_CIFS_POSIX=y # CONFIG_CODA_FS is not set # CONFIG_AFS_FS is not set # CONFIG_9P_FS is not set +CONFIG_GENERIC_ACL=y # # Partition Types @@ -1040,6 +1077,12 @@ CONFIG_NLS_ISO8859_1=y # CONFIG_NLS_UTF8 is not set # +# Distributed Lock Manager +# +CONFIG_DLM=m +# CONFIG_DLM_DEBUG is not set + +# # iSeries device drivers # CONFIG_VIOCONS=y @@ -1073,6 +1116,7 @@ CONFIG_PLIST=y # Kernel hacking # # CONFIG_PRINTK_TIME is not set +CONFIG_ENABLE_MUST_CHECK=y CONFIG_MAGIC_SYSRQ=y # CONFIG_UNUSED_SYMBOLS is not set CONFIG_DEBUG_KERNEL=y @@ -1091,6 +1135,7 @@ CONFIG_DETECT_SOFTLOCKUP=y # CONFIG_DEBUG_INFO is not set CONFIG_DEBUG_FS=y # CONFIG_DEBUG_VM is not set +# CONFIG_DEBUG_LIST is not set # CONFIG_FORCED_INLINING is not set # CONFIG_RCU_TORTURE_TEST is not set CONFIG_DEBUG_STACKOVERFLOW=y @@ -1109,6 +1154,10 @@ CONFIG_IRQSTACKS=y # Cryptographic options # CONFIG_CRYPTO=y +CONFIG_CRYPTO_ALGAPI=y +CONFIG_CRYPTO_BLKCIPHER=m +CONFIG_CRYPTO_HASH=y +CONFIG_CRYPTO_MANAGER=m CONFIG_CRYPTO_HMAC=y CONFIG_CRYPTO_NULL=m CONFIG_CRYPTO_MD4=m @@ -1118,9 +1167,12 @@ CONFIG_CRYPTO_SHA256=m CONFIG_CRYPTO_SHA512=m CONFIG_CRYPTO_WP512=m CONFIG_CRYPTO_TGR192=m +CONFIG_CRYPTO_ECB=m +CONFIG_CRYPTO_CBC=m CONFIG_CRYPTO_DES=y CONFIG_CRYPTO_BLOWFISH=m CONFIG_CRYPTO_TWOFISH=m +CONFIG_CRYPTO_TWOFISH_COMMON=m CONFIG_CRYPTO_SERPENT=m CONFIG_CRYPTO_AES=m CONFIG_CRYPTO_CAST5=m diff --git a/arch/powerpc/configs/maple_defconfig b/arch/powerpc/configs/maple_defconfig index 62ba66091a13..ae96a5b2f00d 100644 --- a/arch/powerpc/configs/maple_defconfig +++ b/arch/powerpc/configs/maple_defconfig @@ -1,7 +1,7 @@ # # Automatically generated make config: don't edit -# Linux kernel version: 2.6.18-rc6 -# Sun Sep 10 10:24:55 2006 +# Linux kernel version: 2.6.18 +# Mon Oct 9 11:59:34 2006 # CONFIG_PPC64=y CONFIG_64BIT=y @@ -22,6 +22,7 @@ CONFIG_ARCH_MAY_HAVE_PC_FDC=y CONFIG_PPC_OF=y CONFIG_PPC_UDBG_16550=y CONFIG_GENERIC_TBSYNC=y +CONFIG_AUDIT_ARCH=y # CONFIG_DEFAULT_UIMAGE is not set # @@ -34,7 +35,7 @@ CONFIG_PPC_FPU=y CONFIG_PPC_STD_MMU=y CONFIG_VIRT_CPU_ACCOUNTING=y CONFIG_SMP=y -CONFIG_NR_CPUS=2 +CONFIG_NR_CPUS=4 CONFIG_DEFCONFIG_LIST="/lib/modules/$UNAME_RELEASE/.config" # @@ -51,10 +52,11 @@ CONFIG_LOCALVERSION="" CONFIG_LOCALVERSION_AUTO=y CONFIG_SWAP=y CONFIG_SYSVIPC=y +# CONFIG_IPC_NS is not set CONFIG_POSIX_MQUEUE=y # CONFIG_BSD_PROCESS_ACCT is not set # CONFIG_TASKSTATS is not set -CONFIG_SYSCTL=y +# CONFIG_UTS_NS is not set # CONFIG_AUDIT is not set CONFIG_IKCONFIG=y CONFIG_IKCONFIG_PROC=y @@ -62,7 +64,9 @@ CONFIG_IKCONFIG_PROC=y # CONFIG_RELAY is not set CONFIG_INITRAMFS_SOURCE="" CONFIG_CC_OPTIMIZE_FOR_SIZE=y +CONFIG_SYSCTL=y # CONFIG_EMBEDDED is not set +# CONFIG_SYSCTL_SYSCALL is not set CONFIG_KALLSYMS=y CONFIG_KALLSYMS_ALL=y # CONFIG_KALLSYMS_EXTRA_PASS is not set @@ -71,12 +75,12 @@ CONFIG_PRINTK=y CONFIG_BUG=y CONFIG_ELF_CORE=y CONFIG_BASE_FULL=y -CONFIG_RT_MUTEXES=y CONFIG_FUTEX=y CONFIG_EPOLL=y CONFIG_SHMEM=y CONFIG_SLAB=y CONFIG_VM_EVENT_COUNTERS=y +CONFIG_RT_MUTEXES=y # CONFIG_TINY_SHMEM is not set CONFIG_BASE_SMALL=0 # CONFIG_SLOB is not set @@ -95,6 +99,7 @@ CONFIG_STOP_MACHINE=y # # Block layer # +CONFIG_BLOCK=y # CONFIG_BLK_DEV_IO_TRACE is not set # @@ -114,16 +119,16 @@ CONFIG_DEFAULT_IOSCHED="anticipatory" # Platform support # CONFIG_PPC_MULTIPLATFORM=y -# CONFIG_PPC_ISERIES is not set # CONFIG_EMBEDDED6xx is not set # CONFIG_APUS is not set # CONFIG_PPC_PSERIES is not set +# CONFIG_PPC_ISERIES is not set # CONFIG_PPC_PMAC is not set CONFIG_PPC_MAPLE=y +# CONFIG_PPC_PASEMI is not set # CONFIG_PPC_CELL is not set # CONFIG_PPC_CELL_NATIVE is not set # CONFIG_PPC_IBM_CELL_BLADE is not set -# CONFIG_UDBG_RTAS_CONSOLE is not set CONFIG_U3_DART=y # CONFIG_PPC_RTAS is not set # CONFIG_MMIO_NVRAM is not set @@ -157,6 +162,7 @@ CONFIG_IRQ_ALL_CPUS=y CONFIG_ARCH_SELECT_MEMORY_MODEL=y CONFIG_ARCH_FLATMEM_ENABLE=y CONFIG_ARCH_SPARSEMEM_ENABLE=y +CONFIG_ARCH_POPULATES_NODE_MAP=y CONFIG_SELECT_MEMORY_MODEL=y CONFIG_FLATMEM_MANUAL=y # CONFIG_DISCONTIGMEM_MANUAL is not set @@ -184,6 +190,7 @@ CONFIG_GENERIC_ISA_DMA=y CONFIG_PCI=y CONFIG_PCI_DOMAINS=y # CONFIG_PCIEPORTBUS is not set +# CONFIG_PCI_MULTITHREAD_PROBE is not set # CONFIG_PCI_DEBUG is not set # @@ -211,6 +218,7 @@ CONFIG_PACKET_MMAP=y CONFIG_UNIX=y CONFIG_XFRM=y CONFIG_XFRM_USER=m +# CONFIG_XFRM_SUB_POLICY is not set # CONFIG_NET_KEY is not set CONFIG_INET=y CONFIG_IP_MULTICAST=y @@ -232,10 +240,12 @@ CONFIG_IP_PNP_DHCP=y # CONFIG_INET_TUNNEL is not set CONFIG_INET_XFRM_MODE_TRANSPORT=y CONFIG_INET_XFRM_MODE_TUNNEL=y +CONFIG_INET_XFRM_MODE_BEET=y CONFIG_INET_DIAG=y CONFIG_INET_TCP_DIAG=y # CONFIG_TCP_CONG_ADVANCED is not set -CONFIG_TCP_CONG_BIC=y +CONFIG_TCP_CONG_CUBIC=y +CONFIG_DEFAULT_TCP_CONG="cubic" # CONFIG_IPV6 is not set # CONFIG_INET6_XFRM_TUNNEL is not set # CONFIG_INET6_TUNNEL is not set @@ -265,7 +275,6 @@ CONFIG_TCP_CONG_BIC=y # CONFIG_ATALK is not set # CONFIG_X25 is not set # CONFIG_LAPB is not set -# CONFIG_NET_DIVERT is not set # CONFIG_ECONET is not set # CONFIG_WAN_ROUTER is not set @@ -377,6 +386,7 @@ CONFIG_BLK_DEV_AMD74XX=y # CONFIG_BLK_DEV_CS5530 is not set # CONFIG_BLK_DEV_HPT34X is not set # CONFIG_BLK_DEV_HPT366 is not set +# CONFIG_BLK_DEV_JMICRON is not set # CONFIG_BLK_DEV_SC1200 is not set # CONFIG_BLK_DEV_PIIX is not set # CONFIG_BLK_DEV_IT821X is not set @@ -399,6 +409,12 @@ CONFIG_IDEDMA_AUTO=y # # CONFIG_RAID_ATTRS is not set # CONFIG_SCSI is not set +# CONFIG_SCSI_NETLINK is not set + +# +# Serial ATA (prod) and Parallel ATA (experimental) drivers +# +# CONFIG_ATA is not set # # Multi-device support (RAID and LVM) @@ -498,7 +514,7 @@ CONFIG_E1000=y # CONFIG_VIA_VELOCITY is not set CONFIG_TIGON3=y # CONFIG_BNX2 is not set -# CONFIG_MV643XX_ETH is not set +# CONFIG_QLA3XXX is not set # # Ethernet (10000 Mbit) @@ -545,6 +561,7 @@ CONFIG_TIGON3=y # Input device support # CONFIG_INPUT=y +# CONFIG_INPUT_FF_MEMLESS is not set # # Userland interfaces @@ -704,6 +721,7 @@ CONFIG_I2C_AMD8111=y # # Misc devices # +# CONFIG_TIFM_CORE is not set # # Multimedia devices @@ -779,7 +797,6 @@ CONFIG_USB_UHCI_HCD=y # # may also be needed; see USB_STORAGE Help for more information # -# CONFIG_USB_STORAGE is not set # CONFIG_USB_LIBUSUAL is not set # @@ -802,6 +819,7 @@ CONFIG_USB_HIDINPUT=y # CONFIG_USB_ATI_REMOTE2 is not set # CONFIG_USB_KEYSPAN_REMOTE is not set # CONFIG_USB_APPLETOUCH is not set +# CONFIG_USB_TRANCEVIBRATOR is not set # # USB Imaging devices @@ -828,6 +846,7 @@ CONFIG_USB_MON=y CONFIG_USB_SERIAL=y # CONFIG_USB_SERIAL_CONSOLE is not set CONFIG_USB_SERIAL_GENERIC=y +# CONFIG_USB_SERIAL_AIRCABLE is not set # CONFIG_USB_SERIAL_AIRPRIME is not set # CONFIG_USB_SERIAL_ARK3116 is not set # CONFIG_USB_SERIAL_BELKIN is not set @@ -862,6 +881,7 @@ CONFIG_USB_SERIAL_KEYSPAN_USA49WLC=y # CONFIG_USB_SERIAL_KLSI is not set # CONFIG_USB_SERIAL_KOBIL_SCT is not set # CONFIG_USB_SERIAL_MCT_U232 is not set +# CONFIG_USB_SERIAL_MOS7840 is not set # CONFIG_USB_SERIAL_NAVMAN is not set # CONFIG_USB_SERIAL_PL2303 is not set # CONFIG_USB_SERIAL_HP4X is not set @@ -879,6 +899,7 @@ CONFIG_USB_EZUSB=y # # CONFIG_USB_EMI62 is not set # CONFIG_USB_EMI26 is not set +# CONFIG_USB_ADUTUX is not set # CONFIG_USB_AUERSWALD is not set # CONFIG_USB_RIO500 is not set # CONFIG_USB_LEGOTOWER is not set @@ -886,9 +907,9 @@ CONFIG_USB_EZUSB=y # CONFIG_USB_LED is not set # CONFIG_USB_CYPRESS_CY7C63 is not set # CONFIG_USB_CYTHERM is not set -# CONFIG_USB_PHIDGETKIT is not set -# CONFIG_USB_PHIDGETSERVO is not set +# CONFIG_USB_PHIDGET is not set # CONFIG_USB_IDMOUSE is not set +# CONFIG_USB_FTDI_ELAN is not set # CONFIG_USB_APPLEDISPLAY is not set # CONFIG_USB_SISUSBVGA is not set # CONFIG_USB_LD is not set @@ -995,8 +1016,10 @@ CONFIG_FAT_DEFAULT_IOCHARSET="iso8859-1" # CONFIG_PROC_FS=y CONFIG_PROC_KCORE=y +CONFIG_PROC_SYSCTL=y CONFIG_SYSFS=y CONFIG_TMPFS=y +# CONFIG_TMPFS_POSIX_ACL is not set CONFIG_HUGETLBFS=y CONFIG_HUGETLB_PAGE=y CONFIG_RAMFS=y @@ -1129,6 +1152,7 @@ CONFIG_PLIST=y # Kernel hacking # # CONFIG_PRINTK_TIME is not set +CONFIG_ENABLE_MUST_CHECK=y CONFIG_MAGIC_SYSRQ=y # CONFIG_UNUSED_SYMBOLS is not set CONFIG_DEBUG_KERNEL=y @@ -1148,6 +1172,7 @@ CONFIG_DEBUG_SPINLOCK_SLEEP=y # CONFIG_DEBUG_INFO is not set CONFIG_DEBUG_FS=y # CONFIG_DEBUG_VM is not set +# CONFIG_DEBUG_LIST is not set # CONFIG_FORCED_INLINING is not set # CONFIG_RCU_TORTURE_TEST is not set CONFIG_DEBUG_STACKOVERFLOW=y @@ -1169,6 +1194,9 @@ CONFIG_BOOTX_TEXT=y # Cryptographic options # CONFIG_CRYPTO=y +CONFIG_CRYPTO_ALGAPI=y +CONFIG_CRYPTO_BLKCIPHER=m +CONFIG_CRYPTO_MANAGER=m # CONFIG_CRYPTO_HMAC is not set # CONFIG_CRYPTO_NULL is not set # CONFIG_CRYPTO_MD4 is not set @@ -1178,6 +1206,8 @@ CONFIG_CRYPTO_MD5=y # CONFIG_CRYPTO_SHA512 is not set # CONFIG_CRYPTO_WP512 is not set # CONFIG_CRYPTO_TGR192 is not set +CONFIG_CRYPTO_ECB=m +CONFIG_CRYPTO_CBC=m CONFIG_CRYPTO_DES=y # CONFIG_CRYPTO_BLOWFISH is not set # CONFIG_CRYPTO_TWOFISH is not set diff --git a/arch/powerpc/configs/pseries_defconfig b/arch/powerpc/configs/pseries_defconfig index 44175fb7adec..9828663652e9 100644 --- a/arch/powerpc/configs/pseries_defconfig +++ b/arch/powerpc/configs/pseries_defconfig @@ -506,7 +506,7 @@ CONFIG_SCSI_SAS_ATTRS=m # CONFIG_MEGARAID_NEWGEN is not set # CONFIG_MEGARAID_LEGACY is not set # CONFIG_MEGARAID_SAS is not set -# CONFIG_ATA is not set +CONFIG_ATA=y # CONFIG_SCSI_HPTIOP is not set # CONFIG_SCSI_BUSLOGIC is not set # CONFIG_SCSI_DMX3191D is not set diff --git a/arch/powerpc/kernel/ibmebus.c b/arch/powerpc/kernel/ibmebus.c index 124dbcba94a8..39db7a3affe1 100644 --- a/arch/powerpc/kernel/ibmebus.c +++ b/arch/powerpc/kernel/ibmebus.c @@ -319,7 +319,7 @@ EXPORT_SYMBOL(ibmebus_unregister_driver); int ibmebus_request_irq(struct ibmebus_dev *dev, u32 ist, - irqreturn_t (*handler)(int, void*, struct pt_regs *), + irq_handler_t handler, unsigned long irq_flags, const char * devname, void *dev_id) { diff --git a/arch/powerpc/kernel/iommu.c b/arch/powerpc/kernel/iommu.c index ba0694071728..f88a2a675d90 100644 --- a/arch/powerpc/kernel/iommu.c +++ b/arch/powerpc/kernel/iommu.c @@ -75,7 +75,7 @@ static unsigned long iommu_range_alloc(struct iommu_table *tbl, /* This allocator was derived from x86_64's bit string search */ /* Sanity check */ - if (unlikely(npages) == 0) { + if (unlikely(npages == 0)) { if (printk_ratelimit()) WARN_ON(1); return DMA_ERROR_CODE; diff --git a/arch/powerpc/kernel/irq.c b/arch/powerpc/kernel/irq.c index 5deaab3090b4..5e37bf14ef2d 100644 --- a/arch/powerpc/kernel/irq.c +++ b/arch/powerpc/kernel/irq.c @@ -217,7 +217,7 @@ void do_IRQ(struct pt_regs *regs) * The value -2 is for buggy hardware and means that this IRQ * has already been handled. -- Tom */ - irq = ppc_md.get_irq(regs); + irq = ppc_md.get_irq(); if (irq != NO_IRQ && irq != NO_IRQ_IGNORE) { #ifdef CONFIG_IRQSTACKS @@ -572,8 +572,8 @@ unsigned int irq_create_mapping(struct irq_host *host, } EXPORT_SYMBOL_GPL(irq_create_mapping); -extern unsigned int irq_create_of_mapping(struct device_node *controller, - u32 *intspec, unsigned int intsize) +unsigned int irq_create_of_mapping(struct device_node *controller, + u32 *intspec, unsigned int intsize) { struct irq_host *host; irq_hw_number_t hwirq; diff --git a/arch/powerpc/kernel/prom.c b/arch/powerpc/kernel/prom.c index eb913f80bfb1..865b9648d0d5 100644 --- a/arch/powerpc/kernel/prom.c +++ b/arch/powerpc/kernel/prom.c @@ -724,7 +724,7 @@ static int __init early_init_dt_scan_chosen(unsigned long node, strlcpy(cmd_line, p, min((int)l, COMMAND_LINE_SIZE)); #ifdef CONFIG_CMDLINE - if (l == 0 || (l == 1 && (*p) == 0)) + if (p == NULL || l == 0 || (l == 1 && (*p) == 0)) strlcpy(cmd_line, CONFIG_CMDLINE, COMMAND_LINE_SIZE); #endif /* CONFIG_CMDLINE */ diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c index d210d0a5006b..5b59bc18dfe7 100644 --- a/arch/powerpc/kernel/time.c +++ b/arch/powerpc/kernel/time.c @@ -706,7 +706,7 @@ void timer_interrupt(struct pt_regs * regs) #ifdef CONFIG_PPC_ISERIES if (hvlpevent_is_pending()) - process_hvlpevents(regs); + process_hvlpevents(); #endif #ifdef CONFIG_PPC64 diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c index 16fe027bbc12..d1c0758c5611 100644 --- a/arch/powerpc/mm/mem.c +++ b/arch/powerpc/mm/mem.c @@ -307,11 +307,12 @@ void __init paging_init(void) top_of_ram, total_ram); printk(KERN_DEBUG "Memory hole size: %ldMB\n", (top_of_ram - total_ram) >> 20); + memset(max_zone_pfns, 0, sizeof(max_zone_pfns)); #ifdef CONFIG_HIGHMEM - max_zone_pfns[0] = total_lowmem >> PAGE_SHIFT; - max_zone_pfns[1] = top_of_ram >> PAGE_SHIFT; + max_zone_pfns[ZONE_DMA] = total_lowmem >> PAGE_SHIFT; + max_zone_pfns[ZONE_HIGHMEM] = top_of_ram >> PAGE_SHIFT; #else - max_zone_pfns[0] = top_of_ram >> PAGE_SHIFT; + max_zone_pfns[ZONE_DMA] = top_of_ram >> PAGE_SHIFT; #endif free_area_init_nodes(max_zone_pfns); } diff --git a/arch/powerpc/mm/numa.c b/arch/powerpc/mm/numa.c index 43c272075e1a..9da01dc8cfd9 100644 --- a/arch/powerpc/mm/numa.c +++ b/arch/powerpc/mm/numa.c @@ -617,9 +617,9 @@ void __init do_init_bootmem(void) void __init paging_init(void) { - unsigned long max_zone_pfns[MAX_NR_ZONES] = { - lmb_end_of_DRAM() >> PAGE_SHIFT - }; + unsigned long max_zone_pfns[MAX_NR_ZONES]; + memset(max_zone_pfns, 0, sizeof(max_zone_pfns)); + max_zone_pfns[ZONE_DMA] = lmb_end_of_DRAM() >> PAGE_SHIFT; free_area_init_nodes(max_zone_pfns); } diff --git a/arch/powerpc/platforms/82xx/mpc82xx.c b/arch/powerpc/platforms/82xx/mpc82xx.c index 89d702de4863..0f5b30dc60da 100644 --- a/arch/powerpc/platforms/82xx/mpc82xx.c +++ b/arch/powerpc/platforms/82xx/mpc82xx.c @@ -11,7 +11,6 @@ * option) any later version. */ -#include <linux/config.h> #include <linux/stddef.h> #include <linux/kernel.h> #include <linux/init.h> diff --git a/arch/powerpc/platforms/82xx/mpc82xx_ads.c b/arch/powerpc/platforms/82xx/mpc82xx_ads.c index 4276f087f26e..bb9acbb98176 100644 --- a/arch/powerpc/platforms/82xx/mpc82xx_ads.c +++ b/arch/powerpc/platforms/82xx/mpc82xx_ads.c @@ -12,8 +12,6 @@ * option) any later version. */ - -#include <linux/config.h> #include <linux/stddef.h> #include <linux/kernel.h> #include <linux/init.h> @@ -384,8 +382,7 @@ struct hw_interrupt_type m82xx_pci_ic = { }; static void -m82xx_pci_irq_demux(unsigned int irq, struct irq_desc *desc, - struct pt_regs *regs) +m82xx_pci_irq_demux(unsigned int irq, struct irq_desc *desc) { unsigned long stat, mask, pend; int bit; @@ -398,7 +395,7 @@ m82xx_pci_irq_demux(unsigned int irq, struct irq_desc *desc, break; for (bit = 0; pend != 0; ++bit, pend <<= 1) { if (pend & 0x80000000) - __do_IRQ(pci_int_base + bit, regs); + __do_IRQ(pci_int_base + bit); } } } diff --git a/arch/powerpc/platforms/82xx/pq2ads.h b/arch/powerpc/platforms/82xx/pq2ads.h index a7348213508f..fb2f92bcd770 100644 --- a/arch/powerpc/platforms/82xx/pq2ads.h +++ b/arch/powerpc/platforms/82xx/pq2ads.h @@ -22,8 +22,6 @@ #ifndef __MACH_ADS8260_DEFS #define __MACH_ADS8260_DEFS -#include <linux/config.h> - #include <asm/ppcboot.h> /* For our show_cpuinfo hooks. */ diff --git a/arch/powerpc/platforms/85xx/mpc85xx_ads.c b/arch/powerpc/platforms/85xx/mpc85xx_ads.c index 8af7126fc6b9..d3e669d69c73 100644 --- a/arch/powerpc/platforms/85xx/mpc85xx_ads.c +++ b/arch/powerpc/platforms/85xx/mpc85xx_ads.c @@ -66,12 +66,11 @@ mpc85xx_pcibios_fixup(void) #ifdef CONFIG_CPM2 -static void cpm2_cascade(unsigned int irq, struct irq_desc *desc, - struct pt_regs *regs) +static void cpm2_cascade(unsigned int irq, struct irq_desc *desc) { int cascade_irq; - while ((cascade_irq = cpm2_get_irq(regs)) >= 0) { + while ((cascade_irq = cpm2_get_irq()) >= 0) { generic_handle_irq(cascade_irq); } desc->chip->eoi(irq); diff --git a/arch/powerpc/platforms/85xx/mpc85xx_cds.c b/arch/powerpc/platforms/85xx/mpc85xx_cds.c index b92fc6976a47..953cd5dd3f54 100644 --- a/arch/powerpc/platforms/85xx/mpc85xx_cds.c +++ b/arch/powerpc/platforms/85xx/mpc85xx_cds.c @@ -132,10 +132,9 @@ mpc85xx_cds_pcibios_fixup(void) #ifdef CONFIG_PPC_I8259 #warning The i8259 PIC support is currently broken -static void mpc85xx_8259_cascade(unsigned int irq, struct - irq_desc *desc, struct pt_regs *regs) +static void mpc85xx_8259_cascade(unsigned int irq, struct irq_desc *desc) { - unsigned int cascade_irq = i8259_irq(regs); + unsigned int cascade_irq = i8259_irq(); if (cascade_irq != NO_IRQ) generic_handle_irq(cascade_irq); @@ -150,8 +149,10 @@ void __init mpc85xx_cds_pic_init(void) struct mpic *mpic; struct resource r; struct device_node *np = NULL; +#ifdef CONFIG_PPC_I8259 struct device_node *cascade_node = NULL; int cascade_irq; +#endif np = of_find_node_by_type(np, "open-pic"); diff --git a/arch/powerpc/platforms/86xx/mpc86xx_hpcn.c b/arch/powerpc/platforms/86xx/mpc86xx_hpcn.c index 8218703babde..1a1c226ad4d9 100644 --- a/arch/powerpc/platforms/86xx/mpc86xx_hpcn.c +++ b/arch/powerpc/platforms/86xx/mpc86xx_hpcn.c @@ -53,10 +53,9 @@ unsigned long pci_dram_offset = 0; #ifdef CONFIG_PCI -static void mpc86xx_8259_cascade(unsigned int irq, struct irq_desc *desc, - struct pt_regs *regs) +static void mpc86xx_8259_cascade(unsigned int irq, struct irq_desc *desc) { - unsigned int cascade_irq = i8259_irq(regs); + unsigned int cascade_irq = i8259_irq(); if (cascade_irq != NO_IRQ) generic_handle_irq(cascade_irq); desc->chip->eoi(irq); diff --git a/arch/powerpc/platforms/cell/interrupt.c b/arch/powerpc/platforms/cell/interrupt.c index f8768b096f02..a914c12b4060 100644 --- a/arch/powerpc/platforms/cell/interrupt.c +++ b/arch/powerpc/platforms/cell/interrupt.c @@ -98,8 +98,7 @@ static void iic_ioexc_eoi(unsigned int irq) { } -static void iic_ioexc_cascade(unsigned int irq, struct irq_desc *desc, - struct pt_regs *regs) +static void iic_ioexc_cascade(unsigned int irq, struct irq_desc *desc) { struct cbe_iic_regs __iomem *node_iic = (void __iomem *)desc->handler_data; unsigned int base = (irq & 0xffffff00) | IIC_IRQ_TYPE_IOEXC; @@ -140,7 +139,7 @@ static struct irq_chip iic_ioexc_chip = { }; /* Get an IRQ number from the pending state register of the IIC */ -static unsigned int iic_get_irq(struct pt_regs *regs) +static unsigned int iic_get_irq(void) { struct cbe_iic_pending_bits pending; struct iic *iic; diff --git a/arch/powerpc/platforms/cell/spu_base.c b/arch/powerpc/platforms/cell/spu_base.c index ac5f12662dbb..ccfd0c4db874 100644 --- a/arch/powerpc/platforms/cell/spu_base.c +++ b/arch/powerpc/platforms/cell/spu_base.c @@ -147,7 +147,7 @@ static int __spu_trap_data_map(struct spu *spu, unsigned long ea, u64 dsisr) } static irqreturn_t -spu_irq_class_0(int irq, void *data, struct pt_regs *regs) +spu_irq_class_0(int irq, void *data) { struct spu *spu; @@ -186,7 +186,7 @@ spu_irq_class_0_bottom(struct spu *spu) EXPORT_SYMBOL_GPL(spu_irq_class_0_bottom); static irqreturn_t -spu_irq_class_1(int irq, void *data, struct pt_regs *regs) +spu_irq_class_1(int irq, void *data) { struct spu *spu; unsigned long stat, mask, dar, dsisr; @@ -224,7 +224,7 @@ spu_irq_class_1(int irq, void *data, struct pt_regs *regs) EXPORT_SYMBOL_GPL(spu_irq_class_1_bottom); static irqreturn_t -spu_irq_class_2(int irq, void *data, struct pt_regs *regs) +spu_irq_class_2(int irq, void *data) { struct spu *spu; unsigned long stat; diff --git a/arch/powerpc/platforms/chrp/setup.c b/arch/powerpc/platforms/chrp/setup.c index 35cd7a5f6834..cae3d13229b9 100644 --- a/arch/powerpc/platforms/chrp/setup.c +++ b/arch/powerpc/platforms/chrp/setup.c @@ -70,7 +70,7 @@ unsigned long event_scan_interval; * has to include <linux/interrupt.h> (to get irqreturn_t), which * causes all sorts of problems. -- paulus */ -extern irqreturn_t xmon_irq(int, void *, struct pt_regs *); +extern irqreturn_t xmon_irq(int, void *); extern unsigned long loops_per_jiffy; @@ -335,10 +335,9 @@ chrp_event_scan(unsigned long unused) jiffies + event_scan_interval); } -static void chrp_8259_cascade(unsigned int irq, struct irq_desc *desc, - struct pt_regs *regs) +static void chrp_8259_cascade(unsigned int irq, struct irq_desc *desc) { - unsigned int cascade_irq = i8259_irq(regs); + unsigned int cascade_irq = i8259_irq(); if (cascade_irq != NO_IRQ) generic_handle_irq(cascade_irq); desc->chip->eoi(irq); diff --git a/arch/powerpc/platforms/embedded6xx/mpc7448_hpc2.c b/arch/powerpc/platforms/embedded6xx/mpc7448_hpc2.c index cb6f084844f2..bdb475c65cba 100644 --- a/arch/powerpc/platforms/embedded6xx/mpc7448_hpc2.c +++ b/arch/powerpc/platforms/embedded6xx/mpc7448_hpc2.c @@ -61,8 +61,7 @@ pci_dram_offset = MPC7448_HPC2_PCI_MEM_OFFSET; extern int tsi108_setup_pci(struct device_node *dev); extern void _nmask_and_or_msr(unsigned long nmask, unsigned long or_val); extern void tsi108_pci_int_init(void); -extern void tsi108_irq_cascade(unsigned int irq, struct irq_desc *desc, - struct pt_regs *regs); +extern void tsi108_irq_cascade(unsigned int irq, struct irq_desc *desc); int mpc7448_hpc2_exclude_device(u_char bus, u_char devfn) { @@ -200,7 +199,7 @@ static void __init mpc7448_hpc2_init_IRQ(void) tsi_pic = of_find_node_by_type(NULL, "open-pic"); if (tsi_pic) { unsigned int size; - void *prop = get_property(tsi_pic, "reg", &size); + const void *prop = get_property(tsi_pic, "reg", &size); mpic_paddr = of_translate_address(tsi_pic, prop); } diff --git a/arch/powerpc/platforms/iseries/irq.c b/arch/powerpc/platforms/iseries/irq.c index e32446877e78..5225abfafd9b 100644 --- a/arch/powerpc/platforms/iseries/irq.c +++ b/arch/powerpc/platforms/iseries/irq.c @@ -43,10 +43,7 @@ #include "irq.h" #include "pci.h" #include "call_pci.h" - -#if defined(CONFIG_SMP) -extern void iSeries_smp_message_recv(struct pt_regs *); -#endif +#include "smp.h" #ifdef CONFIG_PCI @@ -88,7 +85,7 @@ static DEFINE_SPINLOCK(pending_irqs_lock); static int num_pending_irqs; static int pending_irqs[NR_IRQS]; -static void int_received(struct pci_event *event, struct pt_regs *regs) +static void int_received(struct pci_event *event) { int irq; @@ -146,11 +143,11 @@ static void int_received(struct pci_event *event, struct pt_regs *regs) } } -static void pci_event_handler(struct HvLpEvent *event, struct pt_regs *regs) +static void pci_event_handler(struct HvLpEvent *event) { if (event && (event->xType == HvLpEvent_Type_PciIo)) { if (hvlpevent_is_int(event)) - int_received((struct pci_event *)event, regs); + int_received((struct pci_event *)event); else printk(KERN_ERR "pci_event_handler: unexpected ack received\n"); @@ -308,18 +305,18 @@ int __init iSeries_allocate_IRQ(HvBusNumber bus, /* * Get the next pending IRQ. */ -unsigned int iSeries_get_irq(struct pt_regs *regs) +unsigned int iSeries_get_irq(void) { int irq = NO_IRQ_IGNORE; #ifdef CONFIG_SMP if (get_lppaca()->int_dword.fields.ipi_cnt) { get_lppaca()->int_dword.fields.ipi_cnt = 0; - iSeries_smp_message_recv(regs); + iSeries_smp_message_recv(); } #endif /* CONFIG_SMP */ if (hvlpevent_is_pending()) - process_hvlpevents(regs); + process_hvlpevents(); #ifdef CONFIG_PCI if (num_pending_irqs) { diff --git a/arch/powerpc/platforms/iseries/irq.h b/arch/powerpc/platforms/iseries/irq.h index 1ee8985140e5..69f1b437fc7b 100644 --- a/arch/powerpc/platforms/iseries/irq.h +++ b/arch/powerpc/platforms/iseries/irq.h @@ -4,6 +4,6 @@ extern void iSeries_init_IRQ(void); extern int iSeries_allocate_IRQ(HvBusNumber, HvSubBusNumber, u32); extern void iSeries_activate_IRQs(void); -extern unsigned int iSeries_get_irq(struct pt_regs *); +extern unsigned int iSeries_get_irq(void); #endif /* _ISERIES_IRQ_H */ diff --git a/arch/powerpc/platforms/iseries/lpevents.c b/arch/powerpc/platforms/iseries/lpevents.c index 98c1c2440aad..e3e929e1b460 100644 --- a/arch/powerpc/platforms/iseries/lpevents.c +++ b/arch/powerpc/platforms/iseries/lpevents.c @@ -116,7 +116,7 @@ static void hvlpevent_clear_valid(struct HvLpEvent * event) hvlpevent_invalidate(event); } -void process_hvlpevents(struct pt_regs *regs) +void process_hvlpevents(void) { struct HvLpEvent * event; @@ -144,7 +144,7 @@ void process_hvlpevents(struct pt_regs *regs) __get_cpu_var(hvlpevent_counts)[event->xType]++; if (event->xType < HvLpEvent_Type_NumTypes && lpEventHandler[event->xType]) - lpEventHandler[event->xType](event, regs); + lpEventHandler[event->xType](event); else printk(KERN_INFO "Unexpected Lp Event type=%d\n", event->xType ); diff --git a/arch/powerpc/platforms/iseries/mf.c b/arch/powerpc/platforms/iseries/mf.c index 1983b640bac1..b5737d68d6c4 100644 --- a/arch/powerpc/platforms/iseries/mf.c +++ b/arch/powerpc/platforms/iseries/mf.c @@ -513,7 +513,7 @@ static void handle_ack(struct io_mf_lp_event *event) * parse it enough to know if it is an interrupt or an * acknowledge. */ -static void hv_handler(struct HvLpEvent *event, struct pt_regs *regs) +static void hv_handler(struct HvLpEvent *event) { if ((event != NULL) && (event->xType == HvLpEvent_Type_MachineFac)) { if (hvlpevent_is_ack(event)) @@ -847,7 +847,7 @@ static int mf_get_boot_rtc(struct rtc_time *tm) /* We need to poll here as we are not yet taking interrupts */ while (rtc_data.busy) { if (hvlpevent_is_pending()) - process_hvlpevents(NULL); + process_hvlpevents(); } return rtc_set_tm(rtc_data.rc, rtc_data.ce_msg.ce_msg, tm); } diff --git a/arch/powerpc/platforms/iseries/smp.c b/arch/powerpc/platforms/iseries/smp.c index 2eb095edb472..aee5908df700 100644 --- a/arch/powerpc/platforms/iseries/smp.c +++ b/arch/powerpc/platforms/iseries/smp.c @@ -43,9 +43,11 @@ #include <asm/cputable.h> #include <asm/system.h> +#include "smp.h" + static unsigned long iSeries_smp_message[NR_CPUS]; -void iSeries_smp_message_recv(struct pt_regs *regs) +void iSeries_smp_message_recv(void) { int cpu = smp_processor_id(); int msg; @@ -55,7 +57,7 @@ void iSeries_smp_message_recv(struct pt_regs *regs) for (msg = 0; msg < 4; msg++) if (test_and_clear_bit(msg, &iSeries_smp_message[cpu])) - smp_message_recv(msg, regs); + smp_message_recv(msg); } static inline void smp_iSeries_do_message(int cpu, int msg) diff --git a/arch/powerpc/platforms/iseries/smp.h b/arch/powerpc/platforms/iseries/smp.h new file mode 100644 index 000000000000..d501f7de01e7 --- /dev/null +++ b/arch/powerpc/platforms/iseries/smp.h @@ -0,0 +1,6 @@ +#ifndef _PLATFORMS_ISERIES_SMP_H +#define _PLATFORMS_ISERIES_SMP_H + +extern void iSeries_smp_message_recv(void); + +#endif /* _PLATFORMS_ISERIES_SMP_H */ diff --git a/arch/powerpc/platforms/iseries/viopath.c b/arch/powerpc/platforms/iseries/viopath.c index 9baa4ee82592..04e07e5da0c1 100644 --- a/arch/powerpc/platforms/iseries/viopath.c +++ b/arch/powerpc/platforms/iseries/viopath.c @@ -378,7 +378,7 @@ void vio_set_hostlp(void) } EXPORT_SYMBOL(vio_set_hostlp); -static void vio_handleEvent(struct HvLpEvent *event, struct pt_regs *regs) +static void vio_handleEvent(struct HvLpEvent *event) { HvLpIndex remoteLp; int subtype = (event->xSubtype & VIOMAJOR_SUBTYPE_MASK) diff --git a/arch/powerpc/platforms/maple/pci.c b/arch/powerpc/platforms/maple/pci.c index 1b827618e05f..63b4d1bff359 100644 --- a/arch/powerpc/platforms/maple/pci.c +++ b/arch/powerpc/platforms/maple/pci.c @@ -8,7 +8,7 @@ * 2 of the License, or (at your option) any later version. */ -#define DEBUG +#undef DEBUG #include <linux/kernel.h> #include <linux/pci.h> @@ -16,6 +16,7 @@ #include <linux/string.h> #include <linux/init.h> #include <linux/bootmem.h> +#include <linux/irq.h> #include <asm/sections.h> #include <asm/io.h> @@ -33,7 +34,7 @@ #define DBG(x...) #endif -static struct pci_controller *u3_agp, *u3_ht; +static struct pci_controller *u3_agp, *u3_ht, *u4_pcie; static int __init fixup_one_level_bus_range(struct device_node *node, int higher) { @@ -287,6 +288,114 @@ static struct pci_ops u3_ht_pci_ops = u3_ht_write_config }; +static unsigned int u4_pcie_cfa0(unsigned int devfn, unsigned int off) +{ + return (1 << PCI_SLOT(devfn)) | + (PCI_FUNC(devfn) << 8) | + ((off >> 8) << 28) | + (off & 0xfcu); +} + +static unsigned int u4_pcie_cfa1(unsigned int bus, unsigned int devfn, + unsigned int off) +{ + return (bus << 16) | + (devfn << 8) | + ((off >> 8) << 28) | + (off & 0xfcu) | 1u; +} + +static volatile void __iomem *u4_pcie_cfg_access(struct pci_controller* hose, + u8 bus, u8 dev_fn, int offset) +{ + unsigned int caddr; + + if (bus == hose->first_busno) + caddr = u4_pcie_cfa0(dev_fn, offset); + else + caddr = u4_pcie_cfa1(bus, dev_fn, offset); + + /* Uninorth will return garbage if we don't read back the value ! */ + do { + out_le32(hose->cfg_addr, caddr); + } while (in_le32(hose->cfg_addr) != caddr); + + offset &= 0x03; + return hose->cfg_data + offset; +} + +static int u4_pcie_read_config(struct pci_bus *bus, unsigned int devfn, + int offset, int len, u32 *val) +{ + struct pci_controller *hose; + volatile void __iomem *addr; + + hose = pci_bus_to_host(bus); + if (hose == NULL) + return PCIBIOS_DEVICE_NOT_FOUND; + if (offset >= 0x1000) + return PCIBIOS_BAD_REGISTER_NUMBER; + addr = u4_pcie_cfg_access(hose, bus->number, devfn, offset); + if (!addr) + return PCIBIOS_DEVICE_NOT_FOUND; + /* + * Note: the caller has already checked that offset is + * suitably aligned and that len is 1, 2 or 4. + */ + switch (len) { + case 1: + *val = in_8(addr); + break; + case 2: + *val = in_le16(addr); + break; + default: + *val = in_le32(addr); + break; + } + return PCIBIOS_SUCCESSFUL; +} +static int u4_pcie_write_config(struct pci_bus *bus, unsigned int devfn, + int offset, int len, u32 val) +{ + struct pci_controller *hose; + volatile void __iomem *addr; + + hose = pci_bus_to_host(bus); + if (hose == NULL) + return PCIBIOS_DEVICE_NOT_FOUND; + if (offset >= 0x1000) + return PCIBIOS_BAD_REGISTER_NUMBER; + addr = u4_pcie_cfg_access(hose, bus->number, devfn, offset); + if (!addr) + return PCIBIOS_DEVICE_NOT_FOUND; + /* + * Note: the caller has already checked that offset is + * suitably aligned and that len is 1, 2 or 4. + */ + switch (len) { + case 1: + out_8(addr, val); + (void) in_8(addr); + break; + case 2: + out_le16(addr, val); + (void) in_le16(addr); + break; + default: + out_le32(addr, val); + (void) in_le32(addr); + break; + } + return PCIBIOS_SUCCESSFUL; +} + +static struct pci_ops u4_pcie_pci_ops = +{ + u4_pcie_read_config, + u4_pcie_write_config +}; + static void __init setup_u3_agp(struct pci_controller* hose) { /* On G5, we move AGP up to high bus number so we don't need @@ -307,6 +416,26 @@ static void __init setup_u3_agp(struct pci_controller* hose) u3_agp = hose; } +static void __init setup_u4_pcie(struct pci_controller* hose) +{ + /* We currently only implement the "non-atomic" config space, to + * be optimised later. + */ + hose->ops = &u4_pcie_pci_ops; + hose->cfg_addr = ioremap(0xf0000000 + 0x800000, 0x1000); + hose->cfg_data = ioremap(0xf0000000 + 0xc00000, 0x1000); + + /* The bus contains a bridge from root -> device, we need to + * make it visible on bus 0 so that we pick the right type + * of config cycles. If we didn't, we would have to force all + * config cycles to be type 1. So we override the "bus-range" + * property here + */ + hose->first_busno = 0x00; + hose->last_busno = 0xff; + u4_pcie = hose; +} + static void __init setup_u3_ht(struct pci_controller* hose) { hose->ops = &u3_ht_pci_ops; @@ -354,6 +483,10 @@ static int __init add_bridge(struct device_node *dev) setup_u3_ht(hose); disp_name = "U3-HT"; primary = 1; + } else if (device_is_compatible(dev, "u4-pcie")) { + setup_u4_pcie(hose); + disp_name = "U4-PCIE"; + primary = 0; } printk(KERN_INFO "Found %s PCI host bridge. Firmware bus number: %d->%d\n", disp_name, hose->first_busno, hose->last_busno); @@ -361,7 +494,6 @@ static int __init add_bridge(struct device_node *dev) /* Interpret the "ranges" property */ /* This also maps the I/O region and sets isa_io/mem_base */ pci_process_bridge_OF_ranges(hose, dev, primary); - pci_setup_phb_io(hose, primary); /* Fixup "bus-range" OF property */ fixup_bus_range(dev); @@ -376,8 +508,30 @@ void __init maple_pcibios_fixup(void) DBG(" -> maple_pcibios_fixup\n"); - for_each_pci_dev(dev) + for_each_pci_dev(dev) { + /* Fixup IRQ for PCIe host */ + if (u4_pcie != NULL && dev->bus->number == 0 && + pci_bus_to_host(dev->bus) == u4_pcie) { + printk(KERN_DEBUG "Fixup U4 PCIe IRQ\n"); + dev->irq = irq_create_mapping(NULL, 1); + if (dev->irq != NO_IRQ) + set_irq_type(dev->irq, IRQ_TYPE_LEVEL_LOW); + continue; + } + + /* Hide AMD8111 IDE interrupt when in legacy mode so + * the driver calls pci_get_legacy_ide_irq() + */ + if (dev->vendor == PCI_VENDOR_ID_AMD && + dev->device == PCI_DEVICE_ID_AMD_8111_IDE && + (dev->class & 5) != 5) { + dev->irq = NO_IRQ; + continue; + } + + /* For all others, map the interrupt from the device-tree */ pci_read_irq_line(dev); + } DBG(" <- maple_pcibios_fixup\n"); } @@ -388,8 +542,10 @@ static void __init maple_fixup_phb_resources(void) list_for_each_entry_safe(hose, tmp, &hose_list, list_node) { unsigned long offset = (unsigned long)hose->io_base_virt - pci_io_base; + hose->io_resource.start += offset; hose->io_resource.end += offset; + printk(KERN_INFO "PCI Host %d, io start: %llx; io end: %llx\n", hose->global_number, (unsigned long long)hose->io_resource.start, @@ -431,6 +587,19 @@ void __init maple_pci_init(void) if (ht && add_bridge(ht) != 0) of_node_put(ht); + /* + * We need to call pci_setup_phb_io for the HT bridge first + * so it gets the I/O port numbers starting at 0, and we + * need to call it for the AGP bridge after that so it gets + * small positive I/O port numbers. + */ + if (u3_ht) + pci_setup_phb_io(u3_ht, 1); + if (u3_agp) + pci_setup_phb_io(u3_agp, 0); + if (u4_pcie) + pci_setup_phb_io(u4_pcie, 0); + /* Fixup the IO resources on our host bridges as the common code * does it only for childs of the host bridges */ @@ -465,8 +634,11 @@ int maple_pci_get_legacy_ide_irq(struct pci_dev *pdev, int channel) return defirq; np = pci_device_to_OF_node(pdev); - if (np == NULL) + if (np == NULL) { + printk("Failed to locate OF node for IDE %s\n", + pci_name(pdev)); return defirq; + } irq = irq_of_parse_and_map(np, channel & 0x1); if (irq == NO_IRQ) { printk("Failed to map onboard IDE interrupt for channel %d\n", @@ -479,6 +651,9 @@ int maple_pci_get_legacy_ide_irq(struct pci_dev *pdev, int channel) /* XXX: To remove once all firmwares are ok */ static void fixup_maple_ide(struct pci_dev* dev) { + if (!machine_is(maple)) + return; + #if 0 /* Enable this to enable IDE port 0 */ { u8 v; @@ -495,7 +670,7 @@ static void fixup_maple_ide(struct pci_dev* dev) dev->resource[4].start = 0xcc00; dev->resource[4].end = 0xcc10; #endif -#if 1 /* Enable this to fixup IDE sense/polarity of irqs in IO-APICs */ +#if 0 /* Enable this to fixup IDE sense/polarity of irqs in IO-APICs */ { struct pci_dev *apicdev; u32 v; diff --git a/arch/powerpc/platforms/pasemi/pci.c b/arch/powerpc/platforms/pasemi/pci.c index 4679c5230413..39020c1fa13d 100644 --- a/arch/powerpc/platforms/pasemi/pci.c +++ b/arch/powerpc/platforms/pasemi/pci.c @@ -35,17 +35,17 @@ #define CONFIG_OFFSET_VALID(off) ((off) < 4096) -static unsigned long pa_pxp_cfg_addr(struct pci_controller *hose, +static void volatile __iomem *pa_pxp_cfg_addr(struct pci_controller *hose, u8 bus, u8 devfn, int offset) { - return ((unsigned long)hose->cfg_data) + PA_PXP_CFA(bus, devfn, offset); + return hose->cfg_data + PA_PXP_CFA(bus, devfn, offset); } static int pa_pxp_read_config(struct pci_bus *bus, unsigned int devfn, int offset, int len, u32 *val) { struct pci_controller *hose; - unsigned long addr; + void volatile __iomem *addr; hose = pci_bus_to_host(bus); if (!hose) @@ -62,13 +62,13 @@ static int pa_pxp_read_config(struct pci_bus *bus, unsigned int devfn, */ switch (len) { case 1: - *val = in_8((u8 *)addr); + *val = in_8(addr); break; case 2: - *val = in_le16((u16 *)addr); + *val = in_le16(addr); break; default: - *val = in_le32((u32 *)addr); + *val = in_le32(addr); break; } @@ -79,7 +79,7 @@ static int pa_pxp_write_config(struct pci_bus *bus, unsigned int devfn, int offset, int len, u32 val) { struct pci_controller *hose; - unsigned long addr; + void volatile __iomem *addr; hose = pci_bus_to_host(bus); if (!hose) @@ -96,16 +96,16 @@ static int pa_pxp_write_config(struct pci_bus *bus, unsigned int devfn, */ switch (len) { case 1: - out_8((u8 *)addr, val); - (void) in_8((u8 *)addr); + out_8(addr, val); + (void) in_8(addr); break; case 2: - out_le16((u16 *)addr, val); - (void) in_le16((u16 *)addr); + out_le16(addr, val); + (void) in_le16(addr); break; default: - out_le32((u32 *)addr, val); - (void) in_le32((u32 *)addr); + out_le32(addr, val); + (void) in_le32(addr); break; } return PCIBIOS_SUCCESSFUL; diff --git a/arch/powerpc/platforms/powermac/pic.c b/arch/powerpc/platforms/powermac/pic.c index 5da677835c00..39db12890214 100644 --- a/arch/powerpc/platforms/powermac/pic.c +++ b/arch/powerpc/platforms/powermac/pic.c @@ -42,7 +42,7 @@ * has to include <linux/interrupt.h> (to get irqreturn_t), which * causes all sorts of problems. -- paulus */ -extern irqreturn_t xmon_irq(int, void *, struct pt_regs *); +extern irqreturn_t xmon_irq(int, void *); #ifdef CONFIG_PPC32 struct pmac_irq_hw { @@ -210,7 +210,7 @@ static struct irq_chip pmac_pic = { .retrigger = pmac_retrigger, }; -static irqreturn_t gatwick_action(int cpl, void *dev_id, struct pt_regs *regs) +static irqreturn_t gatwick_action(int cpl, void *dev_id) { unsigned long flags; int irq, bits; @@ -235,18 +235,18 @@ static irqreturn_t gatwick_action(int cpl, void *dev_id, struct pt_regs *regs) return rc; } -static unsigned int pmac_pic_get_irq(struct pt_regs *regs) +static unsigned int pmac_pic_get_irq(void) { int irq; unsigned long bits = 0; unsigned long flags; #ifdef CONFIG_SMP - void psurge_smp_message_recv(struct pt_regs *); + void psurge_smp_message_recv(void); /* IPI's are a hack on the powersurge -- Cort */ if ( smp_processor_id() != 0 ) { - psurge_smp_message_recv(regs); + psurge_smp_message_recv(); return NO_IRQ_IGNORE; /* ignore, already handled */ } #endif /* CONFIG_SMP */ @@ -444,7 +444,7 @@ static void pmac_u3_cascade(unsigned int irq, struct irq_desc *desc) { struct mpic *mpic = desc->handler_data; - unsigned int cascade_irq = mpic_get_one_irq(mpic, get_irq_regs()); + unsigned int cascade_irq = mpic_get_one_irq(mpic); if (cascade_irq != NO_IRQ) generic_handle_irq(cascade_irq); desc->chip->eoi(irq); diff --git a/arch/powerpc/platforms/powermac/pic.h b/arch/powerpc/platforms/powermac/pic.h index 664103dfeef9..c44c89f5e532 100644 --- a/arch/powerpc/platforms/powermac/pic.h +++ b/arch/powerpc/platforms/powermac/pic.h @@ -5,7 +5,7 @@ extern struct hw_interrupt_type pmac_pic; -void pmac_pic_init(void); -int pmac_get_irq(struct pt_regs *regs); +extern void pmac_pic_init(void); +extern int pmac_get_irq(void); #endif /* __PPC_PLATFORMS_PMAC_PIC_H */ diff --git a/arch/powerpc/platforms/powermac/smp.c b/arch/powerpc/platforms/powermac/smp.c index 1949b657b092..eeb2ae5ffc58 100644 --- a/arch/powerpc/platforms/powermac/smp.c +++ b/arch/powerpc/platforms/powermac/smp.c @@ -160,7 +160,7 @@ static inline void psurge_clr_ipi(int cpu) */ static unsigned long psurge_smp_message[NR_CPUS]; -void psurge_smp_message_recv(struct pt_regs *regs) +void psurge_smp_message_recv(void) { int cpu = smp_processor_id(); int msg; @@ -174,12 +174,12 @@ void psurge_smp_message_recv(struct pt_regs *regs) /* make sure there is a message there */ for (msg = 0; msg < 4; msg++) if (test_and_clear_bit(msg, &psurge_smp_message[cpu])) - smp_message_recv(msg, regs); + smp_message_recv(msg); } -irqreturn_t psurge_primary_intr(int irq, void *d, struct pt_regs *regs) +irqreturn_t psurge_primary_intr(int irq, void *d) { - psurge_smp_message_recv(regs); + psurge_smp_message_recv(); return IRQ_HANDLED; } @@ -328,6 +328,7 @@ static void __init smp_psurge_kick_cpu(int nr) { unsigned long start = __pa(__secondary_start_pmac_0) + nr * 8; unsigned long a; + int i; /* may need to flush here if secondary bats aren't setup */ for (a = KERNELBASE; a < KERNELBASE + 0x800000; a += 32) @@ -340,7 +341,11 @@ static void __init smp_psurge_kick_cpu(int nr) mb(); psurge_set_ipi(nr); - udelay(10); + /* + * We can't use udelay here because the timebase is now frozen. + */ + for (i = 0; i < 2000; ++i) + barrier(); psurge_clr_ipi(nr); if (ppc_md.progress) ppc_md.progress("smp_psurge_kick_cpu - done", 0x354); diff --git a/arch/powerpc/platforms/pseries/iommu.c b/arch/powerpc/platforms/pseries/iommu.c index bbf2e34dc358..d24ba547e53f 100644 --- a/arch/powerpc/platforms/pseries/iommu.c +++ b/arch/powerpc/platforms/pseries/iommu.c @@ -267,7 +267,8 @@ static void iommu_table_setparms(struct pci_controller *phb, struct iommu_table *tbl) { struct device_node *node; - const unsigned long *basep, *sizep; + const unsigned long *basep; + const u32 *sizep; node = (struct device_node *)phb->arch_data; diff --git a/arch/powerpc/platforms/pseries/setup.c b/arch/powerpc/platforms/pseries/setup.c index ad9aec2c6fee..89a8119f988d 100644 --- a/arch/powerpc/platforms/pseries/setup.c +++ b/arch/powerpc/platforms/pseries/setup.c @@ -123,7 +123,7 @@ static void __init fwnmi_init(void) void pseries_8259_cascade(unsigned int irq, struct irq_desc *desc) { - unsigned int cascade_irq = i8259_irq(get_irq_regs()); + unsigned int cascade_irq = i8259_irq(); if (cascade_irq != NO_IRQ) generic_handle_irq(cascade_irq); desc->chip->eoi(irq); diff --git a/arch/powerpc/platforms/pseries/xics.c b/arch/powerpc/platforms/pseries/xics.c index f6bd2f285153..d071abe78ab1 100644 --- a/arch/powerpc/platforms/pseries/xics.c +++ b/arch/powerpc/platforms/pseries/xics.c @@ -308,14 +308,14 @@ static inline unsigned int xics_remap_irq(unsigned int vec) return NO_IRQ; } -static unsigned int xics_get_irq_direct(struct pt_regs *regs) +static unsigned int xics_get_irq_direct(void) { unsigned int cpu = smp_processor_id(); return xics_remap_irq(direct_xirr_info_get(cpu)); } -static unsigned int xics_get_irq_lpar(struct pt_regs *regs) +static unsigned int xics_get_irq_lpar(void) { unsigned int cpu = smp_processor_id(); diff --git a/arch/powerpc/sysdev/cpm2_pic.c b/arch/powerpc/sysdev/cpm2_pic.c index 28b018994746..767ee6651adc 100644 --- a/arch/powerpc/sysdev/cpm2_pic.c +++ b/arch/powerpc/sysdev/cpm2_pic.c @@ -147,7 +147,7 @@ static struct irq_chip cpm2_pic = { .end = cpm2_end_irq, }; -unsigned int cpm2_get_irq(struct pt_regs *regs) +unsigned int cpm2_get_irq(void) { int irq; unsigned long bits; diff --git a/arch/powerpc/sysdev/cpm2_pic.h b/arch/powerpc/sysdev/cpm2_pic.h index 3c513e5a688e..2840616529e4 100644 --- a/arch/powerpc/sysdev/cpm2_pic.h +++ b/arch/powerpc/sysdev/cpm2_pic.h @@ -3,7 +3,7 @@ extern intctl_cpm2_t *cpm2_intctl; -extern unsigned int cpm2_get_irq(struct pt_regs *regs); +extern unsigned int cpm2_get_irq(void); extern void cpm2_pic_init(struct device_node*); diff --git a/arch/powerpc/sysdev/fsl_soc.c b/arch/powerpc/sysdev/fsl_soc.c index 7d759f1c26b1..dbe92ae20333 100644 --- a/arch/powerpc/sysdev/fsl_soc.c +++ b/arch/powerpc/sysdev/fsl_soc.c @@ -567,7 +567,7 @@ static int __init fs_enet_of_init(void) struct resource r[4]; struct device_node *phy, *mdio; struct fs_platform_info fs_enet_data; - const unsigned int *id, *phy_addr, phy_irq; + const unsigned int *id, *phy_addr, *phy_irq; const void *mac_addr; const phandle *ph; const char *model; @@ -641,7 +641,7 @@ static int __init fs_enet_of_init(void) if (strstr(model, "FCC")) { int fcc_index = *id - 1; - unsigned char* mdio_bb_prop; + const unsigned char *mdio_bb_prop; fs_enet_data.dpram_offset = (u32)cpm_dpram_addr(0); fs_enet_data.rx_ring = 32; @@ -708,8 +708,9 @@ static int __init fs_enet_of_init(void) ret = platform_device_add_data(fs_enet_dev, &fs_enet_data, sizeof(struct fs_platform_info)); - if (ret) - goto unreg; + if (ret) + goto unreg; + } } return 0; diff --git a/arch/powerpc/sysdev/i8259.c b/arch/powerpc/sysdev/i8259.c index 26a6a3becd66..0450265d73bb 100644 --- a/arch/powerpc/sysdev/i8259.c +++ b/arch/powerpc/sysdev/i8259.c @@ -34,7 +34,7 @@ static struct irq_host *i8259_host; * which is called. It should be noted that polling is broken on some * IBM and Motorola PReP boxes so we must use the int-ack feature on them. */ -unsigned int i8259_irq(struct pt_regs *regs) +unsigned int i8259_irq(void) { int irq; int lock = 0; diff --git a/arch/powerpc/sysdev/ipic.c b/arch/powerpc/sysdev/ipic.c index 6ebdae8e6f69..bc4d4a7f9657 100644 --- a/arch/powerpc/sysdev/ipic.c +++ b/arch/powerpc/sysdev/ipic.c @@ -709,7 +709,7 @@ void ipic_clear_mcp_status(u32 mask) } /* Return an interrupt vector or NO_IRQ if no interrupt is pending. */ -unsigned int ipic_get_irq(struct pt_regs *regs) +unsigned int ipic_get_irq(void) { int irq; diff --git a/arch/powerpc/sysdev/mpic.c b/arch/powerpc/sysdev/mpic.c index 195215560fd7..ba4833f57d47 100644 --- a/arch/powerpc/sysdev/mpic.c +++ b/arch/powerpc/sysdev/mpic.c @@ -1217,7 +1217,7 @@ void mpic_send_ipi(unsigned int ipi_no, unsigned int cpu_mask) mpic_physmask(cpu_mask & cpus_addr(cpu_online_map)[0])); } -unsigned int mpic_get_one_irq(struct mpic *mpic, struct pt_regs *regs) +unsigned int mpic_get_one_irq(struct mpic *mpic) { u32 src; @@ -1230,13 +1230,13 @@ unsigned int mpic_get_one_irq(struct mpic *mpic, struct pt_regs *regs) return irq_linear_revmap(mpic->irqhost, src); } -unsigned int mpic_get_irq(struct pt_regs *regs) +unsigned int mpic_get_irq(void) { struct mpic *mpic = mpic_primary; BUG_ON(mpic == NULL); - return mpic_get_one_irq(mpic, regs); + return mpic_get_one_irq(mpic); } diff --git a/arch/powerpc/sysdev/qe_lib/qe_ic.c b/arch/powerpc/sysdev/qe_lib/qe_ic.c index 0dec010bcbb5..6995f51b9488 100644 --- a/arch/powerpc/sysdev/qe_lib/qe_ic.c +++ b/arch/powerpc/sysdev/qe_lib/qe_ic.c @@ -300,7 +300,7 @@ static struct irq_host_ops qe_ic_host_ops = { }; /* Return an interrupt vector or NO_IRQ if no interrupt is pending. */ -unsigned int qe_ic_get_low_irq(struct qe_ic *qe_ic, struct pt_regs *regs) +unsigned int qe_ic_get_low_irq(struct qe_ic *qe_ic) { int irq; @@ -316,7 +316,7 @@ unsigned int qe_ic_get_low_irq(struct qe_ic *qe_ic, struct pt_regs *regs) } /* Return an interrupt vector or NO_IRQ if no interrupt is pending. */ -unsigned int qe_ic_get_high_irq(struct qe_ic *qe_ic, struct pt_regs *regs) +unsigned int qe_ic_get_high_irq(struct qe_ic *qe_ic) { int irq; @@ -333,13 +333,12 @@ unsigned int qe_ic_get_high_irq(struct qe_ic *qe_ic, struct pt_regs *regs) /* FIXME: We mask all the QE Low interrupts while handling. We should * let other interrupt come in, but BAD interrupts are generated */ -void fastcall qe_ic_cascade_low(unsigned int irq, struct irq_desc *desc, - struct pt_regs *regs) +void fastcall qe_ic_cascade_low(unsigned int irq, struct irq_desc *desc) { struct qe_ic *qe_ic = desc->handler_data; struct irq_chip *chip = irq_desc[irq].chip; - unsigned int cascade_irq = qe_ic_get_low_irq(qe_ic, regs); + unsigned int cascade_irq = qe_ic_get_low_irq(qe_ic); chip->mask_ack(irq); if (cascade_irq != NO_IRQ) @@ -349,13 +348,12 @@ void fastcall qe_ic_cascade_low(unsigned int irq, struct irq_desc *desc, /* FIXME: We mask all the QE High interrupts while handling. We should * let other interrupt come in, but BAD interrupts are generated */ -void fastcall qe_ic_cascade_high(unsigned int irq, struct irq_desc *desc, - struct pt_regs *regs) +void fastcall qe_ic_cascade_high(unsigned int irq, struct irq_desc *desc) { struct qe_ic *qe_ic = desc->handler_data; struct irq_chip *chip = irq_desc[irq].chip; - unsigned int cascade_irq = qe_ic_get_high_irq(qe_ic, regs); + unsigned int cascade_irq = qe_ic_get_high_irq(qe_ic); chip->mask_ack(irq); if (cascade_irq != NO_IRQ) diff --git a/arch/powerpc/sysdev/qe_lib/qe_io.c b/arch/powerpc/sysdev/qe_lib/qe_io.c index aea435970389..0afe6bfe3714 100644 --- a/arch/powerpc/sysdev/qe_lib/qe_io.c +++ b/arch/powerpc/sysdev/qe_lib/qe_io.c @@ -14,7 +14,6 @@ * option) any later version. */ -#include <linux/config.h> #include <linux/stddef.h> #include <linux/kernel.h> #include <linux/init.h> diff --git a/arch/powerpc/sysdev/tsi108_pci.c b/arch/powerpc/sysdev/tsi108_pci.c index 14f4a1ab6d18..322f86e93de5 100644 --- a/arch/powerpc/sysdev/tsi108_pci.c +++ b/arch/powerpc/sysdev/tsi108_pci.c @@ -405,8 +405,7 @@ void __init tsi108_pci_int_init(void) init_pci_source(); } -void tsi108_irq_cascade(unsigned int irq, struct irq_desc *desc, - struct pt_regs *regs) +void tsi108_irq_cascade(unsigned int irq, struct irq_desc *desc) { unsigned int cascade_irq = get_pci_source(); if (cascade_irq != NO_IRQ) diff --git a/arch/powerpc/xmon/xmon.c b/arch/powerpc/xmon/xmon.c index 5a854f36383c..f56ffef4defa 100644 --- a/arch/powerpc/xmon/xmon.c +++ b/arch/powerpc/xmon/xmon.c @@ -36,6 +36,7 @@ #include <asm/rtas.h> #include <asm/sstep.h> #include <asm/bug.h> +#include <asm/irq_regs.h> #ifdef CONFIG_PPC64 #include <asm/hvcall.h> @@ -521,13 +522,12 @@ int xmon(struct pt_regs *excp) } EXPORT_SYMBOL(xmon); -irqreturn_t -xmon_irq(int irq, void *d, struct pt_regs *regs) +irqreturn_t xmon_irq(int irq, void *d) { unsigned long flags; local_irq_save(flags); printf("Keyboard interrupt\n"); - xmon(regs); + xmon(get_irq_regs()); local_irq_restore(flags); return IRQ_HANDLED; } diff --git a/arch/ppc/4xx_io/serial_sicc.c b/arch/ppc/4xx_io/serial_sicc.c index 87fe9a89dba7..e35483961b90 100644 --- a/arch/ppc/4xx_io/serial_sicc.c +++ b/arch/ppc/4xx_io/serial_sicc.c @@ -414,7 +414,7 @@ static void siccuart_event(struct SICC_info *info, int event) } static void -siccuart_rx_chars(struct SICC_info *info, struct pt_regs *regs) +siccuart_rx_chars(struct SICC_info *info) { struct tty_struct *tty = info->tty; unsigned int status, ch, rsr, flg, ignored = 0; @@ -441,7 +441,7 @@ siccuart_rx_chars(struct SICC_info *info, struct pt_regs *regs) #ifdef SUPPORT_SYSRQ if (info->sysrq) { if (ch && time_before(jiffies, info->sysrq)) { - handle_sysrq(ch, regs, NULL); + handle_sysrq(ch, NULL); info->sysrq = 0; goto ignore_char; } @@ -553,15 +553,15 @@ static void siccuart_tx_chars(struct SICC_info *info) } -static irqreturn_t siccuart_int_rx(int irq, void *dev_id, struct pt_regs *regs) +static irqreturn_t siccuart_int_rx(int irq, void *dev_id) { struct SICC_info *info = dev_id; - siccuart_rx_chars(info, regs); + siccuart_rx_chars(info) return IRQ_HANDLED; } -static irqreturn_t siccuart_int_tx(int irq, void *dev_id, struct pt_regs *regs) +static irqreturn_t siccuart_int_tx(int irq, void *dev_id) { struct SICC_info *info = dev_id; siccuart_tx_chars(info); diff --git a/arch/ppc/8260_io/enet.c b/arch/ppc/8260_io/enet.c index ac6d55fe2235..a6056c29cf00 100644 --- a/arch/ppc/8260_io/enet.c +++ b/arch/ppc/8260_io/enet.c @@ -122,7 +122,7 @@ struct scc_enet_private { static int scc_enet_open(struct net_device *dev); static int scc_enet_start_xmit(struct sk_buff *skb, struct net_device *dev); static int scc_enet_rx(struct net_device *dev); -static irqreturn_t scc_enet_interrupt(int irq, void *dev_id, struct pt_regs *); +static irqreturn_t scc_enet_interrupt(int irq, void *dev_id); static int scc_enet_close(struct net_device *dev); static struct net_device_stats *scc_enet_get_stats(struct net_device *dev); static void set_multicast_list(struct net_device *dev); @@ -273,7 +273,7 @@ scc_enet_timeout(struct net_device *dev) * This is called from the CPM handler, not the MPC core interrupt. */ static irqreturn_t -scc_enet_interrupt(int irq, void * dev_id, struct pt_regs * regs) +scc_enet_interrupt(int irq, void * dev_id) { struct net_device *dev = dev_id; volatile struct scc_enet_private *cep; diff --git a/arch/ppc/8260_io/fcc_enet.c b/arch/ppc/8260_io/fcc_enet.c index e347fe88316d..2e1943e27819 100644 --- a/arch/ppc/8260_io/fcc_enet.c +++ b/arch/ppc/8260_io/fcc_enet.c @@ -140,7 +140,7 @@ typedef struct { static int fcc_enet_open(struct net_device *dev); static int fcc_enet_start_xmit(struct sk_buff *skb, struct net_device *dev); static int fcc_enet_rx(struct net_device *dev); -static irqreturn_t fcc_enet_interrupt(int irq, void *dev_id, struct pt_regs *); +static irqreturn_t fcc_enet_interrupt(int irq, void *dev_id); static int fcc_enet_close(struct net_device *dev); static struct net_device_stats *fcc_enet_get_stats(struct net_device *dev); /* static void set_multicast_list(struct net_device *dev); */ @@ -524,7 +524,7 @@ fcc_enet_timeout(struct net_device *dev) /* The interrupt handler. */ static irqreturn_t -fcc_enet_interrupt(int irq, void * dev_id, struct pt_regs * regs) +fcc_enet_interrupt(int irq, void * dev_id) { struct net_device *dev = dev_id; volatile struct fcc_enet_private *cep; @@ -1563,7 +1563,7 @@ mii_discover_phy(uint mii_reg, struct net_device *dev) #ifdef PHY_INTERRUPT /* This interrupt occurs when the PHY detects a link change. */ static irqreturn_t -mii_link_interrupt(int irq, void * dev_id, struct pt_regs * regs) +mii_link_interrupt(int irq, void * dev_id) { struct net_device *dev = dev_id; struct fcc_enet_private *fep = dev->priv; diff --git a/arch/ppc/8xx_io/commproc.c b/arch/ppc/8xx_io/commproc.c index 9b3ace26280c..3b23bcb35b7a 100644 --- a/arch/ppc/8xx_io/commproc.c +++ b/arch/ppc/8xx_io/commproc.c @@ -47,12 +47,12 @@ cpm8xx_t *cpmp; /* Pointer to comm processor space */ /* CPM interrupt vector functions. */ struct cpm_action { - void (*handler)(void *, struct pt_regs * regs); + void (*handler)(void *); void *dev_id; }; static struct cpm_action cpm_vecs[CPMVEC_NR]; -static irqreturn_t cpm_interrupt(int irq, void * dev, struct pt_regs * regs); -static irqreturn_t cpm_error_interrupt(int irq, void *dev, struct pt_regs * regs); +static irqreturn_t cpm_interrupt(int irq, void * dev); +static irqreturn_t cpm_error_interrupt(int irq, void *dev); static void alloc_host_memory(void); /* Define a table of names to identify CPM interrupt handlers in * /proc/interrupts. @@ -205,7 +205,7 @@ cpm_interrupt_init(void) * Get the CPM interrupt vector. */ int -cpm_get_irq(struct pt_regs *regs) +cpm_get_irq(void) { int cpm_vec; @@ -222,7 +222,7 @@ cpm_get_irq(struct pt_regs *regs) /* CPM interrupt controller cascade interrupt. */ static irqreturn_t -cpm_interrupt(int irq, void * dev, struct pt_regs * regs) +cpm_interrupt(int irq, void * dev) { /* This interrupt handler never actually gets called. It is * installed only to unmask the CPM cascade interrupt in the SIU @@ -237,7 +237,7 @@ cpm_interrupt(int irq, void * dev, struct pt_regs * regs) * tests in the interrupt handler. */ static irqreturn_t -cpm_error_interrupt(int irq, void *dev, struct pt_regs *regs) +cpm_error_interrupt(int irq, void *dev) { return IRQ_HANDLED; } @@ -246,11 +246,11 @@ cpm_error_interrupt(int irq, void *dev, struct pt_regs *regs) * request_irq() to the handler prototype required by cpm_install_handler(). */ static irqreturn_t -cpm_handler_helper(int irq, void *dev_id, struct pt_regs *regs) +cpm_handler_helper(int irq, void *dev_id) { int cpm_vec = irq - CPM_IRQ_OFFSET; - (*cpm_vecs[cpm_vec].handler)(dev_id, regs); + (*cpm_vecs[cpm_vec].handler)(dev_id); return IRQ_HANDLED; } @@ -267,8 +267,7 @@ cpm_handler_helper(int irq, void *dev_id, struct pt_regs *regs) * request_irq() or cpm_install_handler(). */ void -cpm_install_handler(int cpm_vec, void (*handler)(void *, struct pt_regs *regs), - void *dev_id) +cpm_install_handler(int cpm_vec, void (*handler)(void *), void *dev_id) { int err; diff --git a/arch/ppc/8xx_io/cs4218_tdm.c b/arch/ppc/8xx_io/cs4218_tdm.c index f5f300fc213d..959d31c26cbb 100644 --- a/arch/ppc/8xx_io/cs4218_tdm.c +++ b/arch/ppc/8xx_io/cs4218_tdm.c @@ -331,7 +331,7 @@ static int CS_SetFormat(int format); static int CS_SetVolume(int volume); static void cs4218_tdm_tx_intr(void *devid); static void cs4218_tdm_rx_intr(void *devid); -static void cs4218_intr(void *devid, struct pt_regs *regs); +static void cs4218_intr(void *devid); static int cs_get_volume(uint reg); static int cs_volume_setter(int volume, int mute); static int cs_get_gain(uint reg); @@ -2646,7 +2646,7 @@ int __init tdm8xx_sound_init(void) * full duplex operation. */ static void -cs4218_intr(void *dev_id, struct pt_regs *regs) +cs4218_intr(void *dev_id) { volatile smc_t *sp; volatile cpm8xx_t *cp; diff --git a/arch/ppc/8xx_io/enet.c b/arch/ppc/8xx_io/enet.c index a695375c3e4c..b23c45bc151a 100644 --- a/arch/ppc/8xx_io/enet.c +++ b/arch/ppc/8xx_io/enet.c @@ -149,7 +149,7 @@ struct scc_enet_private { static int scc_enet_open(struct net_device *dev); static int scc_enet_start_xmit(struct sk_buff *skb, struct net_device *dev); static int scc_enet_rx(struct net_device *dev); -static void scc_enet_interrupt(void *dev_id, struct pt_regs *regs); +static void scc_enet_interrupt(void *dev_id); static int scc_enet_close(struct net_device *dev); static struct net_device_stats *scc_enet_get_stats(struct net_device *dev); static void set_multicast_list(struct net_device *dev); @@ -305,7 +305,7 @@ scc_enet_timeout(struct net_device *dev) * This is called from the CPM handler, not the MPC core interrupt. */ static void -scc_enet_interrupt(void *dev_id, struct pt_regs *regs) +scc_enet_interrupt(void *dev_id) { struct net_device *dev = dev_id; volatile struct scc_enet_private *cep; diff --git a/arch/ppc/8xx_io/fec.c b/arch/ppc/8xx_io/fec.c index 8b6295bbb564..2f9fa9e3d331 100644 --- a/arch/ppc/8xx_io/fec.c +++ b/arch/ppc/8xx_io/fec.c @@ -198,8 +198,7 @@ static int fec_enet_start_xmit(struct sk_buff *skb, struct net_device *dev); #ifdef CONFIG_USE_MDIO static void fec_enet_mii(struct net_device *dev); #endif /* CONFIG_USE_MDIO */ -static irqreturn_t fec_enet_interrupt(int irq, void * dev_id, - struct pt_regs * regs); +static irqreturn_t fec_enet_interrupt(int irq, void * dev_id); #ifdef CONFIG_FEC_PACKETHOOK static void fec_enet_tx(struct net_device *dev, __u32 regval); static void fec_enet_rx(struct net_device *dev, __u32 regval); @@ -472,7 +471,7 @@ fec_timeout(struct net_device *dev) * This is called from the MPC core interrupt. */ static irqreturn_t -fec_enet_interrupt(int irq, void * dev_id, struct pt_regs * regs) +fec_enet_interrupt(int irq, void * dev_id) { struct net_device *dev = dev_id; volatile fec_t *fecp; @@ -1408,7 +1407,7 @@ static #ifdef CONFIG_RPXCLASSIC void mii_link_interrupt(void *dev_id) #else -irqreturn_t mii_link_interrupt(int irq, void * dev_id, struct pt_regs * regs) +irqreturn_t mii_link_interrupt(int irq, void * dev_id) #endif { #ifdef CONFIG_USE_MDIO diff --git a/arch/ppc/kernel/smp.c b/arch/ppc/kernel/smp.c index ca57e896a36c..96a55972b986 100644 --- a/arch/ppc/kernel/smp.c +++ b/arch/ppc/kernel/smp.c @@ -84,7 +84,7 @@ smp_message_pass(int target, int msg) /* * Common functions */ -void smp_message_recv(int msg, struct pt_regs *regs) +void smp_message_recv(int msg) { atomic_inc(&ipi_recv); @@ -100,7 +100,7 @@ void smp_message_recv(int msg, struct pt_regs *regs) break; #ifdef CONFIG_XMON case PPC_MSG_XMON_BREAK: - xmon(regs); + xmon(get_irq_regs()); break; #endif /* CONFIG_XMON */ default: diff --git a/arch/ppc/kernel/time.c b/arch/ppc/kernel/time.c index 187388625a76..d4b2cf74da6a 100644 --- a/arch/ppc/kernel/time.c +++ b/arch/ppc/kernel/time.c @@ -142,7 +142,7 @@ void timer_interrupt(struct pt_regs * regs) while ((next_dec = tb_ticks_per_jiffy - tb_delta(&jiffy_stamp)) <= 0) { jiffy_stamp += tb_ticks_per_jiffy; - profile_tick(CPU_PROFILING, regs); + profile_tick(CPU_PROFILING); update_process_times(user_mode(regs)); if (smp_processor_id()) diff --git a/arch/ppc/mm/init.c b/arch/ppc/mm/init.c index 410200046af1..c374e53ae03a 100644 --- a/arch/ppc/mm/init.c +++ b/arch/ppc/mm/init.c @@ -374,11 +374,12 @@ void __init paging_init(void) end_pfn = start_pfn + (total_memory >> PAGE_SHIFT); add_active_range(0, start_pfn, end_pfn); + memset(max_zone_pfns, 0, sizeof(max_zone_pfns)); #ifdef CONFIG_HIGHMEM - max_zone_pfns[0] = total_lowmem >> PAGE_SHIFT; - max_zone_pfns[1] = total_memory >> PAGE_SHIFT; + max_zone_pfns[ZONE_DMA] = total_lowmem >> PAGE_SHIFT; + max_zone_pfns[ZONE_HIGHMEM] = total_memory >> PAGE_SHIFT; #else - max_zone_pfns[0] = total_memory >> PAGE_SHIFT; + max_zone_pfns[ZONE_DMA] = total_memory >> PAGE_SHIFT; #endif /* CONFIG_HIGHMEM */ free_area_init_nodes(max_zone_pfns); } diff --git a/arch/ppc/platforms/85xx/mpc8560_ads.c b/arch/ppc/platforms/85xx/mpc8560_ads.c index 94badafe4ef1..14ecec7bbed7 100644 --- a/arch/ppc/platforms/85xx/mpc8560_ads.c +++ b/arch/ppc/platforms/85xx/mpc8560_ads.c @@ -211,10 +211,10 @@ mpc8560ads_setup_arch(void) #endif } -static irqreturn_t cpm2_cascade(int irq, void *dev_id, struct pt_regs *regs) +static irqreturn_t cpm2_cascade(int irq, void *dev_id) { - while ((irq = cpm2_get_irq(regs)) >= 0) - __do_IRQ(irq, regs); + while ((irq = cpm2_get_irq()) >= 0) + __do_IRQ(irq); return IRQ_HANDLED; } diff --git a/arch/ppc/platforms/85xx/mpc85xx_cds_common.c b/arch/ppc/platforms/85xx/mpc85xx_cds_common.c index 75204588a3e7..5ce0f69c1db6 100644 --- a/arch/ppc/platforms/85xx/mpc85xx_cds_common.c +++ b/arch/ppc/platforms/85xx/mpc85xx_cds_common.c @@ -127,10 +127,10 @@ mpc85xx_cds_show_cpuinfo(struct seq_file *m) } #ifdef CONFIG_CPM2 -static irqreturn_t cpm2_cascade(int irq, void *dev_id, struct pt_regs *regs) +static irqreturn_t cpm2_cascade(int irq, void *dev_id) { - while((irq = cpm2_get_irq(regs)) >= 0) - __do_IRQ(irq, regs); + while((irq = cpm2_get_irq()) >= 0) + __do_IRQ(irq); return IRQ_HANDLED; } diff --git a/arch/ppc/platforms/85xx/stx_gp3.c b/arch/ppc/platforms/85xx/stx_gp3.c index 495aa79bb3a1..4bb18ab27672 100644 --- a/arch/ppc/platforms/85xx/stx_gp3.c +++ b/arch/ppc/platforms/85xx/stx_gp3.c @@ -156,10 +156,10 @@ gp3_setup_arch(void) printk ("bi_immr_base = %8.8lx\n", binfo->bi_immr_base); } -static irqreturn_t cpm2_cascade(int irq, void *dev_id, struct pt_regs *regs) +static irqreturn_t cpm2_cascade(int irq, void *dev_id) { - while ((irq = cpm2_get_irq(regs)) >= 0) - __do_IRQ(irq, regs); + while ((irq = cpm2_get_irq()) >= 0) + __do_IRQ(irq); return IRQ_HANDLED; } diff --git a/arch/ppc/platforms/85xx/tqm85xx.c b/arch/ppc/platforms/85xx/tqm85xx.c index 189ed4175f9f..dd45f2e18449 100644 --- a/arch/ppc/platforms/85xx/tqm85xx.c +++ b/arch/ppc/platforms/85xx/tqm85xx.c @@ -181,10 +181,10 @@ tqm85xx_setup_arch(void) } #ifdef CONFIG_MPC8560 -static irqreturn_t cpm2_cascade(int irq, void *dev_id, struct pt_regs *regs) +static irqreturn_t cpm2_cascade(int irq, void *dev_id) { - while ((irq = cpm2_get_irq(regs)) >= 0) - __do_IRQ(irq, regs); + while ((irq = cpm2_get_irq()) >= 0) + __do_IRQ(irq); return IRQ_HANDLED; } diff --git a/arch/ppc/platforms/apus_setup.c b/arch/ppc/platforms/apus_setup.c index 1d034ead2c9a..063274d2c503 100644 --- a/arch/ppc/platforms/apus_setup.c +++ b/arch/ppc/platforms/apus_setup.c @@ -492,7 +492,7 @@ apus_halt(void) static unsigned char last_ipl[8]; -int apus_get_irq(struct pt_regs* regs) +int apus_get_irq(void) { unsigned char ipl_emu, mask; unsigned int level; diff --git a/arch/ppc/platforms/hdpu.c b/arch/ppc/platforms/hdpu.c index e0f112a1fd0b..d809e17aa536 100644 --- a/arch/ppc/platforms/hdpu.c +++ b/arch/ppc/platforms/hdpu.c @@ -659,8 +659,7 @@ static void __init hdpu_map_io(void) char hdpu_smp0[] = "SMP Cpu #0"; char hdpu_smp1[] = "SMP Cpu #1"; -static irqreturn_t hdpu_smp_cpu0_int_handler(int irq, void *dev_id, - struct pt_regs *regs) +static irqreturn_t hdpu_smp_cpu0_int_handler(int irq, void *dev_id) { volatile unsigned int doorbell; @@ -670,22 +669,21 @@ static irqreturn_t hdpu_smp_cpu0_int_handler(int irq, void *dev_id, mv64x60_write(&bh, MV64360_CPU0_DOORBELL_CLR, doorbell); if (doorbell & 1) { - smp_message_recv(0, regs); + smp_message_recv(0); } if (doorbell & 2) { - smp_message_recv(1, regs); + smp_message_recv(1); } if (doorbell & 4) { - smp_message_recv(2, regs); + smp_message_recv(2); } if (doorbell & 8) { - smp_message_recv(3, regs); + smp_message_recv(3); } return IRQ_HANDLED; } -static irqreturn_t hdpu_smp_cpu1_int_handler(int irq, void *dev_id, - struct pt_regs *regs) +static irqreturn_t hdpu_smp_cpu1_int_handler(int irq, void *dev_id) { volatile unsigned int doorbell; @@ -695,16 +693,16 @@ static irqreturn_t hdpu_smp_cpu1_int_handler(int irq, void *dev_id, mv64x60_write(&bh, MV64360_CPU1_DOORBELL_CLR, doorbell); if (doorbell & 1) { - smp_message_recv(0, regs); + smp_message_recv(0); } if (doorbell & 2) { - smp_message_recv(1, regs); + smp_message_recv(1); } if (doorbell & 4) { - smp_message_recv(2, regs); + smp_message_recv(2); } if (doorbell & 8) { - smp_message_recv(3, regs); + smp_message_recv(3); } return IRQ_HANDLED; } diff --git a/arch/ppc/platforms/radstone_ppc7d.c b/arch/ppc/platforms/radstone_ppc7d.c index 3bb530af0297..13d70ab50bf1 100644 --- a/arch/ppc/platforms/radstone_ppc7d.c +++ b/arch/ppc/platforms/radstone_ppc7d.c @@ -451,11 +451,11 @@ static void __init ppc7d_calibrate_decr(void) * Interrupt stuff *****************************************************************************/ -static irqreturn_t ppc7d_i8259_intr(int irq, void *dev_id, struct pt_regs *regs) +static irqreturn_t ppc7d_i8259_intr(int irq, void *dev_id) { u32 temp = mv64x60_read(&bh, MV64x60_GPP_INTR_CAUSE); if (temp & (1 << 28)) { - i8259_irq(regs); + i8259_irq(); mv64x60_write(&bh, MV64x60_GPP_INTR_CAUSE, temp & (~(1 << 28))); return IRQ_HANDLED; } @@ -536,13 +536,13 @@ static u32 ppc7d_irq_canonicalize(u32 irq) return irq; } -static int ppc7d_get_irq(struct pt_regs *regs) +static int ppc7d_get_irq(void) { int irq; - irq = mv64360_get_irq(regs); + irq = mv64360_get_irq(); if (irq == (mv64360_irq_base + MV64x60_IRQ_GPP28)) - irq = i8259_irq(regs); + irq = i8259_irq(); return irq; } diff --git a/arch/ppc/platforms/sbc82xx.c b/arch/ppc/platforms/sbc82xx.c index 60b769c7f3fc..cc0935ccab7a 100644 --- a/arch/ppc/platforms/sbc82xx.c +++ b/arch/ppc/platforms/sbc82xx.c @@ -121,7 +121,7 @@ struct hw_interrupt_type sbc82xx_i8259_ic = { .end = sbc82xx_i8259_end_irq, }; -static irqreturn_t sbc82xx_i8259_demux(int irq, void *dev_id, struct pt_regs *regs) +static irqreturn_t sbc82xx_i8259_demux(int irq, void *dev_id) { spin_lock(&sbc82xx_i8259_lock); @@ -139,7 +139,7 @@ static irqreturn_t sbc82xx_i8259_demux(int irq, void *dev_id, struct pt_regs *re return IRQ_HANDLED; } } - __do_IRQ(NR_SIU_INTS + irq, regs); + __do_IRQ(NR_SIU_INTS + irq); return IRQ_HANDLED; } diff --git a/arch/ppc/syslib/cpc700.h b/arch/ppc/syslib/cpc700.h index 0a8a5d84390f..987e9aa0dd45 100644 --- a/arch/ppc/syslib/cpc700.h +++ b/arch/ppc/syslib/cpc700.h @@ -91,6 +91,6 @@ extern struct hw_interrupt_type cpc700_pic; extern unsigned int cpc700_irq_assigns[32][2]; extern void __init cpc700_init_IRQ(void); -extern int cpc700_get_irq(struct pt_regs *); +extern int cpc700_get_irq(void); #endif /* __PPC_SYSLIB_CPC700_H__ */ diff --git a/arch/ppc/syslib/cpc700_pic.c b/arch/ppc/syslib/cpc700_pic.c index 172aa215fdb0..d48e8f45c050 100644 --- a/arch/ppc/syslib/cpc700_pic.c +++ b/arch/ppc/syslib/cpc700_pic.c @@ -158,7 +158,7 @@ cpc700_init_IRQ(void) * Find the highest IRQ that generating an interrupt, if any. */ int -cpc700_get_irq(struct pt_regs *regs) +cpc700_get_irq(void) { int irq = 0; u_int irq_status, irq_test = 1; diff --git a/arch/ppc/syslib/cpm2_pic.c b/arch/ppc/syslib/cpm2_pic.c index c0fee0beb815..fb2d5842641a 100644 --- a/arch/ppc/syslib/cpm2_pic.c +++ b/arch/ppc/syslib/cpm2_pic.c @@ -123,7 +123,7 @@ static struct hw_interrupt_type cpm2_pic = { .end = cpm2_end_irq, }; -int cpm2_get_irq(struct pt_regs *regs) +int cpm2_get_irq(void) { int irq; unsigned long bits; diff --git a/arch/ppc/syslib/cpm2_pic.h b/arch/ppc/syslib/cpm2_pic.h index 97cab8f13a1a..467339337a78 100644 --- a/arch/ppc/syslib/cpm2_pic.h +++ b/arch/ppc/syslib/cpm2_pic.h @@ -1,7 +1,7 @@ #ifndef _PPC_KERNEL_CPM2_H #define _PPC_KERNEL_CPM2_H -extern int cpm2_get_irq(struct pt_regs *regs); +extern int cpm2_get_irq(void); extern void cpm2_init_IRQ(void); diff --git a/arch/ppc/syslib/gt64260_pic.c b/arch/ppc/syslib/gt64260_pic.c index 7fd550a7d586..e84d432c0657 100644 --- a/arch/ppc/syslib/gt64260_pic.c +++ b/arch/ppc/syslib/gt64260_pic.c @@ -110,9 +110,6 @@ gt64260_init_irq(void) * This function returns the lowest interrupt number of all interrupts that * are currently asserted. * - * Input Variable(s): - * struct pt_regs* not used - * * Output Variable(s): * None. * @@ -120,7 +117,7 @@ gt64260_init_irq(void) * int <interrupt number> or -2 (bogus interrupt) */ int -gt64260_get_irq(struct pt_regs *regs) +gt64260_get_irq(void) { int irq; int irq_gpp; @@ -229,7 +226,7 @@ gt64260_mask_irq(unsigned int irq) } static irqreturn_t -gt64260_cpu_error_int_handler(int irq, void *dev_id, struct pt_regs *regs) +gt64260_cpu_error_int_handler(int irq, void *dev_id) { printk(KERN_ERR "gt64260_cpu_error_int_handler: %s 0x%08x\n", "Error on CPU interface - Cause regiser", @@ -250,7 +247,7 @@ gt64260_cpu_error_int_handler(int irq, void *dev_id, struct pt_regs *regs) } static irqreturn_t -gt64260_pci_error_int_handler(int irq, void *dev_id, struct pt_regs *regs) +gt64260_pci_error_int_handler(int irq, void *dev_id) { u32 val; unsigned int pci_bus = (unsigned int)dev_id; diff --git a/arch/ppc/syslib/i8259.c b/arch/ppc/syslib/i8259.c index eb35353af837..a43dda5a8334 100644 --- a/arch/ppc/syslib/i8259.c +++ b/arch/ppc/syslib/i8259.c @@ -28,7 +28,7 @@ static int i8259_pic_irq_offset; * which is called. It should be noted that polling is broken on some * IBM and Motorola PReP boxes so we must use the int-ack feature on them. */ -int i8259_irq(struct pt_regs *regs) +int i8259_irq(void) { int irq; diff --git a/arch/ppc/syslib/ibm440gx_common.c b/arch/ppc/syslib/ibm440gx_common.c index 4b77e6c8c87f..6ad52f4a26e1 100644 --- a/arch/ppc/syslib/ibm440gx_common.c +++ b/arch/ppc/syslib/ibm440gx_common.c @@ -119,7 +119,7 @@ static inline u32 l2c_diag(u32 addr) return mfdcr(DCRN_L2C0_DATA); } -static irqreturn_t l2c_error_handler(int irq, void* dev, struct pt_regs* regs) +static irqreturn_t l2c_error_handler(int irq, void* dev) { u32 sr = mfdcr(DCRN_L2C0_SR); if (sr & L2C_SR_CPE){ diff --git a/arch/ppc/syslib/ipic.c b/arch/ppc/syslib/ipic.c index 46801f5ec03f..10659c24b1be 100644 --- a/arch/ppc/syslib/ipic.c +++ b/arch/ppc/syslib/ipic.c @@ -601,7 +601,7 @@ void ipic_clear_mcp_status(u32 mask) } /* Return an interrupt vector or -1 if no interrupt is pending. */ -int ipic_get_irq(struct pt_regs *regs) +int ipic_get_irq(void) { int irq; diff --git a/arch/ppc/syslib/m82xx_pci.c b/arch/ppc/syslib/m82xx_pci.c index d3fa264e179e..e3b586b1ede9 100644 --- a/arch/ppc/syslib/m82xx_pci.c +++ b/arch/ppc/syslib/m82xx_pci.c @@ -117,7 +117,7 @@ struct hw_interrupt_type pq2pci_ic = { }; static irqreturn_t -pq2pci_irq_demux(int irq, void *dev_id, struct pt_regs *regs) +pq2pci_irq_demux(int irq, void *dev_id) { unsigned long stat, mask, pend; int bit; @@ -130,7 +130,7 @@ pq2pci_irq_demux(int irq, void *dev_id, struct pt_regs *regs) break; for (bit = 0; pend != 0; ++bit, pend <<= 1) { if (pend & 0x80000000) - __do_IRQ(NR_CPM_INTS + bit, regs); + __do_IRQ(NR_CPM_INTS + bit); } } diff --git a/arch/ppc/syslib/m8xx_setup.c b/arch/ppc/syslib/m8xx_setup.c index 54303a7b4e69..d8d299bd1a12 100644 --- a/arch/ppc/syslib/m8xx_setup.c +++ b/arch/ppc/syslib/m8xx_setup.c @@ -169,7 +169,7 @@ abort(void) } /* A place holder for time base interrupts, if they are ever enabled. */ -irqreturn_t timebase_interrupt(int irq, void * dev, struct pt_regs * regs) +irqreturn_t timebase_interrupt(int irq, void * dev) { printk ("timebase_interrupt()\n"); diff --git a/arch/ppc/syslib/m8xx_wdt.c b/arch/ppc/syslib/m8xx_wdt.c index ac11d7bab443..fffac8cbeb51 100644 --- a/arch/ppc/syslib/m8xx_wdt.c +++ b/arch/ppc/syslib/m8xx_wdt.c @@ -21,7 +21,7 @@ static int wdt_timeout; int m8xx_has_internal_rtc = 0; -static irqreturn_t m8xx_wdt_interrupt(int, void *, struct pt_regs *); +static irqreturn_t m8xx_wdt_interrupt(int, void *); static struct irqaction m8xx_wdt_irqaction = { .handler = m8xx_wdt_interrupt, .name = "watchdog", @@ -35,7 +35,7 @@ void m8xx_wdt_reset(void) out_be16(&imap->im_siu_conf.sc_swsr, 0xaa39); /* write magic2 */ } -static irqreturn_t m8xx_wdt_interrupt(int irq, void *dev, struct pt_regs *regs) +static irqreturn_t m8xx_wdt_interrupt(int irq, void *dev) { volatile immap_t *imap = (volatile immap_t *)IMAP_ADDR; diff --git a/arch/ppc/syslib/mpc52xx_pic.c b/arch/ppc/syslib/mpc52xx_pic.c index 6425b5cee7db..af35a316544a 100644 --- a/arch/ppc/syslib/mpc52xx_pic.c +++ b/arch/ppc/syslib/mpc52xx_pic.c @@ -220,7 +220,7 @@ mpc52xx_init_irq(void) } int -mpc52xx_get_irq(struct pt_regs *regs) +mpc52xx_get_irq(void) { u32 status; int irq = -1; diff --git a/arch/ppc/syslib/mv64360_pic.c b/arch/ppc/syslib/mv64360_pic.c index 3f6d162f87cf..4b7a3338e122 100644 --- a/arch/ppc/syslib/mv64360_pic.c +++ b/arch/ppc/syslib/mv64360_pic.c @@ -55,10 +55,9 @@ static void mv64360_unmask_irq(unsigned int); static void mv64360_mask_irq(unsigned int); -static irqreturn_t mv64360_cpu_error_int_handler(int, void *, struct pt_regs *); -static irqreturn_t mv64360_sram_error_int_handler(int, void *, - struct pt_regs *); -static irqreturn_t mv64360_pci_error_int_handler(int, void *, struct pt_regs *); +static irqreturn_t mv64360_cpu_error_int_handler(int, void *); +static irqreturn_t mv64360_sram_error_int_handler(int, void *); +static irqreturn_t mv64360_pci_error_int_handler(int, void *); /* ========================== local declarations =========================== */ @@ -131,9 +130,6 @@ mv64360_init_irq(void) * This function returns the lowest interrupt number of all interrupts that * are currently asserted. * - * Input Variable(s): - * struct pt_regs* not used - * * Output Variable(s): * None. * @@ -142,7 +138,7 @@ mv64360_init_irq(void) * */ int -mv64360_get_irq(struct pt_regs *regs) +mv64360_get_irq(void) { int irq; int irq_gpp; @@ -283,7 +279,7 @@ mv64360_mask_irq(unsigned int irq) } static irqreturn_t -mv64360_cpu_error_int_handler(int irq, void *dev_id, struct pt_regs *regs) +mv64360_cpu_error_int_handler(int irq, void *dev_id) { printk(KERN_ERR "mv64360_cpu_error_int_handler: %s 0x%08x\n", "Error on CPU interface - Cause regiser", @@ -304,7 +300,7 @@ mv64360_cpu_error_int_handler(int irq, void *dev_id, struct pt_regs *regs) } static irqreturn_t -mv64360_sram_error_int_handler(int irq, void *dev_id, struct pt_regs *regs) +mv64360_sram_error_int_handler(int irq, void *dev_id) { printk(KERN_ERR "mv64360_sram_error_int_handler: %s 0x%08x\n", "Error in internal SRAM - Cause register", @@ -325,7 +321,7 @@ mv64360_sram_error_int_handler(int irq, void *dev_id, struct pt_regs *regs) } static irqreturn_t -mv64360_pci_error_int_handler(int irq, void *dev_id, struct pt_regs *regs) +mv64360_pci_error_int_handler(int irq, void *dev_id) { u32 val; unsigned int pci_bus = (unsigned int)dev_id; @@ -380,7 +376,7 @@ mv64360_register_hdlrs(void) /* Clear old errors and register CPU interface error intr handler */ mv64x60_write(&bh, MV64x60_CPU_ERR_CAUSE, 0); if ((rc = request_irq(MV64x60_IRQ_CPU_ERR + mv64360_irq_base, - mv64360_cpu_error_int_handler, IRQF_DISABLED, CPU_INTR_STR, 0))) + mv64360_cpu_error_int_handler, IRQF_DISABLED, CPU_INTR_STR, NULL))) printk(KERN_WARNING "Can't register cpu error handler: %d", rc); mv64x60_write(&bh, MV64x60_CPU_ERR_MASK, 0); @@ -389,7 +385,7 @@ mv64360_register_hdlrs(void) /* Clear old errors and register internal SRAM error intr handler */ mv64x60_write(&bh, MV64360_SRAM_ERR_CAUSE, 0); if ((rc = request_irq(MV64360_IRQ_SRAM_PAR_ERR + mv64360_irq_base, - mv64360_sram_error_int_handler,IRQF_DISABLED,SRAM_INTR_STR, 0))) + mv64360_sram_error_int_handler,IRQF_DISABLED,SRAM_INTR_STR, NULL))) printk(KERN_WARNING "Can't register SRAM error handler: %d",rc); /* Clear old errors and register PCI 0 error intr handler */ diff --git a/arch/ppc/syslib/open_pic.c b/arch/ppc/syslib/open_pic.c index aa0b95788705..18ec94733293 100644 --- a/arch/ppc/syslib/open_pic.c +++ b/arch/ppc/syslib/open_pic.c @@ -45,7 +45,7 @@ static u_int NumSources; static int open_pic_irq_offset; static volatile OpenPIC_Source __iomem *ISR[NR_IRQS]; static int openpic_cascade_irq = -1; -static int (*openpic_cascade_fn)(struct pt_regs *); +static int (*openpic_cascade_fn)(void); /* Global Operations */ static void openpic_disable_8259_pass_through(void); @@ -54,7 +54,7 @@ static void openpic_set_spurious(u_int vector); #ifdef CONFIG_SMP /* Interprocessor Interrupts */ static void openpic_initipi(u_int ipi, u_int pri, u_int vector); -static irqreturn_t openpic_ipi_action(int cpl, void *dev_id, struct pt_regs *); +static irqreturn_t openpic_ipi_action(int cpl, void *dev_id); #endif /* Timer Interrupts */ @@ -700,7 +700,7 @@ static struct irqaction openpic_cascade_irqaction = { void __init openpic_hookup_cascade(u_int irq, char *name, - int (*cascade_fn)(struct pt_regs *)) + int (*cascade_fn)(void)) { openpic_cascade_irq = irq; openpic_cascade_fn = cascade_fn; @@ -857,16 +857,16 @@ static void openpic_end_ipi(unsigned int irq_nr) { } -static irqreturn_t openpic_ipi_action(int cpl, void *dev_id, struct pt_regs *regs) +static irqreturn_t openpic_ipi_action(int cpl, void *dev_id) { - smp_message_recv(cpl-OPENPIC_VEC_IPI-open_pic_irq_offset, regs); + smp_message_recv(cpl-OPENPIC_VEC_IPI-open_pic_irq_offset); return IRQ_HANDLED; } #endif /* CONFIG_SMP */ int -openpic_get_irq(struct pt_regs *regs) +openpic_get_irq(void) { int irq = openpic_irq(); @@ -876,7 +876,7 @@ openpic_get_irq(struct pt_regs *regs) * This should move to irq.c eventually. -- paulus */ if (irq == openpic_cascade_irq && openpic_cascade_fn != NULL) { - int cirq = openpic_cascade_fn(regs); + int cirq = openpic_cascade_fn(); /* Allow for the cascade being shared with other devices */ if (cirq != -1) { diff --git a/arch/ppc/syslib/open_pic2.c b/arch/ppc/syslib/open_pic2.c index e1ff971539ea..d585207f9f77 100644 --- a/arch/ppc/syslib/open_pic2.c +++ b/arch/ppc/syslib/open_pic2.c @@ -529,7 +529,7 @@ static void openpic2_end_irq(unsigned int irq_nr) } int -openpic2_get_irq(struct pt_regs *regs) +openpic2_get_irq(void) { int irq = openpic2_irq(); diff --git a/arch/ppc/syslib/ppc403_pic.c b/arch/ppc/syslib/ppc403_pic.c index 1584c8b1229f..607ebd111d44 100644 --- a/arch/ppc/syslib/ppc403_pic.c +++ b/arch/ppc/syslib/ppc403_pic.c @@ -42,7 +42,7 @@ static struct hw_interrupt_type ppc403_aic = { }; int -ppc403_pic_get_irq(struct pt_regs *regs) +ppc403_pic_get_irq(void) { int irq; unsigned long bits; diff --git a/arch/ppc/syslib/ppc4xx_pic.c b/arch/ppc/syslib/ppc4xx_pic.c index 745685df5984..ee0da4b4b993 100644 --- a/arch/ppc/syslib/ppc4xx_pic.c +++ b/arch/ppc/syslib/ppc4xx_pic.c @@ -96,7 +96,7 @@ UIC_HANDLERS(1); UIC_HANDLERS(2); UIC_HANDLERS(3); -static int ppc4xx_pic_get_irq(struct pt_regs *regs) +static int ppc4xx_pic_get_irq(void) { u32 uic0 = mfdcr(DCRN_UIC_MSR(UIC0)); if (uic0 & UIC0_UIC1NC) @@ -125,7 +125,7 @@ UIC_HANDLERS(0); UIC_HANDLERS(1); UIC_HANDLERS(2); -static int ppc4xx_pic_get_irq(struct pt_regs *regs) +static int ppc4xx_pic_get_irq(void) { u32 uicb = mfdcr(DCRN_UIC_MSR(UICB)); if (uicb & UICB_UIC0NC) @@ -158,7 +158,7 @@ static void __init ppc4xx_pic_impl_init(void) UIC_HANDLERS(0); UIC_HANDLERS(1); -static int ppc4xx_pic_get_irq(struct pt_regs *regs) +static int ppc4xx_pic_get_irq(void) { u32 uic0 = mfdcr(DCRN_UIC_MSR(UIC0)); if (uic0 & UIC0_UIC1NC) @@ -179,7 +179,7 @@ static void __init ppc4xx_pic_impl_init(void) #define ACK_UIC0_PARENT UIC_HANDLERS(0); -static int ppc4xx_pic_get_irq(struct pt_regs *regs) +static int ppc4xx_pic_get_irq(void) { u32 uic0 = mfdcr(DCRN_UIC_MSR(UIC0)); return uic0 ? 32 - ffs(uic0) : -1; diff --git a/arch/ppc/syslib/ppc85xx_rio.c b/arch/ppc/syslib/ppc85xx_rio.c index d9b471b4d695..05b0e9415085 100644 --- a/arch/ppc/syslib/ppc85xx_rio.c +++ b/arch/ppc/syslib/ppc85xx_rio.c @@ -349,13 +349,12 @@ EXPORT_SYMBOL_GPL(rio_hw_add_outb_message); * mpc85xx_rio_tx_handler - MPC85xx outbound message interrupt handler * @irq: Linux interrupt number * @dev_instance: Pointer to interrupt-specific data - * @regs: Register context * * Handles outbound message interrupts. Executes a register outbound * mailbox event handler and acks the interrupt occurence. */ static irqreturn_t -mpc85xx_rio_tx_handler(int irq, void *dev_instance, struct pt_regs *regs) +mpc85xx_rio_tx_handler(int irq, void *dev_instance) { int osr; struct rio_mport *port = (struct rio_mport *)dev_instance; @@ -517,13 +516,12 @@ void rio_close_outb_mbox(struct rio_mport *mport, int mbox) * mpc85xx_rio_rx_handler - MPC85xx inbound message interrupt handler * @irq: Linux interrupt number * @dev_instance: Pointer to interrupt-specific data - * @regs: Register context * * Handles inbound message interrupts. Executes a registered inbound * mailbox event handler and acks the interrupt occurence. */ static irqreturn_t -mpc85xx_rio_rx_handler(int irq, void *dev_instance, struct pt_regs *regs) +mpc85xx_rio_rx_handler(int irq, void *dev_instance) { int isr; struct rio_mport *port = (struct rio_mport *)dev_instance; @@ -736,13 +734,12 @@ EXPORT_SYMBOL_GPL(rio_hw_get_inb_message); * mpc85xx_rio_dbell_handler - MPC85xx doorbell interrupt handler * @irq: Linux interrupt number * @dev_instance: Pointer to interrupt-specific data - * @regs: Register context * * Handles doorbell interrupts. Parses a list of registered * doorbell event handlers and executes a matching event handler. */ static irqreturn_t -mpc85xx_rio_dbell_handler(int irq, void *dev_instance, struct pt_regs *regs) +mpc85xx_rio_dbell_handler(int irq, void *dev_instance) { int dsr; struct rio_mport *port = (struct rio_mport *)dev_instance; diff --git a/arch/ppc/syslib/ppc8xx_pic.c b/arch/ppc/syslib/ppc8xx_pic.c index d6c25fe25011..e8619c750732 100644 --- a/arch/ppc/syslib/ppc8xx_pic.c +++ b/arch/ppc/syslib/ppc8xx_pic.c @@ -10,7 +10,7 @@ #include <asm/mpc8xx.h> #include "ppc8xx_pic.h" -extern int cpm_get_irq(struct pt_regs *regs); +extern int cpm_get_irq(void); /* The 8xx internal interrupt controller. It is usually * the only interrupt controller. Some boards, like the MBX and @@ -96,7 +96,7 @@ m8xx_get_irq(struct pt_regs *regs) * get back SIU_LEVEL7. In this case, return -1 */ if (irq == CPM_INTERRUPT) - irq = CPM_IRQ_OFFSET + cpm_get_irq(regs); + irq = CPM_IRQ_OFFSET + cpm_get_irq(); #if defined(CONFIG_PCI) else if (irq == ISA_BRIDGE_INT) { int isa_irq; diff --git a/arch/ppc/syslib/xilinx_pic.c b/arch/ppc/syslib/xilinx_pic.c index 39a93dc6375b..6fd4cdbada72 100644 --- a/arch/ppc/syslib/xilinx_pic.c +++ b/arch/ppc/syslib/xilinx_pic.c @@ -86,7 +86,7 @@ static struct hw_interrupt_type xilinx_intc = { }; int -xilinx_pic_get_irq(struct pt_regs *regs) +xilinx_pic_get_irq(void) { int irq; diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig index 51c2dfe89c62..608193cfe43f 100644 --- a/arch/s390/Kconfig +++ b/arch/s390/Kconfig @@ -30,6 +30,9 @@ config GENERIC_CALIBRATE_DELAY bool default y +config GENERIC_TIME + def_bool y + config GENERIC_BUST_SPINLOCK bool diff --git a/arch/s390/appldata/appldata_base.c b/arch/s390/appldata/appldata_base.c index 2b1e6c9a6e0e..45c9fa7d7545 100644 --- a/arch/s390/appldata/appldata_base.c +++ b/arch/s390/appldata/appldata_base.c @@ -109,7 +109,7 @@ static LIST_HEAD(appldata_ops_list); * * schedule work and reschedule timer */ -static void appldata_timer_function(unsigned long data, struct pt_regs *regs) +static void appldata_timer_function(unsigned long data) { P_DEBUG(" -= Timer =-\n"); P_DEBUG("CPU: %i, expire_count: %i\n", smp_processor_id(), diff --git a/arch/s390/defconfig b/arch/s390/defconfig index b6cad75fd1f4..a3257398ea8d 100644 --- a/arch/s390/defconfig +++ b/arch/s390/defconfig @@ -9,6 +9,7 @@ CONFIG_STACKTRACE_SUPPORT=y CONFIG_RWSEM_XCHGADD_ALGORITHM=y CONFIG_GENERIC_HWEIGHT=y CONFIG_GENERIC_CALIBRATE_DELAY=y +CONFIG_GENERIC_TIME=y CONFIG_S390=y CONFIG_DEFCONFIG_LIST="/lib/modules/$UNAME_RELEASE/.config" diff --git a/arch/s390/kernel/s390_ext.c b/arch/s390/kernel/s390_ext.c index c1b383537fec..4faf96f8a834 100644 --- a/arch/s390/kernel/s390_ext.c +++ b/arch/s390/kernel/s390_ext.c @@ -16,6 +16,7 @@ #include <asm/lowcore.h> #include <asm/s390_ext.h> +#include <asm/irq_regs.h> #include <asm/irq.h> /* @@ -114,7 +115,9 @@ void do_extint(struct pt_regs *regs, unsigned short code) { ext_int_info_t *p; int index; + struct pt_regs *old_regs; + old_regs = set_irq_regs(regs); irq_enter(); asm volatile ("mc 0,0"); if (S390_lowcore.int_clock >= S390_lowcore.jiffy_timer) @@ -122,18 +125,18 @@ void do_extint(struct pt_regs *regs, unsigned short code) * Make sure that the i/o interrupt did not "overtake" * the last HZ timer interrupt. */ - account_ticks(regs); + account_ticks(); kstat_cpu(smp_processor_id()).irqs[EXTERNAL_INTERRUPT]++; index = ext_hash(code); for (p = ext_int_hash[index]; p; p = p->next) { if (likely(p->code == code)) { if (likely(p->handler)) - p->handler(regs, code); + p->handler(code); } } irq_exit(); + set_irq_regs(old_regs); } EXPORT_SYMBOL(register_external_interrupt); EXPORT_SYMBOL(unregister_external_interrupt); - diff --git a/arch/s390/kernel/s390_ksyms.c b/arch/s390/kernel/s390_ksyms.c index 9f19e833a562..90b5ef529eb7 100644 --- a/arch/s390/kernel/s390_ksyms.c +++ b/arch/s390/kernel/s390_ksyms.c @@ -51,4 +51,3 @@ EXPORT_SYMBOL(csum_fold); EXPORT_SYMBOL(console_mode); EXPORT_SYMBOL(console_devno); EXPORT_SYMBOL(console_irq); -EXPORT_SYMBOL(sys_wait4); diff --git a/arch/s390/kernel/smp.c b/arch/s390/kernel/smp.c index a8e6199755d4..62822245f9be 100644 --- a/arch/s390/kernel/smp.c +++ b/arch/s390/kernel/smp.c @@ -339,7 +339,7 @@ void machine_power_off_smp(void) * cpus are handled. */ -void do_ext_call_interrupt(struct pt_regs *regs, __u16 code) +void do_ext_call_interrupt(__u16 code) { unsigned long bits; diff --git a/arch/s390/kernel/stacktrace.c b/arch/s390/kernel/stacktrace.c index d9428a0fc8fb..0d14a4789bf2 100644 --- a/arch/s390/kernel/stacktrace.c +++ b/arch/s390/kernel/stacktrace.c @@ -62,27 +62,26 @@ static inline unsigned long save_context_stack(struct stack_trace *trace, void save_stack_trace(struct stack_trace *trace, struct task_struct *task) { register unsigned long sp asm ("15"); - unsigned long orig_sp; + unsigned long orig_sp, new_sp; - sp &= PSW_ADDR_INSN; - orig_sp = sp; + orig_sp = sp & PSW_ADDR_INSN; - sp = save_context_stack(trace, &trace->skip, sp, + new_sp = save_context_stack(trace, &trace->skip, orig_sp, S390_lowcore.panic_stack - PAGE_SIZE, S390_lowcore.panic_stack); - if ((sp != orig_sp) && !trace->all_contexts) + if ((new_sp != orig_sp) && !trace->all_contexts) return; - sp = save_context_stack(trace, &trace->skip, sp, + new_sp = save_context_stack(trace, &trace->skip, new_sp, S390_lowcore.async_stack - ASYNC_SIZE, S390_lowcore.async_stack); - if ((sp != orig_sp) && !trace->all_contexts) + if ((new_sp != orig_sp) && !trace->all_contexts) return; if (task) - save_context_stack(trace, &trace->skip, sp, + save_context_stack(trace, &trace->skip, new_sp, (unsigned long) task_stack_page(task), (unsigned long) task_stack_page(task) + THREAD_SIZE); else - save_context_stack(trace, &trace->skip, sp, + save_context_stack(trace, &trace->skip, new_sp, S390_lowcore.thread_info, S390_lowcore.thread_info + THREAD_SIZE); return; diff --git a/arch/s390/kernel/time.c b/arch/s390/kernel/time.c index 4bf66cc4a267..6cceed4df73e 100644 --- a/arch/s390/kernel/time.c +++ b/arch/s390/kernel/time.c @@ -28,12 +28,14 @@ #include <linux/profile.h> #include <linux/timex.h> #include <linux/notifier.h> +#include <linux/clocksource.h> #include <asm/uaccess.h> #include <asm/delay.h> #include <asm/s390_ext.h> #include <asm/div64.h> #include <asm/irq.h> +#include <asm/irq_regs.h> #include <asm/timer.h> /* change this if you have some constant time drift */ @@ -81,78 +83,10 @@ void tod_to_timeval(__u64 todval, struct timespec *xtime) xtime->tv_nsec = ((todval * 1000) >> 12); } -static inline unsigned long do_gettimeoffset(void) -{ - __u64 now; - - now = (get_clock() - jiffies_timer_cc) >> 12; - now -= (__u64) jiffies * USECS_PER_JIFFY; - return (unsigned long) now; -} - -/* - * This version of gettimeofday has microsecond resolution. - */ -void do_gettimeofday(struct timeval *tv) -{ - unsigned long flags; - unsigned long seq; - unsigned long usec, sec; - - do { - seq = read_seqbegin_irqsave(&xtime_lock, flags); - - sec = xtime.tv_sec; - usec = xtime.tv_nsec / 1000 + do_gettimeoffset(); - } while (read_seqretry_irqrestore(&xtime_lock, seq, flags)); - - while (usec >= 1000000) { - usec -= 1000000; - sec++; - } - - tv->tv_sec = sec; - tv->tv_usec = usec; -} - -EXPORT_SYMBOL(do_gettimeofday); - -int do_settimeofday(struct timespec *tv) -{ - time_t wtm_sec, sec = tv->tv_sec; - long wtm_nsec, nsec = tv->tv_nsec; - - if ((unsigned long)tv->tv_nsec >= NSEC_PER_SEC) - return -EINVAL; - - write_seqlock_irq(&xtime_lock); - /* This is revolting. We need to set the xtime.tv_nsec - * correctly. However, the value in this location is - * is value at the last tick. - * Discover what correction gettimeofday - * would have done, and then undo it! - */ - nsec -= do_gettimeoffset() * 1000; - - wtm_sec = wall_to_monotonic.tv_sec + (xtime.tv_sec - sec); - wtm_nsec = wall_to_monotonic.tv_nsec + (xtime.tv_nsec - nsec); - - set_normalized_timespec(&xtime, sec, nsec); - set_normalized_timespec(&wall_to_monotonic, wtm_sec, wtm_nsec); - - ntp_clear(); - write_sequnlock_irq(&xtime_lock); - clock_was_set(); - return 0; -} - -EXPORT_SYMBOL(do_settimeofday); - - #ifdef CONFIG_PROFILING -#define s390_do_profile(regs) profile_tick(CPU_PROFILING, regs) +#define s390_do_profile() profile_tick(CPU_PROFILING) #else -#define s390_do_profile(regs) do { ; } while(0) +#define s390_do_profile() do { ; } while(0) #endif /* CONFIG_PROFILING */ @@ -160,7 +94,7 @@ EXPORT_SYMBOL(do_settimeofday); * timer_interrupt() needs to keep up the real-time clock, * as well as call the "do_timer()" routine every clocktick */ -void account_ticks(struct pt_regs *regs) +void account_ticks(void) { __u64 tmp; __u32 ticks; @@ -221,10 +155,10 @@ void account_ticks(struct pt_regs *regs) account_tick_vtime(current); #else while (ticks--) - update_process_times(user_mode(regs)); + update_process_times(user_mode(get_irq_regs())); #endif - s390_do_profile(regs); + s390_do_profile(); } #ifdef CONFIG_NO_IDLE_HZ @@ -285,9 +219,11 @@ static inline void stop_hz_timer(void) */ static inline void start_hz_timer(void) { + BUG_ON(!in_interrupt()); + if (!cpu_isset(smp_processor_id(), nohz_cpu_mask)) return; - account_ticks(task_pt_regs(current)); + account_ticks(); cpu_clear(smp_processor_id(), nohz_cpu_mask); } @@ -337,6 +273,22 @@ void init_cpu_timer(void) extern void vtime_init(void); +static cycle_t read_tod_clock(void) +{ + return get_clock(); +} + +static struct clocksource clocksource_tod = { + .name = "tod", + .rating = 100, + .read = read_tod_clock, + .mask = -1ULL, + .mult = 1000, + .shift = 12, + .is_continuous = 1, +}; + + /* * Initialize the TOD clock and the CPU timer of * the boot cpu. @@ -381,6 +333,9 @@ void __init time_init(void) &ext_int_info_cc) != 0) panic("Couldn't request external interrupt 0x1004"); + if (clocksource_register(&clocksource_tod) != 0) + panic("Could not register TOD clock source"); + init_cpu_timer(); #ifdef CONFIG_NO_IDLE_HZ diff --git a/arch/s390/kernel/traps.c b/arch/s390/kernel/traps.c index 3eb4fab048b8..66375a5e3d12 100644 --- a/arch/s390/kernel/traps.c +++ b/arch/s390/kernel/traps.c @@ -61,7 +61,7 @@ extern pgm_check_handler_t do_dat_exception; #ifdef CONFIG_PFAULT extern int pfault_init(void); extern void pfault_fini(void); -extern void pfault_interrupt(struct pt_regs *regs, __u16 error_code); +extern void pfault_interrupt(__u16 error_code); static ext_int_info_t ext_int_pfault; #endif extern pgm_check_handler_t do_monitor_call; @@ -474,7 +474,7 @@ asmlinkage void illegal_op(struct pt_regs * regs, long interruption_code) signal = math_emu_b3(opcode, regs); } else if (opcode[0] == 0xed) { get_user(*((__u32 *) (opcode+2)), - (__u32 *)(location+1)); + (__u32 __user *)(location+1)); signal = math_emu_ed(opcode, regs); } else if (*((__u16 *) opcode) == 0xb299) { get_user(*((__u16 *) (opcode+2)), location+1); @@ -499,7 +499,7 @@ asmlinkage void illegal_op(struct pt_regs * regs, long interruption_code) info.si_signo = signal; info.si_errno = 0; info.si_code = SEGV_MAPERR; - info.si_addr = (void *) location; + info.si_addr = (void __user *) location; do_trap(interruption_code, signal, "user address fault", regs, &info); } else @@ -520,10 +520,10 @@ asmlinkage void specification_exception(struct pt_regs * regs, long interruption_code) { __u8 opcode[6]; - __u16 *location = NULL; + __u16 __user *location = NULL; int signal = 0; - location = (__u16 *) get_check_address(regs); + location = (__u16 __user *) get_check_address(regs); /* * We got all needed information from the lowcore and can @@ -632,7 +632,7 @@ asmlinkage void data_exception(struct pt_regs * regs, long interruption_code) break; case 0xed: get_user(*((__u32 *) (opcode+2)), - (__u32 *)(location+1)); + (__u32 __user *)(location+1)); signal = math_emu_ed(opcode, regs); break; case 0xb2: diff --git a/arch/s390/kernel/vtime.c b/arch/s390/kernel/vtime.c index 2306cd83fca1..21baaf5496d6 100644 --- a/arch/s390/kernel/vtime.c +++ b/arch/s390/kernel/vtime.c @@ -22,6 +22,7 @@ #include <asm/s390_ext.h> #include <asm/timer.h> +#include <asm/irq_regs.h> static ext_int_info_t ext_int_info_timer; DEFINE_PER_CPU(struct vtimer_queue, virt_cpu_timer); @@ -208,11 +209,11 @@ static void list_add_sorted(struct vtimer_list *timer, struct list_head *head) * Do the callback functions of expired vtimer events. * Called from within the interrupt handler. */ -static void do_callbacks(struct list_head *cb_list, struct pt_regs *regs) +static void do_callbacks(struct list_head *cb_list) { struct vtimer_queue *vt_list; struct vtimer_list *event, *tmp; - void (*fn)(unsigned long, struct pt_regs*); + void (*fn)(unsigned long); unsigned long data; if (list_empty(cb_list)) @@ -223,7 +224,7 @@ static void do_callbacks(struct list_head *cb_list, struct pt_regs *regs) list_for_each_entry_safe(event, tmp, cb_list, entry) { fn = event->function; data = event->data; - fn(data, regs); + fn(data); if (!event->interval) /* delete one shot timer */ @@ -241,7 +242,7 @@ static void do_callbacks(struct list_head *cb_list, struct pt_regs *regs) /* * Handler for the virtual CPU timer. */ -static void do_cpu_timer_interrupt(struct pt_regs *regs, __u16 error_code) +static void do_cpu_timer_interrupt(__u16 error_code) { int cpu; __u64 next, delta; @@ -274,7 +275,7 @@ static void do_cpu_timer_interrupt(struct pt_regs *regs, __u16 error_code) list_move_tail(&event->entry, &cb_list); } spin_unlock(&vt_list->lock); - do_callbacks(&cb_list, regs); + do_callbacks(&cb_list); /* next event is first in list */ spin_lock(&vt_list->lock); diff --git a/arch/s390/mm/fault.c b/arch/s390/mm/fault.c index 9c3c19fe62fc..1c323bbfda91 100644 --- a/arch/s390/mm/fault.c +++ b/arch/s390/mm/fault.c @@ -451,7 +451,7 @@ void pfault_fini(void) } asmlinkage void -pfault_interrupt(struct pt_regs *regs, __u16 error_code) +pfault_interrupt(__u16 error_code) { struct task_struct *tsk; __u16 subcode; diff --git a/arch/sparc/kernel/irq.c b/arch/sparc/kernel/irq.c index 72f0201051a0..b3b6680a2a30 100644 --- a/arch/sparc/kernel/irq.c +++ b/arch/sparc/kernel/irq.c @@ -46,6 +46,7 @@ #include <asm/pgtable.h> #include <asm/pcic.h> #include <asm/cacheflush.h> +#include <asm/irq_regs.h> #ifdef CONFIG_SMP #define SMP_NOP2 "nop; nop;\n\t" @@ -133,8 +134,8 @@ static void irq_panic(void) prom_halt(); } -void (*sparc_init_timers)(irqreturn_t (*)(int, void *,struct pt_regs *)) = - (void (*)(irqreturn_t (*)(int, void *,struct pt_regs *))) irq_panic; +void (*sparc_init_timers)(irq_handler_t ) = + (void (*)(irq_handler_t )) irq_panic; /* * Dave Redman (djhr@tadpole.co.uk) @@ -319,12 +320,14 @@ void unexpected_irq(int irq, void *dev_id, struct pt_regs * regs) void handler_irq(int irq, struct pt_regs * regs) { + struct pt_regs *old_regs; struct irqaction * action; int cpu = smp_processor_id(); #ifdef CONFIG_SMP extern void smp4m_irq_rotate(int cpu); #endif + old_regs = set_irq_regs(regs); irq_enter(); disable_pil_irq(irq); #ifdef CONFIG_SMP @@ -338,27 +341,31 @@ void handler_irq(int irq, struct pt_regs * regs) do { if (!action || !action->handler) unexpected_irq(irq, NULL, regs); - action->handler(irq, action->dev_id, regs); + action->handler(irq, action->dev_id); action = action->next; } while (action); sparc_irq[irq].flags &= ~SPARC_IRQ_INPROGRESS; enable_pil_irq(irq); irq_exit(); + set_irq_regs(old_regs); } #ifdef CONFIG_BLK_DEV_FD -extern void floppy_interrupt(int irq, void *dev_id, struct pt_regs *regs); +extern void floppy_interrupt(int irq, void *dev_id) void sparc_floppy_irq(int irq, void *dev_id, struct pt_regs *regs) { + struct pt_regs *old_regs; int cpu = smp_processor_id(); + old_regs = set_irq_regs(regs); disable_pil_irq(irq); irq_enter(); kstat_cpu(cpu).irqs[irq]++; - floppy_interrupt(irq, dev_id, regs); + floppy_interrupt(irq, dev_id); irq_exit(); enable_pil_irq(irq); + set_irq_regs(old_regs); // XXX Eek, it's totally changed with preempt_count() and such // if (softirq_pending(cpu)) // do_softirq(); @@ -369,7 +376,7 @@ void sparc_floppy_irq(int irq, void *dev_id, struct pt_regs *regs) * thus no sharing possible. */ int request_fast_irq(unsigned int irq, - irqreturn_t (*handler)(int, void *, struct pt_regs *), + irq_handler_t handler, unsigned long irqflags, const char *devname) { struct irqaction *action; @@ -468,7 +475,7 @@ out: } int request_irq(unsigned int irq, - irqreturn_t (*handler)(int, void *, struct pt_regs *), + irq_handler_t handler, unsigned long irqflags, const char * devname, void *dev_id) { struct irqaction * action, **actionp; @@ -478,7 +485,7 @@ int request_irq(unsigned int irq, if (sparc_cpu_model == sun4d) { extern int sun4d_request_irq(unsigned int, - irqreturn_t (*)(int, void *, struct pt_regs *), + irq_handler_t , unsigned long, const char *, void *); return sun4d_request_irq(irq, handler, irqflags, devname, dev_id); } diff --git a/arch/sparc/kernel/pcic.c b/arch/sparc/kernel/pcic.c index edb6cc665f56..207f1b6eef53 100644 --- a/arch/sparc/kernel/pcic.c +++ b/arch/sparc/kernel/pcic.c @@ -34,6 +34,7 @@ #include <asm/pcic.h> #include <asm/timer.h> #include <asm/uaccess.h> +#include <asm/irq_regs.h> unsigned int pcic_pin_to_irq(unsigned int pin, char *name); @@ -708,13 +709,13 @@ static void pcic_clear_clock_irq(void) pcic_timer_dummy = readl(pcic0.pcic_regs+PCI_SYS_LIMIT); } -static irqreturn_t pcic_timer_handler (int irq, void *h, struct pt_regs *regs) +static irqreturn_t pcic_timer_handler (int irq, void *h) { write_seqlock(&xtime_lock); /* Dummy, to show that we remember */ pcic_clear_clock_irq(); do_timer(1); #ifndef CONFIG_SMP - update_process_times(user_mode(regs)); + update_process_times(user_mode(get_irq_regs())); #endif write_sequnlock(&xtime_lock); return IRQ_HANDLED; diff --git a/arch/sparc/kernel/prom.c b/arch/sparc/kernel/prom.c index 4ca9e5fc97f4..2cc302b6bec0 100644 --- a/arch/sparc/kernel/prom.c +++ b/arch/sparc/kernel/prom.c @@ -243,7 +243,7 @@ int of_set_property(struct device_node *dp, const char *name, void *val, int len void *old_val = prop->value; int ret; - ret = prom_setprop(dp->node, name, val, len); + ret = prom_setprop(dp->node, (char *) name, val, len); err = -EINVAL; if (ret >= 0) { prop->value = new_val; @@ -477,7 +477,10 @@ static struct property * __init build_one_prop(phandle node, char *prev, char *s p->length = 0; } else { p->value = prom_early_alloc(p->length + 1); - prom_getproperty(node, p->name, p->value, p->length); + len = prom_getproperty(node, p->name, p->value, + p->length); + if (len <= 0) + p->length = 0; ((unsigned char *)p->value)[p->length] = '\0'; } } diff --git a/arch/sparc/kernel/setup.c b/arch/sparc/kernel/setup.c index 0251cab4708b..f5ee1ac834bc 100644 --- a/arch/sparc/kernel/setup.c +++ b/arch/sparc/kernel/setup.c @@ -121,16 +121,6 @@ static struct console prom_debug_console = { .index = -1, }; -int obp_system_intr(void) -{ - if (boot_flags & BOOTME_DEBUG) { - printk("OBP: system interrupted\n"); - prom_halt(); - return 1; - } - return 0; -} - /* * Process kernel command line switches that are specific to the * SPARC or that require special low-level processing. diff --git a/arch/sparc/kernel/sparc_ksyms.c b/arch/sparc/kernel/sparc_ksyms.c index 4d441a554d35..33dadd9f2871 100644 --- a/arch/sparc/kernel/sparc_ksyms.c +++ b/arch/sparc/kernel/sparc_ksyms.c @@ -87,6 +87,7 @@ extern void ___set_bit(void); extern void ___clear_bit(void); extern void ___change_bit(void); extern void ___rw_read_enter(void); +extern void ___rw_read_try(void); extern void ___rw_read_exit(void); extern void ___rw_write_enter(void); @@ -104,8 +105,9 @@ extern unsigned _Urem(unsigned, unsigned); EXPORT_SYMBOL(sparc_cpu_model); EXPORT_SYMBOL(kernel_thread); #ifdef CONFIG_SMP -// XXX find what uses (or used) these. +// XXX find what uses (or used) these. AV: see asm/spinlock.h EXPORT_SYMBOL(___rw_read_enter); +EXPORT_SYMBOL(___rw_read_try); EXPORT_SYMBOL(___rw_read_exit); EXPORT_SYMBOL(___rw_write_enter); #endif diff --git a/arch/sparc/kernel/sun4c_irq.c b/arch/sparc/kernel/sun4c_irq.c index 4be2c86ea540..009e891a4329 100644 --- a/arch/sparc/kernel/sun4c_irq.c +++ b/arch/sparc/kernel/sun4c_irq.c @@ -154,7 +154,7 @@ static void sun4c_load_profile_irq(int cpu, unsigned int limit) /* Errm.. not sure how to do this.. */ } -static void __init sun4c_init_timers(irqreturn_t (*counter_fn)(int, void *, struct pt_regs *)) +static void __init sun4c_init_timers(irq_handler_t counter_fn) { int irq; diff --git a/arch/sparc/kernel/sun4d_irq.c b/arch/sparc/kernel/sun4d_irq.c index 74eed9775ac0..d4f9da8170c5 100644 --- a/arch/sparc/kernel/sun4d_irq.c +++ b/arch/sparc/kernel/sun4d_irq.c @@ -38,6 +38,7 @@ #include <asm/sbus.h> #include <asm/sbi.h> #include <asm/cacheflush.h> +#include <asm/irq_regs.h> /* If you trust current SCSI layer to handle different SCSI IRQs, enable this. I don't trust it... -jj */ /* #define DISTRIBUTE_IRQS */ @@ -198,6 +199,7 @@ extern void unexpected_irq(int, void *, struct pt_regs *); void sun4d_handler_irq(int irq, struct pt_regs * regs) { + struct pt_regs *old_regs; struct irqaction * action; int cpu = smp_processor_id(); /* SBUS IRQ level (1 - 7) */ @@ -208,6 +210,7 @@ void sun4d_handler_irq(int irq, struct pt_regs * regs) cc_set_iclr(1 << irq); + old_regs = set_irq_regs(regs); irq_enter(); kstat_cpu(cpu).irqs[irq]++; if (!sbusl) { @@ -215,7 +218,7 @@ void sun4d_handler_irq(int irq, struct pt_regs * regs) if (!action) unexpected_irq(irq, NULL, regs); do { - action->handler(irq, action->dev_id, regs); + action->handler(irq, action->dev_id); action = action->next; } while (action); } else { @@ -242,7 +245,7 @@ void sun4d_handler_irq(int irq, struct pt_regs * regs) if (!action) unexpected_irq(irq, NULL, regs); do { - action->handler(irq, action->dev_id, regs); + action->handler(irq, action->dev_id); action = action->next; } while (action); release_sbi(SBI2DEVID(sbino), slot); @@ -250,6 +253,7 @@ void sun4d_handler_irq(int irq, struct pt_regs * regs) } } irq_exit(); + set_irq_regs(old_regs); } unsigned int sun4d_build_irq(struct sbus_dev *sdev, int irq) @@ -272,7 +276,7 @@ unsigned int sun4d_sbint_to_irq(struct sbus_dev *sdev, unsigned int sbint) } int sun4d_request_irq(unsigned int irq, - irqreturn_t (*handler)(int, void *, struct pt_regs *), + irq_handler_t handler, unsigned long irqflags, const char * devname, void *dev_id) { struct irqaction *action, *tmp = NULL, **actionp; @@ -466,7 +470,7 @@ static void sun4d_load_profile_irq(int cpu, unsigned int limit) bw_set_prof_limit(cpu, limit); } -static void __init sun4d_init_timers(irqreturn_t (*counter_fn)(int, void *, struct pt_regs *)) +static void __init sun4d_init_timers(irq_handler_t counter_fn) { int irq; int cpu; diff --git a/arch/sparc/kernel/sun4d_smp.c b/arch/sparc/kernel/sun4d_smp.c index 3ff4edd32815..c80ea61e8ba0 100644 --- a/arch/sparc/kernel/sun4d_smp.c +++ b/arch/sparc/kernel/sun4d_smp.c @@ -23,6 +23,7 @@ #include <asm/ptrace.h> #include <asm/atomic.h> +#include <asm/irq_regs.h> #include <asm/delay.h> #include <asm/irq.h> @@ -369,10 +370,12 @@ void smp4d_message_pass(int target, int msg, unsigned long data, int wait) void smp4d_percpu_timer_interrupt(struct pt_regs *regs) { + struct pt_regs *old_regs; int cpu = hard_smp4d_processor_id(); static int cpu_tick[NR_CPUS]; static char led_mask[] = { 0xe, 0xd, 0xb, 0x7, 0xb, 0xd }; + old_regs = set_irq_regs(regs); bw_get_prof_limit(cpu); bw_clear_intr_mask(0, 1); /* INTR_TABLE[0] & 1 is Profile IRQ */ @@ -384,7 +387,7 @@ void smp4d_percpu_timer_interrupt(struct pt_regs *regs) show_leds(cpu); } - profile_tick(CPU_PROFILING, regs); + profile_tick(CPU_PROFILING); if(!--prof_counter(cpu)) { int user = user_mode(regs); @@ -395,6 +398,7 @@ void smp4d_percpu_timer_interrupt(struct pt_regs *regs) prof_counter(cpu) = prof_multiplier(cpu); } + set_irq_regs(old_regs); } extern unsigned int lvl14_resolution; diff --git a/arch/sparc/kernel/sun4m_irq.c b/arch/sparc/kernel/sun4m_irq.c index 7cefa301efea..a654c16f4027 100644 --- a/arch/sparc/kernel/sun4m_irq.c +++ b/arch/sparc/kernel/sun4m_irq.c @@ -228,7 +228,7 @@ static void sun4m_load_profile_irq(int cpu, unsigned int limit) sun4m_timers->cpu_timers[cpu].l14_timer_limit = limit; } -static void __init sun4m_init_timers(irqreturn_t (*counter_fn)(int, void *, struct pt_regs *)) +static void __init sun4m_init_timers(irq_handler_t counter_fn) { int reg_count, irq, cpu; struct linux_prom_registers cnt_regs[PROMREG_MAX]; diff --git a/arch/sparc/kernel/sun4m_smp.c b/arch/sparc/kernel/sun4m_smp.c index 7d4a649138f6..e2d9c018bd56 100644 --- a/arch/sparc/kernel/sun4m_smp.c +++ b/arch/sparc/kernel/sun4m_smp.c @@ -19,6 +19,7 @@ #include <linux/profile.h> #include <asm/cacheflush.h> #include <asm/tlbflush.h> +#include <asm/irq_regs.h> #include <asm/ptrace.h> #include <asm/atomic.h> @@ -353,11 +354,14 @@ void smp4m_cross_call_irq(void) void smp4m_percpu_timer_interrupt(struct pt_regs *regs) { + struct pt_regs *old_regs; int cpu = smp_processor_id(); + old_regs = set_irq_regs(regs); + clear_profile_irq(cpu); - profile_tick(CPU_PROFILING, regs); + profile_tick(CPU_PROFILING); if(!--prof_counter(cpu)) { int user = user_mode(regs); @@ -368,6 +372,7 @@ void smp4m_percpu_timer_interrupt(struct pt_regs *regs) prof_counter(cpu) = prof_multiplier(cpu); } + set_irq_regs(old_regs); } extern unsigned int lvl14_resolution; diff --git a/arch/sparc/kernel/tick14.c b/arch/sparc/kernel/tick14.c index d3b4daac705f..f1a7bd19e04f 100644 --- a/arch/sparc/kernel/tick14.c +++ b/arch/sparc/kernel/tick14.c @@ -55,7 +55,7 @@ void install_obp_ticker(void) linux_lvl14[3] = obp_lvl14[3]; } -void claim_ticker14(irqreturn_t (*handler)(int, void *, struct pt_regs *), +void claim_ticker14(irq_handler_t handler, int irq_nr, unsigned int timeout ) { int cpu = smp_processor_id(); diff --git a/arch/sparc/kernel/time.c b/arch/sparc/kernel/time.c index e10dc831944d..7dcd1a16c6e4 100644 --- a/arch/sparc/kernel/time.c +++ b/arch/sparc/kernel/time.c @@ -42,6 +42,7 @@ #include <asm/page.h> #include <asm/pcic.h> #include <asm/of_device.h> +#include <asm/irq_regs.h> DEFINE_SPINLOCK(rtc_lock); enum sparc_clock_type sp_clock_typ; @@ -104,13 +105,13 @@ __volatile__ unsigned int *master_l10_limit; #define TICK_SIZE (tick_nsec / 1000) -irqreturn_t timer_interrupt(int irq, void *dev_id, struct pt_regs * regs) +irqreturn_t timer_interrupt(int irq, void *dev_id) { /* last time the cmos clock got updated */ static long last_rtc_update; #ifndef CONFIG_SMP - profile_tick(CPU_PROFILING, regs); + profile_tick(CPU_PROFILING); #endif /* Protect counter clear so that do_gettimeoffset works */ @@ -128,7 +129,7 @@ irqreturn_t timer_interrupt(int irq, void *dev_id, struct pt_regs * regs) do_timer(1); #ifndef CONFIG_SMP - update_process_times(user_mode(regs)); + update_process_times(user_mode(get_irq_regs())); #endif diff --git a/arch/sparc/kernel/vmlinux.lds.S b/arch/sparc/kernel/vmlinux.lds.S index 346c19a949fd..1dd78c84888a 100644 --- a/arch/sparc/kernel/vmlinux.lds.S +++ b/arch/sparc/kernel/vmlinux.lds.S @@ -36,11 +36,11 @@ SECTIONS . = ALIGN(4096); __init_begin = .; + _sinittext = .; .init.text : { - _sinittext = .; *(.init.text) - _einittext = .; } + _einittext = .; __init_text_end = .; .init.data : { *(.init.data) } . = ALIGN(16); diff --git a/arch/sparc/lib/locks.S b/arch/sparc/lib/locks.S index 95fa48424967..b1df55cb2215 100644 --- a/arch/sparc/lib/locks.S +++ b/arch/sparc/lib/locks.S @@ -25,6 +25,15 @@ ___rw_read_enter_spin_on_wlock: ldstub [%g1 + 3], %g2 b ___rw_read_enter_spin_on_wlock ldub [%g1 + 3], %g2 +___rw_read_try_spin_on_wlock: + andcc %g2, 0xff, %g0 + be,a ___rw_read_try + ldstub [%g1 + 3], %g2 + xnorcc %g2, 0x0, %o0 /* if g2 is ~0, set o0 to 0 and bugger off */ + bne,a ___rw_read_enter_spin_on_wlock + ld [%g1], %g2 + retl + mov %g4, %o7 ___rw_read_exit_spin_on_wlock: orcc %g2, 0x0, %g0 be,a ___rw_read_exit @@ -60,6 +69,17 @@ ___rw_read_exit: retl mov %g4, %o7 + .globl ___rw_read_try +___rw_read_try: + orcc %g2, 0x0, %g0 + bne ___rw_read_try_spin_on_wlock + ld [%g1], %g2 + add %g2, 1, %g2 + st %g2, [%g1] + set 1, %o1 + retl + mov %g4, %o7 + .globl ___rw_write_enter ___rw_write_enter: orcc %g2, 0x0, %g0 diff --git a/arch/sparc/mm/srmmu.c b/arch/sparc/mm/srmmu.c index b27a506309ee..0df7121cef07 100644 --- a/arch/sparc/mm/srmmu.c +++ b/arch/sparc/mm/srmmu.c @@ -402,7 +402,7 @@ void srmmu_nocache_calcsize(void) srmmu_nocache_end = SRMMU_NOCACHE_VADDR + srmmu_nocache_size; } -void srmmu_nocache_init(void) +void __init srmmu_nocache_init(void) { unsigned int bitmap_bits; pgd_t *pgd; diff --git a/arch/sparc64/defconfig b/arch/sparc64/defconfig index f54ab375464b..dcae559879ae 100644 --- a/arch/sparc64/defconfig +++ b/arch/sparc64/defconfig @@ -1,7 +1,7 @@ # # Automatically generated make config: don't edit -# Linux kernel version: 2.6.18 -# Mon Oct 2 14:24:40 2006 +# Linux kernel version: 2.6.19-rc1 +# Thu Oct 5 02:08:41 2006 # CONFIG_SPARC=y CONFIG_SPARC64=y @@ -197,6 +197,7 @@ CONFIG_INET_XFRM_TUNNEL=y CONFIG_INET_TUNNEL=y CONFIG_INET_XFRM_MODE_TRANSPORT=y CONFIG_INET_XFRM_MODE_TUNNEL=y +CONFIG_INET_XFRM_MODE_BEET=y CONFIG_INET_DIAG=y CONFIG_INET_TCP_DIAG=y # CONFIG_TCP_CONG_ADVANCED is not set @@ -214,6 +215,7 @@ CONFIG_INET6_XFRM_TUNNEL=m CONFIG_INET6_TUNNEL=m CONFIG_INET6_XFRM_MODE_TRANSPORT=m CONFIG_INET6_XFRM_MODE_TUNNEL=m +CONFIG_INET6_XFRM_MODE_BEET=m # CONFIG_INET6_XFRM_MODE_ROUTEOPTIMIZATION is not set CONFIG_IPV6_TUNNEL=m # CONFIG_IPV6_SUBTREES is not set @@ -373,6 +375,7 @@ CONFIG_BLK_DEV_ALI15X3=y # CONFIG_BLK_DEV_CS5530 is not set # CONFIG_BLK_DEV_HPT34X is not set # CONFIG_BLK_DEV_HPT366 is not set +# CONFIG_BLK_DEV_JMICRON is not set # CONFIG_BLK_DEV_SC1200 is not set # CONFIG_BLK_DEV_PIIX is not set # CONFIG_BLK_DEV_IT821X is not set @@ -449,10 +452,10 @@ CONFIG_ISCSI_TCP=m # CONFIG_SCSI_INIA100 is not set # CONFIG_SCSI_STEX is not set # CONFIG_SCSI_SYM53C8XX_2 is not set -# CONFIG_SCSI_IPR is not set # CONFIG_SCSI_QLOGIC_1280 is not set # CONFIG_SCSI_QLOGICPTI is not set # CONFIG_SCSI_QLA_FC is not set +# CONFIG_SCSI_QLA_ISCSI is not set # CONFIG_SCSI_LPFC is not set # CONFIG_SCSI_DC395x is not set # CONFIG_SCSI_DC390T is not set @@ -478,6 +481,7 @@ CONFIG_MD_RAID456=m CONFIG_MD_MULTIPATH=m # CONFIG_MD_FAULTY is not set CONFIG_BLK_DEV_DM=m +# CONFIG_DM_DEBUG is not set CONFIG_DM_CRYPT=m CONFIG_DM_SNAPSHOT=m CONFIG_DM_MIRROR=m @@ -840,12 +844,12 @@ CONFIG_HWMON=y # # Misc devices # +# CONFIG_TIFM_CORE is not set # # Multimedia devices # # CONFIG_VIDEO_DEV is not set -CONFIG_VIDEO_V4L2=y # # Digital Video Broadcasting Devices @@ -858,6 +862,7 @@ CONFIG_VIDEO_V4L2=y # # CONFIG_FIRMWARE_EDID is not set CONFIG_FB=y +CONFIG_FB_DDC=y CONFIG_FB_CFB_FILLRECT=y CONFIG_FB_CFB_COPYAREA=y CONFIG_FB_CFB_IMAGEBLIT=y @@ -1236,6 +1241,7 @@ CONFIG_FS_MBCACHE=y # CONFIG_JFS_FS is not set CONFIG_FS_POSIX_ACL=y # CONFIG_XFS_FS is not set +# CONFIG_GFS2_FS is not set # CONFIG_OCFS2_FS is not set # CONFIG_MINIX_FS is not set # CONFIG_ROMFS_FS is not set @@ -1279,6 +1285,7 @@ CONFIG_RAMFS=y # # CONFIG_ADFS_FS is not set # CONFIG_AFFS_FS is not set +# CONFIG_ECRYPT_FS is not set # CONFIG_HFS_FS is not set # CONFIG_HFSPLUS_FS is not set # CONFIG_BEFS_FS is not set @@ -1355,6 +1362,10 @@ CONFIG_NLS_DEFAULT="iso8859-1" # CONFIG_NLS_UTF8 is not set # +# Distributed Lock Manager +# + +# # Instrumentation Support # CONFIG_PROFILING=y diff --git a/arch/sparc64/kernel/ebus.c b/arch/sparc64/kernel/ebus.c index 8a9b470e1b65..2df25c2b4071 100644 --- a/arch/sparc64/kernel/ebus.c +++ b/arch/sparc64/kernel/ebus.c @@ -79,7 +79,7 @@ static void __ebus_dma_reset(struct ebus_dma_info *p, int no_drain) } } -static irqreturn_t ebus_dma_irq(int irq, void *dev_id, struct pt_regs *regs) +static irqreturn_t ebus_dma_irq(int irq, void *dev_id) { struct ebus_dma_info *p = dev_id; unsigned long flags; diff --git a/arch/sparc64/kernel/irq.c b/arch/sparc64/kernel/irq.c index 4e64724cb9ae..d64b1ea848de 100644 --- a/arch/sparc64/kernel/irq.c +++ b/arch/sparc64/kernel/irq.c @@ -522,12 +522,13 @@ void ack_bad_irq(unsigned int virt_irq) } #ifndef CONFIG_SMP -extern irqreturn_t timer_interrupt(int, void *, struct pt_regs *); +extern irqreturn_t timer_interrupt(int, void *); void timer_irq(int irq, struct pt_regs *regs) { unsigned long clr_mask = 1 << irq; unsigned long tick_mask = tick_ops->softint_mask; + struct pt_regs *old_regs; if (get_softint() & tick_mask) { irq = 0; @@ -535,21 +536,25 @@ void timer_irq(int irq, struct pt_regs *regs) } clear_softint(clr_mask); + old_regs = set_irq_regs(regs); irq_enter(); kstat_this_cpu.irqs[0]++; - timer_interrupt(irq, NULL, regs); + timer_interrupt(irq, NULL); irq_exit(); + set_irq_regs(old_regs); } #endif void handler_irq(int irq, struct pt_regs *regs) { struct ino_bucket *bucket; + struct pt_regs *old_regs; clear_softint(1 << irq); + old_regs = set_irq_regs(regs); irq_enter(); /* Sliiiick... */ @@ -558,12 +563,13 @@ void handler_irq(int irq, struct pt_regs *regs) struct ino_bucket *next = __bucket(bucket->irq_chain); bucket->irq_chain = 0; - __do_IRQ(bucket->virt_irq, regs); + __do_IRQ(bucket->virt_irq); bucket = next; } irq_exit(); + set_irq_regs(old_regs); } struct sun5_timer { diff --git a/arch/sparc64/kernel/of_device.c b/arch/sparc64/kernel/of_device.c index 7f9204535770..d822c7c18e1f 100644 --- a/arch/sparc64/kernel/of_device.c +++ b/arch/sparc64/kernel/of_device.c @@ -841,7 +841,7 @@ static struct of_device * __init scan_one_device(struct device_node *dp, if (!parent) strcpy(op->dev.bus_id, "root"); else - strcpy(op->dev.bus_id, dp->path_component_name); + sprintf(op->dev.bus_id, "%s@%08x", dp->name, dp->node); if (of_device_register(op)) { printk("%s: Could not register of device.\n", diff --git a/arch/sparc64/kernel/pci_psycho.c b/arch/sparc64/kernel/pci_psycho.c index 1ec0aab68c08..fda5db223d96 100644 --- a/arch/sparc64/kernel/pci_psycho.c +++ b/arch/sparc64/kernel/pci_psycho.c @@ -533,7 +533,7 @@ static void psycho_check_iommu_error(struct pci_controller_info *p, #define PSYCHO_UEAFSR_RESV2 0x00000000007fffffUL /* Reserved */ #define PSYCHO_UE_AFAR 0x0038UL -static irqreturn_t psycho_ue_intr(int irq, void *dev_id, struct pt_regs *regs) +static irqreturn_t psycho_ue_intr(int irq, void *dev_id) { struct pci_controller_info *p = dev_id; unsigned long afsr_reg = p->pbm_A.controller_regs + PSYCHO_UE_AFSR; @@ -610,7 +610,7 @@ static irqreturn_t psycho_ue_intr(int irq, void *dev_id, struct pt_regs *regs) #define PSYCHO_CEAFSR_RESV2 0x00000000007fffffUL /* Reserved */ #define PSYCHO_CE_AFAR 0x0040UL -static irqreturn_t psycho_ce_intr(int irq, void *dev_id, struct pt_regs *regs) +static irqreturn_t psycho_ce_intr(int irq, void *dev_id) { struct pci_controller_info *p = dev_id; unsigned long afsr_reg = p->pbm_A.controller_regs + PSYCHO_CE_AFSR; @@ -735,7 +735,7 @@ static irqreturn_t psycho_pcierr_intr_other(struct pci_pbm_info *pbm, int is_pbm return ret; } -static irqreturn_t psycho_pcierr_intr(int irq, void *dev_id, struct pt_regs *regs) +static irqreturn_t psycho_pcierr_intr(int irq, void *dev_id) { struct pci_pbm_info *pbm = dev_id; struct pci_controller_info *p = pbm->parent; diff --git a/arch/sparc64/kernel/pci_sabre.c b/arch/sparc64/kernel/pci_sabre.c index 45891850b90d..6ec569828c29 100644 --- a/arch/sparc64/kernel/pci_sabre.c +++ b/arch/sparc64/kernel/pci_sabre.c @@ -574,7 +574,7 @@ static void sabre_check_iommu_error(struct pci_controller_info *p, spin_unlock_irqrestore(&iommu->lock, flags); } -static irqreturn_t sabre_ue_intr(int irq, void *dev_id, struct pt_regs *regs) +static irqreturn_t sabre_ue_intr(int irq, void *dev_id) { struct pci_controller_info *p = dev_id; unsigned long afsr_reg = p->pbm_A.controller_regs + SABRE_UE_AFSR; @@ -634,7 +634,7 @@ static irqreturn_t sabre_ue_intr(int irq, void *dev_id, struct pt_regs *regs) return IRQ_HANDLED; } -static irqreturn_t sabre_ce_intr(int irq, void *dev_id, struct pt_regs *regs) +static irqreturn_t sabre_ce_intr(int irq, void *dev_id) { struct pci_controller_info *p = dev_id; unsigned long afsr_reg = p->pbm_A.controller_regs + SABRE_CE_AFSR; @@ -726,7 +726,7 @@ static irqreturn_t sabre_pcierr_intr_other(struct pci_controller_info *p) return ret; } -static irqreturn_t sabre_pcierr_intr(int irq, void *dev_id, struct pt_regs *regs) +static irqreturn_t sabre_pcierr_intr(int irq, void *dev_id) { struct pci_controller_info *p = dev_id; unsigned long afsr_reg, afar_reg; diff --git a/arch/sparc64/kernel/pci_schizo.c b/arch/sparc64/kernel/pci_schizo.c index 75ade83ecc65..66911b126aed 100644 --- a/arch/sparc64/kernel/pci_schizo.c +++ b/arch/sparc64/kernel/pci_schizo.c @@ -515,7 +515,7 @@ static void schizo_check_iommu_error(struct pci_controller_info *p, #define SCHIZO_UEAFSR_MTAG 0x000000000000e000UL /* Safari */ #define SCHIZO_UEAFSR_ECCSYND 0x00000000000001ffUL /* Safari */ -static irqreturn_t schizo_ue_intr(int irq, void *dev_id, struct pt_regs *regs) +static irqreturn_t schizo_ue_intr(int irq, void *dev_id) { struct pci_controller_info *p = dev_id; unsigned long afsr_reg = p->pbm_B.controller_regs + SCHIZO_UE_AFSR; @@ -603,7 +603,7 @@ static irqreturn_t schizo_ue_intr(int irq, void *dev_id, struct pt_regs *regs) #define SCHIZO_CEAFSR_MTAG 0x000000000000e000UL #define SCHIZO_CEAFSR_ECCSYND 0x00000000000001ffUL -static irqreturn_t schizo_ce_intr(int irq, void *dev_id, struct pt_regs *regs) +static irqreturn_t schizo_ce_intr(int irq, void *dev_id) { struct pci_controller_info *p = dev_id; unsigned long afsr_reg = p->pbm_B.controller_regs + SCHIZO_CE_AFSR; @@ -778,7 +778,7 @@ static irqreturn_t schizo_pcierr_intr_other(struct pci_pbm_info *pbm) return ret; } -static irqreturn_t schizo_pcierr_intr(int irq, void *dev_id, struct pt_regs *regs) +static irqreturn_t schizo_pcierr_intr(int irq, void *dev_id) { struct pci_pbm_info *pbm = dev_id; struct pci_controller_info *p = pbm->parent; @@ -933,7 +933,7 @@ static irqreturn_t schizo_pcierr_intr(int irq, void *dev_id, struct pt_regs *reg /* We only expect UNMAP errors here. The rest of the Safari errors * are marked fatal and thus cause a system reset. */ -static irqreturn_t schizo_safarierr_intr(int irq, void *dev_id, struct pt_regs *regs) +static irqreturn_t schizo_safarierr_intr(int irq, void *dev_id) { struct pci_controller_info *p = dev_id; u64 errlog; diff --git a/arch/sparc64/kernel/power.c b/arch/sparc64/kernel/power.c index 0b9c70627ce4..699b24b890df 100644 --- a/arch/sparc64/kernel/power.c +++ b/arch/sparc64/kernel/power.c @@ -35,7 +35,7 @@ static void __iomem *power_reg; static DECLARE_WAIT_QUEUE_HEAD(powerd_wait); static int button_pressed; -static irqreturn_t power_handler(int irq, void *dev_id, struct pt_regs *regs) +static irqreturn_t power_handler(int irq, void *dev_id) { if (button_pressed == 0) { button_pressed = 1; diff --git a/arch/sparc64/kernel/sbus.c b/arch/sparc64/kernel/sbus.c index c49a57795743..01d6d869ea2b 100644 --- a/arch/sparc64/kernel/sbus.c +++ b/arch/sparc64/kernel/sbus.c @@ -839,7 +839,7 @@ unsigned int sbus_build_irq(void *buscookie, unsigned int ino) #define SYSIO_UEAFSR_SIZE 0x00001c0000000000UL /* Bad transfer size 2^SIZE */ #define SYSIO_UEAFSR_MID 0x000003e000000000UL /* UPA MID causing the fault */ #define SYSIO_UEAFSR_RESV2 0x0000001fffffffffUL /* Reserved */ -static irqreturn_t sysio_ue_handler(int irq, void *dev_id, struct pt_regs *regs) +static irqreturn_t sysio_ue_handler(int irq, void *dev_id) { struct sbus_bus *sbus = dev_id; struct sbus_iommu *iommu = sbus->iommu; @@ -911,7 +911,7 @@ static irqreturn_t sysio_ue_handler(int irq, void *dev_id, struct pt_regs *regs) #define SYSIO_CEAFSR_SIZE 0x00001c0000000000UL /* Bad transfer size 2^SIZE */ #define SYSIO_CEAFSR_MID 0x000003e000000000UL /* UPA MID causing the fault */ #define SYSIO_CEAFSR_RESV2 0x0000001fffffffffUL /* Reserved */ -static irqreturn_t sysio_ce_handler(int irq, void *dev_id, struct pt_regs *regs) +static irqreturn_t sysio_ce_handler(int irq, void *dev_id) { struct sbus_bus *sbus = dev_id; struct sbus_iommu *iommu = sbus->iommu; @@ -988,7 +988,7 @@ static irqreturn_t sysio_ce_handler(int irq, void *dev_id, struct pt_regs *regs) #define SYSIO_SBAFSR_SIZE 0x00001c0000000000UL /* Size of transfer */ #define SYSIO_SBAFSR_MID 0x000003e000000000UL /* MID causing the error */ #define SYSIO_SBAFSR_RESV3 0x0000001fffffffffUL /* Reserved */ -static irqreturn_t sysio_sbus_error_handler(int irq, void *dev_id, struct pt_regs *regs) +static irqreturn_t sysio_sbus_error_handler(int irq, void *dev_id) { struct sbus_bus *sbus = dev_id; struct sbus_iommu *iommu = sbus->iommu; diff --git a/arch/sparc64/kernel/setup.c b/arch/sparc64/kernel/setup.c index 958287448cfe..cc8ad480a204 100644 --- a/arch/sparc64/kernel/setup.c +++ b/arch/sparc64/kernel/setup.c @@ -91,16 +91,6 @@ void kernel_enter_debugger(void) { } -int obp_system_intr(void) -{ - if (boot_flags & BOOTME_DEBUG) { - printk("OBP: system interrupted\n"); - prom_halt(); - return 1; - } - return 0; -} - /* * Process kernel command line switches that are specific to the * SPARC or that require special low-level processing. diff --git a/arch/sparc64/kernel/smp.c b/arch/sparc64/kernel/smp.c index f62bf3a2de1a..cc09d8266414 100644 --- a/arch/sparc64/kernel/smp.c +++ b/arch/sparc64/kernel/smp.c @@ -31,6 +31,7 @@ #include <asm/cpudata.h> #include <asm/irq.h> +#include <asm/irq_regs.h> #include <asm/page.h> #include <asm/pgtable.h> #include <asm/oplib.h> @@ -1187,6 +1188,7 @@ void smp_percpu_timer_interrupt(struct pt_regs *regs) unsigned long compare, tick, pstate; int cpu = smp_processor_id(); int user = user_mode(regs); + struct pt_regs *old_regs; /* * Check for level 14 softint. @@ -1203,8 +1205,9 @@ void smp_percpu_timer_interrupt(struct pt_regs *regs) clear_softint(tick_mask); } + old_regs = set_irq_regs(regs); do { - profile_tick(CPU_PROFILING, regs); + profile_tick(CPU_PROFILING); if (!--prof_counter(cpu)) { irq_enter(); @@ -1236,6 +1239,7 @@ void smp_percpu_timer_interrupt(struct pt_regs *regs) : /* no outputs */ : "r" (pstate)); } while (time_after_eq(tick, compare)); + set_irq_regs(old_regs); } static void __init smp_setup_percpu_timer(void) diff --git a/arch/sparc64/kernel/time.c b/arch/sparc64/kernel/time.c index 00f6fc4aaaff..061e1b1fa583 100644 --- a/arch/sparc64/kernel/time.c +++ b/arch/sparc64/kernel/time.c @@ -45,6 +45,7 @@ #include <asm/cpudata.h> #include <asm/uaccess.h> #include <asm/prom.h> +#include <asm/irq_regs.h> DEFINE_SPINLOCK(mostek_lock); DEFINE_SPINLOCK(rtc_lock); @@ -452,7 +453,7 @@ static inline void timer_check_rtc(void) } } -irqreturn_t timer_interrupt(int irq, void *dev_id, struct pt_regs * regs) +irqreturn_t timer_interrupt(int irq, void *dev_id) { unsigned long ticks, compare, pstate; @@ -460,8 +461,8 @@ irqreturn_t timer_interrupt(int irq, void *dev_id, struct pt_regs * regs) do { #ifndef CONFIG_SMP - profile_tick(CPU_PROFILING, regs); - update_process_times(user_mode(regs)); + profile_tick(CPU_PROFILING); + update_process_times(user_mode(get_irq_regs())); #endif do_timer(1); diff --git a/arch/um/Kconfig b/arch/um/Kconfig index d75307589d74..78fb619bdb73 100644 --- a/arch/um/Kconfig +++ b/arch/um/Kconfig @@ -25,6 +25,19 @@ config PCI config PCMCIA bool +# Yet to do! +config TRACE_IRQFLAGS_SUPPORT + bool + default n + +config LOCKDEP_SUPPORT + bool + default y + +config STACKTRACE_SUPPORT + bool + default y + config GENERIC_CALIBRATE_DELAY bool default y @@ -37,13 +50,15 @@ config IRQ_RELEASE_METHOD menu "UML-specific options" config MODE_TT - bool "Tracing thread support" + bool "Tracing thread support (DEPRECATED)" default n help This option controls whether tracing thread support is compiled - into UML. This option is largely obsolete, given that skas0 provides + into UML. This option is largely obsolete, given that skas0 provides skas security and performance without needing to patch the host. - It is safe to say 'N' here. + It is safe to say 'N' here; saying 'Y' may cause additional problems + with the resulting binary even if you run UML in SKAS mode, and running + in TT mode is strongly *NOT RECOMMENDED*. config STATIC_LINK bool "Force a static link" @@ -56,6 +71,9 @@ config STATIC_LINK for use in a chroot jail. So, if you intend to run UML inside a chroot, and you disable CONFIG_MODE_TT, you probably want to say Y here. + Additionally, this option enables using higher memory spaces (up to + 2.75G) for UML - disabling CONFIG_MODE_TT and enabling this option leads + to best results for this. config KERNEL_HALF_GIGS int "Kernel address space size (in .5G units)" @@ -72,10 +90,13 @@ config MODE_SKAS default y help This option controls whether skas (separate kernel address space) - support is compiled in. If you have applied the skas patch to the - host, then you certainly want to say Y here (and consider saying N - to CONFIG_MODE_TT). Otherwise, it is safe to say Y. Disabling this - option will shrink the UML binary slightly. + support is compiled in. + Unless you have specific needs to use TT mode (which applies almost only + to developers), you should say Y here. + SKAS mode will make use of the SKAS3 patch if it is applied on the host + (and your UML will run in SKAS3 mode), but if no SKAS patch is applied + on the host it will run in SKAS0 mode, which is anyway faster than TT + mode. source "arch/um/Kconfig.arch" source "mm/Kconfig" diff --git a/arch/um/Kconfig.i386 b/arch/um/Kconfig.i386 index f6eb72d117b9..f191a550a079 100644 --- a/arch/um/Kconfig.i386 +++ b/arch/um/Kconfig.i386 @@ -16,23 +16,42 @@ config SEMAPHORE_SLEEPERS bool default y -config HOST_2G_2G - bool "2G/2G host address space split" - default n - help - This is needed when the host on which you run has a 2G/2G memory - split, instead of the customary 3G/1G. - - Note that to enable such a host - configuration, which makes sense only in some cases, you need special - host patches. - - So, if you do not know what to do here, say 'N'. +choice + prompt "Host memory split" + default HOST_VMSPLIT_3G + ---help--- + This is needed when the host kernel on which you run has a non-default + (like 2G/2G) memory split, instead of the customary 3G/1G. If you did + not recompile your own kernel but use the default distro's one, you can + safely accept the "Default split" option. + + It can be enabled on recent (>=2.6.16-rc2) vanilla kernels via + CONFIG_VM_SPLIT_*, or on previous kernels with special patches (-ck + patchset by Con Kolivas, or other ones) - option names match closely the + host CONFIG_VM_SPLIT_* ones. + + A lower setting (where 1G/3G is lowest and 3G/1G is higher) will + tolerate even more "normal" host kernels, but an higher setting will be + stricter. + + So, if you do not know what to do here, say 'Default split'. + + config HOST_VMSPLIT_3G + bool "Default split (3G/1G user/kernel host split)" + config HOST_VMSPLIT_3G_OPT + bool "3G/1G user/kernel host split (for full 1G low memory)" + config HOST_VMSPLIT_2G + bool "2G/2G user/kernel host split" + config HOST_VMSPLIT_1G + bool "1G/3G user/kernel host split" +endchoice config TOP_ADDR - hex - default 0xc0000000 if !HOST_2G_2G - default 0x80000000 if HOST_2G_2G + hex + default 0xB0000000 if HOST_VMSPLIT_3G_OPT + default 0x78000000 if HOST_VMSPLIT_2G + default 0x40000000 if HOST_VMSPLIT_1G + default 0xC0000000 config 3_LEVEL_PGTABLES bool "Three-level pagetables (EXPERIMENTAL)" diff --git a/arch/um/Makefile-x86_64 b/arch/um/Makefile-x86_64 index 11154b6773ec..d278682dd799 100644 --- a/arch/um/Makefile-x86_64 +++ b/arch/um/Makefile-x86_64 @@ -1,10 +1,10 @@ # Copyright 2003 - 2004 Pathscale, Inc # Released under the GPL -core-y += arch/um/sys-x86_64/ +core-y += arch/um/sys-x86_64/ arch/x86_64/crypto/ START := 0x60000000 -_extra_flags_ = -fno-builtin -m64 -mcmodel=kernel +_extra_flags_ = -fno-builtin -m64 #We #undef __x86_64__ for kernelspace, not for userspace where #it's needed for headers to work! diff --git a/arch/um/drivers/line.c b/arch/um/drivers/line.c index cfd9f01fd464..426633e5d6e3 100644 --- a/arch/um/drivers/line.c +++ b/arch/um/drivers/line.c @@ -20,7 +20,7 @@ #define LINE_BUFSIZE 4096 -static irqreturn_t line_interrupt(int irq, void *data, struct pt_regs *unused) +static irqreturn_t line_interrupt(int irq, void *data) { struct chan *chan = data; struct line *line = chan->line; @@ -364,8 +364,7 @@ void line_unthrottle(struct tty_struct *tty) reactivate_chan(&line->chan_list, line->driver->read_irq); } -static irqreturn_t line_write_interrupt(int irq, void *data, - struct pt_regs *unused) +static irqreturn_t line_write_interrupt(int irq, void *data) { struct chan *chan = data; struct line *line = chan->line; @@ -712,7 +711,7 @@ struct winch { struct tty_struct *tty; }; -static irqreturn_t winch_interrupt(int irq, void *data, struct pt_regs *unused) +static irqreturn_t winch_interrupt(int irq, void *data) { struct winch *winch = data; struct tty_struct *tty; diff --git a/arch/um/drivers/mconsole_kern.c b/arch/um/drivers/mconsole_kern.c index a67dcbd78de4..d08bd036ccb8 100644 --- a/arch/um/drivers/mconsole_kern.c +++ b/arch/um/drivers/mconsole_kern.c @@ -74,8 +74,7 @@ static void mc_work_proc(void *unused) static DECLARE_WORK(mconsole_work, mc_work_proc, NULL); -static irqreturn_t mconsole_interrupt(int irq, void *dev_id, - struct pt_regs *regs) +static irqreturn_t mconsole_interrupt(int irq, void *dev_id) { /* long to avoid size mismatch warnings from gcc */ long fd; @@ -674,8 +673,9 @@ static void with_console(struct mc_request *req, void (*proc)(void *), static void sysrq_proc(void *arg) { char *op = arg; - - handle_sysrq(*op, ¤t->thread.regs, NULL); + struct pt_regs *old_regs = set_irq_regs(¤t->thread.regs); + handle_sysrq(*op, NULL); + set_irq_regs(old_regs); } void mconsole_sysrq(struct mc_request *req) diff --git a/arch/um/drivers/net_kern.c b/arch/um/drivers/net_kern.c index c1c5604752fb..ec9eb8bd9432 100644 --- a/arch/um/drivers/net_kern.c +++ b/arch/um/drivers/net_kern.c @@ -77,7 +77,7 @@ static void uml_dev_close(void* dev) dev_close( (struct net_device *) dev); } -irqreturn_t uml_net_interrupt(int irq, void *dev_id, struct pt_regs *regs) +irqreturn_t uml_net_interrupt(int irq, void *dev_id) { struct net_device *dev = dev_id; struct uml_net_private *lp = dev->priv; diff --git a/arch/um/drivers/pcap_user.h b/arch/um/drivers/pcap_user.h index 58f9f6a1420f..96b80b565eeb 100644 --- a/arch/um/drivers/pcap_user.h +++ b/arch/um/drivers/pcap_user.h @@ -15,7 +15,7 @@ struct pcap_data { void *dev; }; -extern struct net_user_info pcap_user_info; +extern const struct net_user_info pcap_user_info; extern int pcap_user_read(int fd, void *buf, int len, struct pcap_data *pri); diff --git a/arch/um/drivers/port_kern.c b/arch/um/drivers/port_kern.c index 73755f37a8a8..ce9f3733f73e 100644 --- a/arch/um/drivers/port_kern.c +++ b/arch/um/drivers/port_kern.c @@ -47,7 +47,7 @@ struct connection { struct port_list *port; }; -static irqreturn_t pipe_interrupt(int irq, void *data, struct pt_regs *regs) +static irqreturn_t pipe_interrupt(int irq, void *data) { struct connection *conn = data; int fd; @@ -152,7 +152,7 @@ void port_work_proc(void *unused) DECLARE_WORK(port_work, port_work_proc, NULL); -static irqreturn_t port_interrupt(int irq, void *data, struct pt_regs *regs) +static irqreturn_t port_interrupt(int irq, void *data) { struct port_list *port = data; diff --git a/arch/um/drivers/ubd_kern.c b/arch/um/drivers/ubd_kern.c index f0b0668458b7..bc458f57921b 100644 --- a/arch/um/drivers/ubd_kern.c +++ b/arch/um/drivers/ubd_kern.c @@ -524,7 +524,7 @@ static void ubd_handler(void) do_ubd_request(ubd_queue); } -static irqreturn_t ubd_intr(int irq, void *dev, struct pt_regs *unused) +static irqreturn_t ubd_intr(int irq, void *dev) { ubd_handler(); return(IRQ_HANDLED); diff --git a/arch/um/drivers/xterm_kern.c b/arch/um/drivers/xterm_kern.c index 6036ec85895a..a4ce7058e10e 100644 --- a/arch/um/drivers/xterm_kern.c +++ b/arch/um/drivers/xterm_kern.c @@ -21,7 +21,7 @@ struct xterm_wait { int new_fd; }; -static irqreturn_t xterm_interrupt(int irq, void *data, struct pt_regs *regs) +static irqreturn_t xterm_interrupt(int irq, void *data) { struct xterm_wait *xterm = data; int fd; diff --git a/arch/um/include/common-offsets.h b/arch/um/include/common-offsets.h index 356390d1f8b9..461175f8b1d9 100644 --- a/arch/um/include/common-offsets.h +++ b/arch/um/include/common-offsets.h @@ -1,9 +1,16 @@ /* for use by sys-$SUBARCH/kernel-offsets.c */ +DEFINE(KERNEL_MADV_REMOVE, MADV_REMOVE); +#ifdef CONFIG_MODE_TT +OFFSET(HOST_TASK_EXTERN_PID, task_struct, thread.mode.tt.extern_pid); +#endif + OFFSET(HOST_TASK_REGS, task_struct, thread.regs); OFFSET(HOST_TASK_PID, task_struct, pid); + DEFINE(UM_KERN_PAGE_SIZE, PAGE_SIZE); DEFINE(UM_NSEC_PER_SEC, NSEC_PER_SEC); + DEFINE_STR(UM_KERN_EMERG, KERN_EMERG); DEFINE_STR(UM_KERN_ALERT, KERN_ALERT); DEFINE_STR(UM_KERN_CRIT, KERN_CRIT); @@ -12,6 +19,10 @@ DEFINE_STR(UM_KERN_WARNING, KERN_WARNING); DEFINE_STR(UM_KERN_NOTICE, KERN_NOTICE); DEFINE_STR(UM_KERN_INFO, KERN_INFO); DEFINE_STR(UM_KERN_DEBUG, KERN_DEBUG); + DEFINE(UM_ELF_CLASS, ELF_CLASS); DEFINE(UM_ELFCLASS32, ELFCLASS32); DEFINE(UM_ELFCLASS64, ELFCLASS64); + +/* For crypto assembler code. */ +DEFINE(crypto_tfm_ctx_offset, offsetof(struct crypto_tfm, __crt_ctx)); diff --git a/arch/um/include/irq_kern.h b/arch/um/include/irq_kern.h index c222d56b1494..4f775597fd5f 100644 --- a/arch/um/include/irq_kern.h +++ b/arch/um/include/irq_kern.h @@ -10,12 +10,11 @@ #include "asm/ptrace.h" extern int um_request_irq(unsigned int irq, int fd, int type, - irqreturn_t (*handler)(int, void *, - struct pt_regs *), + irq_handler_t handler, unsigned long irqflags, const char * devname, void *dev_id); extern int init_aio_irq(int irq, char *name, - irqreturn_t (*handler)(int, void *, struct pt_regs *)); + irq_handler_t handler); #endif diff --git a/arch/um/include/longjmp.h b/arch/um/include/longjmp.h index e93c6d3e893b..e860bc5848e0 100644 --- a/arch/um/include/longjmp.h +++ b/arch/um/include/longjmp.h @@ -12,7 +12,8 @@ extern void longjmp(jmp_buf, int); } while(0) #define UML_SETJMP(buf) ({ \ - int n, enable; \ + int n; \ + volatile int enable; \ enable = get_signals(); \ n = setjmp(*buf); \ if(n != 0) \ diff --git a/arch/um/include/os.h b/arch/um/include/os.h index 120ca21a513a..6516f6dca96d 100644 --- a/arch/um/include/os.h +++ b/arch/um/include/os.h @@ -201,6 +201,7 @@ extern int os_getpgrp(void); #ifdef UML_CONFIG_MODE_TT extern void init_new_thread_stack(void *sig_stack, void (*usr1_handler)(int)); +extern void stop(void); #endif extern void init_new_thread_signals(void); extern int run_kernel_thread(int (*fn)(void *), void *arg, void **jmp_ptr); diff --git a/arch/um/include/sysdep-i386/kernel-offsets.h b/arch/um/include/sysdep-i386/kernel-offsets.h index 2c13de321f2f..97ec9d894d75 100644 --- a/arch/um/include/sysdep-i386/kernel-offsets.h +++ b/arch/um/include/sysdep-i386/kernel-offsets.h @@ -1,6 +1,7 @@ #include <linux/stddef.h> #include <linux/sched.h> #include <linux/elf.h> +#include <linux/crypto.h> #include <asm/mman.h> #define DEFINE(sym, val) \ @@ -17,9 +18,5 @@ void foo(void) { OFFSET(HOST_TASK_DEBUGREGS, task_struct, thread.arch.debugregs); - DEFINE(KERNEL_MADV_REMOVE, MADV_REMOVE); -#ifdef CONFIG_MODE_TT - OFFSET(HOST_TASK_EXTERN_PID, task_struct, thread.mode.tt.extern_pid); -#endif #include <common-offsets.h> } diff --git a/arch/um/include/sysdep-x86_64/kernel-offsets.h b/arch/um/include/sysdep-x86_64/kernel-offsets.h index 91d129fb3930..a307237b7964 100644 --- a/arch/um/include/sysdep-x86_64/kernel-offsets.h +++ b/arch/um/include/sysdep-x86_64/kernel-offsets.h @@ -2,6 +2,7 @@ #include <linux/sched.h> #include <linux/time.h> #include <linux/elf.h> +#include <linux/crypto.h> #include <asm/page.h> #include <asm/mman.h> @@ -18,9 +19,5 @@ void foo(void) { - DEFINE(KERNEL_MADV_REMOVE, MADV_REMOVE); -#ifdef CONFIG_MODE_TT - OFFSET(HOST_TASK_EXTERN_PID, task_struct, thread.mode.tt.extern_pid); -#endif #include <common-offsets.h> } diff --git a/arch/um/kernel/irq.c b/arch/um/kernel/irq.c index eee97bb81ba5..ef259569fd8c 100644 --- a/arch/um/kernel/irq.c +++ b/arch/um/kernel/irq.c @@ -355,14 +355,16 @@ void forward_interrupts(int pid) */ unsigned int do_IRQ(int irq, union uml_pt_regs *regs) { - irq_enter(); - __do_IRQ(irq, (struct pt_regs *)regs); - irq_exit(); - return 1; + struct pt_regs *old_regs = set_irq_regs((struct pt_regs *)regs); + irq_enter(); + __do_IRQ(irq); + irq_exit(); + set_irq_regs(old_regs); + return 1; } int um_request_irq(unsigned int irq, int fd, int type, - irqreturn_t (*handler)(int, void *, struct pt_regs *), + irq_handler_t handler, unsigned long irqflags, const char * devname, void *dev_id) { @@ -423,8 +425,7 @@ void __init init_IRQ(void) } } -int init_aio_irq(int irq, char *name, irqreturn_t (*handler)(int, void *, - struct pt_regs *)) +int init_aio_irq(int irq, char *name, irq_handler_t handler) { int fds[2], err; diff --git a/arch/um/kernel/sigio.c b/arch/um/kernel/sigio.c index 0ad755ceb212..2b0ab438301c 100644 --- a/arch/um/kernel/sigio.c +++ b/arch/um/kernel/sigio.c @@ -17,7 +17,7 @@ /* Protected by sigio_lock() called from write_sigio_workaround */ static int sigio_irq_fd = -1; -static irqreturn_t sigio_interrupt(int irq, void *data, struct pt_regs *unused) +static irqreturn_t sigio_interrupt(int irq, void *data) { char c; diff --git a/arch/um/kernel/skas/mmu.c b/arch/um/kernel/skas/mmu.c index c17eddcf89b3..2c6d090a2e87 100644 --- a/arch/um/kernel/skas/mmu.c +++ b/arch/um/kernel/skas/mmu.c @@ -60,10 +60,7 @@ static int init_stub_pte(struct mm_struct *mm, unsigned long proc, #endif *pte = mk_pte(virt_to_page(kernel), __pgprot(_PAGE_PRESENT)); - /* This is wrong for the code page, but it doesn't matter since the - * stub is mapped by hand with the correct permissions. - */ - *pte = pte_mkwrite(*pte); + *pte = pte_mkread(*pte); return(0); out_pmd: diff --git a/arch/um/kernel/time.c b/arch/um/kernel/time.c index a92965f8f9cd..2e354b3ca060 100644 --- a/arch/um/kernel/time.c +++ b/arch/um/kernel/time.c @@ -86,7 +86,7 @@ static inline unsigned long long get_time(void) return nsecs; } -irqreturn_t um_timer(int irq, void *dev, struct pt_regs *regs) +irqreturn_t um_timer(int irq, void *dev) { unsigned long long nsecs; unsigned long flags; diff --git a/arch/um/kernel/tt/uaccess_user.c b/arch/um/kernel/tt/uaccess_user.c index 6c92bbccb49c..ed1abcf4d057 100644 --- a/arch/um/kernel/tt/uaccess_user.c +++ b/arch/um/kernel/tt/uaccess_user.c @@ -4,13 +4,13 @@ * Licensed under the GPL */ -#include <setjmp.h> #include <string.h> #include "user_util.h" #include "uml_uaccess.h" #include "task.h" #include "kern_util.h" #include "os.h" +#include "longjmp.h" int __do_copy_from_user(void *to, const void *from, int n, void **fault_addr, void **fault_catcher) @@ -80,10 +80,10 @@ int __do_strnlen_user(const char *str, unsigned long n, struct tt_regs save = TASK_REGS(get_current())->tt; int ret; unsigned long *faddrp = (unsigned long *)fault_addr; - sigjmp_buf jbuf; + jmp_buf jbuf; *fault_catcher = &jbuf; - if(sigsetjmp(jbuf, 1) == 0) + if(UML_SETJMP(&jbuf) == 0) ret = strlen(str) + 1; else ret = *faddrp - (unsigned long) str; diff --git a/arch/um/os-Linux/tt.c b/arch/um/os-Linux/tt.c index 5461a065bbb9..3dc3a02d6263 100644 --- a/arch/um/os-Linux/tt.c +++ b/arch/um/os-Linux/tt.c @@ -10,7 +10,6 @@ #include <errno.h> #include <stdarg.h> #include <stdlib.h> -#include <setjmp.h> #include <sys/time.h> #include <sys/ptrace.h> #include <linux/ptrace.h> diff --git a/arch/um/os-Linux/util.c b/arch/um/os-Linux/util.c index 3f5b1514e8a7..56b8a50e8bc2 100644 --- a/arch/um/os-Linux/util.c +++ b/arch/um/os-Linux/util.c @@ -80,11 +80,18 @@ void setup_machinename(char *machine_out) struct utsname host; uname(&host); -#if defined(UML_CONFIG_UML_X86) && !defined(UML_CONFIG_64BIT) +#ifdef UML_CONFIG_UML_X86 +# ifndef UML_CONFIG_64BIT if (!strcmp(host.machine, "x86_64")) { strcpy(machine_out, "i686"); return; } +# else + if (!strcmp(host.machine, "i686")) { + strcpy(machine_out, "x86_64"); + return; + } +# endif #endif strcpy(machine_out, host.machine); } diff --git a/arch/um/sys-x86_64/ksyms.c b/arch/um/sys-x86_64/ksyms.c index 859273808203..12c593607c59 100644 --- a/arch/um/sys-x86_64/ksyms.c +++ b/arch/um/sys-x86_64/ksyms.c @@ -14,6 +14,3 @@ EXPORT_SYMBOL(__up_wakeup); /*XXX: we need them because they would be exported by x86_64 */ EXPORT_SYMBOL(__memcpy); - -/* Networking helper routines. */ -EXPORT_SYMBOL(ip_compute_csum); diff --git a/arch/x86_64/ia32/ptrace32.c b/arch/x86_64/ia32/ptrace32.c index d18198ed636b..3a7561d4703e 100644 --- a/arch/x86_64/ia32/ptrace32.c +++ b/arch/x86_64/ia32/ptrace32.c @@ -205,9 +205,9 @@ static int getreg32(struct task_struct *child, unsigned regno, u32 *val) static long ptrace32_siginfo(unsigned request, u32 pid, u32 addr, u32 data) { int ret; - compat_siginfo_t *si32 = (compat_siginfo_t *)compat_ptr(data); + compat_siginfo_t __user *si32 = compat_ptr(data); siginfo_t ssi; - siginfo_t *si = compat_alloc_user_space(sizeof(siginfo_t)); + siginfo_t __user *si = compat_alloc_user_space(sizeof(siginfo_t)); if (request == PTRACE_SETSIGINFO) { memset(&ssi, 0, sizeof(siginfo_t)); ret = copy_siginfo_from_user32(&ssi, si32); diff --git a/arch/x86_64/kernel/apic.c b/arch/x86_64/kernel/apic.c index af4a1c71a80d..4d9d5ed942b2 100644 --- a/arch/x86_64/kernel/apic.c +++ b/arch/x86_64/kernel/apic.c @@ -913,8 +913,10 @@ void smp_local_timer_interrupt(void) * [ if a single-CPU system runs an SMP kernel then we call the local * interrupt as well. Thus we cannot inline the local irq ... ] */ -void smp_apic_timer_interrupt(void) +void smp_apic_timer_interrupt(struct pt_regs *regs) { + struct pt_regs *old_regs = set_irq_regs(regs); + /* * the NMI deadlock-detector uses this. */ @@ -934,6 +936,7 @@ void smp_apic_timer_interrupt(void) irq_enter(); smp_local_timer_interrupt(); irq_exit(); + set_irq_regs(old_regs); } /* diff --git a/arch/x86_64/kernel/genapic_cluster.c b/arch/x86_64/kernel/genapic_cluster.c index cdb90e671b88..73d76308b955 100644 --- a/arch/x86_64/kernel/genapic_cluster.c +++ b/arch/x86_64/kernel/genapic_cluster.c @@ -63,6 +63,13 @@ static cpumask_t cluster_target_cpus(void) return cpumask_of_cpu(0); } +static cpumask_t cluster_vector_allocation_domain(int cpu) +{ + cpumask_t domain = CPU_MASK_NONE; + cpu_set(cpu, domain); + return domain; +} + static void cluster_send_IPI_mask(cpumask_t mask, int vector) { send_IPI_mask_sequence(mask, vector); @@ -119,6 +126,7 @@ struct genapic apic_cluster = { .int_delivery_mode = dest_Fixed, .int_dest_mode = (APIC_DEST_PHYSICAL != 0), .target_cpus = cluster_target_cpus, + .vector_allocation_domain = cluster_vector_allocation_domain, .apic_id_registered = cluster_apic_id_registered, .init_apic_ldr = cluster_init_apic_ldr, .send_IPI_all = cluster_send_IPI_all, diff --git a/arch/x86_64/kernel/genapic_flat.c b/arch/x86_64/kernel/genapic_flat.c index 50ad153eaac4..0dfc223c1839 100644 --- a/arch/x86_64/kernel/genapic_flat.c +++ b/arch/x86_64/kernel/genapic_flat.c @@ -22,6 +22,20 @@ static cpumask_t flat_target_cpus(void) return cpu_online_map; } +static cpumask_t flat_vector_allocation_domain(int cpu) +{ + /* Careful. Some cpus do not strictly honor the set of cpus + * specified in the interrupt destination when using lowest + * priority interrupt delivery mode. + * + * In particular there was a hyperthreading cpu observed to + * deliver interrupts to the wrong hyperthread when only one + * hyperthread was specified in the interrupt desitination. + */ + cpumask_t domain = { { [0] = APIC_ALL_CPUS, } }; + return domain; +} + /* * Set up the logical destination ID. * @@ -121,6 +135,7 @@ struct genapic apic_flat = { .int_delivery_mode = dest_LowestPrio, .int_dest_mode = (APIC_DEST_LOGICAL != 0), .target_cpus = flat_target_cpus, + .vector_allocation_domain = flat_vector_allocation_domain, .apic_id_registered = flat_apic_id_registered, .init_apic_ldr = flat_init_apic_ldr, .send_IPI_all = flat_send_IPI_all, @@ -141,6 +156,14 @@ static cpumask_t physflat_target_cpus(void) return cpumask_of_cpu(0); } +static cpumask_t physflat_vector_allocation_domain(int cpu) +{ + cpumask_t domain = CPU_MASK_NONE; + cpu_set(cpu, domain); + return domain; +} + + static void physflat_send_IPI_mask(cpumask_t cpumask, int vector) { send_IPI_mask_sequence(cpumask, vector); @@ -179,6 +202,7 @@ struct genapic apic_physflat = { .int_delivery_mode = dest_Fixed, .int_dest_mode = (APIC_DEST_PHYSICAL != 0), .target_cpus = physflat_target_cpus, + .vector_allocation_domain = physflat_vector_allocation_domain, .apic_id_registered = flat_apic_id_registered, .init_apic_ldr = flat_init_apic_ldr,/*not needed, but shouldn't hurt*/ .send_IPI_all = physflat_send_IPI_all, diff --git a/arch/x86_64/kernel/io_apic.c b/arch/x86_64/kernel/io_apic.c index 91728d9d3472..44b55f833875 100644 --- a/arch/x86_64/kernel/io_apic.c +++ b/arch/x86_64/kernel/io_apic.c @@ -47,7 +47,7 @@ #include <asm/msidef.h> #include <asm/hypertransport.h> -static int assign_irq_vector(int irq, cpumask_t mask); +static int assign_irq_vector(int irq, cpumask_t mask, cpumask_t *result); #define __apicdebuginit __init @@ -174,12 +174,10 @@ static void set_ioapic_affinity_irq(unsigned int irq, cpumask_t mask) cpus_and(mask, tmp, CPU_MASK_ALL); - vector = assign_irq_vector(irq, mask); + vector = assign_irq_vector(irq, mask, &tmp); if (vector < 0) return; - cpus_clear(tmp); - cpu_set(vector >> 8, tmp); dest = cpu_mask_to_apicid(tmp); /* @@ -188,7 +186,7 @@ static void set_ioapic_affinity_irq(unsigned int irq, cpumask_t mask) dest = SET_APIC_LOGICAL_ID(dest); spin_lock_irqsave(&ioapic_lock, flags); - __target_IO_APIC_irq(irq, dest, vector & 0xff); + __target_IO_APIC_irq(irq, dest, vector); set_native_irq_info(irq, mask); spin_unlock_irqrestore(&ioapic_lock, flags); } @@ -563,9 +561,45 @@ static inline int IO_APIC_irq_trigger(int irq) } /* irq_vectors is indexed by the sum of all RTEs in all I/O APICs. */ -unsigned int irq_vector[NR_IRQ_VECTORS] __read_mostly = { FIRST_EXTERNAL_VECTOR, 0 }; +static u8 irq_vector[NR_IRQ_VECTORS] __read_mostly = { + [0] = FIRST_EXTERNAL_VECTOR + 0, + [1] = FIRST_EXTERNAL_VECTOR + 1, + [2] = FIRST_EXTERNAL_VECTOR + 2, + [3] = FIRST_EXTERNAL_VECTOR + 3, + [4] = FIRST_EXTERNAL_VECTOR + 4, + [5] = FIRST_EXTERNAL_VECTOR + 5, + [6] = FIRST_EXTERNAL_VECTOR + 6, + [7] = FIRST_EXTERNAL_VECTOR + 7, + [8] = FIRST_EXTERNAL_VECTOR + 8, + [9] = FIRST_EXTERNAL_VECTOR + 9, + [10] = FIRST_EXTERNAL_VECTOR + 10, + [11] = FIRST_EXTERNAL_VECTOR + 11, + [12] = FIRST_EXTERNAL_VECTOR + 12, + [13] = FIRST_EXTERNAL_VECTOR + 13, + [14] = FIRST_EXTERNAL_VECTOR + 14, + [15] = FIRST_EXTERNAL_VECTOR + 15, +}; + +static cpumask_t irq_domain[NR_IRQ_VECTORS] __read_mostly = { + [0] = CPU_MASK_ALL, + [1] = CPU_MASK_ALL, + [2] = CPU_MASK_ALL, + [3] = CPU_MASK_ALL, + [4] = CPU_MASK_ALL, + [5] = CPU_MASK_ALL, + [6] = CPU_MASK_ALL, + [7] = CPU_MASK_ALL, + [8] = CPU_MASK_ALL, + [9] = CPU_MASK_ALL, + [10] = CPU_MASK_ALL, + [11] = CPU_MASK_ALL, + [12] = CPU_MASK_ALL, + [13] = CPU_MASK_ALL, + [14] = CPU_MASK_ALL, + [15] = CPU_MASK_ALL, +}; -static int __assign_irq_vector(int irq, cpumask_t mask) +static int __assign_irq_vector(int irq, cpumask_t mask, cpumask_t *result) { /* * NOTE! The local APIC isn't very good at handling @@ -587,16 +621,24 @@ static int __assign_irq_vector(int irq, cpumask_t mask) BUG_ON((unsigned)irq >= NR_IRQ_VECTORS); - if (IO_APIC_VECTOR(irq) > 0) - old_vector = IO_APIC_VECTOR(irq); - if ((old_vector > 0) && cpu_isset(old_vector >> 8, mask)) { - return old_vector; + if (irq_vector[irq] > 0) + old_vector = irq_vector[irq]; + if (old_vector > 0) { + cpus_and(*result, irq_domain[irq], mask); + if (!cpus_empty(*result)) + return old_vector; } for_each_cpu_mask(cpu, mask) { + cpumask_t domain; + int first, new_cpu; int vector, offset; - vector = pos[cpu].vector; - offset = pos[cpu].offset; + + domain = vector_allocation_domain(cpu); + first = first_cpu(domain); + + vector = pos[first].vector; + offset = pos[first].offset; next: vector += 8; if (vector >= FIRST_SYSTEM_VECTOR) { @@ -604,35 +646,40 @@ next: offset = (offset + 1) % 8; vector = FIRST_DEVICE_VECTOR + offset; } - if (unlikely(pos[cpu].vector == vector)) + if (unlikely(pos[first].vector == vector)) continue; if (vector == IA32_SYSCALL_VECTOR) goto next; - if (per_cpu(vector_irq, cpu)[vector] != -1) - goto next; + for_each_cpu_mask(new_cpu, domain) + if (per_cpu(vector_irq, cpu)[vector] != -1) + goto next; /* Found one! */ - pos[cpu].vector = vector; - pos[cpu].offset = offset; + for_each_cpu_mask(new_cpu, domain) { + pos[cpu].vector = vector; + pos[cpu].offset = offset; + } if (old_vector >= 0) { - int old_cpu = old_vector >> 8; - old_vector &= 0xff; - per_cpu(vector_irq, old_cpu)[old_vector] = -1; + int old_cpu; + for_each_cpu_mask(old_cpu, irq_domain[irq]) + per_cpu(vector_irq, old_cpu)[old_vector] = -1; } - per_cpu(vector_irq, cpu)[vector] = irq; - vector |= cpu << 8; - IO_APIC_VECTOR(irq) = vector; + for_each_cpu_mask(new_cpu, domain) + per_cpu(vector_irq, new_cpu)[vector] = irq; + irq_vector[irq] = vector; + irq_domain[irq] = domain; + cpus_and(*result, domain, mask); return vector; } return -ENOSPC; } -static int assign_irq_vector(int irq, cpumask_t mask) +static int assign_irq_vector(int irq, cpumask_t mask, cpumask_t *result) { int vector; unsigned long flags; spin_lock_irqsave(&vector_lock, flags); - vector = __assign_irq_vector(irq, mask); + vector = __assign_irq_vector(irq, mask, result); spin_unlock_irqrestore(&vector_lock, flags); return vector; } @@ -704,14 +751,12 @@ static void __init setup_IO_APIC_irqs(void) if (IO_APIC_IRQ(irq)) { cpumask_t mask; - vector = assign_irq_vector(irq, TARGET_CPUS); + vector = assign_irq_vector(irq, TARGET_CPUS, &mask); if (vector < 0) continue; - cpus_clear(mask); - cpu_set(vector >> 8, mask); entry.dest.logical.logical_dest = cpu_mask_to_apicid(mask); - entry.vector = vector & 0xff; + entry.vector = vector; ioapic_register_intr(irq, vector, IOAPIC_AUTO); if (!apic && (irq < 16)) @@ -1289,7 +1334,7 @@ static inline void init_IO_APIC_traps(void) */ for (irq = 0; irq < NR_IRQS ; irq++) { int tmp = irq; - if (IO_APIC_IRQ(tmp) && !IO_APIC_VECTOR(tmp)) { + if (IO_APIC_IRQ(tmp) && !irq_vector[tmp]) { /* * Hmm.. We don't have an entry for this, * so default to an old-fashioned 8259 @@ -1430,12 +1475,13 @@ static inline void check_timer(void) { int apic1, pin1, apic2, pin2; int vector; + cpumask_t mask; /* * get/set the timer IRQ vector: */ disable_8259A_irq(0); - vector = assign_irq_vector(0, TARGET_CPUS); + vector = assign_irq_vector(0, TARGET_CPUS, &mask); /* * Subtle, code in do_timer_interrupt() expects an AEOI @@ -1667,6 +1713,7 @@ int create_irq(void) int new; int vector = 0; unsigned long flags; + cpumask_t mask; irq = -ENOSPC; spin_lock_irqsave(&vector_lock, flags); @@ -1675,7 +1722,7 @@ int create_irq(void) continue; if (irq_vector[new] != 0) continue; - vector = __assign_irq_vector(new, TARGET_CPUS); + vector = __assign_irq_vector(new, TARGET_CPUS, &mask); if (likely(vector > 0)) irq = new; break; @@ -1707,13 +1754,10 @@ static int msi_compose_msg(struct pci_dev *pdev, unsigned int irq, struct msi_ms { int vector; unsigned dest; + cpumask_t tmp; - vector = assign_irq_vector(irq, TARGET_CPUS); + vector = assign_irq_vector(irq, TARGET_CPUS, &tmp); if (vector >= 0) { - cpumask_t tmp; - - cpus_clear(tmp); - cpu_set(vector >> 8, tmp); dest = cpu_mask_to_apicid(tmp); msg->address_hi = MSI_ADDR_BASE_HI; @@ -1752,12 +1796,10 @@ static void set_msi_irq_affinity(unsigned int irq, cpumask_t mask) cpus_and(mask, tmp, CPU_MASK_ALL); - vector = assign_irq_vector(irq, mask); + vector = assign_irq_vector(irq, mask, &tmp); if (vector < 0) return; - cpus_clear(tmp); - cpu_set(vector >> 8, tmp); dest = cpu_mask_to_apicid(tmp); read_msi_msg(irq, &msg); @@ -1844,12 +1886,10 @@ static void set_ht_irq_affinity(unsigned int irq, cpumask_t mask) cpus_and(mask, tmp, CPU_MASK_ALL); - vector = assign_irq_vector(irq, mask); + vector = assign_irq_vector(irq, mask, &tmp); if (vector < 0) return; - cpus_clear(tmp); - cpu_set(vector >> 8, tmp); dest = cpu_mask_to_apicid(tmp); target_ht_irq(irq, dest, vector & 0xff); @@ -1857,7 +1897,7 @@ static void set_ht_irq_affinity(unsigned int irq, cpumask_t mask) } #endif -static struct hw_interrupt_type ht_irq_chip = { +static struct irq_chip ht_irq_chip = { .name = "PCI-HT", .mask = mask_ht_irq, .unmask = unmask_ht_irq, @@ -1871,15 +1911,13 @@ static struct hw_interrupt_type ht_irq_chip = { int arch_setup_ht_irq(unsigned int irq, struct pci_dev *dev) { int vector; + cpumask_t tmp; - vector = assign_irq_vector(irq, TARGET_CPUS); + vector = assign_irq_vector(irq, TARGET_CPUS, &tmp); if (vector >= 0) { u32 low, high; unsigned dest; - cpumask_t tmp; - cpus_clear(tmp); - cpu_set(vector >> 8, tmp); dest = cpu_mask_to_apicid(tmp); high = HT_IRQ_HIGH_DEST_ID(dest); @@ -1945,13 +1983,10 @@ int io_apic_set_pci_routing (int ioapic, int pin, int irq, int triggering, int p add_pin_to_irq(irq, ioapic, pin); - vector = assign_irq_vector(irq, TARGET_CPUS); + vector = assign_irq_vector(irq, TARGET_CPUS, &mask); if (vector < 0) return vector; - cpus_clear(mask); - cpu_set(vector >> 8, mask); - /* * Generate a PCI IRQ routing entry and program the IOAPIC accordingly. * Note that we mask (disable) IRQs now -- these get enabled when the diff --git a/arch/x86_64/kernel/irq.c b/arch/x86_64/kernel/irq.c index b8a407fcd5d5..dff68eb2b787 100644 --- a/arch/x86_64/kernel/irq.c +++ b/arch/x86_64/kernel/irq.c @@ -114,16 +114,16 @@ asmlinkage unsigned int do_IRQ(struct pt_regs *regs) irq_enter(); irq = __get_cpu_var(vector_irq)[vector]; - if (unlikely(irq >= NR_IRQS)) { - printk(KERN_EMERG "%s: cannot handle IRQ %d\n", - __FUNCTION__, irq); - BUG(); - } - #ifdef CONFIG_DEBUG_STACKOVERFLOW stack_overflow_check(regs); #endif - generic_handle_irq(irq); + + if (likely(irq < NR_IRQS)) + generic_handle_irq(irq); + else + printk(KERN_EMERG "%s: %d.%d No irq handler for vector\n", + __func__, smp_processor_id(), vector); + irq_exit(); set_irq_regs(old_regs); diff --git a/arch/x86_64/mm/init.c b/arch/x86_64/mm/init.c index 19c72520a868..971dc1181e69 100644 --- a/arch/x86_64/mm/init.c +++ b/arch/x86_64/mm/init.c @@ -406,9 +406,12 @@ void __cpuinit zap_low_mappings(int cpu) #ifndef CONFIG_NUMA void __init paging_init(void) { - unsigned long max_zone_pfns[MAX_NR_ZONES] = {MAX_DMA_PFN, - MAX_DMA32_PFN, - end_pfn}; + unsigned long max_zone_pfns[MAX_NR_ZONES]; + memset(max_zone_pfns, 0, sizeof(max_zone_pfns)); + max_zone_pfns[ZONE_DMA] = MAX_DMA_PFN; + max_zone_pfns[ZONE_DMA32] = MAX_DMA32_PFN; + max_zone_pfns[ZONE_NORMAL] = end_pfn; + memory_present(0, 0, end_pfn); sparse_init(); free_area_init_nodes(max_zone_pfns); diff --git a/arch/x86_64/mm/numa.c b/arch/x86_64/mm/numa.c index 829a008bd39b..2ee2e003606c 100644 --- a/arch/x86_64/mm/numa.c +++ b/arch/x86_64/mm/numa.c @@ -338,9 +338,11 @@ static void __init arch_sparse_init(void) void __init paging_init(void) { int i; - unsigned long max_zone_pfns[MAX_NR_ZONES] = { MAX_DMA_PFN, - MAX_DMA32_PFN, - end_pfn}; + unsigned long max_zone_pfns[MAX_NR_ZONES]; + memset(max_zone_pfns, 0, sizeof(max_zone_pfns)); + max_zone_pfns[ZONE_DMA] = MAX_DMA_PFN; + max_zone_pfns[ZONE_DMA32] = MAX_DMA32_PFN; + max_zone_pfns[ZONE_NORMAL] = end_pfn; arch_sparse_init(); diff --git a/block/elevator.c b/block/elevator.c index 487dd3da8853..8ccd163254b8 100644 --- a/block/elevator.c +++ b/block/elevator.c @@ -93,21 +93,18 @@ static inline int elv_try_merge(struct request *__rq, struct bio *bio) static struct elevator_type *elevator_find(const char *name) { - struct elevator_type *e = NULL; + struct elevator_type *e; struct list_head *entry; list_for_each(entry, &elv_list) { - struct elevator_type *__e; - __e = list_entry(entry, struct elevator_type, list); + e = list_entry(entry, struct elevator_type, list); - if (!strcmp(__e->elevator_name, name)) { - e = __e; - break; - } + if (!strcmp(e->elevator_name, name)) + return e; } - return e; + return NULL; } static void elevator_put(struct elevator_type *e) @@ -1088,7 +1085,7 @@ ssize_t elv_iosched_show(request_queue_t *q, char *name) struct list_head *entry; int len = 0; - spin_lock_irq(q->queue_lock); + spin_lock_irq(&elv_list_lock); list_for_each(entry, &elv_list) { struct elevator_type *__e; @@ -1098,7 +1095,7 @@ ssize_t elv_iosched_show(request_queue_t *q, char *name) else len += sprintf(name+len, "%s ", __e->elevator_name); } - spin_unlock_irq(q->queue_lock); + spin_unlock_irq(&elv_list_lock); len += sprintf(len+name, "\n"); return len; diff --git a/crypto/serpent.c b/crypto/serpent.c index 465d091cd3ec..2b0a19a44ec5 100644 --- a/crypto/serpent.c +++ b/crypto/serpent.c @@ -364,10 +364,10 @@ static void serpent_encrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src) { struct serpent_ctx *ctx = crypto_tfm_ctx(tfm); const u32 - *k = ctx->expkey, - *s = (const u32 *)src; - u32 *d = (u32 *)dst, - r0, r1, r2, r3, r4; + *k = ctx->expkey; + const __le32 *s = (const __le32 *)src; + __le32 *d = (__le32 *)dst; + u32 r0, r1, r2, r3, r4; /* * Note: The conversions between u8* and u32* might cause trouble @@ -423,10 +423,10 @@ static void serpent_decrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src) { struct serpent_ctx *ctx = crypto_tfm_ctx(tfm); const u32 - *k = ((struct serpent_ctx *)ctx)->expkey, - *s = (const u32 *)src; - u32 *d = (u32 *)dst, - r0, r1, r2, r3, r4; + *k = ((struct serpent_ctx *)ctx)->expkey; + const __le32 *s = (const __le32 *)src; + __le32 *d = (__le32 *)dst; + u32 r0, r1, r2, r3, r4; r0 = le32_to_cpu(s[0]); r1 = le32_to_cpu(s[1]); diff --git a/drivers/acpi/cm_sbs.c b/drivers/acpi/cm_sbs.c index a01ce6700bfe..4a9b7bf6f44e 100644 --- a/drivers/acpi/cm_sbs.c +++ b/drivers/acpi/cm_sbs.c @@ -67,7 +67,7 @@ void acpi_unlock_ac_dir(struct proc_dir_entry *acpi_ac_dir_param) lock_ac_dir_cnt--; if (lock_ac_dir_cnt == 0 && acpi_ac_dir_param && acpi_ac_dir) { remove_proc_entry(ACPI_AC_CLASS, acpi_root_dir); - acpi_ac_dir = 0; + acpi_ac_dir = NULL; } mutex_unlock(&cm_sbs_mutex); } @@ -99,7 +99,7 @@ void acpi_unlock_battery_dir(struct proc_dir_entry *acpi_battery_dir_param) if (lock_battery_dir_cnt == 0 && acpi_battery_dir_param && acpi_battery_dir) { remove_proc_entry(ACPI_BATTERY_CLASS, acpi_root_dir); - acpi_battery_dir = 0; + acpi_battery_dir = NULL; } mutex_unlock(&cm_sbs_mutex); return; diff --git a/drivers/acpi/hardware/hwregs.c b/drivers/acpi/hardware/hwregs.c index 3143f36fcec9..fa58c1edce1e 100644 --- a/drivers/acpi/hardware/hwregs.c +++ b/drivers/acpi/hardware/hwregs.c @@ -665,8 +665,6 @@ acpi_status acpi_hw_register_write(u8 use_lock, u32 register_id, u32 value) /* * Perform a read first to preserve certain bits (per ACPI spec) - * - * Note: This includes SCI_EN, we never want to change this bit */ status = acpi_hw_register_read(ACPI_MTX_DO_NOT_LOCK, ACPI_REGISTER_PM1_CONTROL, diff --git a/drivers/ata/libata-core.c b/drivers/ata/libata-core.c index 77138a39eb04..83728a9457ad 100644 --- a/drivers/ata/libata-core.c +++ b/drivers/ata/libata-core.c @@ -870,7 +870,11 @@ static unsigned int ata_id_xfermask(const u16 *id) * the PIO timing number for the maximum. Turn it into * a mask. */ - pio_mask = (2 << (id[ATA_ID_OLD_PIO_MODES] & 0xFF)) - 1 ; + u8 mode = id[ATA_ID_OLD_PIO_MODES] & 0xFF; + if (mode < 5) /* Valid PIO range */ + pio_mask = (2 << mode) - 1; + else + pio_mask = 1; /* But wait.. there's more. Design your standards by * committee and you too can get a free iordy field to diff --git a/drivers/ata/libata-scsi.c b/drivers/ata/libata-scsi.c index b0d0cc41f3e8..7af2a4ba4990 100644 --- a/drivers/ata/libata-scsi.c +++ b/drivers/ata/libata-scsi.c @@ -164,10 +164,10 @@ int ata_cmd_ioctl(struct scsi_device *scsidev, void __user *arg) { int rc = 0; u8 scsi_cmd[MAX_COMMAND_SIZE]; - u8 args[4], *argbuf = NULL; + u8 args[4], *argbuf = NULL, *sensebuf = NULL; int argsize = 0; - struct scsi_sense_hdr sshdr; enum dma_data_direction data_dir; + int cmd_result; if (arg == NULL) return -EINVAL; @@ -175,6 +175,10 @@ int ata_cmd_ioctl(struct scsi_device *scsidev, void __user *arg) if (copy_from_user(args, arg, sizeof(args))) return -EFAULT; + sensebuf = kzalloc(SCSI_SENSE_BUFFERSIZE, GFP_NOIO); + if (!sensebuf) + return -ENOMEM; + memset(scsi_cmd, 0, sizeof(scsi_cmd)); if (args[3]) { @@ -191,7 +195,7 @@ int ata_cmd_ioctl(struct scsi_device *scsidev, void __user *arg) data_dir = DMA_FROM_DEVICE; } else { scsi_cmd[1] = (3 << 1); /* Non-data */ - /* scsi_cmd[2] is already 0 -- no off.line, cc, or data xfer */ + scsi_cmd[2] = 0x20; /* cc but no off.line or data xfer */ data_dir = DMA_NONE; } @@ -210,18 +214,46 @@ int ata_cmd_ioctl(struct scsi_device *scsidev, void __user *arg) /* Good values for timeout and retries? Values below from scsi_ioctl_send_command() for default case... */ - if (scsi_execute_req(scsidev, scsi_cmd, data_dir, argbuf, argsize, - &sshdr, (10*HZ), 5)) { + cmd_result = scsi_execute(scsidev, scsi_cmd, data_dir, argbuf, argsize, + sensebuf, (10*HZ), 5, 0); + + if (driver_byte(cmd_result) == DRIVER_SENSE) {/* sense data available */ + u8 *desc = sensebuf + 8; + cmd_result &= ~(0xFF<<24); /* DRIVER_SENSE is not an error */ + + /* If we set cc then ATA pass-through will cause a + * check condition even if no error. Filter that. */ + if (cmd_result & SAM_STAT_CHECK_CONDITION) { + struct scsi_sense_hdr sshdr; + scsi_normalize_sense(sensebuf, SCSI_SENSE_BUFFERSIZE, + &sshdr); + if (sshdr.sense_key==0 && + sshdr.asc==0 && sshdr.ascq==0) + cmd_result &= ~SAM_STAT_CHECK_CONDITION; + } + + /* Send userspace a few ATA registers (same as drivers/ide) */ + if (sensebuf[0] == 0x72 && /* format is "descriptor" */ + desc[0] == 0x09 ) { /* code is "ATA Descriptor" */ + args[0] = desc[13]; /* status */ + args[1] = desc[3]; /* error */ + args[2] = desc[5]; /* sector count (0:7) */ + if (copy_to_user(arg, args, sizeof(args))) + rc = -EFAULT; + } + } + + + if (cmd_result) { rc = -EIO; goto error; } - /* Need code to retrieve data from check condition? */ - if ((argbuf) && copy_to_user(arg + sizeof(args), argbuf, argsize)) rc = -EFAULT; error: + kfree(sensebuf); kfree(argbuf); return rc; } diff --git a/drivers/ata/pata_qdi.c b/drivers/ata/pata_qdi.c index 7977f471d5e9..2c3cc0ccc606 100644 --- a/drivers/ata/pata_qdi.c +++ b/drivers/ata/pata_qdi.c @@ -141,7 +141,7 @@ static void qdi_data_xfer(struct ata_device *adev, unsigned char *buf, unsigned memcpy(&pad, buf + buflen - slop, slop); outl(le32_to_cpu(pad), ap->ioaddr.data_addr); } else { - pad = cpu_to_le16(inl(ap->ioaddr.data_addr)); + pad = cpu_to_le32(inl(ap->ioaddr.data_addr)); memcpy(buf + buflen - slop, &pad, slop); } } diff --git a/drivers/ata/sata_promise.c b/drivers/ata/sata_promise.c index d636ede064aa..72eda5160fad 100644 --- a/drivers/ata/sata_promise.c +++ b/drivers/ata/sata_promise.c @@ -260,6 +260,7 @@ static const struct pci_device_id pdc_ata_pci_tbl[] = { #if 0 { PCI_VDEVICE(PROMISE, 0x3570), board_20771 }, #endif + { PCI_VDEVICE(PROMISE, 0x3577), board_20771 }, { } /* terminate list */ }; @@ -360,7 +361,7 @@ static void pdc_sata_phy_reset(struct ata_port *ap) static void pdc_pata_cbl_detect(struct ata_port *ap) { u8 tmp; - void __iomem *mmio = (void *) ap->ioaddr.cmd_addr + PDC_CTLSTAT + 0x03; + void __iomem *mmio = (void __iomem *) ap->ioaddr.cmd_addr + PDC_CTLSTAT + 0x03; tmp = readb(mmio); diff --git a/drivers/ata/sata_sil.c b/drivers/ata/sata_sil.c index ae5edb80ea9a..ca8d99312472 100644 --- a/drivers/ata/sata_sil.c +++ b/drivers/ata/sata_sil.c @@ -349,7 +349,7 @@ static u32 sil_scr_read (struct ata_port *ap, unsigned int sc_reg) static void sil_scr_write (struct ata_port *ap, unsigned int sc_reg, u32 val) { - void *mmio = (void __iomem *) sil_scr_addr(ap, sc_reg); + void __iomem *mmio = (void __iomem *) sil_scr_addr(ap, sc_reg); if (mmio) writel(val, mmio); } diff --git a/drivers/ata/sata_svw.c b/drivers/ata/sata_svw.c index 84025a2fd5be..db32d15b7fa1 100644 --- a/drivers/ata/sata_svw.c +++ b/drivers/ata/sata_svw.c @@ -177,7 +177,7 @@ static void k2_bmdma_setup_mmio (struct ata_queued_cmd *qc) struct ata_port *ap = qc->ap; unsigned int rw = (qc->tf.flags & ATA_TFLAG_WRITE); u8 dmactl; - void *mmio = (void *) ap->ioaddr.bmdma_addr; + void __iomem *mmio = (void __iomem *) ap->ioaddr.bmdma_addr; /* load PRD table addr. */ mb(); /* make sure PRD table writes are visible to controller */ writel(ap->prd_dma, mmio + ATA_DMA_TABLE_OFS); @@ -205,7 +205,7 @@ static void k2_bmdma_setup_mmio (struct ata_queued_cmd *qc) static void k2_bmdma_start_mmio (struct ata_queued_cmd *qc) { struct ata_port *ap = qc->ap; - void *mmio = (void *) ap->ioaddr.bmdma_addr; + void __iomem *mmio = (void __iomem *) ap->ioaddr.bmdma_addr; u8 dmactl; /* start host DMA transaction */ diff --git a/drivers/atm/ambassador.c b/drivers/atm/ambassador.c index 8ff5c4e50823..323592de047b 100644 --- a/drivers/atm/ambassador.c +++ b/drivers/atm/ambassador.c @@ -862,15 +862,10 @@ static inline void interrupts_off (amb_dev * dev) { /********** interrupt handling **********/ static irqreturn_t interrupt_handler(int irq, void *dev_id) { - amb_dev * dev = (amb_dev *) dev_id; + amb_dev * dev = dev_id; PRINTD (DBG_IRQ|DBG_FLOW, "interrupt_handler: %p", dev_id); - if (!dev_id) { - PRINTD (DBG_IRQ|DBG_ERR, "irq with NULL dev_id: %d", irq); - return IRQ_NONE; - } - { u32 interrupt = rd_plain (dev, offsetof(amb_mem, interrupt)); diff --git a/drivers/atm/horizon.c b/drivers/atm/horizon.c index 33e9ee47392b..f59349206dd2 100644 --- a/drivers/atm/horizon.c +++ b/drivers/atm/horizon.c @@ -1389,15 +1389,6 @@ static irqreturn_t interrupt_handler(int irq, void *dev_id) { PRINTD (DBG_FLOW, "interrupt_handler: %p", dev_id); - if (!dev_id) { - PRINTD (DBG_IRQ|DBG_ERR, "irq with NULL dev_id: %d", irq); - return IRQ_NONE; - } - if (irq != dev->irq) { - PRINTD (DBG_IRQ|DBG_ERR, "irq mismatch: %d", irq); - return IRQ_NONE; - } - // definitely for us irq_ok = 0; while ((int_source = rd_regl (dev, INT_SOURCE_REG_OFF) diff --git a/drivers/atm/lanai.c b/drivers/atm/lanai.c index 8895f026bea7..267825501dfe 100644 --- a/drivers/atm/lanai.c +++ b/drivers/atm/lanai.c @@ -1892,11 +1892,9 @@ static inline void lanai_int_1(struct lanai_dev *lanai, u32 reason) static irqreturn_t lanai_int(int irq, void *devid) { - struct lanai_dev *lanai = (struct lanai_dev *) devid; + struct lanai_dev *lanai = devid; u32 reason; - (void) irq; /* unused variables */ - #ifdef USE_POWERDOWN /* * If we're powered down we shouldn't be generating any interrupts - diff --git a/drivers/block/DAC960.c b/drivers/block/DAC960.c index 3e8ab84b9447..742d07403101 100644 --- a/drivers/block/DAC960.c +++ b/drivers/block/DAC960.c @@ -5254,7 +5254,7 @@ static void DAC960_V2_ProcessCompletedCommand(DAC960_Command_T *Command) static irqreturn_t DAC960_GEM_InterruptHandler(int IRQ_Channel, void *DeviceIdentifier) { - DAC960_Controller_T *Controller = (DAC960_Controller_T *) DeviceIdentifier; + DAC960_Controller_T *Controller = DeviceIdentifier; void __iomem *ControllerBaseAddress = Controller->BaseAddress; DAC960_V2_StatusMailbox_T *NextStatusMailbox; unsigned long flags; @@ -5295,7 +5295,7 @@ static irqreturn_t DAC960_GEM_InterruptHandler(int IRQ_Channel, static irqreturn_t DAC960_BA_InterruptHandler(int IRQ_Channel, void *DeviceIdentifier) { - DAC960_Controller_T *Controller = (DAC960_Controller_T *) DeviceIdentifier; + DAC960_Controller_T *Controller = DeviceIdentifier; void __iomem *ControllerBaseAddress = Controller->BaseAddress; DAC960_V2_StatusMailbox_T *NextStatusMailbox; unsigned long flags; @@ -5337,7 +5337,7 @@ static irqreturn_t DAC960_BA_InterruptHandler(int IRQ_Channel, static irqreturn_t DAC960_LP_InterruptHandler(int IRQ_Channel, void *DeviceIdentifier) { - DAC960_Controller_T *Controller = (DAC960_Controller_T *) DeviceIdentifier; + DAC960_Controller_T *Controller = DeviceIdentifier; void __iomem *ControllerBaseAddress = Controller->BaseAddress; DAC960_V2_StatusMailbox_T *NextStatusMailbox; unsigned long flags; @@ -5379,7 +5379,7 @@ static irqreturn_t DAC960_LP_InterruptHandler(int IRQ_Channel, static irqreturn_t DAC960_LA_InterruptHandler(int IRQ_Channel, void *DeviceIdentifier) { - DAC960_Controller_T *Controller = (DAC960_Controller_T *) DeviceIdentifier; + DAC960_Controller_T *Controller = DeviceIdentifier; void __iomem *ControllerBaseAddress = Controller->BaseAddress; DAC960_V1_StatusMailbox_T *NextStatusMailbox; unsigned long flags; @@ -5417,7 +5417,7 @@ static irqreturn_t DAC960_LA_InterruptHandler(int IRQ_Channel, static irqreturn_t DAC960_PG_InterruptHandler(int IRQ_Channel, void *DeviceIdentifier) { - DAC960_Controller_T *Controller = (DAC960_Controller_T *) DeviceIdentifier; + DAC960_Controller_T *Controller = DeviceIdentifier; void __iomem *ControllerBaseAddress = Controller->BaseAddress; DAC960_V1_StatusMailbox_T *NextStatusMailbox; unsigned long flags; @@ -5455,7 +5455,7 @@ static irqreturn_t DAC960_PG_InterruptHandler(int IRQ_Channel, static irqreturn_t DAC960_PD_InterruptHandler(int IRQ_Channel, void *DeviceIdentifier) { - DAC960_Controller_T *Controller = (DAC960_Controller_T *) DeviceIdentifier; + DAC960_Controller_T *Controller = DeviceIdentifier; void __iomem *ControllerBaseAddress = Controller->BaseAddress; unsigned long flags; @@ -5493,7 +5493,7 @@ static irqreturn_t DAC960_PD_InterruptHandler(int IRQ_Channel, static irqreturn_t DAC960_P_InterruptHandler(int IRQ_Channel, void *DeviceIdentifier) { - DAC960_Controller_T *Controller = (DAC960_Controller_T *) DeviceIdentifier; + DAC960_Controller_T *Controller = DeviceIdentifier; void __iomem *ControllerBaseAddress = Controller->BaseAddress; unsigned long flags; diff --git a/drivers/block/DAC960.h b/drivers/block/DAC960.h index cec539e601fe..6148073532b2 100644 --- a/drivers/block/DAC960.h +++ b/drivers/block/DAC960.h @@ -4379,8 +4379,8 @@ static inline void DAC960_P_To_PD_TranslateEnquiry(void *Enquiry) static inline void DAC960_P_To_PD_TranslateDeviceState(void *DeviceState) { memcpy(DeviceState + 2, DeviceState + 3, 1); - memcpy(DeviceState + 4, DeviceState + 5, 2); - memcpy(DeviceState + 6, DeviceState + 8, 4); + memmove(DeviceState + 4, DeviceState + 5, 2); + memmove(DeviceState + 6, DeviceState + 8, 4); } static inline diff --git a/drivers/block/amiflop.c b/drivers/block/amiflop.c index 5d254b714509..5d6562171533 100644 --- a/drivers/block/amiflop.c +++ b/drivers/block/amiflop.c @@ -1709,10 +1709,13 @@ static struct kobject *floppy_find(dev_t dev, int *part, void *data) return get_disk(unit[drive].gendisk); } -int __init amiga_floppy_init(void) +static int __init amiga_floppy_init(void) { int i, ret; + if (!MACH_IS_AMIGA) + return -ENXIO; + if (!AMIGAHW_PRESENT(AMI_FLOPPY)) return -ENXIO; @@ -1809,15 +1812,9 @@ out_blkdev: return ret; } +module_init(amiga_floppy_init); #ifdef MODULE -int init_module(void) -{ - if (!MACH_IS_AMIGA) - return -ENXIO; - return amiga_floppy_init(); -} - #if 0 /* not safe to unload */ void cleanup_module(void) { diff --git a/drivers/block/loop.c b/drivers/block/loop.c index d6bb8da955a2..beab6d2643cb 100644 --- a/drivers/block/loop.c +++ b/drivers/block/loop.c @@ -295,7 +295,7 @@ fail: * and do_lo_send_write(). */ static int __do_lo_send_write(struct file *file, - u8 __user *buf, const int len, loff_t pos) + u8 *buf, const int len, loff_t pos) { ssize_t bw; mm_segment_t old_fs = get_fs(); @@ -324,7 +324,7 @@ static int do_lo_send_direct_write(struct loop_device *lo, struct bio_vec *bvec, int bsize, loff_t pos, struct page *page) { ssize_t bw = __do_lo_send_write(lo->lo_backing_file, - (u8 __user *)kmap(bvec->bv_page) + bvec->bv_offset, + kmap(bvec->bv_page) + bvec->bv_offset, bvec->bv_len, pos); kunmap(bvec->bv_page); cond_resched(); @@ -351,7 +351,7 @@ static int do_lo_send_write(struct loop_device *lo, struct bio_vec *bvec, bvec->bv_offset, bvec->bv_len, pos >> 9); if (likely(!ret)) return __do_lo_send_write(lo->lo_backing_file, - (u8 __user *)page_address(page), bvec->bv_len, + page_address(page), bvec->bv_len, pos); printk(KERN_ERR "loop: Transfer error at byte offset %llu, " "length %i.\n", (unsigned long long)pos, bvec->bv_len); @@ -1187,7 +1187,7 @@ struct compat_loop_info { * - noinlined to reduce stack space usage in main part of driver */ static noinline int -loop_info64_from_compat(const struct compat_loop_info *arg, +loop_info64_from_compat(const struct compat_loop_info __user *arg, struct loop_info64 *info64) { struct compat_loop_info info; diff --git a/drivers/block/xd.c b/drivers/block/xd.c index 10cc38783bdf..0d97b7eb818a 100644 --- a/drivers/block/xd.c +++ b/drivers/block/xd.c @@ -48,9 +48,9 @@ #include <linux/blkdev.h> #include <linux/blkpg.h> #include <linux/delay.h> +#include <linux/io.h> #include <asm/system.h> -#include <asm/io.h> #include <asm/uaccess.h> #include <asm/dma.h> diff --git a/drivers/block/z2ram.c b/drivers/block/z2ram.c index 82ddbdd7bd4b..7cc2685ca84a 100644 --- a/drivers/block/z2ram.c +++ b/drivers/block/z2ram.c @@ -329,7 +329,7 @@ static struct kobject *z2_find(dev_t dev, int *part, void *data) static struct request_queue *z2_queue; -int __init +static int __init z2_init(void) { int ret; @@ -370,26 +370,7 @@ err: return ret; } -#if defined(MODULE) - -MODULE_LICENSE("GPL"); - -int -init_module( void ) -{ - int error; - - error = z2_init(); - if ( error == 0 ) - { - printk( KERN_INFO DEVICE_NAME ": loaded as module\n" ); - } - - return error; -} - -void -cleanup_module( void ) +static void __exit z2_exit(void) { int i, j; blk_unregister_region(MKDEV(Z2RAM_MAJOR, 0), 256); @@ -425,4 +406,7 @@ cleanup_module( void ) return; } -#endif + +module_init(z2_init); +module_exit(z2_exit); +MODULE_LICENSE("GPL"); diff --git a/drivers/cdrom/cdrom.c b/drivers/cdrom/cdrom.c index 2a0c50d84fc5..7ea0f48f8fa6 100644 --- a/drivers/cdrom/cdrom.c +++ b/drivers/cdrom/cdrom.c @@ -703,7 +703,7 @@ static int cdrom_has_defect_mgt(struct cdrom_device_info *cdi) { struct packet_command cgc; char buffer[16]; - __u16 *feature_code; + __be16 *feature_code; int ret; init_cdrom_command(&cgc, buffer, sizeof(buffer), CGC_DATA_READ); @@ -716,7 +716,7 @@ static int cdrom_has_defect_mgt(struct cdrom_device_info *cdi) if ((ret = cdi->ops->generic_packet(cdi, &cgc))) return ret; - feature_code = (__u16 *) &buffer[sizeof(struct feature_header)]; + feature_code = (__be16 *) &buffer[sizeof(struct feature_header)]; if (be16_to_cpu(*feature_code) == CDF_HWDM) return 0; @@ -2963,7 +2963,7 @@ static int mmc_ioctl(struct cdrom_device_info *cdi, unsigned int cmd, how much data is available for transfer. buffer[1] is unfortunately ambigious and the only reliable way seem to be to simply skip over the block descriptor... */ - offset = 8 + be16_to_cpu(*(unsigned short *)(buffer+6)); + offset = 8 + be16_to_cpu(*(__be16 *)(buffer+6)); if (offset + 16 > sizeof(buffer)) return -E2BIG; diff --git a/drivers/cdrom/mcdx.c b/drivers/cdrom/mcdx.c index 60e1978ec0ea..f574962f4288 100644 --- a/drivers/cdrom/mcdx.c +++ b/drivers/cdrom/mcdx.c @@ -850,10 +850,6 @@ static irqreturn_t mcdx_intr(int irq, void *dev_id) struct s_drive_stuff *stuffp = dev_id; unsigned char b; - if (stuffp == NULL) { - xwarn("mcdx: no device for intr %d\n", irq); - return IRQ_NONE; - } #ifdef AK2 if (!stuffp->busy && stuffp->pending) stuffp->int_err = 1; diff --git a/drivers/char/ip2/i2lib.c b/drivers/char/ip2/i2lib.c index fc944d375be7..54d93f0345e8 100644 --- a/drivers/char/ip2/i2lib.c +++ b/drivers/char/ip2/i2lib.c @@ -1007,7 +1007,7 @@ i2InputAvailable(i2ChanStrPtr pCh) // applications that one cannot break out of. //****************************************************************************** static int -i2Output(i2ChanStrPtr pCh, const char *pSource, int count, int user ) +i2Output(i2ChanStrPtr pCh, const char *pSource, int count) { i2eBordStrPtr pB; unsigned char *pInsert; @@ -1020,7 +1020,7 @@ i2Output(i2ChanStrPtr pCh, const char *pSource, int count, int user ) int bailout = 10; - ip2trace (CHANN, ITRC_OUTPUT, ITRC_ENTER, 2, count, user ); + ip2trace (CHANN, ITRC_OUTPUT, ITRC_ENTER, 2, count, 0 ); // Ensure channel structure seems real if ( !i2Validate ( pCh ) ) @@ -1087,12 +1087,7 @@ i2Output(i2ChanStrPtr pCh, const char *pSource, int count, int user ) DATA_COUNT_OF(pInsert) = amountToMove; // Move the data - if ( user ) { - rc = copy_from_user((char*)(DATA_OF(pInsert)), pSource, - amountToMove ); - } else { - memcpy( (char*)(DATA_OF(pInsert)), pSource, amountToMove ); - } + memcpy( (char*)(DATA_OF(pInsert)), pSource, amountToMove ); // Adjust pointers and indices pSource += amountToMove; pCh->Obuf_char_count += amountToMove; diff --git a/drivers/char/ip2/i2lib.h b/drivers/char/ip2/i2lib.h index 952e113ccd8a..e559e9bac06d 100644 --- a/drivers/char/ip2/i2lib.h +++ b/drivers/char/ip2/i2lib.h @@ -332,7 +332,7 @@ static int i2QueueCommands(int, i2ChanStrPtr, int, int, cmdSyntaxPtr,...); static int i2GetStatus(i2ChanStrPtr, int); static int i2Input(i2ChanStrPtr); static int i2InputFlush(i2ChanStrPtr); -static int i2Output(i2ChanStrPtr, const char *, int, int); +static int i2Output(i2ChanStrPtr, const char *, int); static int i2OutputFree(i2ChanStrPtr); static int i2ServiceBoard(i2eBordStrPtr); static void i2DrainOutput(i2ChanStrPtr, int); diff --git a/drivers/char/ip2/ip2main.c b/drivers/char/ip2/ip2main.c index 858ba5432c99..a3f32d46d2f8 100644 --- a/drivers/char/ip2/ip2main.c +++ b/drivers/char/ip2/ip2main.c @@ -1704,7 +1704,7 @@ ip2_write( PTTY tty, const unsigned char *pData, int count) /* This is the actual move bit. Make sure it does what we need!!!!! */ WRITE_LOCK_IRQSAVE(&pCh->Pbuf_spinlock,flags); - bytesSent = i2Output( pCh, pData, count, 0 ); + bytesSent = i2Output( pCh, pData, count); WRITE_UNLOCK_IRQRESTORE(&pCh->Pbuf_spinlock,flags); ip2trace (CHANN, ITRC_WRITE, ITRC_RETURN, 1, bytesSent ); @@ -1764,7 +1764,7 @@ ip2_flush_chars( PTTY tty ) // // We may need to restart i2Output if it does not fullfill this request // - strip = i2Output( pCh, pCh->Pbuf, pCh->Pbuf_stuff, 0 ); + strip = i2Output( pCh, pCh->Pbuf, pCh->Pbuf_stuff); if ( strip != pCh->Pbuf_stuff ) { memmove( pCh->Pbuf, &pCh->Pbuf[strip], pCh->Pbuf_stuff - strip ); } diff --git a/drivers/char/ipmi/ipmi_msghandler.c b/drivers/char/ipmi/ipmi_msghandler.c index 2455e8d478ac..34a4fd13fa81 100644 --- a/drivers/char/ipmi/ipmi_msghandler.c +++ b/drivers/char/ipmi/ipmi_msghandler.c @@ -1928,13 +1928,8 @@ static ssize_t guid_show(struct device *dev, struct device_attribute *attr, (long long) bmc->guid[8]); } -static void -cleanup_bmc_device(struct kref *ref) +static void remove_files(struct bmc_device *bmc) { - struct bmc_device *bmc; - - bmc = container_of(ref, struct bmc_device, refcount); - device_remove_file(&bmc->dev->dev, &bmc->device_id_attr); device_remove_file(&bmc->dev->dev, @@ -1951,12 +1946,23 @@ cleanup_bmc_device(struct kref *ref) &bmc->manufacturer_id_attr); device_remove_file(&bmc->dev->dev, &bmc->product_id_attr); + if (bmc->id.aux_firmware_revision_set) device_remove_file(&bmc->dev->dev, &bmc->aux_firmware_rev_attr); if (bmc->guid_set) device_remove_file(&bmc->dev->dev, &bmc->guid_attr); +} + +static void +cleanup_bmc_device(struct kref *ref) +{ + struct bmc_device *bmc; + + bmc = container_of(ref, struct bmc_device, refcount); + + remove_files(bmc); platform_device_unregister(bmc->dev); kfree(bmc); } @@ -1977,6 +1983,79 @@ static void ipmi_bmc_unregister(ipmi_smi_t intf) mutex_unlock(&ipmidriver_mutex); } +static int create_files(struct bmc_device *bmc) +{ + int err; + + err = device_create_file(&bmc->dev->dev, + &bmc->device_id_attr); + if (err) goto out; + err = device_create_file(&bmc->dev->dev, + &bmc->provides_dev_sdrs_attr); + if (err) goto out_devid; + err = device_create_file(&bmc->dev->dev, + &bmc->revision_attr); + if (err) goto out_sdrs; + err = device_create_file(&bmc->dev->dev, + &bmc->firmware_rev_attr); + if (err) goto out_rev; + err = device_create_file(&bmc->dev->dev, + &bmc->version_attr); + if (err) goto out_firm; + err = device_create_file(&bmc->dev->dev, + &bmc->add_dev_support_attr); + if (err) goto out_version; + err = device_create_file(&bmc->dev->dev, + &bmc->manufacturer_id_attr); + if (err) goto out_add_dev; + err = device_create_file(&bmc->dev->dev, + &bmc->product_id_attr); + if (err) goto out_manu; + if (bmc->id.aux_firmware_revision_set) { + err = device_create_file(&bmc->dev->dev, + &bmc->aux_firmware_rev_attr); + if (err) goto out_prod_id; + } + if (bmc->guid_set) { + err = device_create_file(&bmc->dev->dev, + &bmc->guid_attr); + if (err) goto out_aux_firm; + } + + return 0; + +out_aux_firm: + if (bmc->id.aux_firmware_revision_set) + device_remove_file(&bmc->dev->dev, + &bmc->aux_firmware_rev_attr); +out_prod_id: + device_remove_file(&bmc->dev->dev, + &bmc->product_id_attr); +out_manu: + device_remove_file(&bmc->dev->dev, + &bmc->manufacturer_id_attr); +out_add_dev: + device_remove_file(&bmc->dev->dev, + &bmc->add_dev_support_attr); +out_version: + device_remove_file(&bmc->dev->dev, + &bmc->version_attr); +out_firm: + device_remove_file(&bmc->dev->dev, + &bmc->firmware_rev_attr); +out_rev: + device_remove_file(&bmc->dev->dev, + &bmc->revision_attr); +out_sdrs: + device_remove_file(&bmc->dev->dev, + &bmc->provides_dev_sdrs_attr); +out_devid: + device_remove_file(&bmc->dev->dev, + &bmc->device_id_attr); +out: + return err; +} + static int ipmi_bmc_register(ipmi_smi_t intf) { int rv; @@ -2051,7 +2130,6 @@ static int ipmi_bmc_register(ipmi_smi_t intf) bmc->provides_dev_sdrs_attr.attr.mode = S_IRUGO; bmc->provides_dev_sdrs_attr.show = provides_dev_sdrs_show; - bmc->revision_attr.attr.name = "revision"; bmc->revision_attr.attr.owner = THIS_MODULE; bmc->revision_attr.attr.mode = S_IRUGO; @@ -2093,28 +2171,14 @@ static int ipmi_bmc_register(ipmi_smi_t intf) bmc->aux_firmware_rev_attr.attr.mode = S_IRUGO; bmc->aux_firmware_rev_attr.show = aux_firmware_rev_show; - device_create_file(&bmc->dev->dev, - &bmc->device_id_attr); - device_create_file(&bmc->dev->dev, - &bmc->provides_dev_sdrs_attr); - device_create_file(&bmc->dev->dev, - &bmc->revision_attr); - device_create_file(&bmc->dev->dev, - &bmc->firmware_rev_attr); - device_create_file(&bmc->dev->dev, - &bmc->version_attr); - device_create_file(&bmc->dev->dev, - &bmc->add_dev_support_attr); - device_create_file(&bmc->dev->dev, - &bmc->manufacturer_id_attr); - device_create_file(&bmc->dev->dev, - &bmc->product_id_attr); - if (bmc->id.aux_firmware_revision_set) - device_create_file(&bmc->dev->dev, - &bmc->aux_firmware_rev_attr); - if (bmc->guid_set) - device_create_file(&bmc->dev->dev, - &bmc->guid_attr); + rv = create_files(bmc); + if (rv) { + mutex_lock(&ipmidriver_mutex); + platform_device_unregister(bmc->dev); + mutex_unlock(&ipmidriver_mutex); + + return rv; + } printk(KERN_INFO "ipmi: Found new BMC (man_id: 0x%6.6x, " diff --git a/drivers/char/istallion.c b/drivers/char/istallion.c index d6e031542c6b..ffdf9df1a67a 100644 --- a/drivers/char/istallion.c +++ b/drivers/char/istallion.c @@ -686,37 +686,37 @@ static stlibrd_t *stli_allocbrd(void); static void stli_ecpinit(stlibrd_t *brdp); static void stli_ecpenable(stlibrd_t *brdp); static void stli_ecpdisable(stlibrd_t *brdp); -static char *stli_ecpgetmemptr(stlibrd_t *brdp, unsigned long offset, int line); +static void __iomem *stli_ecpgetmemptr(stlibrd_t *brdp, unsigned long offset, int line); static void stli_ecpreset(stlibrd_t *brdp); static void stli_ecpintr(stlibrd_t *brdp); static void stli_ecpeiinit(stlibrd_t *brdp); static void stli_ecpeienable(stlibrd_t *brdp); static void stli_ecpeidisable(stlibrd_t *brdp); -static char *stli_ecpeigetmemptr(stlibrd_t *brdp, unsigned long offset, int line); +static void __iomem *stli_ecpeigetmemptr(stlibrd_t *brdp, unsigned long offset, int line); static void stli_ecpeireset(stlibrd_t *brdp); static void stli_ecpmcenable(stlibrd_t *brdp); static void stli_ecpmcdisable(stlibrd_t *brdp); -static char *stli_ecpmcgetmemptr(stlibrd_t *brdp, unsigned long offset, int line); +static void __iomem *stli_ecpmcgetmemptr(stlibrd_t *brdp, unsigned long offset, int line); static void stli_ecpmcreset(stlibrd_t *brdp); static void stli_ecppciinit(stlibrd_t *brdp); -static char *stli_ecppcigetmemptr(stlibrd_t *brdp, unsigned long offset, int line); +static void __iomem *stli_ecppcigetmemptr(stlibrd_t *brdp, unsigned long offset, int line); static void stli_ecppcireset(stlibrd_t *brdp); static void stli_onbinit(stlibrd_t *brdp); static void stli_onbenable(stlibrd_t *brdp); static void stli_onbdisable(stlibrd_t *brdp); -static char *stli_onbgetmemptr(stlibrd_t *brdp, unsigned long offset, int line); +static void __iomem *stli_onbgetmemptr(stlibrd_t *brdp, unsigned long offset, int line); static void stli_onbreset(stlibrd_t *brdp); static void stli_onbeinit(stlibrd_t *brdp); static void stli_onbeenable(stlibrd_t *brdp); static void stli_onbedisable(stlibrd_t *brdp); -static char *stli_onbegetmemptr(stlibrd_t *brdp, unsigned long offset, int line); +static void __iomem *stli_onbegetmemptr(stlibrd_t *brdp, unsigned long offset, int line); static void stli_onbereset(stlibrd_t *brdp); static void stli_bbyinit(stlibrd_t *brdp); -static char *stli_bbygetmemptr(stlibrd_t *brdp, unsigned long offset, int line); +static void __iomem *stli_bbygetmemptr(stlibrd_t *brdp, unsigned long offset, int line); static void stli_bbyreset(stlibrd_t *brdp); static void stli_stalinit(stlibrd_t *brdp); -static char *stli_stalgetmemptr(stlibrd_t *brdp, unsigned long offset, int line); +static void __iomem *stli_stalgetmemptr(stlibrd_t *brdp, unsigned long offset, int line); static void stli_stalreset(stlibrd_t *brdp); static stliport_t *stli_getport(int brdnr, int panelnr, int portnr); @@ -1566,7 +1566,7 @@ static void stli_flushchars(struct tty_struct *tty) len = MIN(len, cooksize); count = 0; - shbuf = (char *) EBRDGETMEMPTR(brdp, portp->txoffset); + shbuf = EBRDGETMEMPTR(brdp, portp->txoffset); buf = stli_txcookbuf; while (len > 0) { @@ -2948,9 +2948,9 @@ static void stli_ecpdisable(stlibrd_t *brdp) /*****************************************************************************/ -static char *stli_ecpgetmemptr(stlibrd_t *brdp, unsigned long offset, int line) +static void __iomem *stli_ecpgetmemptr(stlibrd_t *brdp, unsigned long offset, int line) { - void *ptr; + void __iomem *ptr; unsigned char val; if (offset > brdp->memsize) { @@ -3022,9 +3022,9 @@ static void stli_ecpeidisable(stlibrd_t *brdp) /*****************************************************************************/ -static char *stli_ecpeigetmemptr(stlibrd_t *brdp, unsigned long offset, int line) +static void __iomem *stli_ecpeigetmemptr(stlibrd_t *brdp, unsigned long offset, int line) { - void *ptr; + void __iomem *ptr; unsigned char val; if (offset > brdp->memsize) { @@ -3074,9 +3074,9 @@ static void stli_ecpmcdisable(stlibrd_t *brdp) /*****************************************************************************/ -static char *stli_ecpmcgetmemptr(stlibrd_t *brdp, unsigned long offset, int line) +static void __iomem *stli_ecpmcgetmemptr(stlibrd_t *brdp, unsigned long offset, int line) { - void *ptr; + void __iomem *ptr; unsigned char val; if (offset > brdp->memsize) { @@ -3119,9 +3119,9 @@ static void stli_ecppciinit(stlibrd_t *brdp) /*****************************************************************************/ -static char *stli_ecppcigetmemptr(stlibrd_t *brdp, unsigned long offset, int line) +static void __iomem *stli_ecppcigetmemptr(stlibrd_t *brdp, unsigned long offset, int line) { - void *ptr; + void __iomem *ptr; unsigned char val; if (offset > brdp->memsize) { @@ -3185,9 +3185,9 @@ static void stli_onbdisable(stlibrd_t *brdp) /*****************************************************************************/ -static char *stli_onbgetmemptr(stlibrd_t *brdp, unsigned long offset, int line) +static void __iomem *stli_onbgetmemptr(stlibrd_t *brdp, unsigned long offset, int line) { - void *ptr; + void __iomem *ptr; if (offset > brdp->memsize) { printk(KERN_ERR "STALLION: shared memory pointer=%x out of " @@ -3250,9 +3250,9 @@ static void stli_onbedisable(stlibrd_t *brdp) /*****************************************************************************/ -static char *stli_onbegetmemptr(stlibrd_t *brdp, unsigned long offset, int line) +static void __iomem *stli_onbegetmemptr(stlibrd_t *brdp, unsigned long offset, int line) { - void *ptr; + void __iomem *ptr; unsigned char val; if (offset > brdp->memsize) { @@ -3300,9 +3300,9 @@ static void stli_bbyinit(stlibrd_t *brdp) /*****************************************************************************/ -static char *stli_bbygetmemptr(stlibrd_t *brdp, unsigned long offset, int line) +static void __iomem *stli_bbygetmemptr(stlibrd_t *brdp, unsigned long offset, int line) { - void *ptr; + void __iomem *ptr; unsigned char val; BUG_ON(offset > brdp->memsize); @@ -3337,7 +3337,7 @@ static void stli_stalinit(stlibrd_t *brdp) /*****************************************************************************/ -static char *stli_stalgetmemptr(stlibrd_t *brdp, unsigned long offset, int line) +static void __iomem *stli_stalgetmemptr(stlibrd_t *brdp, unsigned long offset, int line) { BUG_ON(offset > brdp->memsize); return brdp->membase + (offset % STAL_PAGESIZE); @@ -3876,7 +3876,7 @@ static int stli_eisamemprobe(stlibrd_t *brdp) continue; if (brdp->brdtype == BRD_ECPE) { - ecpsigp = (cdkecpsig_t __iomem *) stli_ecpeigetmemptr(brdp, + ecpsigp = stli_ecpeigetmemptr(brdp, CDK_SIGADDR, __LINE__); memcpy_fromio(&ecpsig, ecpsigp, sizeof(cdkecpsig_t)); if (ecpsig.magic == cpu_to_le32(ECP_MAGIC)) @@ -4184,7 +4184,7 @@ static int stli_initbrds(void) static ssize_t stli_memread(struct file *fp, char __user *buf, size_t count, loff_t *offp) { unsigned long flags; - void *memptr; + void __iomem *memptr; stlibrd_t *brdp; int brdnr, size, n; void *p; @@ -4214,7 +4214,7 @@ static ssize_t stli_memread(struct file *fp, char __user *buf, size_t count, lof while (size > 0) { spin_lock_irqsave(&brd_lock, flags); EBRDENABLE(brdp); - memptr = (void *) EBRDGETMEMPTR(brdp, off); + memptr = EBRDGETMEMPTR(brdp, off); n = MIN(size, (brdp->pagesize - (((unsigned long) off) % brdp->pagesize))); n = MIN(n, PAGE_SIZE); memcpy_fromio(p, memptr, n); @@ -4247,7 +4247,7 @@ out: static ssize_t stli_memwrite(struct file *fp, const char __user *buf, size_t count, loff_t *offp) { unsigned long flags; - void *memptr; + void __iomem *memptr; stlibrd_t *brdp; char __user *chbuf; int brdnr, size, n; @@ -4287,7 +4287,7 @@ static ssize_t stli_memwrite(struct file *fp, const char __user *buf, size_t cou } spin_lock_irqsave(&brd_lock, flags); EBRDENABLE(brdp); - memptr = (void *) EBRDGETMEMPTR(brdp, off); + memptr = EBRDGETMEMPTR(brdp, off); memcpy_toio(memptr, p, n); EBRDDISABLE(brdp); spin_unlock_irqrestore(&brd_lock, flags); diff --git a/drivers/char/qtronix.c b/drivers/char/qtronix.c deleted file mode 100644 index 5c9477741a30..000000000000 --- a/drivers/char/qtronix.c +++ /dev/null @@ -1,605 +0,0 @@ -/* - * - * BRIEF MODULE DESCRIPTION - * Qtronix 990P infrared keyboard driver. - * - * - * Copyright 2001 MontaVista Software Inc. - * Author: MontaVista Software, Inc. - * ppopov@mvista.com or source@mvista.com - * - * - * The bottom portion of this driver was take from - * pc_keyb.c Please see that file for copyrights. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License as published by the - * Free Software Foundation; either version 2 of the License, or (at your - * option) any later version. - * - * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED - * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF - * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN - * NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, - * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT - * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF - * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON - * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF - * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * You should have received a copy of the GNU General Public License along - * with this program; if not, write to the Free Software Foundation, Inc., - * 675 Mass Ave, Cambridge, MA 02139, USA. - */ - - -/* - * NOTE: - * - * This driver has only been tested with the Consumer IR - * port of the ITE 8172 system controller. - * - * You do not need this driver if you are using the ps/2 or - * USB adapter that the keyboard ships with. You only need - * this driver if your board has a IR port and the keyboard - * data is being sent directly to the IR. In that case, - * you also need some low-level IR support. See it8172_cir.c. - * - */ - -#ifdef CONFIG_QTRONIX_KEYBOARD - -#include <linux/module.h> -#include <linux/types.h> -#include <linux/pci.h> -#include <linux/kernel.h> - -#include <asm/it8172/it8172.h> -#include <asm/it8172/it8172_int.h> -#include <asm/it8172/it8172_cir.h> - -#include <linux/spinlock.h> -#include <linux/sched.h> -#include <linux/interrupt.h> -#include <linux/tty.h> -#include <linux/mm.h> -#include <linux/signal.h> -#include <linux/init.h> -#include <linux/kbd_ll.h> -#include <linux/delay.h> -#include <linux/poll.h> -#include <linux/miscdevice.h> -#include <linux/slab.h> -#include <linux/kbd_kern.h> -#include <linux/smp_lock.h> -#include <asm/io.h> -#include <linux/pc_keyb.h> - -#include <asm/keyboard.h> -#include <linux/bitops.h> -#include <asm/uaccess.h> -#include <asm/irq.h> -#include <asm/system.h> - -#define leading1 0 -#define leading2 0xF - -#define KBD_CIR_PORT 0 -#define AUX_RECONNECT 170 /* scancode when ps2 device is plugged (back) in */ - -static int data_index; -struct cir_port *cir; -static unsigned char kbdbytes[5]; -static unsigned char cir_data[32]; /* we only need 16 chars */ - -static void kbd_int_handler(int irq, void *dev_id); -static int handle_data(unsigned char *p_data); -static inline void handle_mouse_event(unsigned char scancode); -static inline void handle_keyboard_event(unsigned char scancode, int down); -static int __init psaux_init(void); - -static struct aux_queue *queue; /* Mouse data buffer. */ -static int aux_count = 0; - -/* - * Keys accessed through the 'Fn' key - * The Fn key does not produce a key-up sequence. So, the first - * time the user presses it, it will be key-down event. The key - * stays down until the user presses it again. - */ -#define NUM_FN_KEYS 56 -static unsigned char fn_keys[NUM_FN_KEYS] = { - 0,0,0,0,0,0,0,0, /* 0 7 */ - 8,9,10,93,0,0,0,0, /* 8 15 */ - 0,0,0,0,0,0,0,5, /* 16 23 */ - 6,7,91,0,0,0,0,0, /* 24 31 */ - 0,0,0,0,0,2,3,4, /* 32 39 */ - 92,0,0,0,0,0,0,0, /* 40 47 */ - 0,0,0,0,11,0,94,95 /* 48 55 */ - -}; - -void __init init_qtronix_990P_kbd(void) -{ - int retval; - - cir = (struct cir_port *)kmalloc(sizeof(struct cir_port), GFP_KERNEL); - if (!cir) { - printk("Unable to initialize Qtronix keyboard\n"); - return; - } - - /* - * revisit - * this should be programmable, somehow by the, by the user. - */ - cir->port = KBD_CIR_PORT; - cir->baud_rate = 0x1d; - cir->rdwos = 0; - cir->rxdcr = 0x3; - cir->hcfs = 0; - cir->fifo_tl = 0; - cir->cfq = 0x1d; - cir_port_init(cir); - - retval = request_irq(IT8172_CIR0_IRQ, kbd_int_handler, - (unsigned long )(IRQF_DISABLED|IRQF_SHARED), - (const char *)"Qtronix IR Keyboard", (void *)cir); - - if (retval) { - printk("unable to allocate cir %d irq %d\n", - cir->port, IT8172_CIR0_IRQ); - } -#ifdef CONFIG_PSMOUSE - psaux_init(); -#endif -} - -static inline unsigned char BitReverse(unsigned short key) -{ - unsigned char rkey = 0; - rkey |= (key & 0x1) << 7; - rkey |= (key & 0x2) << 5; - rkey |= (key & 0x4) << 3; - rkey |= (key & 0x8) << 1; - rkey |= (key & 0x10) >> 1; - rkey |= (key & 0x20) >> 3; - rkey |= (key & 0x40) >> 5; - rkey |= (key & 0x80) >> 7; - return rkey; - -} - - -static inline u_int8_t UpperByte(u_int8_t data) -{ - return (data >> 4); -} - - -static inline u_int8_t LowerByte(u_int8_t data) -{ - return (data & 0xF); -} - - -int CheckSumOk(u_int8_t byte1, u_int8_t byte2, - u_int8_t byte3, u_int8_t byte4, u_int8_t byte5) -{ - u_int8_t CheckSum; - - CheckSum = (byte1 & 0x0F) + byte2 + byte3 + byte4 + byte5; - if ( LowerByte(UpperByte(CheckSum) + LowerByte(CheckSum)) != UpperByte(byte1) ) - return 0; - else - return 1; -} - - -static void kbd_int_handler(int irq, void *dev_id) -{ - struct cir_port *cir; - int j; - unsigned char int_status; - - cir = (struct cir_port *)dev_id; - int_status = get_int_status(cir); - if (int_status & 0x4) { - clear_fifo(cir); - return; - } - - while (cir_get_rx_count(cir)) { - - cir_data[data_index] = cir_read_data(cir); - - if (data_index == 0) {/* expecting first byte */ - if (cir_data[data_index] != leading1) { - //printk("!leading byte %x\n", cir_data[data_index]); - set_rx_active(cir); - clear_fifo(cir); - continue; - } - } - if (data_index == 1) { - if ((cir_data[data_index] & 0xf) != leading2) { - set_rx_active(cir); - data_index = 0; /* start over */ - clear_fifo(cir); - continue; - } - } - - if ( (cir_data[data_index] == 0xff)) { /* last byte */ - //printk("data_index %d\n", data_index); - set_rx_active(cir); -#if 0 - for (j=0; j<=data_index; j++) { - printk("rx_data %d: %x\n", j, cir_data[j]); - } -#endif - data_index = 0; - handle_data(cir_data); - return; - } - else if (data_index>16) { - set_rx_active(cir); -#if 0 - printk("warning: data_index %d\n", data_index); - for (j=0; j<=data_index; j++) { - printk("rx_data %d: %x\n", j, cir_data[j]); - } -#endif - data_index = 0; - clear_fifo(cir); - return; - } - data_index++; - } -} - - -#define NUM_KBD_BYTES 5 -static int handle_data(unsigned char *p_data) -{ - u_int32_t bit_bucket; - u_int32_t i, j; - u_int32_t got_bits, next_byte; - int down = 0; - - /* Reorganize the bit stream */ - for (i=0; i<16; i++) - p_data[i] = BitReverse(~p_data[i]); - - /* - * We've already previously checked that p_data[0] - * is equal to leading1 and that (p_data[1] & 0xf) - * is equal to leading2. These twelve bits are the - * leader code. We can now throw them away (the 12 - * bits) and continue parsing the stream. - */ - bit_bucket = p_data[1] << 12; - got_bits = 4; - next_byte = 2; - - /* - * Process four bits at a time - */ - for (i=0; i<NUM_KBD_BYTES; i++) { - - kbdbytes[i]=0; - - for (j=0; j<8; j++) /* 8 bits per byte */ - { - if (got_bits < 4) { - bit_bucket |= (p_data[next_byte++] << (8 - got_bits)); - got_bits += 8; - } - - if ((bit_bucket & 0xF000) == 0x8000) { - /* Convert 1000b to 1 */ - kbdbytes[i] = 0x80 | (kbdbytes[i] >> 1); - got_bits -= 4; - bit_bucket = bit_bucket << 4; - } - else if ((bit_bucket & 0xC000) == 0x8000) { - /* Convert 10b to 0 */ - kbdbytes[i] = kbdbytes[i] >> 1; - got_bits -= 2; - bit_bucket = bit_bucket << 2; - } - else { - /* bad serial stream */ - return 1; - } - - if (next_byte > 16) { - //printk("error: too many bytes\n"); - return 1; - } - } - } - - - if (!CheckSumOk(kbdbytes[0], kbdbytes[1], - kbdbytes[2], kbdbytes[3], kbdbytes[4])) { - //printk("checksum failed\n"); - return 1; - } - - if (kbdbytes[1] & 0x08) { - //printk("m: %x %x %x\n", kbdbytes[1], kbdbytes[2], kbdbytes[3]); - handle_mouse_event(kbdbytes[1]); - handle_mouse_event(kbdbytes[2]); - handle_mouse_event(kbdbytes[3]); - } - else { - if (kbdbytes[2] == 0) down = 1; -#if 0 - if (down) - printk("down %d\n", kbdbytes[3]); - else - printk("up %d\n", kbdbytes[3]); -#endif - handle_keyboard_event(kbdbytes[3], down); - } - return 0; -} - - -DEFINE_SPINLOCK(kbd_controller_lock); -static unsigned char handle_kbd_event(void); - - -int kbd_setkeycode(unsigned int scancode, unsigned int keycode) -{ - printk("kbd_setkeycode scancode %x keycode %x\n", scancode, keycode); - return 0; -} - -int kbd_getkeycode(unsigned int scancode) -{ - return scancode; -} - - -int kbd_translate(unsigned char scancode, unsigned char *keycode, - char raw_mode) -{ - static int prev_scancode = 0; - - if (scancode == 0x00 || scancode == 0xff) { - prev_scancode = 0; - return 0; - } - - /* todo */ - if (!prev_scancode && scancode == 160) { /* Fn key down */ - //printk("Fn key down\n"); - prev_scancode = 160; - return 0; - } - else if (prev_scancode && scancode == 160) { /* Fn key up */ - //printk("Fn key up\n"); - prev_scancode = 0; - return 0; - } - - /* todo */ - if (prev_scancode == 160) { - if (scancode <= NUM_FN_KEYS) { - *keycode = fn_keys[scancode]; - //printk("fn keycode %d\n", *keycode); - } - else - return 0; - } - else if (scancode <= 127) { - *keycode = scancode; - } - else - return 0; - - - return 1; -} - -char kbd_unexpected_up(unsigned char keycode) -{ - //printk("kbd_unexpected_up\n"); - return 0; -} - -static unsigned char kbd_exists = 1; - -static inline void handle_keyboard_event(unsigned char scancode, int down) -{ - kbd_exists = 1; - handle_scancode(scancode, down); - tasklet_schedule(&keyboard_tasklet); -} - - -void kbd_leds(unsigned char leds) -{ -} - -/* dummy */ -void kbd_init_hw(void) -{ -} - - - -static inline void handle_mouse_event(unsigned char scancode) -{ - if(scancode == AUX_RECONNECT){ - queue->head = queue->tail = 0; /* Flush input queue */ - // __aux_write_ack(AUX_ENABLE_DEV); /* ping the mouse :) */ - return; - } - - if (aux_count) { - int head = queue->head; - - queue->buf[head] = scancode; - head = (head + 1) & (AUX_BUF_SIZE-1); - if (head != queue->tail) { - queue->head = head; - kill_fasync(&queue->fasync, SIGIO, POLL_IN); - wake_up_interruptible(&queue->proc_list); - } - } -} - -static unsigned char get_from_queue(void) -{ - unsigned char result; - unsigned long flags; - - spin_lock_irqsave(&kbd_controller_lock, flags); - result = queue->buf[queue->tail]; - queue->tail = (queue->tail + 1) & (AUX_BUF_SIZE-1); - spin_unlock_irqrestore(&kbd_controller_lock, flags); - return result; -} - - -static inline int queue_empty(void) -{ - return queue->head == queue->tail; -} - -static int fasync_aux(int fd, struct file *filp, int on) -{ - int retval; - - //printk("fasync_aux\n"); - retval = fasync_helper(fd, filp, on, &queue->fasync); - if (retval < 0) - return retval; - return 0; -} - - -/* - * Random magic cookie for the aux device - */ -#define AUX_DEV ((void *)queue) - -static int release_aux(struct inode * inode, struct file * file) -{ - fasync_aux(-1, file, 0); - aux_count--; - return 0; -} - -static int open_aux(struct inode * inode, struct file * file) -{ - if (aux_count++) { - return 0; - } - queue->head = queue->tail = 0; /* Flush input queue */ - return 0; -} - -/* - * Put bytes from input queue to buffer. - */ - -static ssize_t read_aux(struct file * file, char * buffer, - size_t count, loff_t *ppos) -{ - DECLARE_WAITQUEUE(wait, current); - ssize_t i = count; - unsigned char c; - - if (queue_empty()) { - if (file->f_flags & O_NONBLOCK) - return -EAGAIN; - add_wait_queue(&queue->proc_list, &wait); -repeat: - set_current_state(TASK_INTERRUPTIBLE); - if (queue_empty() && !signal_pending(current)) { - schedule(); - goto repeat; - } - current->state = TASK_RUNNING; - remove_wait_queue(&queue->proc_list, &wait); - } - while (i > 0 && !queue_empty()) { - c = get_from_queue(); - put_user(c, buffer++); - i--; - } - if (count-i) { - struct inode *inode = file->f_dentry->d_inode; - inode->i_atime = current_fs_time(inode->i_sb); - return count-i; - } - if (signal_pending(current)) - return -ERESTARTSYS; - return 0; -} - -/* - * Write to the aux device. - */ - -static ssize_t write_aux(struct file * file, const char * buffer, - size_t count, loff_t *ppos) -{ - /* - * The ITE boards this was tested on did not have the - * transmit wires connected. - */ - return count; -} - -static unsigned int aux_poll(struct file *file, poll_table * wait) -{ - poll_wait(file, &queue->proc_list, wait); - if (!queue_empty()) - return POLLIN | POLLRDNORM; - return 0; -} - -struct file_operations psaux_fops = { - .read = read_aux, - .write = write_aux, - .poll = aux_poll, - .open = open_aux, - .release = release_aux, - .fasync = fasync_aux, -}; - -/* - * Initialize driver. - */ -static struct miscdevice psaux_mouse = { - PSMOUSE_MINOR, "psaux", &psaux_fops -}; - -static int __init psaux_init(void) -{ - int retval; - - retval = misc_register(&psaux_mouse); - if(retval < 0) - return retval; - - queue = (struct aux_queue *) kmalloc(sizeof(*queue), GFP_KERNEL); - if (!queue) { - misc_deregister(&psaux_mouse); - return -ENOMEM; - } - - memset(queue, 0, sizeof(*queue)); - queue->head = queue->tail = 0; - init_waitqueue_head(&queue->proc_list); - - return 0; -} -module_init(init_qtronix_990P_kbd); -#endif diff --git a/drivers/char/rio/func.h b/drivers/char/rio/func.h index 6b039186856d..9e7283bd81a0 100644 --- a/drivers/char/rio/func.h +++ b/drivers/char/rio/func.h @@ -88,7 +88,7 @@ void RIOHostReset(unsigned int, struct DpRam __iomem *, unsigned int); /* riointr.c */ void RIOTxEnable(char *); -void RIOServiceHost(struct rio_info *, struct Host *, int); +void RIOServiceHost(struct rio_info *, struct Host *); int riotproc(struct rio_info *, struct ttystatics *, int, int); /* rioparam.c */ diff --git a/drivers/char/rio/rio_linux.c b/drivers/char/rio/rio_linux.c index 3bea594600d4..c382df0f82f6 100644 --- a/drivers/char/rio/rio_linux.c +++ b/drivers/char/rio/rio_linux.c @@ -368,7 +368,7 @@ static irqreturn_t rio_interrupt(int irq, void *ptr) struct Host *HostP; func_enter(); - HostP = (struct Host *) ptr; /* &p->RIOHosts[(long)ptr]; */ + HostP = ptr; /* &p->RIOHosts[(long)ptr]; */ rio_dprintk(RIO_DEBUG_IFLOW, "rio: enter rio_interrupt (%d/%d)\n", irq, HostP->Ivec); /* AAargh! The order in which to do these things is essential and @@ -402,7 +402,7 @@ static irqreturn_t rio_interrupt(int irq, void *ptr) return IRQ_HANDLED; } - RIOServiceHost(p, HostP, irq); + RIOServiceHost(p, HostP); rio_dprintk(RIO_DEBUG_IFLOW, "riointr() doing host %p type %d\n", ptr, HostP->Type); diff --git a/drivers/char/rio/riointr.c b/drivers/char/rio/riointr.c index 0bd09040a5c0..eeda40c5e189 100644 --- a/drivers/char/rio/riointr.c +++ b/drivers/char/rio/riointr.c @@ -181,7 +181,7 @@ static int RupIntr; static int RxIntr; static int TxIntr; -void RIOServiceHost(struct rio_info *p, struct Host *HostP, int From) +void RIOServiceHost(struct rio_info *p, struct Host *HostP) { rio_spin_lock(&HostP->HostLock); if ((HostP->Flags & RUN_STATE) != RC_RUNNING) { diff --git a/drivers/char/riscom8.c b/drivers/char/riscom8.c index be68cfb0ae69..5ab32b38f45a 100644 --- a/drivers/char/riscom8.c +++ b/drivers/char/riscom8.c @@ -559,11 +559,10 @@ static irqreturn_t rc_interrupt(int irq, void * dev_id) int handled = 0; bp = IRQ_to_board[irq]; - - if (!bp || !(bp->flags & RC_BOARD_ACTIVE)) { + + if (!(bp->flags & RC_BOARD_ACTIVE)) return IRQ_NONE; - } - + while ((++loop < 16) && ((status = ~(rc_in(bp, RC_BSR))) & (RC_BSR_TOUT | RC_BSR_TINT | RC_BSR_MINT | RC_BSR_RINT))) { diff --git a/drivers/char/rtc.c b/drivers/char/rtc.c index abee7a339462..66a7385bc34a 100644 --- a/drivers/char/rtc.c +++ b/drivers/char/rtc.c @@ -35,13 +35,13 @@ * 1.09a Pete Zaitcev: Sun SPARC * 1.09b Jeff Garzik: Modularize, init cleanup * 1.09c Jeff Garzik: SMP cleanup - * 1.10 Paul Barton-Davis: add support for async I/O + * 1.10 Paul Barton-Davis: add support for async I/O * 1.10a Andrea Arcangeli: Alpha updates * 1.10b Andrew Morton: SMP lock fix * 1.10c Cesar Barros: SMP locking fixes and cleanup * 1.10d Paul Gortmaker: delete paranoia check in rtc_exit * 1.10e Maciej W. Rozycki: Handle DECstation's year weirdness. - * 1.11 Takashi Iwai: Kernel access functions + * 1.11 Takashi Iwai: Kernel access functions * rtc_register/rtc_unregister/rtc_control * 1.11a Daniele Bellucci: Audit create_proc_read_entry in rtc_init * 1.12 Venkatesh Pallipadi: Hooks for emulating rtc on HPET base-timer diff --git a/drivers/char/serial167.c b/drivers/char/serial167.c index 6f13f98e3171..461bfe0234c9 100644 --- a/drivers/char/serial167.c +++ b/drivers/char/serial167.c @@ -62,6 +62,7 @@ #include <linux/console.h> #include <linux/module.h> #include <linux/bitops.h> +#include <linux/tty_flip.h> #include <asm/system.h> #include <asm/io.h> @@ -427,8 +428,9 @@ cd2401_rxerr_interrupt(int irq, void *dev_id) overflowing, we still loose the next incoming character. */ - tty_insert_flip_char(tty, data, TTY_NORMAL); - } + if (tty_buffer_request_room(tty, 1) != 0){ + tty_insert_flip_char(tty, data, TTY_FRAME); + } /* These two conditions may imply */ /* a normal read should be done. */ /* else if(data & CyTIMEOUT) */ @@ -437,14 +439,14 @@ cd2401_rxerr_interrupt(int irq, void *dev_id) tty_insert_flip_char(tty, 0, TTY_NORMAL); } }else{ - tty_insert_flip_char(tty, data, TTY_NORMAL); + tty_insert_flip_char(tty, data, TTY_NORMAL); } }else{ /* there was a software buffer overrun and nothing could be done about it!!! */ } } - schedule_delayed_work(&tty->flip.work, 1); + tty_schedule_flip(tty); /* end of service */ base_addr[CyREOIR] = rfoc ? 0 : CyNOTRANS; return IRQ_HANDLED; @@ -635,6 +637,7 @@ cd2401_rx_interrupt(int irq, void *dev_id) char data; int char_count; int save_cnt; + int len; /* determine the channel and change to that context */ channel = (u_short ) (base_addr[CyLICR] >> 2); @@ -667,14 +670,15 @@ cd2401_rx_interrupt(int irq, void *dev_id) info->mon.char_max = char_count; info->mon.char_last = char_count; #endif - while(char_count--){ + len = tty_buffer_request_room(tty, char_count); + while(len--){ data = base_addr[CyRDR]; tty_insert_flip_char(tty, data, TTY_NORMAL); #ifdef CYCLOM_16Y_HACK udelay(10L); #endif } - schedule_delayed_work(&tty->flip.work, 1); + tty_schedule_flip(tty); } /* end of service */ base_addr[CyREOIR] = save_cnt ? 0 : CyNOTRANS; @@ -1422,7 +1426,6 @@ cy_tiocmget(struct tty_struct *tty, struct file *file) volatile unsigned char *base_addr = (u_char *)BASE_ADDR; unsigned long flags; unsigned char status; - unsigned int result; channel = info->line; @@ -1446,7 +1449,6 @@ cy_tiocmset(struct tty_struct *tty, struct file *file, int channel; volatile unsigned char *base_addr = (u_char *)BASE_ADDR; unsigned long flags; - unsigned int arg; channel = info->line; diff --git a/drivers/char/specialix.c b/drivers/char/specialix.c index 6022495571ae..d0b88d0e87fd 100644 --- a/drivers/char/specialix.c +++ b/drivers/char/specialix.c @@ -912,7 +912,7 @@ static irqreturn_t sx_interrupt(int irq, void *dev_id) spin_lock_irqsave(&bp->lock, flags); dprintk (SX_DEBUG_FLOW, "enter %s port %d room: %ld\n", __FUNCTION__, port_No(sx_get_port(bp, "INT")), SERIAL_XMIT_SIZE - sx_get_port(bp, "ITN")->xmit_cnt - 1); - if (!bp || !(bp->flags & SX_BOARD_ACTIVE)) { + if (!(bp->flags & SX_BOARD_ACTIVE)) { dprintk (SX_DEBUG_IRQ, "sx: False interrupt. irq %d.\n", irq); spin_unlock_irqrestore(&bp->lock, flags); func_exit(); diff --git a/drivers/char/sysrq.c b/drivers/char/sysrq.c index 4c0e08685705..5f49280779fb 100644 --- a/drivers/char/sysrq.c +++ b/drivers/char/sysrq.c @@ -38,6 +38,7 @@ #include <linux/irq.h> #include <asm/ptrace.h> +#include <asm/irq_regs.h> /* Whether we react on sysrq keys or just ignore them */ int sysrq_enabled = 1; diff --git a/drivers/char/tpm/tpm.c b/drivers/char/tpm/tpm.c index a082a2e34252..6ad2d3bb945c 100644 --- a/drivers/char/tpm/tpm.c +++ b/drivers/char/tpm/tpm.c @@ -1153,7 +1153,14 @@ struct tpm_chip *tpm_register_hardware(struct device *dev, const struct tpm_vend spin_unlock(&driver_lock); - sysfs_create_group(&dev->kobj, chip->vendor.attr_group); + if (sysfs_create_group(&dev->kobj, chip->vendor.attr_group)) { + list_del(&chip->list); + put_device(dev); + clear_bit(chip->dev_num, dev_mask); + kfree(chip); + kfree(devname); + return NULL; + } chip->bios_dir = tpm_bios_log_setup(devname); diff --git a/drivers/char/tpm/tpm_atmel.c b/drivers/char/tpm/tpm_atmel.c index ad8ffe49256f..1ab0896070be 100644 --- a/drivers/char/tpm/tpm_atmel.c +++ b/drivers/char/tpm/tpm_atmel.c @@ -184,7 +184,9 @@ static int __init init_atmel(void) unsigned long base; struct tpm_chip *chip; - driver_register(&atml_drv); + rc = driver_register(&atml_drv); + if (rc) + return rc; if ((iobase = atmel_get_base_addr(&base, ®ion_size)) == NULL) { rc = -ENODEV; @@ -195,10 +197,8 @@ static int __init init_atmel(void) (atmel_request_region (tpm_atmel.base, region_size, "tpm_atmel0") == NULL) ? 0 : 1; - - if (IS_ERR - (pdev = - platform_device_register_simple("tpm_atmel", -1, NULL, 0))) { + pdev = platform_device_register_simple("tpm_atmel", -1, NULL, 0); + if (IS_ERR(pdev)) { rc = PTR_ERR(pdev); goto err_rel_reg; } diff --git a/drivers/char/tpm/tpm_nsc.c b/drivers/char/tpm/tpm_nsc.c index 26287aace87d..608f73071bef 100644 --- a/drivers/char/tpm/tpm_nsc.c +++ b/drivers/char/tpm/tpm_nsc.c @@ -284,7 +284,7 @@ static struct device_driver nsc_drv = { static int __init init_nsc(void) { int rc = 0; - int lo, hi; + int lo, hi, err; int nscAddrBase = TPM_ADDR; struct tpm_chip *chip; unsigned long base; @@ -297,7 +297,9 @@ static int __init init_nsc(void) return -ENODEV; } - driver_register(&nsc_drv); + err = driver_register(&nsc_drv); + if (err) + return err; hi = tpm_read_index(nscAddrBase, TPM_NSC_BASE0_HI); lo = tpm_read_index(nscAddrBase, TPM_NSC_BASE0_LO); diff --git a/drivers/char/viocons.c b/drivers/char/viocons.c index a362ee9c92dd..6d2e314860df 100644 --- a/drivers/char/viocons.c +++ b/drivers/char/viocons.c @@ -947,7 +947,7 @@ static void vioHandleData(struct HvLpEvent *event) */ continue; } else if (vio_sysrq_pressed) { - handle_sysrq(cevent->data[index], NULL, tty); + handle_sysrq(cevent->data[index], tty); vio_sysrq_pressed = 0; /* * continue because we don't want to add diff --git a/drivers/char/vme_scc.c b/drivers/char/vme_scc.c index 0cdbaa70cf9f..d0b94dd1af6d 100644 --- a/drivers/char/vme_scc.c +++ b/drivers/char/vme_scc.c @@ -593,7 +593,7 @@ static void scc_enable_tx_interrupts(void *ptr) local_irq_save(flags); SCCmod(INT_AND_DMA_REG, 0xff, IDR_TX_INT_ENAB); /* restart the transmitter */ - scc_tx_int (0, port, 0); + scc_tx_int (0, port); local_irq_restore(flags); } diff --git a/drivers/dma/ioatdma.c b/drivers/dma/ioatdma.c index f3bf1e230bca..0358419a0e48 100644 --- a/drivers/dma/ioatdma.c +++ b/drivers/dma/ioatdma.c @@ -80,7 +80,7 @@ static int enumerate_dma_channels(struct ioat_device *device) static struct ioat_desc_sw *ioat_dma_alloc_descriptor( struct ioat_dma_chan *ioat_chan, - int flags) + gfp_t flags) { struct ioat_dma_descriptor *desc; struct ioat_desc_sw *desc_sw; @@ -686,7 +686,7 @@ static int __devinit ioat_probe(struct pci_dev *pdev, { int err; unsigned long mmio_start, mmio_len; - void *reg_base; + void __iomem *reg_base; struct ioat_device *device; err = pci_enable_device(pdev); diff --git a/drivers/dma/ioatdma.h b/drivers/dma/ioatdma.h index a5d3b3644160..62b26a9be4c9 100644 --- a/drivers/dma/ioatdma.h +++ b/drivers/dma/ioatdma.h @@ -44,7 +44,7 @@ extern struct list_head dma_client_list; struct ioat_device { struct pci_dev *pdev; - void *reg_base; + void __iomem *reg_base; struct pci_pool *dma_pool; struct pci_pool *completion_pool; @@ -73,7 +73,7 @@ struct ioat_device { struct ioat_dma_chan { - void *reg_base; + void __iomem *reg_base; dma_cookie_t completed_cookie; unsigned long last_completion; diff --git a/drivers/eisa/eisa-bus.c b/drivers/eisa/eisa-bus.c index 3a365e159d89..d944647c82c2 100644 --- a/drivers/eisa/eisa-bus.c +++ b/drivers/eisa/eisa-bus.c @@ -226,14 +226,26 @@ static int __init eisa_init_device (struct eisa_root_device *root, static int __init eisa_register_device (struct eisa_device *edev) { - if (device_register (&edev->dev)) - return -1; + int rc = device_register (&edev->dev); + if (rc) + return rc; - device_create_file (&edev->dev, &dev_attr_signature); - device_create_file (&edev->dev, &dev_attr_enabled); - device_create_file (&edev->dev, &dev_attr_modalias); + rc = device_create_file (&edev->dev, &dev_attr_signature); + if (rc) goto err_devreg; + rc = device_create_file (&edev->dev, &dev_attr_enabled); + if (rc) goto err_sig; + rc = device_create_file (&edev->dev, &dev_attr_modalias); + if (rc) goto err_enab; return 0; + +err_enab: + device_remove_file (&edev->dev, &dev_attr_enabled); +err_sig: + device_remove_file (&edev->dev, &dev_attr_signature); +err_devreg: + device_unregister(&edev->dev); + return rc; } static int __init eisa_request_resources (struct eisa_root_device *root, diff --git a/drivers/firmware/dcdbas.c b/drivers/firmware/dcdbas.c index 339f405ff708..8bcb58cd4ac0 100644 --- a/drivers/firmware/dcdbas.c +++ b/drivers/firmware/dcdbas.c @@ -559,7 +559,7 @@ static int __devinit dcdbas_probe(struct platform_device *dev) while (--i >= 0) sysfs_remove_bin_file(&dev->dev.kobj, dcdbas_bin_attrs[i]); - sysfs_create_group(&dev->dev.kobj, &dcdbas_attr_group); + sysfs_remove_group(&dev->dev.kobj, &dcdbas_attr_group); return error; } } diff --git a/drivers/firmware/dell_rbu.c b/drivers/firmware/dell_rbu.c index fc17599c905e..08b161798443 100644 --- a/drivers/firmware/dell_rbu.c +++ b/drivers/firmware/dell_rbu.c @@ -249,7 +249,7 @@ static int packetize_data(void *data, size_t length) if ((rc = create_packet(temp, packet_length))) return rc; - pr_debug("%p:%lu\n", temp, (end - temp)); + pr_debug("%p:%td\n", temp, (end - temp)); temp += packet_length; } @@ -718,14 +718,27 @@ static int __init dcdrbu_init(void) return -EIO; } - sysfs_create_bin_file(&rbu_device->dev.kobj, &rbu_data_attr); - sysfs_create_bin_file(&rbu_device->dev.kobj, &rbu_image_type_attr); - sysfs_create_bin_file(&rbu_device->dev.kobj, + rc = sysfs_create_bin_file(&rbu_device->dev.kobj, &rbu_data_attr); + if (rc) + goto out_devreg; + rc = sysfs_create_bin_file(&rbu_device->dev.kobj, &rbu_image_type_attr); + if (rc) + goto out_data; + rc = sysfs_create_bin_file(&rbu_device->dev.kobj, &rbu_packet_size_attr); + if (rc) + goto out_imtype; rbu_data.entry_created = 0; - return rc; + return 0; +out_imtype: + sysfs_remove_bin_file(&rbu_device->dev.kobj, &rbu_image_type_attr); +out_data: + sysfs_remove_bin_file(&rbu_device->dev.kobj, &rbu_data_attr); +out_devreg: + platform_device_unregister(rbu_device); + return rc; } static __exit void dcdrbu_exit(void) diff --git a/drivers/firmware/efivars.c b/drivers/firmware/efivars.c index 8ebce1c03ad7..5ab5e393b882 100644 --- a/drivers/firmware/efivars.c +++ b/drivers/firmware/efivars.c @@ -639,7 +639,12 @@ efivar_create_sysfs_entry(unsigned long variable_name_size, kobject_set_name(&new_efivar->kobj, "%s", short_name); kobj_set_kset_s(new_efivar, vars_subsys); - kobject_register(&new_efivar->kobj); + i = kobject_register(&new_efivar->kobj); + if (i) { + kfree(short_name); + kfree(new_efivar); + return 1; + } kfree(short_name); short_name = NULL; diff --git a/drivers/i2c/busses/i2c-powermac.c b/drivers/i2c/busses/i2c-powermac.c index a508cb962d24..648d55533d87 100644 --- a/drivers/i2c/busses/i2c-powermac.c +++ b/drivers/i2c/busses/i2c-powermac.c @@ -182,9 +182,9 @@ static const struct i2c_algorithm i2c_powermac_algorithm = { }; -static int i2c_powermac_remove(struct device *dev) +static int i2c_powermac_remove(struct platform_device *dev) { - struct i2c_adapter *adapter = dev_get_drvdata(dev); + struct i2c_adapter *adapter = platform_get_drvdata(dev); struct pmac_i2c_bus *bus = i2c_get_adapdata(adapter); int rc; @@ -195,16 +195,16 @@ static int i2c_powermac_remove(struct device *dev) if (rc) printk("i2c-powermac.c: Failed to remove bus %s !\n", adapter->name); - dev_set_drvdata(dev, NULL); + platform_set_drvdata(dev, NULL); kfree(adapter); return 0; } -static int i2c_powermac_probe(struct device *dev) +static int __devexit i2c_powermac_probe(struct platform_device *dev) { - struct pmac_i2c_bus *bus = dev->platform_data; + struct pmac_i2c_bus *bus = dev->dev.platform_data; struct device_node *parent = NULL; struct i2c_adapter *adapter; char name[32]; @@ -246,11 +246,11 @@ static int i2c_powermac_probe(struct device *dev) printk(KERN_ERR "i2c-powermac: can't allocate inteface !\n"); return -ENOMEM; } - dev_set_drvdata(dev, adapter); + platform_set_drvdata(dev, adapter); strcpy(adapter->name, name); adapter->algo = &i2c_powermac_algorithm; i2c_set_adapdata(adapter, bus); - adapter->dev.parent = dev; + adapter->dev.parent = &dev->dev; pmac_i2c_attach_adapter(bus, adapter); rc = i2c_add_adapter(adapter); if (rc) { @@ -265,23 +265,25 @@ static int i2c_powermac_probe(struct device *dev) } -static struct device_driver i2c_powermac_driver = { - .name = "i2c-powermac", - .bus = &platform_bus_type, +static struct platform_driver i2c_powermac_driver = { .probe = i2c_powermac_probe, - .remove = i2c_powermac_remove, + .remove = __devexit_p(i2c_powermac_remove), + .driver = { + .name = "i2c-powermac", + .bus = &platform_bus_type, + }, }; static int __init i2c_powermac_init(void) { - driver_register(&i2c_powermac_driver); + platform_driver_register(&i2c_powermac_driver); return 0; } static void __exit i2c_powermac_cleanup(void) { - driver_unregister(&i2c_powermac_driver); + platform_driver_unregister(&i2c_powermac_driver); } module_init(i2c_powermac_init); diff --git a/drivers/ide/ide-cd.c b/drivers/ide/ide-cd.c index 69bbb6206a00..bddfebdf91d8 100644 --- a/drivers/ide/ide-cd.c +++ b/drivers/ide/ide-cd.c @@ -597,7 +597,7 @@ static void cdrom_prepare_request(ide_drive_t *drive, struct request *rq) struct cdrom_info *cd = drive->driver_data; ide_init_drive_cmd(rq); - rq->cmd_type = REQ_TYPE_BLOCK_PC; + rq->cmd_type = REQ_TYPE_ATA_PC; rq->rq_disk = cd->disk; } @@ -716,7 +716,7 @@ static int cdrom_decode_status(ide_drive_t *drive, int good_stat, int *stat_ret) ide_error(drive, "request sense failure", stat); return 1; - } else if (blk_pc_request(rq)) { + } else if (blk_pc_request(rq) || rq->cmd_type == REQ_TYPE_ATA_PC) { /* All other functions, except for READ. */ unsigned long flags; @@ -2023,7 +2023,8 @@ ide_do_rw_cdrom (ide_drive_t *drive, struct request *rq, sector_t block) } info->last_block = block; return action; - } else if (rq->cmd_type == REQ_TYPE_SENSE) { + } else if (rq->cmd_type == REQ_TYPE_SENSE || + rq->cmd_type == REQ_TYPE_ATA_PC) { return cdrom_do_packet_command(drive); } else if (blk_pc_request(rq)) { return cdrom_do_block_pc(drive, rq); diff --git a/drivers/ide/mips/swarm.c b/drivers/ide/mips/swarm.c index 66f6064f4640..09c9e7936b0d 100644 --- a/drivers/ide/mips/swarm.c +++ b/drivers/ide/mips/swarm.c @@ -4,6 +4,7 @@ * Author: Manish Lachwani, mlachwani@mvista.com * Copyright (C) 2004 MIPS Technologies, Inc. All rights reserved. * Author: Maciej W. Rozycki <macro@mips.com> + * Copyright (c) 2006 Maciej W. Rozycki * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License @@ -127,6 +128,7 @@ static int __devinit swarm_ide_probe(struct device *dev) memcpy(hwif->io_ports, hwif->hw.io_ports, sizeof(hwif->io_ports)); hwif->irq = hwif->hw.irq; + probe_hwif_init(hwif); dev_set_drvdata(dev, hwif); return 0; diff --git a/drivers/ide/pci/generic.c b/drivers/ide/pci/generic.c index 965c43659e35..5b77a5bcbf0c 100644 --- a/drivers/ide/pci/generic.c +++ b/drivers/ide/pci/generic.c @@ -237,10 +237,12 @@ static int __devinit generic_init_one(struct pci_dev *dev, const struct pci_devi if (dev->vendor == PCI_VENDOR_ID_JMICRON && PCI_FUNC(dev->devfn) != 1) goto out; - pci_read_config_word(dev, PCI_COMMAND, &command); - if (!(command & PCI_COMMAND_IO)) { - printk(KERN_INFO "Skipping disabled %s IDE controller.\n", d->name); - goto out; + if (dev->vendor != PCI_VENDOR_ID_JMICRON) { + pci_read_config_word(dev, PCI_COMMAND, &command); + if (!(command & PCI_COMMAND_IO)) { + printk(KERN_INFO "Skipping disabled %s IDE controller.\n", d->name); + goto out; + } } ret = ide_setup_pci_device(dev, d); out: diff --git a/drivers/ieee1394/nodemgr.c b/drivers/ieee1394/nodemgr.c index 3e7974c57443..8e7b83f84485 100644 --- a/drivers/ieee1394/nodemgr.c +++ b/drivers/ieee1394/nodemgr.c @@ -1614,7 +1614,7 @@ static int nodemgr_host_thread(void *__hi) { struct host_info *hi = (struct host_info *)__hi; struct hpsb_host *host = hi->host; - unsigned int g, generation = get_hpsb_generation(host) - 1; + unsigned int g, generation = 0; int i, reset_cycles = 0; /* Setup our device-model entries */ diff --git a/drivers/infiniband/core/cm.c b/drivers/infiniband/core/cm.c index f35fcc4c0638..25b1018a476c 100644 --- a/drivers/infiniband/core/cm.c +++ b/drivers/infiniband/core/cm.c @@ -75,6 +75,7 @@ static struct ib_cm { struct rb_root remote_sidr_table; struct idr local_id_table; __be32 random_id_operand; + struct list_head timewait_list; struct workqueue_struct *wq; } cm; @@ -112,6 +113,7 @@ struct cm_work { struct cm_timewait_info { struct cm_work work; /* Must be first. */ + struct list_head list; struct rb_node remote_qp_node; struct rb_node remote_id_node; __be64 remote_ca_guid; @@ -647,13 +649,6 @@ static inline int cm_convert_to_ms(int iba_time) static void cm_cleanup_timewait(struct cm_timewait_info *timewait_info) { - unsigned long flags; - - if (!timewait_info->inserted_remote_id && - !timewait_info->inserted_remote_qp) - return; - - spin_lock_irqsave(&cm.lock, flags); if (timewait_info->inserted_remote_id) { rb_erase(&timewait_info->remote_id_node, &cm.remote_id_table); timewait_info->inserted_remote_id = 0; @@ -663,7 +658,6 @@ static void cm_cleanup_timewait(struct cm_timewait_info *timewait_info) rb_erase(&timewait_info->remote_qp_node, &cm.remote_qp_table); timewait_info->inserted_remote_qp = 0; } - spin_unlock_irqrestore(&cm.lock, flags); } static struct cm_timewait_info * cm_create_timewait_info(__be32 local_id) @@ -684,8 +678,12 @@ static struct cm_timewait_info * cm_create_timewait_info(__be32 local_id) static void cm_enter_timewait(struct cm_id_private *cm_id_priv) { int wait_time; + unsigned long flags; + spin_lock_irqsave(&cm.lock, flags); cm_cleanup_timewait(cm_id_priv->timewait_info); + list_add_tail(&cm_id_priv->timewait_info->list, &cm.timewait_list); + spin_unlock_irqrestore(&cm.lock, flags); /* * The cm_id could be destroyed by the user before we exit timewait. @@ -701,9 +699,13 @@ static void cm_enter_timewait(struct cm_id_private *cm_id_priv) static void cm_reset_to_idle(struct cm_id_private *cm_id_priv) { + unsigned long flags; + cm_id_priv->id.state = IB_CM_IDLE; if (cm_id_priv->timewait_info) { + spin_lock_irqsave(&cm.lock, flags); cm_cleanup_timewait(cm_id_priv->timewait_info); + spin_unlock_irqrestore(&cm.lock, flags); kfree(cm_id_priv->timewait_info); cm_id_priv->timewait_info = NULL; } @@ -1307,6 +1309,7 @@ static struct cm_id_private * cm_match_req(struct cm_work *work, if (timewait_info) { cur_cm_id_priv = cm_get_id(timewait_info->work.local_id, timewait_info->work.remote_id); + cm_cleanup_timewait(cm_id_priv->timewait_info); spin_unlock_irqrestore(&cm.lock, flags); if (cur_cm_id_priv) { cm_dup_req_handler(work, cur_cm_id_priv); @@ -1315,7 +1318,8 @@ static struct cm_id_private * cm_match_req(struct cm_work *work, cm_issue_rej(work->port, work->mad_recv_wc, IB_CM_REJ_STALE_CONN, CM_MSG_RESPONSE_REQ, NULL, 0); - goto error; + listen_cm_id_priv = NULL; + goto out; } /* Find matching listen request. */ @@ -1323,21 +1327,20 @@ static struct cm_id_private * cm_match_req(struct cm_work *work, req_msg->service_id, req_msg->private_data); if (!listen_cm_id_priv) { + cm_cleanup_timewait(cm_id_priv->timewait_info); spin_unlock_irqrestore(&cm.lock, flags); cm_issue_rej(work->port, work->mad_recv_wc, IB_CM_REJ_INVALID_SERVICE_ID, CM_MSG_RESPONSE_REQ, NULL, 0); - goto error; + goto out; } atomic_inc(&listen_cm_id_priv->refcount); atomic_inc(&cm_id_priv->refcount); cm_id_priv->id.state = IB_CM_REQ_RCVD; atomic_inc(&cm_id_priv->work_count); spin_unlock_irqrestore(&cm.lock, flags); +out: return listen_cm_id_priv; - -error: cm_cleanup_timewait(cm_id_priv->timewait_info); - return NULL; } static int cm_req_handler(struct cm_work *work) @@ -1899,6 +1902,32 @@ out: spin_unlock_irqrestore(&cm_id_priv->lock, flags); } EXPORT_SYMBOL(ib_send_cm_drep); +static int cm_issue_drep(struct cm_port *port, + struct ib_mad_recv_wc *mad_recv_wc) +{ + struct ib_mad_send_buf *msg = NULL; + struct cm_dreq_msg *dreq_msg; + struct cm_drep_msg *drep_msg; + int ret; + + ret = cm_alloc_response_msg(port, mad_recv_wc, &msg); + if (ret) + return ret; + + dreq_msg = (struct cm_dreq_msg *) mad_recv_wc->recv_buf.mad; + drep_msg = (struct cm_drep_msg *) msg->mad; + + cm_format_mad_hdr(&drep_msg->hdr, CM_DREP_ATTR_ID, dreq_msg->hdr.tid); + drep_msg->remote_comm_id = dreq_msg->local_comm_id; + drep_msg->local_comm_id = dreq_msg->remote_comm_id; + + ret = ib_post_send_mad(msg, NULL); + if (ret) + cm_free_msg(msg); + + return ret; +} + static int cm_dreq_handler(struct cm_work *work) { struct cm_id_private *cm_id_priv; @@ -1910,8 +1939,10 @@ static int cm_dreq_handler(struct cm_work *work) dreq_msg = (struct cm_dreq_msg *)work->mad_recv_wc->recv_buf.mad; cm_id_priv = cm_acquire_id(dreq_msg->remote_comm_id, dreq_msg->local_comm_id); - if (!cm_id_priv) + if (!cm_id_priv) { + cm_issue_drep(work->port, work->mad_recv_wc); return -EINVAL; + } work->cm_event.private_data = &dreq_msg->private_data; @@ -2601,28 +2632,29 @@ static int cm_timewait_handler(struct cm_work *work) { struct cm_timewait_info *timewait_info; struct cm_id_private *cm_id_priv; - unsigned long flags; int ret; timewait_info = (struct cm_timewait_info *)work; - cm_cleanup_timewait(timewait_info); + spin_lock_irq(&cm.lock); + list_del(&timewait_info->list); + spin_unlock_irq(&cm.lock); cm_id_priv = cm_acquire_id(timewait_info->work.local_id, timewait_info->work.remote_id); if (!cm_id_priv) return -EINVAL; - spin_lock_irqsave(&cm_id_priv->lock, flags); + spin_lock_irq(&cm_id_priv->lock); if (cm_id_priv->id.state != IB_CM_TIMEWAIT || cm_id_priv->remote_qpn != timewait_info->remote_qpn) { - spin_unlock_irqrestore(&cm_id_priv->lock, flags); + spin_unlock_irq(&cm_id_priv->lock); goto out; } cm_id_priv->id.state = IB_CM_IDLE; ret = atomic_inc_and_test(&cm_id_priv->work_count); if (!ret) list_add_tail(&work->list, &cm_id_priv->work_list); - spin_unlock_irqrestore(&cm_id_priv->lock, flags); + spin_unlock_irq(&cm_id_priv->lock); if (ret) cm_process_work(cm_id_priv, work); @@ -3374,6 +3406,7 @@ static int __init ib_cm_init(void) idr_init(&cm.local_id_table); get_random_bytes(&cm.random_id_operand, sizeof cm.random_id_operand); idr_pre_get(&cm.local_id_table, GFP_KERNEL); + INIT_LIST_HEAD(&cm.timewait_list); cm.wq = create_workqueue("ib_cm"); if (!cm.wq) @@ -3391,7 +3424,20 @@ error: static void __exit ib_cm_cleanup(void) { + struct cm_timewait_info *timewait_info, *tmp; + + spin_lock_irq(&cm.lock); + list_for_each_entry(timewait_info, &cm.timewait_list, list) + cancel_delayed_work(&timewait_info->work.work); + spin_unlock_irq(&cm.lock); + destroy_workqueue(cm.wq); + + list_for_each_entry_safe(timewait_info, tmp, &cm.timewait_list, list) { + list_del(&timewait_info->list); + kfree(timewait_info); + } + ib_unregister_client(&cm_client); idr_destroy(&cm.local_id_table); } diff --git a/drivers/infiniband/hw/amso1100/c2_ae.c b/drivers/infiniband/hw/amso1100/c2_ae.c index 3aae4978e1cb..a31439bd3b67 100644 --- a/drivers/infiniband/hw/amso1100/c2_ae.c +++ b/drivers/infiniband/hw/amso1100/c2_ae.c @@ -66,7 +66,6 @@ static int c2_convert_cm_status(u32 c2_status) } } -#ifdef DEBUG static const char* to_event_str(int event) { static const char* event_str[] = { @@ -144,7 +143,6 @@ static const char *to_qp_state_str(int state) return "<invalid QP state>"; }; } -#endif void c2_ae_event(struct c2_dev *c2dev, u32 mq_index) { diff --git a/drivers/infiniband/hw/amso1100/c2_qp.c b/drivers/infiniband/hw/amso1100/c2_qp.c index 12261132b077..5bcf697aa335 100644 --- a/drivers/infiniband/hw/amso1100/c2_qp.c +++ b/drivers/infiniband/hw/amso1100/c2_qp.c @@ -35,6 +35,8 @@ * */ +#include <linux/delay.h> + #include "c2.h" #include "c2_vq.h" #include "c2_status.h" @@ -705,10 +707,8 @@ static inline void c2_activity(struct c2_dev *c2dev, u32 mq_index, u16 shared) * cannot get on the bus and the card and system hang in a * deadlock -- thus the need for this code. [TOT] */ - while (readl(c2dev->regs + PCI_BAR0_ADAPTER_HINT) & 0x80000000) { - set_current_state(TASK_UNINTERRUPTIBLE); - schedule_timeout(0); - } + while (readl(c2dev->regs + PCI_BAR0_ADAPTER_HINT) & 0x80000000) + udelay(10); __raw_writel(C2_HINT_MAKE(mq_index, shared), c2dev->regs + PCI_BAR0_ADAPTER_HINT); @@ -766,6 +766,7 @@ int c2_post_send(struct ib_qp *ibqp, struct ib_send_wr *ib_wr, struct c2_dev *c2dev = to_c2dev(ibqp->device); struct c2_qp *qp = to_c2qp(ibqp); union c2wr wr; + unsigned long lock_flags; int err = 0; u32 flags; @@ -881,8 +882,10 @@ int c2_post_send(struct ib_qp *ibqp, struct ib_send_wr *ib_wr, /* * Post the puppy! */ + spin_lock_irqsave(&qp->lock, lock_flags); err = qp_wr_post(&qp->sq_mq, &wr, qp, msg_size); if (err) { + spin_unlock_irqrestore(&qp->lock, lock_flags); break; } @@ -890,6 +893,7 @@ int c2_post_send(struct ib_qp *ibqp, struct ib_send_wr *ib_wr, * Enqueue mq index to activity FIFO. */ c2_activity(c2dev, qp->sq_mq.index, qp->sq_mq.hint_count); + spin_unlock_irqrestore(&qp->lock, lock_flags); ib_wr = ib_wr->next; } @@ -905,6 +909,7 @@ int c2_post_receive(struct ib_qp *ibqp, struct ib_recv_wr *ib_wr, struct c2_dev *c2dev = to_c2dev(ibqp->device); struct c2_qp *qp = to_c2qp(ibqp); union c2wr wr; + unsigned long lock_flags; int err = 0; if (qp->state > IB_QPS_RTS) @@ -945,8 +950,10 @@ int c2_post_receive(struct ib_qp *ibqp, struct ib_recv_wr *ib_wr, break; } + spin_lock_irqsave(&qp->lock, lock_flags); err = qp_wr_post(&qp->rq_mq, &wr, qp, qp->rq_mq.msg_size); if (err) { + spin_unlock_irqrestore(&qp->lock, lock_flags); break; } @@ -954,6 +961,7 @@ int c2_post_receive(struct ib_qp *ibqp, struct ib_recv_wr *ib_wr, * Enqueue mq index to activity FIFO */ c2_activity(c2dev, qp->rq_mq.index, qp->rq_mq.hint_count); + spin_unlock_irqrestore(&qp->lock, lock_flags); ib_wr = ib_wr->next; } diff --git a/drivers/infiniband/hw/mthca/mthca_provider.c b/drivers/infiniband/hw/mthca/mthca_provider.c index 981fe2eebdfa..fc67f780581b 100644 --- a/drivers/infiniband/hw/mthca/mthca_provider.c +++ b/drivers/infiniband/hw/mthca/mthca_provider.c @@ -179,6 +179,8 @@ static int mthca_query_port(struct ib_device *ibdev, props->max_mtu = out_mad->data[41] & 0xf; props->active_mtu = out_mad->data[36] >> 4; props->subnet_timeout = out_mad->data[51] & 0x1f; + props->max_vl_num = out_mad->data[37] >> 4; + props->init_type_reply = out_mad->data[41] >> 4; out: kfree(in_mad); diff --git a/drivers/infiniband/hw/mthca/mthca_srq.c b/drivers/infiniband/hw/mthca/mthca_srq.c index 0f316c87bf64..92a72f521528 100644 --- a/drivers/infiniband/hw/mthca/mthca_srq.c +++ b/drivers/infiniband/hw/mthca/mthca_srq.c @@ -201,6 +201,8 @@ int mthca_alloc_srq(struct mthca_dev *dev, struct mthca_pd *pd, if (mthca_is_memfree(dev)) srq->max = roundup_pow_of_two(srq->max + 1); + else + srq->max = srq->max + 1; ds = max(64UL, roundup_pow_of_two(sizeof (struct mthca_next_seg) + @@ -277,7 +279,7 @@ int mthca_alloc_srq(struct mthca_dev *dev, struct mthca_pd *pd, srq->first_free = 0; srq->last_free = srq->max - 1; - attr->max_wr = (mthca_is_memfree(dev)) ? srq->max - 1 : srq->max; + attr->max_wr = srq->max - 1; attr->max_sge = srq->max_gs; return 0; @@ -413,7 +415,7 @@ int mthca_query_srq(struct ib_srq *ibsrq, struct ib_srq_attr *srq_attr) srq_attr->srq_limit = be16_to_cpu(tavor_ctx->limit_watermark); } - srq_attr->max_wr = (mthca_is_memfree(dev)) ? srq->max - 1 : srq->max; + srq_attr->max_wr = srq->max - 1; srq_attr->max_sge = srq->max_gs; out: diff --git a/drivers/infiniband/ulp/ipoib/ipoib_ib.c b/drivers/infiniband/ulp/ipoib/ipoib_ib.c index f426a69d9a43..8bf5e9ec7c95 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_ib.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_ib.c @@ -355,6 +355,11 @@ void ipoib_send(struct net_device *dev, struct sk_buff *skb, tx_req->skb = skb; addr = dma_map_single(priv->ca->dma_device, skb->data, skb->len, DMA_TO_DEVICE); + if (unlikely(dma_mapping_error(addr))) { + ++priv->stats.tx_errors; + dev_kfree_skb_any(skb); + return; + } pci_unmap_addr_set(tx_req, mapping, addr); if (unlikely(post_send(priv, priv->tx_head & (ipoib_sendq_size - 1), diff --git a/drivers/infiniband/ulp/srp/ib_srp.c b/drivers/infiniband/ulp/srp/ib_srp.c index 44b9e5be6687..4b09147f438f 100644 --- a/drivers/infiniband/ulp/srp/ib_srp.c +++ b/drivers/infiniband/ulp/srp/ib_srp.c @@ -343,29 +343,32 @@ static int srp_send_req(struct srp_target_port *target) */ if (target->io_class == SRP_REV10_IB_IO_CLASS) { memcpy(req->priv.initiator_port_id, - target->srp_host->initiator_port_id + 8, 8); + &target->path.sgid.global.interface_id, 8); memcpy(req->priv.initiator_port_id + 8, - target->srp_host->initiator_port_id, 8); + &target->initiator_ext, 8); memcpy(req->priv.target_port_id, &target->ioc_guid, 8); memcpy(req->priv.target_port_id + 8, &target->id_ext, 8); } else { memcpy(req->priv.initiator_port_id, - target->srp_host->initiator_port_id, 16); + &target->initiator_ext, 8); + memcpy(req->priv.initiator_port_id + 8, + &target->path.sgid.global.interface_id, 8); memcpy(req->priv.target_port_id, &target->id_ext, 8); memcpy(req->priv.target_port_id + 8, &target->ioc_guid, 8); } /* * Topspin/Cisco SRP targets will reject our login unless we - * zero out the first 8 bytes of our initiator port ID. The - * second 8 bytes must be our local node GUID, but we always - * use that anyway. + * zero out the first 8 bytes of our initiator port ID and set + * the second 8 bytes to the local node GUID. */ if (topspin_workarounds && !memcmp(&target->ioc_guid, topspin_oui, 3)) { printk(KERN_DEBUG PFX "Topspin/Cisco initiator port ID workaround " "activated for target GUID %016llx\n", (unsigned long long) be64_to_cpu(target->ioc_guid)); memset(req->priv.initiator_port_id, 0, 8); + memcpy(req->priv.initiator_port_id + 8, + &target->srp_host->dev->dev->node_guid, 8); } status = ib_send_cm_req(target->cm_id, &req->param); @@ -1553,6 +1556,7 @@ enum { SRP_OPT_MAX_SECT = 1 << 5, SRP_OPT_MAX_CMD_PER_LUN = 1 << 6, SRP_OPT_IO_CLASS = 1 << 7, + SRP_OPT_INITIATOR_EXT = 1 << 8, SRP_OPT_ALL = (SRP_OPT_ID_EXT | SRP_OPT_IOC_GUID | SRP_OPT_DGID | @@ -1569,6 +1573,7 @@ static match_table_t srp_opt_tokens = { { SRP_OPT_MAX_SECT, "max_sect=%d" }, { SRP_OPT_MAX_CMD_PER_LUN, "max_cmd_per_lun=%d" }, { SRP_OPT_IO_CLASS, "io_class=%x" }, + { SRP_OPT_INITIATOR_EXT, "initiator_ext=%s" }, { SRP_OPT_ERR, NULL } }; @@ -1668,6 +1673,12 @@ static int srp_parse_options(const char *buf, struct srp_target_port *target) target->io_class = token; break; + case SRP_OPT_INITIATOR_EXT: + p = match_strdup(args); + target->initiator_ext = cpu_to_be64(simple_strtoull(p, NULL, 16)); + kfree(p); + break; + default: printk(KERN_WARNING PFX "unknown parameter or missing value " "'%s' in target creation request\n", p); @@ -1708,7 +1719,6 @@ static ssize_t srp_create_target(struct class_device *class_dev, target_host->max_lun = SRP_MAX_LUN; target = host_to_target(target_host); - memset(target, 0, sizeof *target); target->io_class = SRP_REV16A_IB_IO_CLASS; target->scsi_host = target_host; @@ -1815,9 +1825,6 @@ static struct srp_host *srp_add_port(struct srp_device *device, u8 port) host->dev = device; host->port = port; - host->initiator_port_id[7] = port; - memcpy(host->initiator_port_id + 8, &device->dev->node_guid, 8); - host->class_dev.class = &srp_class; host->class_dev.dev = device->dev->dma_device; snprintf(host->class_dev.class_id, BUS_ID_SIZE, "srp-%s-%d", diff --git a/drivers/infiniband/ulp/srp/ib_srp.h b/drivers/infiniband/ulp/srp/ib_srp.h index 5b581fb8eb0d..d4e35ef51374 100644 --- a/drivers/infiniband/ulp/srp/ib_srp.h +++ b/drivers/infiniband/ulp/srp/ib_srp.h @@ -91,7 +91,6 @@ struct srp_device { }; struct srp_host { - u8 initiator_port_id[16]; struct srp_device *dev; u8 port; struct class_device class_dev; @@ -122,6 +121,7 @@ struct srp_target_port { __be64 id_ext; __be64 ioc_guid; __be64 service_id; + __be64 initiator_ext; u16 io_class; struct srp_host *srp_host; struct Scsi_Host *scsi_host; diff --git a/drivers/input/keyboard/Kconfig b/drivers/input/keyboard/Kconfig index 679bde34d247..81a333f73010 100644 --- a/drivers/input/keyboard/Kconfig +++ b/drivers/input/keyboard/Kconfig @@ -166,7 +166,7 @@ config KEYBOARD_AMIGA config KEYBOARD_HIL_OLD tristate "HP HIL keyboard support (simple driver)" - depends on GSC + depends on GSC || HP300 default y help The "Human Interface Loop" is a older, 8-channel USB-like @@ -183,7 +183,7 @@ config KEYBOARD_HIL_OLD config KEYBOARD_HIL tristate "HP HIL keyboard support" - depends on GSC + depends on GSC || HP300 default y select HP_SDC select HIL_MLC diff --git a/drivers/input/keyboard/hil_kbd.c b/drivers/input/keyboard/hil_kbd.c index c9b0b8978cd8..e774dd31e99b 100644 --- a/drivers/input/keyboard/hil_kbd.c +++ b/drivers/input/keyboard/hil_kbd.c @@ -328,7 +328,7 @@ static int hil_kbd_connect(struct serio *serio, struct serio_driver *drv) kbd->dev->id.vendor = PCI_VENDOR_ID_HP; kbd->dev->id.product = 0x0001; /* TODO: get from kbd->rsc */ kbd->dev->id.version = 0x0100; /* TODO: get from kbd->rsc */ - kbd->dev->dev = &serio->dev; + kbd->dev->cdev.dev = &serio->dev; for (i = 0; i < 128; i++) { set_bit(hil_kbd_set1[i], kbd->dev->keybit); diff --git a/drivers/input/misc/Kconfig b/drivers/input/misc/Kconfig index a6dfc7455733..ba0e88c64e1e 100644 --- a/drivers/input/misc/Kconfig +++ b/drivers/input/misc/Kconfig @@ -73,7 +73,7 @@ config INPUT_UINPUT config HP_SDC_RTC tristate "HP SDC Real Time Clock" - depends on GSC + depends on GSC || HP300 select HP_SDC help Say Y here if you want to support the built-in real time clock diff --git a/drivers/input/misc/wistron_btns.c b/drivers/input/misc/wistron_btns.c index 4639537336fc..7b9d1c1da41a 100644 --- a/drivers/input/misc/wistron_btns.c +++ b/drivers/input/misc/wistron_btns.c @@ -17,7 +17,7 @@ * with this program; if not, write to the Free Software Foundation, Inc., * 59 Temple Place Suite 330, Boston, MA 02111-1307, USA. */ -#include <asm/io.h> +#include <linux/io.h> #include <linux/dmi.h> #include <linux/init.h> #include <linux/input.h> diff --git a/drivers/input/mouse/Kconfig b/drivers/input/mouse/Kconfig index f15ccf781688..35d998c3e578 100644 --- a/drivers/input/mouse/Kconfig +++ b/drivers/input/mouse/Kconfig @@ -119,7 +119,7 @@ config MOUSE_VSXXXAA config MOUSE_HIL tristate "HIL pointers (mice etc)." - depends on GSC + depends on GSC || HP300 select HP_SDC select HIL_MLC help diff --git a/drivers/input/mouse/hil_ptr.c b/drivers/input/mouse/hil_ptr.c index 402b057e986e..4f2b503c1ac7 100644 --- a/drivers/input/mouse/hil_ptr.c +++ b/drivers/input/mouse/hil_ptr.c @@ -375,7 +375,7 @@ static int hil_ptr_connect(struct serio *serio, struct serio_driver *driver) ptr->dev->id.vendor = PCI_VENDOR_ID_HP; ptr->dev->id.product = 0x0001; /* TODO: get from ptr->rsc */ ptr->dev->id.version = 0x0100; /* TODO: get from ptr->rsc */ - ptr->dev->dev = &serio->dev; + ptr->dev->cdev.dev = &serio->dev; input_register_device(ptr->dev); printk(KERN_INFO "input: %s (%s), ID: %d\n", diff --git a/drivers/input/serio/Kconfig b/drivers/input/serio/Kconfig index 8cdbfeca5903..adef447f23ea 100644 --- a/drivers/input/serio/Kconfig +++ b/drivers/input/serio/Kconfig @@ -112,7 +112,7 @@ config SERIO_GSCPS2 config HP_SDC tristate "HP System Device Controller i8042 Support" - depends on GSC && SERIO + depends on (GSC || HP300) && SERIO default y ---help--- This option enables support for the "System Device diff --git a/drivers/input/serio/gscps2.c b/drivers/input/serio/gscps2.c index 081fdc3c7737..74f14e097789 100644 --- a/drivers/input/serio/gscps2.c +++ b/drivers/input/serio/gscps2.c @@ -166,7 +166,7 @@ static inline int gscps2_writeb_output(struct gscps2port *ps2port, u8 data) /* make sure any received data is returned as fast as possible */ /* this is important e.g. when we set the LEDs on the keyboard */ - gscps2_interrupt(0, NULL, NULL); + gscps2_interrupt(0, NULL); return 1; } @@ -306,7 +306,7 @@ static int gscps2_open(struct serio *port) /* enable it */ gscps2_enable(ps2port, ENABLE); - gscps2_interrupt(0, NULL, NULL); + gscps2_interrupt(0, NULL); return 0; } diff --git a/drivers/input/serio/hil_mlc.c b/drivers/input/serio/hil_mlc.c index bbbe15e21904..bdfde046b741 100644 --- a/drivers/input/serio/hil_mlc.c +++ b/drivers/input/serio/hil_mlc.c @@ -162,10 +162,10 @@ static void hil_mlc_send_polls(hil_mlc *mlc) { if (did != (p & HIL_PKT_ADDR_MASK) >> 8) { if (drv == NULL || drv->interrupt == NULL) goto skip; - drv->interrupt(serio, 0, 0, NULL); - drv->interrupt(serio, HIL_ERR_INT >> 16, 0, NULL); - drv->interrupt(serio, HIL_PKT_CMD >> 8, 0, NULL); - drv->interrupt(serio, HIL_CMD_POL + cnt, 0, NULL); + drv->interrupt(serio, 0, 0); + drv->interrupt(serio, HIL_ERR_INT >> 16, 0); + drv->interrupt(serio, HIL_PKT_CMD >> 8, 0); + drv->interrupt(serio, HIL_CMD_POL + cnt, 0); skip: did = (p & HIL_PKT_ADDR_MASK) >> 8; serio = did ? mlc->serio[mlc->di_map[did-1]] : NULL; @@ -174,10 +174,10 @@ static void hil_mlc_send_polls(hil_mlc *mlc) { } cnt++; i++; if (drv == NULL || drv->interrupt == NULL) continue; - drv->interrupt(serio, (p >> 24), 0, NULL); - drv->interrupt(serio, (p >> 16) & 0xff, 0, NULL); - drv->interrupt(serio, (p >> 8) & ~HIL_PKT_ADDR_MASK, 0, NULL); - drv->interrupt(serio, p & 0xff, 0, NULL); + drv->interrupt(serio, (p >> 24), 0); + drv->interrupt(serio, (p >> 16) & 0xff, 0); + drv->interrupt(serio, (p >> 8) & ~HIL_PKT_ADDR_MASK, 0); + drv->interrupt(serio, p & 0xff, 0); } } @@ -780,16 +780,16 @@ static int hil_mlc_serio_write(struct serio *serio, unsigned char c) { while ((last != idx) && (*last == 0)) last--; while (idx != last) { - drv->interrupt(serio, 0, 0, NULL); - drv->interrupt(serio, HIL_ERR_INT >> 16, 0, NULL); - drv->interrupt(serio, 0, 0, NULL); - drv->interrupt(serio, *idx, 0, NULL); + drv->interrupt(serio, 0, 0); + drv->interrupt(serio, HIL_ERR_INT >> 16, 0); + drv->interrupt(serio, 0, 0); + drv->interrupt(serio, *idx, 0); idx++; } - drv->interrupt(serio, 0, 0, NULL); - drv->interrupt(serio, HIL_ERR_INT >> 16, 0, NULL); - drv->interrupt(serio, HIL_PKT_CMD >> 8, 0, NULL); - drv->interrupt(serio, *idx, 0, NULL); + drv->interrupt(serio, 0, 0); + drv->interrupt(serio, HIL_ERR_INT >> 16, 0); + drv->interrupt(serio, HIL_PKT_CMD >> 8, 0); + drv->interrupt(serio, *idx, 0); mlc->serio_oidx[map->didx] = 0; mlc->serio_opacket[map->didx] = 0; diff --git a/drivers/isdn/act2000/act2000_isa.c b/drivers/isdn/act2000/act2000_isa.c index 3014495b7ff7..3cac23739344 100644 --- a/drivers/isdn/act2000/act2000_isa.c +++ b/drivers/isdn/act2000/act2000_isa.c @@ -16,8 +16,6 @@ #include "act2000_isa.h" #include "capi.h" -static act2000_card *irq2card_map[16]; - /* * Reset Controller, then try to read the Card's signature. + Return: @@ -65,14 +63,9 @@ act2000_isa_detect(unsigned short portbase) static irqreturn_t act2000_isa_interrupt(int irq, void *dev_id) { - act2000_card *card = irq2card_map[irq]; + act2000_card *card = dev_id; u_char istatus; - if (!card) { - printk(KERN_WARNING - "act2000: Spurious interrupt!\n"); - return IRQ_NONE; - } istatus = (inb(ISA_PORT_ISR) & 0x07); if (istatus & ISA_ISR_OUT) { /* RX fifo has data */ @@ -139,17 +132,15 @@ int act2000_isa_config_irq(act2000_card * card, short irq) { if (card->flags & ACT2000_FLAGS_IVALID) { - free_irq(card->irq, NULL); - irq2card_map[card->irq] = NULL; + free_irq(card->irq, card); } card->flags &= ~ACT2000_FLAGS_IVALID; outb(ISA_COR_IRQOFF, ISA_PORT_COR); if (!irq) return 0; - if (!request_irq(irq, &act2000_isa_interrupt, 0, card->regname, NULL)) { + if (!request_irq(irq, &act2000_isa_interrupt, 0, card->regname, card)) { card->irq = irq; - irq2card_map[card->irq] = card; card->flags |= ACT2000_FLAGS_IVALID; printk(KERN_WARNING "act2000: Could not request irq %d\n",irq); @@ -188,10 +179,9 @@ act2000_isa_release(act2000_card * card) unsigned long flags; spin_lock_irqsave(&card->lock, flags); - if (card->flags & ACT2000_FLAGS_IVALID) { - free_irq(card->irq, NULL); - irq2card_map[card->irq] = NULL; - } + if (card->flags & ACT2000_FLAGS_IVALID) + free_irq(card->irq, card); + card->flags &= ~ACT2000_FLAGS_IVALID; if (card->flags & ACT2000_FLAGS_PVALID) release_region(card->port, ISA_REGION); diff --git a/drivers/isdn/pcbit/layer2.c b/drivers/isdn/pcbit/layer2.c index 13e7d219d1c7..937fd2120381 100644 --- a/drivers/isdn/pcbit/layer2.c +++ b/drivers/isdn/pcbit/layer2.c @@ -311,6 +311,7 @@ pcbit_deliver(void *data) dev->read_queue = frame->next; spin_unlock_irqrestore(&dev->lock, flags); + msg = 0; SET_MSG_CPU(msg, 0); SET_MSG_PROC(msg, 0); SET_MSG_CMD(msg, frame->skb->data[2]); diff --git a/drivers/isdn/sc/init.c b/drivers/isdn/sc/init.c index 222ca7c08baa..06c9872e8c6a 100644 --- a/drivers/isdn/sc/init.c +++ b/drivers/isdn/sc/init.c @@ -98,13 +98,14 @@ static int __init sc_init(void) * Confirm the I/O Address with a test */ if(io[b] == 0) { - pr_debug("I/O Address 0x%x is in use.\n"); + pr_debug("I/O Address invalid.\n"); continue; } outb(0x18, io[b] + 0x400 * EXP_PAGE0); if(inb(io[b] + 0x400 * EXP_PAGE0) != 0x18) { - pr_debug("I/O Base 0x%x fails test\n"); + pr_debug("I/O Base 0x%x fails test\n", + io[b] + 0x400 * EXP_PAGE0); continue; } } @@ -158,8 +159,8 @@ static int __init sc_init(void) outb(0xFF, io[b] + RESET_OFFSET); msleep_interruptible(10000); } - pr_debug("RAM Base for board %d is 0x%x, %s probe\n", b, ram[b], - ram[b] == 0 ? "will" : "won't"); + pr_debug("RAM Base for board %d is 0x%lx, %s probe\n", b, + ram[b], ram[b] == 0 ? "will" : "won't"); if(ram[b]) { /* @@ -168,7 +169,7 @@ static int __init sc_init(void) * board model */ if(request_region(ram[b], SRAM_PAGESIZE, "sc test")) { - pr_debug("request_region for RAM base 0x%x succeeded\n", ram[b]); + pr_debug("request_region for RAM base 0x%lx succeeded\n", ram[b]); model = identify_board(ram[b], io[b]); release_region(ram[b], SRAM_PAGESIZE); } @@ -204,7 +205,7 @@ static int __init sc_init(void) * Nope, there was no place in RAM for the * board, or it couldn't be identified */ - pr_debug("Failed to find an adapter at 0x%x\n", ram[b]); + pr_debug("Failed to find an adapter at 0x%lx\n", ram[b]); continue; } @@ -451,7 +452,7 @@ static int identify_board(unsigned long rambase, unsigned int iobase) HWConfig_pl hwci; int x; - pr_debug("Attempting to identify adapter @ 0x%x io 0x%x\n", + pr_debug("Attempting to identify adapter @ 0x%lx io 0x%x\n", rambase, iobase); /* @@ -490,7 +491,7 @@ static int identify_board(unsigned long rambase, unsigned int iobase) outb(PRI_BASEPG_VAL, pgport); msleep_interruptible(1000); sig = readl(rambase + SIG_OFFSET); - pr_debug("Looking for a signature, got 0x%x\n", sig); + pr_debug("Looking for a signature, got 0x%lx\n", sig); if(sig == SIGNATURE) return PRI_BOARD; @@ -500,7 +501,7 @@ static int identify_board(unsigned long rambase, unsigned int iobase) outb(BRI_BASEPG_VAL, pgport); msleep_interruptible(1000); sig = readl(rambase + SIG_OFFSET); - pr_debug("Looking for a signature, got 0x%x\n", sig); + pr_debug("Looking for a signature, got 0x%lx\n", sig); if(sig == SIGNATURE) return BRI_BOARD; @@ -510,7 +511,7 @@ static int identify_board(unsigned long rambase, unsigned int iobase) * Try to spot a card */ sig = readl(rambase + SIG_OFFSET); - pr_debug("Looking for a signature, got 0x%x\n", sig); + pr_debug("Looking for a signature, got 0x%lx\n", sig); if(sig != SIGNATURE) return -1; @@ -540,7 +541,7 @@ static int identify_board(unsigned long rambase, unsigned int iobase) memcpy_fromio(&rcvmsg, &(dpm->rsp_queue[dpm->rsp_tail]), MSG_LEN); pr_debug("Got HWConfig response, status = 0x%x\n", rcvmsg.rsp_status); memcpy(&hwci, &(rcvmsg.msg_data.HWCresponse), sizeof(HWConfig_pl)); - pr_debug("Hardware Config: Interface: %s, RAM Size: %d, Serial: %s\n" + pr_debug("Hardware Config: Interface: %s, RAM Size: %ld, Serial: %s\n" " Part: %s, Rev: %s\n", hwci.st_u_sense ? "S/T" : "U", hwci.ram_size, hwci.serial_no, hwci.part_no, hwci.rev_no); diff --git a/drivers/isdn/sc/packet.c b/drivers/isdn/sc/packet.c index f50defc38ae5..1e04676b016b 100644 --- a/drivers/isdn/sc/packet.c +++ b/drivers/isdn/sc/packet.c @@ -44,7 +44,7 @@ int sndpkt(int devId, int channel, struct sk_buff *data) return -ENODEV; } - pr_debug("%s: sndpkt: frst = 0x%x nxt = %d f = %d n = %d\n", + pr_debug("%s: sndpkt: frst = 0x%lx nxt = %d f = %d n = %d\n", sc_adapter[card]->devicename, sc_adapter[card]->channel[channel].first_sendbuf, sc_adapter[card]->channel[channel].next_sendbuf, @@ -66,7 +66,7 @@ int sndpkt(int devId, int channel, struct sk_buff *data) ReqLnkWrite.buff_offset = sc_adapter[card]->channel[channel].next_sendbuf * BUFFER_SIZE + sc_adapter[card]->channel[channel].first_sendbuf; ReqLnkWrite.msg_len = data->len; /* sk_buff size */ - pr_debug("%s: writing %d bytes to buffer offset 0x%x\n", + pr_debug("%s: writing %d bytes to buffer offset 0x%lx\n", sc_adapter[card]->devicename, ReqLnkWrite.msg_len, ReqLnkWrite.buff_offset); memcpy_toshmem(card, (char *)ReqLnkWrite.buff_offset, data->data, ReqLnkWrite.msg_len); @@ -74,7 +74,7 @@ int sndpkt(int devId, int channel, struct sk_buff *data) /* * sendmessage */ - pr_debug("%s: sndpkt size=%d, buf_offset=0x%x buf_indx=%d\n", + pr_debug("%s: sndpkt size=%d, buf_offset=0x%lx buf_indx=%d\n", sc_adapter[card]->devicename, ReqLnkWrite.msg_len, ReqLnkWrite.buff_offset, sc_adapter[card]->channel[channel].next_sendbuf); @@ -124,7 +124,7 @@ void rcvpkt(int card, RspMessage *rcvmsg) return; } skb_put(skb, rcvmsg->msg_data.response.msg_len); - pr_debug("%s: getting data from offset: 0x%x\n", + pr_debug("%s: getting data from offset: 0x%lx\n", sc_adapter[card]->devicename, rcvmsg->msg_data.response.buff_offset); memcpy_fromshmem(card, @@ -143,7 +143,7 @@ void rcvpkt(int card, RspMessage *rcvmsg) /* memset_shmem(card, rcvmsg->msg_data.response.buff_offset, 0, BUFFER_SIZE); */ newll.buff_offset = rcvmsg->msg_data.response.buff_offset; newll.msg_len = BUFFER_SIZE; - pr_debug("%s: recycled buffer at offset 0x%x size %d\n", + pr_debug("%s: recycled buffer at offset 0x%lx size %d\n", sc_adapter[card]->devicename, newll.buff_offset, newll.msg_len); sendmessage(card, CEPID, ceReqTypeLnk, ceReqClass1, ceReqLnkRead, @@ -186,7 +186,7 @@ int setup_buffers(int card, int c) sc_adapter[card]->channel[c-1].num_sendbufs = nBuffers / 2; sc_adapter[card]->channel[c-1].free_sendbufs = nBuffers / 2; sc_adapter[card]->channel[c-1].next_sendbuf = 0; - pr_debug("%s: send buffer setup complete: first=0x%x n=%d f=%d, nxt=%d\n", + pr_debug("%s: send buffer setup complete: first=0x%lx n=%d f=%d, nxt=%d\n", sc_adapter[card]->devicename, sc_adapter[card]->channel[c-1].first_sendbuf, sc_adapter[card]->channel[c-1].num_sendbufs, @@ -203,7 +203,7 @@ int setup_buffers(int card, int c) ((sc_adapter[card]->channel[c-1].first_sendbuf + (nBuffers / 2) * buffer_size) + (buffer_size * i)); RcvBuffOffset.msg_len = buffer_size; - pr_debug("%s: adding RcvBuffer #%d offset=0x%x sz=%d bufsz:%d\n", + pr_debug("%s: adding RcvBuffer #%d offset=0x%lx sz=%d bufsz:%d\n", sc_adapter[card]->devicename, i + 1, RcvBuffOffset.buff_offset, RcvBuffOffset.msg_len,buffer_size); diff --git a/drivers/isdn/sc/shmem.c b/drivers/isdn/sc/shmem.c index 24854826ca45..6f58862992db 100644 --- a/drivers/isdn/sc/shmem.c +++ b/drivers/isdn/sc/shmem.c @@ -61,7 +61,7 @@ void memcpy_toshmem(int card, void *dest, const void *src, size_t n) spin_unlock_irqrestore(&sc_adapter[card]->lock, flags); pr_debug("%s: set page to %#x\n",sc_adapter[card]->devicename, ((sc_adapter[card]->shmem_magic + ch * SRAM_PAGESIZE)>>14)|0x80); - pr_debug("%s: copying %d bytes from %#x to %#x\n", + pr_debug("%s: copying %d bytes from %#lx to %#lx\n", sc_adapter[card]->devicename, n, (unsigned long) src, sc_adapter[card]->rambase + ((unsigned long) dest %0x4000)); diff --git a/drivers/macintosh/adb-iop.c b/drivers/macintosh/adb-iop.c index 1ffee7aaff20..17ef5d3c01b4 100644 --- a/drivers/macintosh/adb-iop.c +++ b/drivers/macintosh/adb-iop.c @@ -266,7 +266,7 @@ int adb_iop_autopoll(int devs) void adb_iop_poll(void) { if (adb_iop_state == idle) adb_iop_start(); - iop_ism_irq(0, (void *) ADB_IOP, NULL); + iop_ism_irq(0, (void *) ADB_IOP); } int adb_iop_reset_bus(void) diff --git a/drivers/macintosh/macio-adb.c b/drivers/macintosh/macio-adb.c index 57ccc19cbdbf..797cef72258f 100644 --- a/drivers/macintosh/macio-adb.c +++ b/drivers/macintosh/macio-adb.c @@ -270,6 +270,6 @@ static void macio_adb_poll(void) local_irq_save(flags); if (in_8(&adb->intr.r) != 0) - macio_adb_interrupt(0, NULL, NULL); + macio_adb_interrupt(0, NULL); local_irq_restore(flags); } diff --git a/drivers/macintosh/via-macii.c b/drivers/macintosh/via-macii.c index ad4bd579f610..5d88d5b0ad99 100644 --- a/drivers/macintosh/via-macii.c +++ b/drivers/macintosh/via-macii.c @@ -295,7 +295,7 @@ static void macii_poll(void) unsigned long flags; local_irq_save(flags); - if (via[IFR] & SR_INT) macii_interrupt(0, NULL, NULL); + if (via[IFR] & SR_INT) macii_interrupt(0, NULL); local_irq_restore(flags); } diff --git a/drivers/macintosh/via-maciisi.c b/drivers/macintosh/via-maciisi.c index 789ee52086fe..1f0aa5dc9aa5 100644 --- a/drivers/macintosh/via-maciisi.c +++ b/drivers/macintosh/via-maciisi.c @@ -421,7 +421,7 @@ maciisi_poll(void) local_irq_save(flags); if (via[IFR] & SR_INT) { - maciisi_interrupt(0, NULL, NULL); + maciisi_interrupt(0, NULL); } else /* avoid calling this function too quickly in a loop */ udelay(ADB_DELAY); diff --git a/drivers/macintosh/via-pmu68k.c b/drivers/macintosh/via-pmu68k.c index 98ec915d0409..d9986f3a3fbf 100644 --- a/drivers/macintosh/via-pmu68k.c +++ b/drivers/macintosh/via-pmu68k.c @@ -221,7 +221,7 @@ pmu_init(void) } if (pmu_state == idle) { adb_int_pending = 1; - pmu_interrupt(0, NULL, NULL); + pmu_interrupt(0, NULL); } pmu_poll(); udelay(10); @@ -562,11 +562,11 @@ pmu_poll(void) local_irq_save(flags); if (via1[IFR] & SR_INT) { via1[IFR] = SR_INT; - pmu_interrupt(IRQ_MAC_ADB_SR, NULL, NULL); + pmu_interrupt(IRQ_MAC_ADB_SR, NULL); } if (via1[IFR] & CB1_INT) { via1[IFR] = CB1_INT; - pmu_interrupt(IRQ_MAC_ADB_CL, NULL, NULL); + pmu_interrupt(IRQ_MAC_ADB_CL, NULL); } local_irq_restore(flags); } diff --git a/drivers/macintosh/windfarm_pm112.c b/drivers/macintosh/windfarm_pm112.c index ef66bf2778ec..fa4b13f89369 100644 --- a/drivers/macintosh/windfarm_pm112.c +++ b/drivers/macintosh/windfarm_pm112.c @@ -650,24 +650,26 @@ static struct notifier_block pm112_events = { .notifier_call = pm112_wf_notify, }; -static int wf_pm112_probe(struct device *dev) +static int wf_pm112_probe(struct platform_device *dev) { wf_register_client(&pm112_events); return 0; } -static int wf_pm112_remove(struct device *dev) +static int __devexit wf_pm112_remove(struct platform_device *dev) { wf_unregister_client(&pm112_events); /* should release all sensors and controls */ return 0; } -static struct device_driver wf_pm112_driver = { - .name = "windfarm", - .bus = &platform_bus_type, +static struct platform_driver wf_pm112_driver = { .probe = wf_pm112_probe, - .remove = wf_pm112_remove, + .remove = __devexit_p(wf_pm112_remove), + .driver = { + .name = "windfarm", + .bus = &platform_bus_type, + }, }; static int __init wf_pm112_init(void) @@ -683,13 +685,13 @@ static int __init wf_pm112_init(void) ++nr_cores; printk(KERN_INFO "windfarm: initializing for dual-core desktop G5\n"); - driver_register(&wf_pm112_driver); + platform_driver_register(&wf_pm112_driver); return 0; } static void __exit wf_pm112_exit(void) { - driver_unregister(&wf_pm112_driver); + platform_driver_unregister(&wf_pm112_driver); } module_init(wf_pm112_init); diff --git a/drivers/macintosh/windfarm_pm81.c b/drivers/macintosh/windfarm_pm81.c index 2ff546e4c92f..2a944851b8e1 100644 --- a/drivers/macintosh/windfarm_pm81.c +++ b/drivers/macintosh/windfarm_pm81.c @@ -131,8 +131,6 @@ static int wf_smu_mach_model; /* machine model id */ -static struct device *wf_smu_dev; - /* Controls & sensors */ static struct wf_sensor *sensor_cpu_power; static struct wf_sensor *sensor_cpu_temp; @@ -717,16 +715,14 @@ static int wf_init_pm(void) return 0; } -static int wf_smu_probe(struct device *ddev) +static int wf_smu_probe(struct platform_device *ddev) { - wf_smu_dev = ddev; - wf_register_client(&wf_smu_events); return 0; } -static int wf_smu_remove(struct device *ddev) +static int __devexit wf_smu_remove(struct platform_device *ddev) { wf_unregister_client(&wf_smu_events); @@ -766,16 +762,16 @@ static int wf_smu_remove(struct device *ddev) if (wf_smu_cpu_fans) kfree(wf_smu_cpu_fans); - wf_smu_dev = NULL; - return 0; } -static struct device_driver wf_smu_driver = { - .name = "windfarm", - .bus = &platform_bus_type, +static struct platform_driver wf_smu_driver = { .probe = wf_smu_probe, - .remove = wf_smu_remove, + .remove = __devexit_p(wf_smu_remove), + .driver = { + .name = "windfarm", + .bus = &platform_bus_type, + }, }; @@ -794,7 +790,7 @@ static int __init wf_smu_init(void) request_module("windfarm_lm75_sensor"); #endif /* MODULE */ - driver_register(&wf_smu_driver); + platform_driver_register(&wf_smu_driver); } return rc; @@ -803,7 +799,7 @@ static int __init wf_smu_init(void) static void __exit wf_smu_exit(void) { - driver_unregister(&wf_smu_driver); + platform_driver_unregister(&wf_smu_driver); } diff --git a/drivers/macintosh/windfarm_pm91.c b/drivers/macintosh/windfarm_pm91.c index 59e9ffe37c39..9961a67b4f85 100644 --- a/drivers/macintosh/windfarm_pm91.c +++ b/drivers/macintosh/windfarm_pm91.c @@ -63,8 +63,6 @@ */ #undef HACKED_OVERTEMP -static struct device *wf_smu_dev; - /* Controls & sensors */ static struct wf_sensor *sensor_cpu_power; static struct wf_sensor *sensor_cpu_temp; @@ -641,16 +639,14 @@ static int wf_init_pm(void) return 0; } -static int wf_smu_probe(struct device *ddev) +static int wf_smu_probe(struct platform_device *ddev) { - wf_smu_dev = ddev; - wf_register_client(&wf_smu_events); return 0; } -static int wf_smu_remove(struct device *ddev) +static int __devexit wf_smu_remove(struct platform_device *ddev) { wf_unregister_client(&wf_smu_events); @@ -698,16 +694,16 @@ static int wf_smu_remove(struct device *ddev) if (wf_smu_cpu_fans) kfree(wf_smu_cpu_fans); - wf_smu_dev = NULL; - return 0; } -static struct device_driver wf_smu_driver = { - .name = "windfarm", - .bus = &platform_bus_type, +static struct platform_driver wf_smu_driver = { .probe = wf_smu_probe, - .remove = wf_smu_remove, + .remove = __devexit_p(wf_smu_remove), + .driver = { + .name = "windfarm", + .bus = &platform_bus_type, + }, }; @@ -725,7 +721,7 @@ static int __init wf_smu_init(void) request_module("windfarm_lm75_sensor"); #endif /* MODULE */ - driver_register(&wf_smu_driver); + platform_driver_register(&wf_smu_driver); } return rc; @@ -734,7 +730,7 @@ static int __init wf_smu_init(void) static void __exit wf_smu_exit(void) { - driver_unregister(&wf_smu_driver); + platform_driver_unregister(&wf_smu_driver); } diff --git a/drivers/mca/mca-bus.c b/drivers/mca/mca-bus.c index 09baa43b2599..da862e4632dd 100644 --- a/drivers/mca/mca-bus.c +++ b/drivers/mca/mca-bus.c @@ -100,6 +100,7 @@ static DEVICE_ATTR(pos, S_IRUGO, mca_show_pos, NULL); int __init mca_register_device(int bus, struct mca_device *mca_dev) { struct mca_bus *mca_bus = mca_root_busses[bus]; + int rc; mca_dev->dev.parent = &mca_bus->dev; mca_dev->dev.bus = &mca_bus_type; @@ -108,13 +109,23 @@ int __init mca_register_device(int bus, struct mca_device *mca_dev) mca_dev->dev.dma_mask = &mca_dev->dma_mask; mca_dev->dev.coherent_dma_mask = mca_dev->dma_mask; - if (device_register(&mca_dev->dev)) - return 0; + rc = device_register(&mca_dev->dev); + if (rc) + goto err_out; - device_create_file(&mca_dev->dev, &dev_attr_id); - device_create_file(&mca_dev->dev, &dev_attr_pos); + rc = device_create_file(&mca_dev->dev, &dev_attr_id); + if (rc) goto err_out_devreg; + rc = device_create_file(&mca_dev->dev, &dev_attr_pos); + if (rc) goto err_out_id; return 1; + +err_out_id: + device_remove_file(&mca_dev->dev, &dev_attr_id); +err_out_devreg: + device_unregister(&mca_dev->dev); +err_out: + return 0; } /* */ @@ -130,13 +141,16 @@ struct mca_bus * __devinit mca_attach_bus(int bus) return NULL; } - mca_bus = kmalloc(sizeof(struct mca_bus), GFP_KERNEL); + mca_bus = kzalloc(sizeof(struct mca_bus), GFP_KERNEL); if (!mca_bus) return NULL; - memset(mca_bus, 0, sizeof(struct mca_bus)); + sprintf(mca_bus->dev.bus_id,"mca%d",bus); sprintf(mca_bus->name,"Host %s MCA Bridge", bus ? "Secondary" : "Primary"); - device_register(&mca_bus->dev); + if (device_register(&mca_bus->dev)) { + kfree(mca_bus); + return NULL; + } mca_root_busses[bus] = mca_bus; diff --git a/drivers/md/bitmap.c b/drivers/md/bitmap.c index 8e67634e79a0..d47d38ac71b1 100644 --- a/drivers/md/bitmap.c +++ b/drivers/md/bitmap.c @@ -1413,7 +1413,7 @@ int bitmap_create(mddev_t *mddev) int err; sector_t start; - BUG_ON(sizeof(bitmap_super_t) != 256); + BUILD_BUG_ON(sizeof(bitmap_super_t) != 256); if (!file && !mddev->bitmap_offset) /* bitmap disabled, nothing to do */ return 0; diff --git a/drivers/md/md.c b/drivers/md/md.c index cb8281605be8..57fa64f93e5f 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -3849,6 +3849,7 @@ static int hot_add_disk(mddev_t * mddev, dev_t dev) } clear_bit(In_sync, &rdev->flags); rdev->desc_nr = -1; + rdev->saved_raid_disk = -1; err = bind_rdev_to_array(rdev, mddev); if (err) goto abort_export; diff --git a/drivers/media/video/em28xx/em28xx-video.c b/drivers/media/video/em28xx/em28xx-video.c index 20df657b70c8..2a461dde480c 100644 --- a/drivers/media/video/em28xx/em28xx-video.c +++ b/drivers/media/video/em28xx/em28xx-video.c @@ -174,7 +174,7 @@ static void em28xx_config_i2c(struct em28xx *dev) route.input = INPUT(dev->ctl_input)->vmux; route.output = 0; - em28xx_i2c_call_clients(dev, VIDIOC_INT_RESET, 0); + em28xx_i2c_call_clients(dev, VIDIOC_INT_RESET, NULL); em28xx_i2c_call_clients(dev, VIDIOC_INT_S_VIDEO_ROUTING, &route); em28xx_i2c_call_clients(dev, VIDIOC_STREAMON, NULL); diff --git a/drivers/media/video/zoran_device.c b/drivers/media/video/zoran_device.c index d9d5020a2224..168e431d7c71 100644 --- a/drivers/media/video/zoran_device.c +++ b/drivers/media/video/zoran_device.c @@ -1415,7 +1415,7 @@ zoran_irq (int irq, struct zoran *zr; unsigned long flags; - zr = (struct zoran *) dev_id; + zr = dev_id; count = 0; if (zr->testing) { diff --git a/drivers/misc/tifm_7xx1.c b/drivers/misc/tifm_7xx1.c index b1748669e05b..1ba8754e9383 100644 --- a/drivers/misc/tifm_7xx1.c +++ b/drivers/misc/tifm_7xx1.c @@ -48,7 +48,7 @@ static void tifm_7xx1_remove_media(void *adapter) printk(KERN_INFO DRIVER_NAME ": demand removing card from socket %d\n", cnt); sock = fm->sockets[cnt]; - fm->sockets[cnt] = 0; + fm->sockets[cnt] = NULL; fm->remove_mask &= ~(1 << cnt); writel(0x0e00, sock->addr + SOCK_CONTROL); @@ -118,7 +118,7 @@ static irqreturn_t tifm_7xx1_isr(int irq, void *dev_id) return IRQ_HANDLED; } -static tifm_media_id tifm_7xx1_toggle_sock_power(char *sock_addr, int is_x2) +static tifm_media_id tifm_7xx1_toggle_sock_power(char __iomem *sock_addr, int is_x2) { unsigned int s_state; int cnt; @@ -163,7 +163,8 @@ static tifm_media_id tifm_7xx1_toggle_sock_power(char *sock_addr, int is_x2) return (readl(sock_addr + SOCK_PRESENT_STATE) >> 4) & 7; } -inline static char *tifm_7xx1_sock_addr(char *base_addr, unsigned int sock_num) +inline static char __iomem * +tifm_7xx1_sock_addr(char __iomem *base_addr, unsigned int sock_num) { return base_addr + ((sock_num + 1) << 10); } @@ -176,7 +177,7 @@ static void tifm_7xx1_insert_media(void *adapter) char *card_name = "xx"; int cnt, ok_to_register; unsigned int insert_mask; - struct tifm_dev *new_sock = 0; + struct tifm_dev *new_sock = NULL; if (!class_device_get(&fm->cdev)) return; @@ -230,7 +231,7 @@ static void tifm_7xx1_insert_media(void *adapter) if (!ok_to_register || device_register(&new_sock->dev)) { spin_lock_irqsave(&fm->lock, flags); - fm->sockets[cnt] = 0; + fm->sockets[cnt] = NULL; spin_unlock_irqrestore(&fm->lock, flags); tifm_free_device(&new_sock->dev); @@ -390,7 +391,7 @@ static void tifm_7xx1_remove(struct pci_dev *dev) tifm_remove_adapter(fm); - pci_set_drvdata(dev, 0); + pci_set_drvdata(dev, NULL); iounmap(fm->addr); pci_intx(dev, 0); diff --git a/drivers/misc/tifm_core.c b/drivers/misc/tifm_core.c index cca5f8522469..ee326136d03b 100644 --- a/drivers/misc/tifm_core.c +++ b/drivers/misc/tifm_core.c @@ -157,7 +157,7 @@ struct tifm_dev *tifm_alloc_device(struct tifm_adapter *fm, unsigned int id) dev->wq = create_singlethread_workqueue(dev->wq_name); if (!dev->wq) { kfree(dev); - return 0; + return NULL; } dev->dev.parent = fm->dev; dev->dev.bus = &tifm_bus_type; diff --git a/drivers/mmc/mmc_block.c b/drivers/mmc/mmc_block.c index c1293f1bda87..f9027c8db792 100644 --- a/drivers/mmc/mmc_block.c +++ b/drivers/mmc/mmc_block.c @@ -28,6 +28,7 @@ #include <linux/kdev_t.h> #include <linux/blkdev.h> #include <linux/mutex.h> +#include <linux/scatterlist.h> #include <linux/mmc/card.h> #include <linux/mmc/host.h> @@ -154,6 +155,71 @@ static int mmc_blk_prep_rq(struct mmc_queue *mq, struct request *req) return stat; } +static u32 mmc_sd_num_wr_blocks(struct mmc_card *card) +{ + int err; + u32 blocks; + + struct mmc_request mrq; + struct mmc_command cmd; + struct mmc_data data; + unsigned int timeout_us; + + struct scatterlist sg; + + memset(&cmd, 0, sizeof(struct mmc_command)); + + cmd.opcode = MMC_APP_CMD; + cmd.arg = card->rca << 16; + cmd.flags = MMC_RSP_R1 | MMC_CMD_AC; + + err = mmc_wait_for_cmd(card->host, &cmd, 0); + if ((err != MMC_ERR_NONE) || !(cmd.resp[0] & R1_APP_CMD)) + return (u32)-1; + + memset(&cmd, 0, sizeof(struct mmc_command)); + + cmd.opcode = SD_APP_SEND_NUM_WR_BLKS; + cmd.arg = 0; + cmd.flags = MMC_RSP_R1 | MMC_CMD_ADTC; + + memset(&data, 0, sizeof(struct mmc_data)); + + data.timeout_ns = card->csd.tacc_ns * 100; + data.timeout_clks = card->csd.tacc_clks * 100; + + timeout_us = data.timeout_ns / 1000; + timeout_us += data.timeout_clks * 1000 / + (card->host->ios.clock / 1000); + + if (timeout_us > 100000) { + data.timeout_ns = 100000000; + data.timeout_clks = 0; + } + + data.blksz = 4; + data.blocks = 1; + data.flags = MMC_DATA_READ; + data.sg = &sg; + data.sg_len = 1; + + memset(&mrq, 0, sizeof(struct mmc_request)); + + mrq.cmd = &cmd; + mrq.data = &data; + + sg_init_one(&sg, &blocks, 4); + + mmc_wait_for_req(card->host, &mrq); + + if (cmd.error != MMC_ERR_NONE || data.error != MMC_ERR_NONE) + return (u32)-1; + + blocks = ntohl(blocks); + + return blocks; +} + static int mmc_blk_issue_rq(struct mmc_queue *mq, struct request *req) { struct mmc_blk_data *md = mq->data; @@ -184,10 +250,13 @@ static int mmc_blk_issue_rq(struct mmc_queue *mq, struct request *req) /* * If the host doesn't support multiple block writes, force - * block writes to single block. + * block writes to single block. SD cards are excepted from + * this rule as they support querying the number of + * successfully written sectors. */ if (rq_data_dir(req) != READ && - !(card->host->caps & MMC_CAP_MULTIWRITE)) + !(card->host->caps & MMC_CAP_MULTIWRITE) && + !mmc_card_sd(card)) brq.data.blocks = 1; if (brq.data.blocks > 1) { @@ -276,24 +345,41 @@ static int mmc_blk_issue_rq(struct mmc_queue *mq, struct request *req) return 1; cmd_err: - mmc_card_release_host(card); - ret = 1; - /* - * For writes and where the host claims to support proper - * error reporting, we first ok the successful blocks. + /* + * If this is an SD card and we're writing, we can first + * mark the known good sectors as ok. + * + * If the card is not SD, we can still ok written sectors + * if the controller can do proper error reporting. * * For reads we just fail the entire chunk as that should * be safe in all cases. */ - if (rq_data_dir(req) != READ && - (card->host->caps & MMC_CAP_MULTIWRITE)) { + if (rq_data_dir(req) != READ && mmc_card_sd(card)) { + u32 blocks; + unsigned int bytes; + + blocks = mmc_sd_num_wr_blocks(card); + if (blocks != (u32)-1) { + if (card->csd.write_partial) + bytes = blocks << md->block_bits; + else + bytes = blocks << 9; + spin_lock_irq(&md->lock); + ret = end_that_request_chunk(req, 1, bytes); + spin_unlock_irq(&md->lock); + } + } else if (rq_data_dir(req) != READ && + (card->host->caps & MMC_CAP_MULTIWRITE)) { spin_lock_irq(&md->lock); ret = end_that_request_chunk(req, 1, brq.data.bytes_xfered); spin_unlock_irq(&md->lock); } + mmc_card_release_host(card); + spin_lock_irq(&md->lock); while (ret) { ret = end_that_request_chunk(req, 0, diff --git a/drivers/mmc/sdhci.c b/drivers/mmc/sdhci.c index 6d024342b2fd..9a7d39b7cdbf 100644 --- a/drivers/mmc/sdhci.c +++ b/drivers/mmc/sdhci.c @@ -1329,7 +1329,7 @@ static int __devinit sdhci_probe_slot(struct pci_dev *pdev, int slot) tasklet_init(&host->finish_tasklet, sdhci_tasklet_finish, (unsigned long)host); - setup_timer(&host->timer, sdhci_timeout_timer, (long)host); + setup_timer(&host->timer, sdhci_timeout_timer, (unsigned long)host); ret = request_irq(host->irq, sdhci_irq, IRQF_SHARED, host->slot_descr, host); diff --git a/drivers/mmc/tifm_sd.c b/drivers/mmc/tifm_sd.c index 6d23dc08d169..2bacff60913d 100644 --- a/drivers/mmc/tifm_sd.c +++ b/drivers/mmc/tifm_sd.c @@ -501,13 +501,13 @@ static void tifm_sd_end_cmd(void *data) struct tifm_dev *sock = host->dev; struct mmc_host *mmc = tifm_get_drvdata(sock); struct mmc_request *mrq; - struct mmc_data *r_data = 0; + struct mmc_data *r_data = NULL; unsigned long flags; spin_lock_irqsave(&sock->lock, flags); mrq = host->req; - host->req = 0; + host->req = NULL; host->state = IDLE; if (!mrq) { @@ -546,7 +546,7 @@ static void tifm_sd_request_nodma(struct mmc_host *mmc, struct mmc_request *mrq) struct tifm_dev *sock = host->dev; unsigned long flags; struct mmc_data *r_data = mrq->cmd->data; - char *t_buffer = 0; + char *t_buffer = NULL; if (r_data) { t_buffer = kmap(r_data->sg->page); @@ -613,13 +613,13 @@ static void tifm_sd_end_cmd_nodma(void *data) struct tifm_dev *sock = host->dev; struct mmc_host *mmc = tifm_get_drvdata(sock); struct mmc_request *mrq; - struct mmc_data *r_data = 0; + struct mmc_data *r_data = NULL; unsigned long flags; spin_lock_irqsave(&sock->lock, flags); mrq = host->req; - host->req = 0; + host->req = NULL; host->state = IDLE; if (!mrq) { @@ -644,7 +644,7 @@ static void tifm_sd_end_cmd_nodma(void *data) r_data->bytes_xfered += r_data->blksz - readl(sock->addr + SOCK_MMCSD_BLOCK_LEN) + 1; } - host->buffer = 0; + host->buffer = NULL; host->buffer_pos = 0; host->buffer_size = 0; } @@ -895,7 +895,7 @@ static void tifm_sd_remove(struct tifm_dev *sock) sock->addr + SOCK_DMA_FIFO_INT_ENABLE_CLEAR); writel(0, sock->addr + SOCK_DMA_FIFO_INT_ENABLE_SET); - tifm_set_drvdata(sock, 0); + tifm_set_drvdata(sock, NULL); mmc_free_host(mmc); } diff --git a/drivers/mtd/maps/physmap.c b/drivers/mtd/maps/physmap.c index bc7cc71788bc..d1717763f719 100644 --- a/drivers/mtd/maps/physmap.c +++ b/drivers/mtd/maps/physmap.c @@ -62,7 +62,7 @@ static int physmap_flash_remove(struct platform_device *dev) } if (info->map.virt != NULL) - iounmap((void *)info->map.virt); + iounmap(info->map.virt); if (info->res != NULL) { release_resource(info->res); diff --git a/drivers/mtd/nand/cs553x_nand.c b/drivers/mtd/nand/cs553x_nand.c index e0a1d386e581..94924d52a9b9 100644 --- a/drivers/mtd/nand/cs553x_nand.c +++ b/drivers/mtd/nand/cs553x_nand.c @@ -249,7 +249,7 @@ static int __init cs553x_init_one(int cs, int mmio, unsigned long adr) goto out; out_ior: - iounmap((void *)this->IO_ADDR_R); + iounmap(this->IO_ADDR_R); out_mtd: kfree(new_mtd); out: diff --git a/drivers/net/3c509.c b/drivers/net/3c509.c index 7ad0a54779c4..f791bf026e51 100644 --- a/drivers/net/3c509.c +++ b/drivers/net/3c509.c @@ -912,16 +912,11 @@ el3_start_xmit(struct sk_buff *skb, struct net_device *dev) static irqreturn_t el3_interrupt(int irq, void *dev_id) { - struct net_device *dev = (struct net_device *)dev_id; + struct net_device *dev = dev_id; struct el3_private *lp; int ioaddr, status; int i = max_interrupt_work; - if (dev == NULL) { - printk ("el3_interrupt(): irq %d for unknown device.\n", irq); - return IRQ_NONE; - } - lp = netdev_priv(dev); spin_lock(&lp->lock); diff --git a/drivers/net/3c523.c b/drivers/net/3c523.c index 1c97271112d0..91849469b4f4 100644 --- a/drivers/net/3c523.c +++ b/drivers/net/3c523.c @@ -902,14 +902,11 @@ static void *alloc_rfa(struct net_device *dev, void *ptr) static irqreturn_t elmc_interrupt(int irq, void *dev_id) { - struct net_device *dev = (struct net_device *) dev_id; + struct net_device *dev = dev_id; unsigned short stat; struct priv *p; - if (dev == NULL) { - printk(KERN_ERR "elmc-interrupt: irq %d for unknown device.\n", irq); - return IRQ_NONE; - } else if (!netif_running(dev)) { + if (!netif_running(dev)) { /* The 3c523 has this habit of generating interrupts during the reset. I'm not sure if the ni52 has this same problem, but it's really annoying if we haven't finished initializing it. I was diff --git a/drivers/net/3c527.c b/drivers/net/3c527.c index d516c3225ca4..f4aca5386add 100644 --- a/drivers/net/3c527.c +++ b/drivers/net/3c527.c @@ -1324,11 +1324,6 @@ static irqreturn_t mc32_interrupt(int irq, void *dev_id) int rx_event = 0; int tx_event = 0; - if (dev == NULL) { - printk(KERN_WARNING "%s: irq %d for unknown device.\n", cardname, irq); - return IRQ_NONE; - } - ioaddr = dev->base_addr; lp = netdev_priv(dev); diff --git a/drivers/net/7990.c b/drivers/net/7990.c index 8e996b4a34ea..7733697f7776 100644 --- a/drivers/net/7990.c +++ b/drivers/net/7990.c @@ -674,7 +674,7 @@ void lance_poll(struct net_device *dev) WRITERAP(lp, LE_CSR0); WRITERDP(lp, LE_C0_STRT); spin_unlock (&lp->devlock); - lance_interrupt(dev->irq, dev, NULL); + lance_interrupt(dev->irq, dev); } #endif diff --git a/drivers/net/8390.c b/drivers/net/8390.c index fa3442cb1a49..3d1c599ac3cb 100644 --- a/drivers/net/8390.c +++ b/drivers/net/8390.c @@ -406,14 +406,8 @@ irqreturn_t ei_interrupt(int irq, void *dev_id) int interrupts, nr_serviced = 0; struct ei_device *ei_local; - if (dev == NULL) - { - printk ("net_interrupt(): irq %d for unknown device.\n", irq); - return IRQ_NONE; - } - e8390_base = dev->base_addr; - ei_local = (struct ei_device *) netdev_priv(dev); + ei_local = netdev_priv(dev); /* * Protect the irq test too. diff --git a/drivers/net/atp.c b/drivers/net/atp.c index 062f80e20874..2d306fcb7f36 100644 --- a/drivers/net/atp.c +++ b/drivers/net/atp.c @@ -598,17 +598,13 @@ static int atp_send_packet(struct sk_buff *skb, struct net_device *dev) Handle the network interface interrupts. */ static irqreturn_t atp_interrupt(int irq, void *dev_instance) { - struct net_device *dev = (struct net_device *)dev_instance; + struct net_device *dev = dev_instance; struct net_local *lp; long ioaddr; static int num_tx_since_rx; int boguscount = max_interrupt_work; int handled = 0; - if (dev == NULL) { - printk(KERN_ERR "ATP_interrupt(): irq %d for unknown device.\n", irq); - return IRQ_NONE; - } ioaddr = dev->base_addr; lp = netdev_priv(dev); diff --git a/drivers/net/b44.c b/drivers/net/b44.c index b124eee4eb10..1ec217433b4c 100644 --- a/drivers/net/b44.c +++ b/drivers/net/b44.c @@ -1706,14 +1706,15 @@ static void __b44_set_rx_mode(struct net_device *dev) __b44_set_mac_addr(bp); - if (dev->flags & IFF_ALLMULTI) + if ((dev->flags & IFF_ALLMULTI) || + (dev->mc_count > B44_MCAST_TABLE_SIZE)) val |= RXCONFIG_ALLMULTI; else i = __b44_load_mcast(bp, dev); - for (; i < 64; i++) { + for (; i < 64; i++) __b44_cam_write(bp, zero, i); - } + bw32(bp, B44_RXCONFIG, val); val = br32(bp, B44_CAM_CTRL); bw32(bp, B44_CAM_CTRL, val | CAM_CTRL_ENABLE); @@ -2055,7 +2056,7 @@ static int b44_read_eeprom(struct b44 *bp, u8 *data) u16 *ptr = (u16 *) data; for (i = 0; i < 128; i += 2) - ptr[i / 2] = readw(bp->regs + 4096 + i); + ptr[i / 2] = cpu_to_le16(readw(bp->regs + 4096 + i)); return 0; } diff --git a/drivers/net/bonding/bond_alb.c b/drivers/net/bonding/bond_alb.c index e83bc825f6af..32923162179e 100644 --- a/drivers/net/bonding/bond_alb.c +++ b/drivers/net/bonding/bond_alb.c @@ -1433,7 +1433,7 @@ void bond_alb_monitor(struct bonding *bond) * write lock to protect from other code that also * sets the promiscuity. */ - write_lock(&bond->curr_slave_lock); + write_lock_bh(&bond->curr_slave_lock); if (bond_info->primary_is_promisc && (++bond_info->rlb_promisc_timeout_counter >= RLB_PROMISC_TIMEOUT)) { @@ -1448,7 +1448,7 @@ void bond_alb_monitor(struct bonding *bond) bond_info->primary_is_promisc = 0; } - write_unlock(&bond->curr_slave_lock); + write_unlock_bh(&bond->curr_slave_lock); if (bond_info->rlb_rebalance) { bond_info->rlb_rebalance = 0; diff --git a/drivers/net/chelsio/cpl5_cmd.h b/drivers/net/chelsio/cpl5_cmd.h index 27925e487bcf..5b357d9e88d6 100644 --- a/drivers/net/chelsio/cpl5_cmd.h +++ b/drivers/net/chelsio/cpl5_cmd.h @@ -108,7 +108,7 @@ struct cpl_tx_pkt_lso { u8 iff:4; #endif u16 vlan; - u32 len; + __be32 len; u32 rsvd2; u8 rsvd3; @@ -119,7 +119,7 @@ struct cpl_tx_pkt_lso { u8 ip_hdr_words:4; u8 tcp_hdr_words:4; #endif - u16 eth_type_mss; + __be16 eth_type_mss; }; struct cpl_rx_pkt { @@ -138,7 +138,7 @@ struct cpl_rx_pkt { u8 iff:4; #endif u16 csum; - u16 vlan; + __be16 vlan; u16 len; }; diff --git a/drivers/net/de600.c b/drivers/net/de600.c index d9b006c9e367..690bb40b353d 100644 --- a/drivers/net/de600.c +++ b/drivers/net/de600.c @@ -265,12 +265,6 @@ static irqreturn_t de600_interrupt(int irq, void *dev_id) int retrig = 0; int boguscount = 0; - /* This might just as well be deleted now, no crummy drivers present :-) */ - if ((dev == NULL) || (DE600_IRQ != irq)) { - printk(KERN_ERR "%s: bogus interrupt %d\n", dev?dev->name:"DE-600", irq); - return IRQ_NONE; - } - spin_lock(&de600_lock); select_nic(); diff --git a/drivers/net/declance.c b/drivers/net/declance.c index e179aa1c1ba0..00e2a8a134d7 100644 --- a/drivers/net/declance.c +++ b/drivers/net/declance.c @@ -696,7 +696,7 @@ out: static irqreturn_t lance_dma_merr_int(const int irq, void *dev_id) { - struct net_device *dev = (struct net_device *) dev_id; + struct net_device *dev = dev_id; printk("%s: DMA error\n", dev->name); return IRQ_HANDLED; @@ -704,7 +704,7 @@ static irqreturn_t lance_dma_merr_int(const int irq, void *dev_id) static irqreturn_t lance_interrupt(const int irq, void *dev_id) { - struct net_device *dev = (struct net_device *) dev_id; + struct net_device *dev = dev_id; struct lance_private *lp = netdev_priv(dev); volatile struct lance_regs *ll = lp->ll; int csr0; diff --git a/drivers/net/dgrs.c b/drivers/net/dgrs.c index 6b1234b09fb3..a79520295fd0 100644 --- a/drivers/net/dgrs.c +++ b/drivers/net/dgrs.c @@ -897,8 +897,8 @@ static int dgrs_ioctl(struct net_device *devN, struct ifreq *ifr, int cmd) static irqreturn_t dgrs_intr(int irq, void *dev_id) { - struct net_device *dev0 = (struct net_device *) dev_id; - DGRS_PRIV *priv0 = (DGRS_PRIV *) dev0->priv; + struct net_device *dev0 = dev_id; + DGRS_PRIV *priv0 = dev0->priv; I596_CB *cbp; int cmd; int i; diff --git a/drivers/net/dm9000.c b/drivers/net/dm9000.c index 3641f3b4a2cc..615d2b14efa7 100644 --- a/drivers/net/dm9000.c +++ b/drivers/net/dm9000.c @@ -346,7 +346,7 @@ static void dm9000_timeout(struct net_device *dev) static void dm9000_poll_controller(struct net_device *dev) { disable_irq(dev->irq); - dm9000_interrupt(dev->irq,dev,NULL); + dm9000_interrupt(dev->irq,dev); enable_irq(dev->irq); } #endif diff --git a/drivers/net/eepro.c b/drivers/net/eepro.c index aae454aaa1c6..a4eb0dc99ecf 100644 --- a/drivers/net/eepro.c +++ b/drivers/net/eepro.c @@ -994,16 +994,6 @@ static int eepro_open(struct net_device *dev) return -EAGAIN; } -#ifdef irq2dev_map - if (((irq2dev_map[dev->irq] != 0) - || (irq2dev_map[dev->irq] = dev) == 0) && - (irq2dev_map[dev->irq]!=dev)) { - /* printk("%s: IRQ map wrong\n", dev->name); */ - free_irq(dev->irq, dev); - return -EAGAIN; - } -#endif - /* Initialize the 82595. */ eepro_sw2bank2(ioaddr); /* be CAREFUL, BANK 2 now */ @@ -1198,17 +1188,11 @@ static int eepro_send_packet(struct sk_buff *skb, struct net_device *dev) static irqreturn_t eepro_interrupt(int irq, void *dev_id) { - struct net_device *dev = (struct net_device *)dev_id; - /* (struct net_device *)(irq2dev_map[irq]);*/ + struct net_device *dev = dev_id; struct eepro_local *lp; int ioaddr, status, boguscount = 20; int handled = 0; - if (dev == NULL) { - printk (KERN_ERR "eepro_interrupt(): irq %d for unknown device.\\n", irq); - return IRQ_NONE; - } - lp = netdev_priv(dev); spin_lock(&lp->lock); @@ -1288,10 +1272,6 @@ static int eepro_close(struct net_device *dev) /* release the interrupt */ free_irq(dev->irq, dev); -#ifdef irq2dev_map - irq2dev_map[dev->irq] = 0; -#endif - /* Update the statistics here. What statistics? */ return 0; diff --git a/drivers/net/eexpress.c b/drivers/net/eexpress.c index 05ca730fe81e..e14be020e562 100644 --- a/drivers/net/eexpress.c +++ b/drivers/net/eexpress.c @@ -796,13 +796,6 @@ static irqreturn_t eexp_irq(int irq, void *dev_info) unsigned short ioaddr,status,ack_cmd; unsigned short old_read_ptr, old_write_ptr; - if (dev==NULL) - { - printk(KERN_WARNING "eexpress: irq %d for unknown device\n", - irq); - return IRQ_NONE; - } - lp = netdev_priv(dev); ioaddr = dev->base_addr; diff --git a/drivers/net/ehea/ehea.h b/drivers/net/ehea/ehea.h index 23b451a8ae12..b40724fc6b74 100644 --- a/drivers/net/ehea/ehea.h +++ b/drivers/net/ehea/ehea.h @@ -39,7 +39,7 @@ #include <asm/io.h> #define DRV_NAME "ehea" -#define DRV_VERSION "EHEA_0028" +#define DRV_VERSION "EHEA_0034" #define EHEA_MSG_DEFAULT (NETIF_MSG_LINK | NETIF_MSG_TIMER \ | NETIF_MSG_RX_ERR | NETIF_MSG_TX_ERR) @@ -50,6 +50,7 @@ #define EHEA_MAX_ENTRIES_SQ 32767 #define EHEA_MIN_ENTRIES_QP 127 +#define EHEA_SMALL_QUEUES #define EHEA_NUM_TX_QP 1 #ifdef EHEA_SMALL_QUEUES @@ -59,11 +60,11 @@ #define EHEA_DEF_ENTRIES_RQ2 1023 #define EHEA_DEF_ENTRIES_RQ3 1023 #else -#define EHEA_MAX_CQE_COUNT 32000 -#define EHEA_DEF_ENTRIES_SQ 16000 -#define EHEA_DEF_ENTRIES_RQ1 32080 -#define EHEA_DEF_ENTRIES_RQ2 4020 -#define EHEA_DEF_ENTRIES_RQ3 4020 +#define EHEA_MAX_CQE_COUNT 4080 +#define EHEA_DEF_ENTRIES_SQ 4080 +#define EHEA_DEF_ENTRIES_RQ1 8160 +#define EHEA_DEF_ENTRIES_RQ2 2040 +#define EHEA_DEF_ENTRIES_RQ3 2040 #endif #define EHEA_MAX_ENTRIES_EQ 20 diff --git a/drivers/net/ehea/ehea_main.c b/drivers/net/ehea/ehea_main.c index c6b31775e26b..eb7d44de59ff 100644 --- a/drivers/net/ehea/ehea_main.c +++ b/drivers/net/ehea/ehea_main.c @@ -766,7 +766,7 @@ static void ehea_parse_eqe(struct ehea_adapter *adapter, u64 eqe) if (EHEA_BMASK_GET(NEQE_PORT_UP, eqe)) { if (!netif_carrier_ok(port->netdev)) { ret = ehea_sense_port_attr( - adapter->port[portnum]); + port); if (ret) { ehea_error("failed resensing port " "attributes"); @@ -818,7 +818,7 @@ static void ehea_parse_eqe(struct ehea_adapter *adapter, u64 eqe) netif_stop_queue(port->netdev); break; default: - ehea_error("unknown event code %x", ec); + ehea_error("unknown event code %x, eqe=0x%lX", ec, eqe); break; } } @@ -1841,7 +1841,7 @@ static int ehea_start_xmit(struct sk_buff *skb, struct net_device *dev) if (netif_msg_tx_queued(port)) { ehea_info("post swqe on QP %d", pr->qp->init_attr.qp_nr); - ehea_dump(swqe, sizeof(*swqe), "swqe"); + ehea_dump(swqe, 512, "swqe"); } ehea_post_swqe(pr->qp, swqe); diff --git a/drivers/net/ehea/ehea_phyp.c b/drivers/net/ehea/ehea_phyp.c index 4a85aca4c7e9..0b51a8cea077 100644 --- a/drivers/net/ehea/ehea_phyp.c +++ b/drivers/net/ehea/ehea_phyp.c @@ -44,71 +44,99 @@ static inline u16 get_order_of_qentries(u16 queue_entries) #define H_ALL_RES_TYPE_MR 5 #define H_ALL_RES_TYPE_MW 6 -static long ehea_hcall_9arg_9ret(unsigned long opcode, - unsigned long arg1, unsigned long arg2, - unsigned long arg3, unsigned long arg4, - unsigned long arg5, unsigned long arg6, - unsigned long arg7, unsigned long arg8, - unsigned long arg9, unsigned long *out1, - unsigned long *out2,unsigned long *out3, - unsigned long *out4,unsigned long *out5, - unsigned long *out6,unsigned long *out7, - unsigned long *out8,unsigned long *out9) +static long ehea_plpar_hcall_norets(unsigned long opcode, + unsigned long arg1, + unsigned long arg2, + unsigned long arg3, + unsigned long arg4, + unsigned long arg5, + unsigned long arg6, + unsigned long arg7) { - long hret; + long ret; int i, sleep_msecs; for (i = 0; i < 5; i++) { - hret = plpar_hcall_9arg_9ret(opcode,arg1, arg2, arg3, arg4, - arg5, arg6, arg7, arg8, arg9, out1, - out2, out3, out4, out5, out6, out7, - out8, out9); - if (H_IS_LONG_BUSY(hret)) { - sleep_msecs = get_longbusy_msecs(hret); + ret = plpar_hcall_norets(opcode, arg1, arg2, arg3, arg4, + arg5, arg6, arg7); + + if (H_IS_LONG_BUSY(ret)) { + sleep_msecs = get_longbusy_msecs(ret); msleep_interruptible(sleep_msecs); continue; } - if (hret < H_SUCCESS) - ehea_error("op=%lx hret=%lx " - "i1=%lx i2=%lx i3=%lx i4=%lx i5=%lx i6=%lx " - "i7=%lx i8=%lx i9=%lx " - "o1=%lx o2=%lx o3=%lx o4=%lx o5=%lx o6=%lx " - "o7=%lx o8=%lx o9=%lx", - opcode, hret, arg1, arg2, arg3, arg4, arg5, - arg6, arg7, arg8, arg9, *out1, *out2, *out3, - *out4, *out5, *out6, *out7, *out8, *out9); - return hret; + if (ret < H_SUCCESS) + ehea_error("opcode=%lx ret=%lx" + " arg1=%lx arg2=%lx arg3=%lx arg4=%lx" + " arg5=%lx arg6=%lx arg7=%lx ", + opcode, ret, + arg1, arg2, arg3, arg4, arg5, + arg6, arg7); + + return ret; } + return H_BUSY; } -u64 ehea_h_query_ehea_qp(const u64 adapter_handle, const u8 qp_category, - const u64 qp_handle, const u64 sel_mask, void *cb_addr) +static long ehea_plpar_hcall9(unsigned long opcode, + unsigned long *outs, /* array of 9 outputs */ + unsigned long arg1, + unsigned long arg2, + unsigned long arg3, + unsigned long arg4, + unsigned long arg5, + unsigned long arg6, + unsigned long arg7, + unsigned long arg8, + unsigned long arg9) { - u64 dummy; + long ret; + int i, sleep_msecs; - if ((((u64)cb_addr) & (PAGE_SIZE - 1)) != 0) { - ehea_error("not on pageboundary"); - return H_PARAMETER; + for (i = 0; i < 5; i++) { + ret = plpar_hcall9(opcode, outs, + arg1, arg2, arg3, arg4, arg5, + arg6, arg7, arg8, arg9); + + if (H_IS_LONG_BUSY(ret)) { + sleep_msecs = get_longbusy_msecs(ret); + msleep_interruptible(sleep_msecs); + continue; + } + + if (ret < H_SUCCESS) + ehea_error("opcode=%lx ret=%lx" + " arg1=%lx arg2=%lx arg3=%lx arg4=%lx" + " arg5=%lx arg6=%lx arg7=%lx arg8=%lx" + " arg9=%lx" + " out1=%lx out2=%lx out3=%lx out4=%lx" + " out5=%lx out6=%lx out7=%lx out8=%lx" + " out9=%lx", + opcode, ret, + arg1, arg2, arg3, arg4, arg5, + arg6, arg7, arg8, arg9, + outs[0], outs[1], outs[2], outs[3], + outs[4], outs[5], outs[6], outs[7], + outs[8]); + + return ret; } - return ehea_hcall_9arg_9ret(H_QUERY_HEA_QP, - adapter_handle, /* R4 */ - qp_category, /* R5 */ - qp_handle, /* R6 */ - sel_mask, /* R7 */ - virt_to_abs(cb_addr), /* R8 */ - 0, 0, 0, 0, /* R9-R12 */ - &dummy, /* R4 */ - &dummy, /* R5 */ - &dummy, /* R6 */ - &dummy, /* R7 */ - &dummy, /* R8 */ - &dummy, /* R9 */ - &dummy, /* R10 */ - &dummy, /* R11 */ - &dummy); /* R12 */ + return H_BUSY; +} + +u64 ehea_h_query_ehea_qp(const u64 adapter_handle, const u8 qp_category, + const u64 qp_handle, const u64 sel_mask, void *cb_addr) +{ + return ehea_plpar_hcall_norets(H_QUERY_HEA_QP, + adapter_handle, /* R4 */ + qp_category, /* R5 */ + qp_handle, /* R6 */ + sel_mask, /* R7 */ + virt_to_abs(cb_addr), /* R8 */ + 0, 0); } /* input param R5 */ @@ -180,6 +208,7 @@ u64 ehea_h_alloc_resource_qp(const u64 adapter_handle, u64 *qp_handle, struct h_epas *h_epas) { u64 hret; + u64 outs[PLPAR_HCALL9_BUFSIZE]; u64 allocate_controls = EHEA_BMASK_SET(H_ALL_RES_QP_EQPO, init_attr->low_lat_rq1 ? 1 : 0) @@ -219,45 +248,29 @@ u64 ehea_h_alloc_resource_qp(const u64 adapter_handle, EHEA_BMASK_SET(H_ALL_RES_QP_TH_RQ2, init_attr->rq2_threshold) | EHEA_BMASK_SET(H_ALL_RES_QP_TH_RQ3, init_attr->rq3_threshold); - u64 r5_out = 0; - u64 r6_out = 0; - u64 r7_out = 0; - u64 r8_out = 0; - u64 r9_out = 0; - u64 g_la_user_out = 0; - u64 r11_out = 0; - u64 r12_out = 0; - - hret = ehea_hcall_9arg_9ret(H_ALLOC_HEA_RESOURCE, - adapter_handle, /* R4 */ - allocate_controls, /* R5 */ - init_attr->send_cq_handle, /* R6 */ - init_attr->recv_cq_handle, /* R7 */ - init_attr->aff_eq_handle, /* R8 */ - r9_reg, /* R9 */ - max_r10_reg, /* R10 */ - r11_in, /* R11 */ - threshold, /* R12 */ - qp_handle, /* R4 */ - &r5_out, /* R5 */ - &r6_out, /* R6 */ - &r7_out, /* R7 */ - &r8_out, /* R8 */ - &r9_out, /* R9 */ - &g_la_user_out, /* R10 */ - &r11_out, /* R11 */ - &r12_out); /* R12 */ - - init_attr->qp_nr = (u32)r5_out; + hret = ehea_plpar_hcall9(H_ALLOC_HEA_RESOURCE, + outs, + adapter_handle, /* R4 */ + allocate_controls, /* R5 */ + init_attr->send_cq_handle, /* R6 */ + init_attr->recv_cq_handle, /* R7 */ + init_attr->aff_eq_handle, /* R8 */ + r9_reg, /* R9 */ + max_r10_reg, /* R10 */ + r11_in, /* R11 */ + threshold); /* R12 */ + + *qp_handle = outs[0]; + init_attr->qp_nr = (u32)outs[1]; init_attr->act_nr_send_wqes = - (u16)EHEA_BMASK_GET(H_ALL_RES_QP_ACT_SWQE, r6_out); + (u16)EHEA_BMASK_GET(H_ALL_RES_QP_ACT_SWQE, outs[2]); init_attr->act_nr_rwqes_rq1 = - (u16)EHEA_BMASK_GET(H_ALL_RES_QP_ACT_R1WQE, r6_out); + (u16)EHEA_BMASK_GET(H_ALL_RES_QP_ACT_R1WQE, outs[2]); init_attr->act_nr_rwqes_rq2 = - (u16)EHEA_BMASK_GET(H_ALL_RES_QP_ACT_R2WQE, r6_out); + (u16)EHEA_BMASK_GET(H_ALL_RES_QP_ACT_R2WQE, outs[2]); init_attr->act_nr_rwqes_rq3 = - (u16)EHEA_BMASK_GET(H_ALL_RES_QP_ACT_R3WQE, r6_out); + (u16)EHEA_BMASK_GET(H_ALL_RES_QP_ACT_R3WQE, outs[2]); init_attr->act_wqe_size_enc_sq = init_attr->wqe_size_enc_sq; init_attr->act_wqe_size_enc_rq1 = init_attr->wqe_size_enc_rq1; @@ -265,25 +278,25 @@ u64 ehea_h_alloc_resource_qp(const u64 adapter_handle, init_attr->act_wqe_size_enc_rq3 = init_attr->wqe_size_enc_rq3; init_attr->nr_sq_pages = - (u32)EHEA_BMASK_GET(H_ALL_RES_QP_SIZE_SQ, r8_out); + (u32)EHEA_BMASK_GET(H_ALL_RES_QP_SIZE_SQ, outs[4]); init_attr->nr_rq1_pages = - (u32)EHEA_BMASK_GET(H_ALL_RES_QP_SIZE_RQ1, r8_out); + (u32)EHEA_BMASK_GET(H_ALL_RES_QP_SIZE_RQ1, outs[4]); init_attr->nr_rq2_pages = - (u32)EHEA_BMASK_GET(H_ALL_RES_QP_SIZE_RQ2, r9_out); + (u32)EHEA_BMASK_GET(H_ALL_RES_QP_SIZE_RQ2, outs[5]); init_attr->nr_rq3_pages = - (u32)EHEA_BMASK_GET(H_ALL_RES_QP_SIZE_RQ3, r9_out); + (u32)EHEA_BMASK_GET(H_ALL_RES_QP_SIZE_RQ3, outs[5]); init_attr->liobn_sq = - (u32)EHEA_BMASK_GET(H_ALL_RES_QP_LIOBN_SQ, r11_out); + (u32)EHEA_BMASK_GET(H_ALL_RES_QP_LIOBN_SQ, outs[7]); init_attr->liobn_rq1 = - (u32)EHEA_BMASK_GET(H_ALL_RES_QP_LIOBN_RQ1, r11_out); + (u32)EHEA_BMASK_GET(H_ALL_RES_QP_LIOBN_RQ1, outs[7]); init_attr->liobn_rq2 = - (u32)EHEA_BMASK_GET(H_ALL_RES_QP_LIOBN_RQ2, r12_out); + (u32)EHEA_BMASK_GET(H_ALL_RES_QP_LIOBN_RQ2, outs[8]); init_attr->liobn_rq3 = - (u32)EHEA_BMASK_GET(H_ALL_RES_QP_LIOBN_RQ3, r12_out); + (u32)EHEA_BMASK_GET(H_ALL_RES_QP_LIOBN_RQ3, outs[8]); if (!hret) - hcp_epas_ctor(h_epas, g_la_user_out, g_la_user_out); + hcp_epas_ctor(h_epas, outs[6], outs[6]); return hret; } @@ -292,31 +305,24 @@ u64 ehea_h_alloc_resource_cq(const u64 adapter_handle, struct ehea_cq_attr *cq_attr, u64 *cq_handle, struct h_epas *epas) { - u64 hret, dummy, act_nr_of_cqes_out, act_pages_out; - u64 g_la_privileged_out, g_la_user_out; - - hret = ehea_hcall_9arg_9ret(H_ALLOC_HEA_RESOURCE, - adapter_handle, /* R4 */ - H_ALL_RES_TYPE_CQ, /* R5 */ - cq_attr->eq_handle, /* R6 */ - cq_attr->cq_token, /* R7 */ - cq_attr->max_nr_of_cqes, /* R8 */ - 0, 0, 0, 0, /* R9-R12 */ - cq_handle, /* R4 */ - &dummy, /* R5 */ - &dummy, /* R6 */ - &act_nr_of_cqes_out, /* R7 */ - &act_pages_out, /* R8 */ - &g_la_privileged_out, /* R9 */ - &g_la_user_out, /* R10 */ - &dummy, /* R11 */ - &dummy); /* R12 */ - - cq_attr->act_nr_of_cqes = act_nr_of_cqes_out; - cq_attr->nr_pages = act_pages_out; + u64 hret; + u64 outs[PLPAR_HCALL9_BUFSIZE]; + + hret = ehea_plpar_hcall9(H_ALLOC_HEA_RESOURCE, + outs, + adapter_handle, /* R4 */ + H_ALL_RES_TYPE_CQ, /* R5 */ + cq_attr->eq_handle, /* R6 */ + cq_attr->cq_token, /* R7 */ + cq_attr->max_nr_of_cqes, /* R8 */ + 0, 0, 0, 0); /* R9-R12 */ + + *cq_handle = outs[0]; + cq_attr->act_nr_of_cqes = outs[3]; + cq_attr->nr_pages = outs[4]; if (!hret) - hcp_epas_ctor(epas, g_la_privileged_out, g_la_user_out); + hcp_epas_ctor(epas, outs[5], outs[6]); return hret; } @@ -361,9 +367,8 @@ u64 ehea_h_alloc_resource_cq(const u64 adapter_handle, u64 ehea_h_alloc_resource_eq(const u64 adapter_handle, struct ehea_eq_attr *eq_attr, u64 *eq_handle) { - u64 hret, dummy, eq_liobn, allocate_controls; - u64 ist1_out, ist2_out, ist3_out, ist4_out; - u64 act_nr_of_eqes_out, act_pages_out; + u64 hret, allocate_controls; + u64 outs[PLPAR_HCALL9_BUFSIZE]; /* resource type */ allocate_controls = @@ -372,27 +377,20 @@ u64 ehea_h_alloc_resource_eq(const u64 adapter_handle, | EHEA_BMASK_SET(H_ALL_RES_EQ_INH_EQE_GEN, !eq_attr->eqe_gen) | EHEA_BMASK_SET(H_ALL_RES_EQ_NON_NEQ_ISN, 1); - hret = ehea_hcall_9arg_9ret(H_ALLOC_HEA_RESOURCE, - adapter_handle, /* R4 */ - allocate_controls, /* R5 */ - eq_attr->max_nr_of_eqes, /* R6 */ - 0, 0, 0, 0, 0, 0, /* R7-R10 */ - eq_handle, /* R4 */ - &dummy, /* R5 */ - &eq_liobn, /* R6 */ - &act_nr_of_eqes_out, /* R7 */ - &act_pages_out, /* R8 */ - &ist1_out, /* R9 */ - &ist2_out, /* R10 */ - &ist3_out, /* R11 */ - &ist4_out); /* R12 */ - - eq_attr->act_nr_of_eqes = act_nr_of_eqes_out; - eq_attr->nr_pages = act_pages_out; - eq_attr->ist1 = ist1_out; - eq_attr->ist2 = ist2_out; - eq_attr->ist3 = ist3_out; - eq_attr->ist4 = ist4_out; + hret = ehea_plpar_hcall9(H_ALLOC_HEA_RESOURCE, + outs, + adapter_handle, /* R4 */ + allocate_controls, /* R5 */ + eq_attr->max_nr_of_eqes, /* R6 */ + 0, 0, 0, 0, 0, 0); /* R7-R10 */ + + *eq_handle = outs[0]; + eq_attr->act_nr_of_eqes = outs[3]; + eq_attr->nr_pages = outs[4]; + eq_attr->ist1 = outs[5]; + eq_attr->ist2 = outs[6]; + eq_attr->ist3 = outs[7]; + eq_attr->ist4 = outs[8]; return hret; } @@ -402,31 +400,22 @@ u64 ehea_h_modify_ehea_qp(const u64 adapter_handle, const u8 cat, void *cb_addr, u64 *inv_attr_id, u64 *proc_mask, u16 *out_swr, u16 *out_rwr) { - u64 hret, dummy, act_out_swr, act_out_rwr; - - if ((((u64)cb_addr) & (PAGE_SIZE - 1)) != 0) { - ehea_error("not on page boundary"); - return H_PARAMETER; - } - - hret = ehea_hcall_9arg_9ret(H_MODIFY_HEA_QP, - adapter_handle, /* R4 */ - (u64) cat, /* R5 */ - qp_handle, /* R6 */ - sel_mask, /* R7 */ - virt_to_abs(cb_addr), /* R8 */ - 0, 0, 0, 0, /* R9-R12 */ - inv_attr_id, /* R4 */ - &dummy, /* R5 */ - &dummy, /* R6 */ - &act_out_swr, /* R7 */ - &act_out_rwr, /* R8 */ - proc_mask, /* R9 */ - &dummy, /* R10 */ - &dummy, /* R11 */ - &dummy); /* R12 */ - *out_swr = act_out_swr; - *out_rwr = act_out_rwr; + u64 hret; + u64 outs[PLPAR_HCALL9_BUFSIZE]; + + hret = ehea_plpar_hcall9(H_MODIFY_HEA_QP, + outs, + adapter_handle, /* R4 */ + (u64) cat, /* R5 */ + qp_handle, /* R6 */ + sel_mask, /* R7 */ + virt_to_abs(cb_addr), /* R8 */ + 0, 0, 0, 0); /* R9-R12 */ + + *inv_attr_id = outs[0]; + *out_swr = outs[3]; + *out_rwr = outs[4]; + *proc_mask = outs[5]; return hret; } @@ -435,122 +424,81 @@ u64 ehea_h_register_rpage(const u64 adapter_handle, const u8 pagesize, const u8 queue_type, const u64 resource_handle, const u64 log_pageaddr, u64 count) { - u64 dummy, reg_control; + u64 reg_control; reg_control = EHEA_BMASK_SET(H_REG_RPAGE_PAGE_SIZE, pagesize) | EHEA_BMASK_SET(H_REG_RPAGE_QT, queue_type); - return ehea_hcall_9arg_9ret(H_REGISTER_HEA_RPAGES, - adapter_handle, /* R4 */ - reg_control, /* R5 */ - resource_handle, /* R6 */ - log_pageaddr, /* R7 */ - count, /* R8 */ - 0, 0, 0, 0, /* R9-R12 */ - &dummy, /* R4 */ - &dummy, /* R5 */ - &dummy, /* R6 */ - &dummy, /* R7 */ - &dummy, /* R8 */ - &dummy, /* R9 */ - &dummy, /* R10 */ - &dummy, /* R11 */ - &dummy); /* R12 */ + return ehea_plpar_hcall_norets(H_REGISTER_HEA_RPAGES, + adapter_handle, /* R4 */ + reg_control, /* R5 */ + resource_handle, /* R6 */ + log_pageaddr, /* R7 */ + count, /* R8 */ + 0, 0); /* R9-R10 */ } u64 ehea_h_register_smr(const u64 adapter_handle, const u64 orig_mr_handle, const u64 vaddr_in, const u32 access_ctrl, const u32 pd, struct ehea_mr *mr) { - u64 hret, dummy, lkey_out; - - hret = ehea_hcall_9arg_9ret(H_REGISTER_SMR, - adapter_handle , /* R4 */ - orig_mr_handle, /* R5 */ - vaddr_in, /* R6 */ - (((u64)access_ctrl) << 32ULL), /* R7 */ - pd, /* R8 */ - 0, 0, 0, 0, /* R9-R12 */ - &mr->handle, /* R4 */ - &dummy, /* R5 */ - &lkey_out, /* R6 */ - &dummy, /* R7 */ - &dummy, /* R8 */ - &dummy, /* R9 */ - &dummy, /* R10 */ - &dummy, /* R11 */ - &dummy); /* R12 */ - mr->lkey = (u32)lkey_out; + u64 hret; + u64 outs[PLPAR_HCALL9_BUFSIZE]; + + hret = ehea_plpar_hcall9(H_REGISTER_SMR, + outs, + adapter_handle , /* R4 */ + orig_mr_handle, /* R5 */ + vaddr_in, /* R6 */ + (((u64)access_ctrl) << 32ULL), /* R7 */ + pd, /* R8 */ + 0, 0, 0, 0); /* R9-R12 */ + + mr->handle = outs[0]; + mr->lkey = (u32)outs[2]; return hret; } u64 ehea_h_disable_and_get_hea(const u64 adapter_handle, const u64 qp_handle) { - u64 hret, dummy, ladr_next_sq_wqe_out; - u64 ladr_next_rq1_wqe_out, ladr_next_rq2_wqe_out, ladr_next_rq3_wqe_out; - - hret = ehea_hcall_9arg_9ret(H_DISABLE_AND_GET_HEA, - adapter_handle, /* R4 */ - H_DISABLE_GET_EHEA_WQE_P, /* R5 */ - qp_handle, /* R6 */ - 0, 0, 0, 0, 0, 0, /* R7-R12 */ - &ladr_next_sq_wqe_out, /* R4 */ - &ladr_next_rq1_wqe_out, /* R5 */ - &ladr_next_rq2_wqe_out, /* R6 */ - &ladr_next_rq3_wqe_out, /* R7 */ - &dummy, /* R8 */ - &dummy, /* R9 */ - &dummy, /* R10 */ - &dummy, /* R11 */ - &dummy); /* R12 */ - return hret; + u64 outs[PLPAR_HCALL9_BUFSIZE]; + + return ehea_plpar_hcall9(H_DISABLE_AND_GET_HEA, + outs, + adapter_handle, /* R4 */ + H_DISABLE_GET_EHEA_WQE_P, /* R5 */ + qp_handle, /* R6 */ + 0, 0, 0, 0, 0, 0); /* R7-R12 */ } u64 ehea_h_free_resource(const u64 adapter_handle, const u64 res_handle) { - u64 dummy; - - return ehea_hcall_9arg_9ret(H_FREE_RESOURCE, - adapter_handle, /* R4 */ - res_handle, /* R5 */ - 0, 0, 0, 0, 0, 0, 0, /* R6-R12 */ - &dummy, /* R4 */ - &dummy, /* R5 */ - &dummy, /* R6 */ - &dummy, /* R7 */ - &dummy, /* R8 */ - &dummy, /* R9 */ - &dummy, /* R10 */ - &dummy, /* R11 */ - &dummy); /* R12 */ + return ehea_plpar_hcall_norets(H_FREE_RESOURCE, + adapter_handle, /* R4 */ + res_handle, /* R5 */ + 0, 0, 0, 0, 0); /* R6-R10 */ } u64 ehea_h_alloc_resource_mr(const u64 adapter_handle, const u64 vaddr, const u64 length, const u32 access_ctrl, const u32 pd, u64 *mr_handle, u32 *lkey) { - u64 hret, dummy, lkey_out; - - hret = ehea_hcall_9arg_9ret(H_ALLOC_HEA_RESOURCE, - adapter_handle, /* R4 */ - 5, /* R5 */ - vaddr, /* R6 */ - length, /* R7 */ - (((u64) access_ctrl) << 32ULL),/* R8 */ - pd, /* R9 */ - 0, 0, 0, /* R10-R12 */ - mr_handle, /* R4 */ - &dummy, /* R5 */ - &lkey_out, /* R6 */ - &dummy, /* R7 */ - &dummy, /* R8 */ - &dummy, /* R9 */ - &dummy, /* R10 */ - &dummy, /* R11 */ - &dummy); /* R12 */ - *lkey = (u32) lkey_out; - + u64 hret; + u64 outs[PLPAR_HCALL9_BUFSIZE]; + + hret = ehea_plpar_hcall9(H_ALLOC_HEA_RESOURCE, + outs, + adapter_handle, /* R4 */ + 5, /* R5 */ + vaddr, /* R6 */ + length, /* R7 */ + (((u64) access_ctrl) << 32ULL), /* R8 */ + pd, /* R9 */ + 0, 0, 0); /* R10-R12 */ + + *mr_handle = outs[0]; + *lkey = (u32)outs[2]; return hret; } @@ -570,23 +518,14 @@ u64 ehea_h_register_rpage_mr(const u64 adapter_handle, const u64 mr_handle, u64 ehea_h_query_ehea(const u64 adapter_handle, void *cb_addr) { - u64 hret, dummy, cb_logaddr; + u64 hret, cb_logaddr; cb_logaddr = virt_to_abs(cb_addr); - hret = ehea_hcall_9arg_9ret(H_QUERY_HEA, - adapter_handle, /* R4 */ - cb_logaddr, /* R5 */ - 0, 0, 0, 0, 0, 0, 0, /* R6-R12 */ - &dummy, /* R4 */ - &dummy, /* R5 */ - &dummy, /* R6 */ - &dummy, /* R7 */ - &dummy, /* R8 */ - &dummy, /* R9 */ - &dummy, /* R10 */ - &dummy, /* R11 */ - &dummy); /* R12 */ + hret = ehea_plpar_hcall_norets(H_QUERY_HEA, + adapter_handle, /* R4 */ + cb_logaddr, /* R5 */ + 0, 0, 0, 0, 0); /* R6-R10 */ #ifdef DEBUG ehea_dmp(cb_addr, sizeof(struct hcp_query_ehea), "hcp_query_ehea"); #endif @@ -597,36 +536,28 @@ u64 ehea_h_query_ehea_port(const u64 adapter_handle, const u16 port_num, const u8 cb_cat, const u64 select_mask, void *cb_addr) { - u64 port_info, dummy; + u64 port_info; u64 cb_logaddr = virt_to_abs(cb_addr); u64 arr_index = 0; port_info = EHEA_BMASK_SET(H_MEHEAPORT_CAT, cb_cat) | EHEA_BMASK_SET(H_MEHEAPORT_PN, port_num); - return ehea_hcall_9arg_9ret(H_QUERY_HEA_PORT, - adapter_handle, /* R4 */ - port_info, /* R5 */ - select_mask, /* R6 */ - arr_index, /* R7 */ - cb_logaddr, /* R8 */ - 0, 0, 0, 0, /* R9-R12 */ - &dummy, /* R4 */ - &dummy, /* R5 */ - &dummy, /* R6 */ - &dummy, /* R7 */ - &dummy, /* R8 */ - &dummy, /* R9 */ - &dummy, /* R10 */ - &dummy, /* R11 */ - &dummy); /* R12 */ + return ehea_plpar_hcall_norets(H_QUERY_HEA_PORT, + adapter_handle, /* R4 */ + port_info, /* R5 */ + select_mask, /* R6 */ + arr_index, /* R7 */ + cb_logaddr, /* R8 */ + 0, 0); /* R9-R10 */ } u64 ehea_h_modify_ehea_port(const u64 adapter_handle, const u16 port_num, const u8 cb_cat, const u64 select_mask, void *cb_addr) { - u64 port_info, dummy, inv_attr_ident, proc_mask; + u64 outs[PLPAR_HCALL9_BUFSIZE]; + u64 port_info; u64 arr_index = 0; u64 cb_logaddr = virt_to_abs(cb_addr); @@ -635,29 +566,21 @@ u64 ehea_h_modify_ehea_port(const u64 adapter_handle, const u16 port_num, #ifdef DEBUG ehea_dump(cb_addr, sizeof(struct hcp_ehea_port_cb0), "Before HCALL"); #endif - return ehea_hcall_9arg_9ret(H_MODIFY_HEA_PORT, - adapter_handle, /* R4 */ - port_info, /* R5 */ - select_mask, /* R6 */ - arr_index, /* R7 */ - cb_logaddr, /* R8 */ - 0, 0, 0, 0, /* R9-R12 */ - &inv_attr_ident, /* R4 */ - &proc_mask, /* R5 */ - &dummy, /* R6 */ - &dummy, /* R7 */ - &dummy, /* R8 */ - &dummy, /* R9 */ - &dummy, /* R10 */ - &dummy, /* R11 */ - &dummy); /* R12 */ + return ehea_plpar_hcall9(H_MODIFY_HEA_PORT, + outs, + adapter_handle, /* R4 */ + port_info, /* R5 */ + select_mask, /* R6 */ + arr_index, /* R7 */ + cb_logaddr, /* R8 */ + 0, 0, 0, 0); /* R9-R12 */ } u64 ehea_h_reg_dereg_bcmc(const u64 adapter_handle, const u16 port_num, const u8 reg_type, const u64 mc_mac_addr, const u16 vlan_id, const u32 hcall_id) { - u64 r5_port_num, r6_reg_type, r7_mc_mac_addr, r8_vlan_id, dummy; + u64 r5_port_num, r6_reg_type, r7_mc_mac_addr, r8_vlan_id; u64 mac_addr = mc_mac_addr >> 16; r5_port_num = EHEA_BMASK_SET(H_REGBCMC_PN, port_num); @@ -665,41 +588,21 @@ u64 ehea_h_reg_dereg_bcmc(const u64 adapter_handle, const u16 port_num, r7_mc_mac_addr = EHEA_BMASK_SET(H_REGBCMC_MACADDR, mac_addr); r8_vlan_id = EHEA_BMASK_SET(H_REGBCMC_VLANID, vlan_id); - return ehea_hcall_9arg_9ret(hcall_id, - adapter_handle, /* R4 */ - r5_port_num, /* R5 */ - r6_reg_type, /* R6 */ - r7_mc_mac_addr, /* R7 */ - r8_vlan_id, /* R8 */ - 0, 0, 0, 0, /* R9-R12 */ - &dummy, /* R4 */ - &dummy, /* R5 */ - &dummy, /* R6 */ - &dummy, /* R7 */ - &dummy, /* R8 */ - &dummy, /* R9 */ - &dummy, /* R10 */ - &dummy, /* R11 */ - &dummy); /* R12 */ + return ehea_plpar_hcall_norets(hcall_id, + adapter_handle, /* R4 */ + r5_port_num, /* R5 */ + r6_reg_type, /* R6 */ + r7_mc_mac_addr, /* R7 */ + r8_vlan_id, /* R8 */ + 0, 0); /* R9-R12 */ } u64 ehea_h_reset_events(const u64 adapter_handle, const u64 neq_handle, const u64 event_mask) { - u64 dummy; - - return ehea_hcall_9arg_9ret(H_RESET_EVENTS, - adapter_handle, /* R4 */ - neq_handle, /* R5 */ - event_mask, /* R6 */ - 0, 0, 0, 0, 0, 0, /* R7-R12 */ - &dummy, /* R4 */ - &dummy, /* R5 */ - &dummy, /* R6 */ - &dummy, /* R7 */ - &dummy, /* R8 */ - &dummy, /* R9 */ - &dummy, /* R10 */ - &dummy, /* R11 */ - &dummy); /* R12 */ + return ehea_plpar_hcall_norets(H_RESET_EVENTS, + adapter_handle, /* R4 */ + neq_handle, /* R5 */ + event_mask, /* R6 */ + 0, 0, 0, 0); /* R7-R12 */ } diff --git a/drivers/net/eth16i.c b/drivers/net/eth16i.c index 8cc3c331aca8..b7b8bc2a6307 100644 --- a/drivers/net/eth16i.c +++ b/drivers/net/eth16i.c @@ -162,9 +162,9 @@ static char *version = #include <linux/skbuff.h> #include <linux/bitops.h> #include <linux/jiffies.h> +#include <linux/io.h> #include <asm/system.h> -#include <asm/io.h> #include <asm/dma.h> diff --git a/drivers/net/forcedeth.c b/drivers/net/forcedeth.c index 99b7a411db28..c5ed635bce36 100644 --- a/drivers/net/forcedeth.c +++ b/drivers/net/forcedeth.c @@ -2497,6 +2497,7 @@ static irqreturn_t nv_nic_irq_tx(int foo, void *data) u8 __iomem *base = get_hwbase(dev); u32 events; int i; + unsigned long flags; dprintk(KERN_DEBUG "%s: nv_nic_irq_tx\n", dev->name); @@ -2508,16 +2509,16 @@ static irqreturn_t nv_nic_irq_tx(int foo, void *data) if (!(events & np->irqmask)) break; - spin_lock_irq(&np->lock); + spin_lock_irqsave(&np->lock, flags); nv_tx_done(dev); - spin_unlock_irq(&np->lock); + spin_unlock_irqrestore(&np->lock, flags); if (events & (NVREG_IRQ_TX_ERR)) { dprintk(KERN_DEBUG "%s: received irq with events 0x%x. Probably TX fail.\n", dev->name, events); } if (i > max_interrupt_work) { - spin_lock_irq(&np->lock); + spin_lock_irqsave(&np->lock, flags); /* disable interrupts on the nic */ writel(NVREG_IRQ_TX_ALL, base + NvRegIrqMask); pci_push(base); @@ -2527,7 +2528,7 @@ static irqreturn_t nv_nic_irq_tx(int foo, void *data) mod_timer(&np->nic_poll, jiffies + POLL_WAIT); } printk(KERN_DEBUG "%s: too many iterations (%d) in nv_nic_irq_tx.\n", dev->name, i); - spin_unlock_irq(&np->lock); + spin_unlock_irqrestore(&np->lock, flags); break; } @@ -2601,6 +2602,7 @@ static irqreturn_t nv_nic_irq_rx(int foo, void *data) u8 __iomem *base = get_hwbase(dev); u32 events; int i; + unsigned long flags; dprintk(KERN_DEBUG "%s: nv_nic_irq_rx\n", dev->name); @@ -2614,14 +2616,14 @@ static irqreturn_t nv_nic_irq_rx(int foo, void *data) nv_rx_process(dev, dev->weight); if (nv_alloc_rx(dev)) { - spin_lock_irq(&np->lock); + spin_lock_irqsave(&np->lock, flags); if (!np->in_shutdown) mod_timer(&np->oom_kick, jiffies + OOM_REFILL); - spin_unlock_irq(&np->lock); + spin_unlock_irqrestore(&np->lock, flags); } if (i > max_interrupt_work) { - spin_lock_irq(&np->lock); + spin_lock_irqsave(&np->lock, flags); /* disable interrupts on the nic */ writel(NVREG_IRQ_RX_ALL, base + NvRegIrqMask); pci_push(base); @@ -2631,7 +2633,7 @@ static irqreturn_t nv_nic_irq_rx(int foo, void *data) mod_timer(&np->nic_poll, jiffies + POLL_WAIT); } printk(KERN_DEBUG "%s: too many iterations (%d) in nv_nic_irq_rx.\n", dev->name, i); - spin_unlock_irq(&np->lock); + spin_unlock_irqrestore(&np->lock, flags); break; } } @@ -2648,6 +2650,7 @@ static irqreturn_t nv_nic_irq_other(int foo, void *data) u8 __iomem *base = get_hwbase(dev); u32 events; int i; + unsigned long flags; dprintk(KERN_DEBUG "%s: nv_nic_irq_other\n", dev->name); @@ -2660,14 +2663,14 @@ static irqreturn_t nv_nic_irq_other(int foo, void *data) break; if (events & NVREG_IRQ_LINK) { - spin_lock_irq(&np->lock); + spin_lock_irqsave(&np->lock, flags); nv_link_irq(dev); - spin_unlock_irq(&np->lock); + spin_unlock_irqrestore(&np->lock, flags); } if (np->need_linktimer && time_after(jiffies, np->link_timeout)) { - spin_lock_irq(&np->lock); + spin_lock_irqsave(&np->lock, flags); nv_linkchange(dev); - spin_unlock_irq(&np->lock); + spin_unlock_irqrestore(&np->lock, flags); np->link_timeout = jiffies + LINK_TIMEOUT; } if (events & (NVREG_IRQ_UNKNOWN)) { @@ -2675,7 +2678,7 @@ static irqreturn_t nv_nic_irq_other(int foo, void *data) dev->name, events); } if (i > max_interrupt_work) { - spin_lock_irq(&np->lock); + spin_lock_irqsave(&np->lock, flags); /* disable interrupts on the nic */ writel(NVREG_IRQ_OTHER, base + NvRegIrqMask); pci_push(base); @@ -2685,7 +2688,7 @@ static irqreturn_t nv_nic_irq_other(int foo, void *data) mod_timer(&np->nic_poll, jiffies + POLL_WAIT); } printk(KERN_DEBUG "%s: too many iterations (%d) in nv_nic_irq_other.\n", dev->name, i); - spin_unlock_irq(&np->lock); + spin_unlock_irqrestore(&np->lock, flags); break; } diff --git a/drivers/net/ibmveth.c b/drivers/net/ibmveth.c index 4bac3cd8f235..2802db23d3cb 100644 --- a/drivers/net/ibmveth.c +++ b/drivers/net/ibmveth.c @@ -213,6 +213,7 @@ static void ibmveth_replenish_buffer_pool(struct ibmveth_adapter *adapter, struc } free_index = pool->consumer_index++ % pool->size; + pool->consumer_index = free_index; index = pool->free_map[free_index]; ibmveth_assert(index != IBM_VETH_INVALID_MAP); @@ -238,7 +239,10 @@ static void ibmveth_replenish_buffer_pool(struct ibmveth_adapter *adapter, struc if(lpar_rc != H_SUCCESS) { pool->free_map[free_index] = index; pool->skbuff[index] = NULL; - pool->consumer_index--; + if (pool->consumer_index == 0) + pool->consumer_index = pool->size - 1; + else + pool->consumer_index--; dma_unmap_single(&adapter->vdev->dev, pool->dma_addr[index], pool->buff_size, DMA_FROM_DEVICE); @@ -326,6 +330,7 @@ static void ibmveth_remove_buffer_from_pool(struct ibmveth_adapter *adapter, u64 DMA_FROM_DEVICE); free_index = adapter->rx_buff_pool[pool].producer_index++ % adapter->rx_buff_pool[pool].size; + adapter->rx_buff_pool[pool].producer_index = free_index; adapter->rx_buff_pool[pool].free_map[free_index] = index; mb(); @@ -437,6 +442,31 @@ static void ibmveth_cleanup(struct ibmveth_adapter *adapter) &adapter->rx_buff_pool[i]); } +static int ibmveth_register_logical_lan(struct ibmveth_adapter *adapter, + union ibmveth_buf_desc rxq_desc, u64 mac_address) +{ + int rc, try_again = 1; + + /* After a kexec the adapter will still be open, so our attempt to + * open it will fail. So if we get a failure we free the adapter and + * try again, but only once. */ +retry: + rc = h_register_logical_lan(adapter->vdev->unit_address, + adapter->buffer_list_dma, rxq_desc.desc, + adapter->filter_list_dma, mac_address); + + if (rc != H_SUCCESS && try_again) { + do { + rc = h_free_logical_lan(adapter->vdev->unit_address); + } while (H_IS_LONG_BUSY(rc) || (rc == H_BUSY)); + + try_again = 0; + goto retry; + } + + return rc; +} + static int ibmveth_open(struct net_device *netdev) { struct ibmveth_adapter *adapter = netdev->priv; @@ -502,12 +532,9 @@ static int ibmveth_open(struct net_device *netdev) ibmveth_debug_printk("filter list @ 0x%p\n", adapter->filter_list_addr); ibmveth_debug_printk("receive q @ 0x%p\n", adapter->rx_queue.queue_addr); + h_vio_signal(adapter->vdev->unit_address, VIO_IRQ_DISABLE); - lpar_rc = h_register_logical_lan(adapter->vdev->unit_address, - adapter->buffer_list_dma, - rxq_desc.desc, - adapter->filter_list_dma, - mac_address); + lpar_rc = ibmveth_register_logical_lan(adapter, rxq_desc, mac_address); if(lpar_rc != H_SUCCESS) { ibmveth_error_printk("h_register_logical_lan failed with %ld\n", lpar_rc); @@ -905,6 +932,14 @@ static int ibmveth_change_mtu(struct net_device *dev, int new_mtu) return -EINVAL; } +#ifdef CONFIG_NET_POLL_CONTROLLER +static void ibmveth_poll_controller(struct net_device *dev) +{ + ibmveth_replenish_task(dev->priv); + ibmveth_interrupt(dev->irq, dev); +} +#endif + static int __devinit ibmveth_probe(struct vio_dev *dev, const struct vio_device_id *id) { int rc, i; @@ -977,6 +1012,9 @@ static int __devinit ibmveth_probe(struct vio_dev *dev, const struct vio_device_ netdev->ethtool_ops = &netdev_ethtool_ops; netdev->change_mtu = ibmveth_change_mtu; SET_NETDEV_DEV(netdev, &dev->dev); +#ifdef CONFIG_NET_POLL_CONTROLLER + netdev->poll_controller = ibmveth_poll_controller; +#endif netdev->features |= NETIF_F_LLTX; spin_lock_init(&adapter->stats_lock); @@ -1132,7 +1170,9 @@ static void ibmveth_proc_register_adapter(struct ibmveth_adapter *adapter) { struct proc_dir_entry *entry; if (ibmveth_proc_dir) { - entry = create_proc_entry(adapter->netdev->name, S_IFREG, ibmveth_proc_dir); + char u_addr[10]; + sprintf(u_addr, "%x", adapter->vdev->unit_address); + entry = create_proc_entry(u_addr, S_IFREG, ibmveth_proc_dir); if (!entry) { ibmveth_error_printk("Cannot create adapter proc entry"); } else { @@ -1147,7 +1187,9 @@ static void ibmveth_proc_register_adapter(struct ibmveth_adapter *adapter) static void ibmveth_proc_unregister_adapter(struct ibmveth_adapter *adapter) { if (ibmveth_proc_dir) { - remove_proc_entry(adapter->netdev->name, ibmveth_proc_dir); + char u_addr[10]; + sprintf(u_addr, "%x", adapter->vdev->unit_address); + remove_proc_entry(u_addr, ibmveth_proc_dir); } } diff --git a/drivers/net/irda/ali-ircc.c b/drivers/net/irda/ali-ircc.c index 971e2dee1e6b..cebf8c374bc5 100644 --- a/drivers/net/irda/ali-ircc.c +++ b/drivers/net/irda/ali-ircc.c @@ -662,19 +662,13 @@ static int ali_ircc_read_dongle_id (int i, chipio_t *info) */ static irqreturn_t ali_ircc_interrupt(int irq, void *dev_id) { - struct net_device *dev = (struct net_device *) dev_id; + struct net_device *dev = dev_id; struct ali_ircc_cb *self; int ret; IRDA_DEBUG(2, "%s(), ---------------- Start ----------------\n", __FUNCTION__); - if (!dev) { - IRDA_WARNING("%s: irq %d for unknown device.\n", - ALI_IRCC_DRIVER_NAME, irq); - return IRQ_NONE; - } - - self = (struct ali_ircc_cb *) dev->priv; + self = dev->priv; spin_lock(&self->lock); diff --git a/drivers/net/irda/donauboe.c b/drivers/net/irda/donauboe.c index 7a9128181e68..16620bd97fbf 100644 --- a/drivers/net/irda/donauboe.c +++ b/drivers/net/irda/donauboe.c @@ -657,12 +657,6 @@ toshoboe_makemttpacket (struct toshoboe_cb *self, void *buf, int mtt) return xbofs; } -static int toshoboe_invalid_dev(int irq) -{ - printk (KERN_WARNING DRIVER_NAME ": irq %d for unknown device.\n", irq); - return 1; -} - #ifdef USE_PROBE /***********************************************************************/ /* Probe code */ @@ -711,12 +705,9 @@ stuff_byte (__u8 byte, __u8 * buf) static irqreturn_t toshoboe_probeinterrupt (int irq, void *dev_id) { - struct toshoboe_cb *self = (struct toshoboe_cb *) dev_id; + struct toshoboe_cb *self = dev_id; __u8 irqstat; - if (self == NULL && toshoboe_invalid_dev(irq)) - return IRQ_NONE; - irqstat = INB (OBOE_ISR); /* was it us */ @@ -1163,13 +1154,10 @@ dumpbufs(skb->data,skb->len,'>'); static irqreturn_t toshoboe_interrupt (int irq, void *dev_id) { - struct toshoboe_cb *self = (struct toshoboe_cb *) dev_id; + struct toshoboe_cb *self = dev_id; __u8 irqstat; struct sk_buff *skb = NULL; - if (self == NULL && toshoboe_invalid_dev(irq)) - return IRQ_NONE; - irqstat = INB (OBOE_ISR); /* was it us */ @@ -1357,13 +1345,11 @@ toshoboe_net_open (struct net_device *dev) { struct toshoboe_cb *self; unsigned long flags; + int rc; IRDA_DEBUG (4, "%s()\n", __FUNCTION__); - IRDA_ASSERT (dev != NULL, return -1; ); - self = (struct toshoboe_cb *) dev->priv; - - IRDA_ASSERT (self != NULL, return 0; ); + self = netdev_priv(dev); if (self->async) return -EBUSY; @@ -1371,11 +1357,10 @@ toshoboe_net_open (struct net_device *dev) if (self->stopped) return 0; - if (request_irq (self->io.irq, toshoboe_interrupt, - IRQF_SHARED | IRQF_DISABLED, dev->name, (void *) self)) - { - return -EAGAIN; - } + rc = request_irq (self->io.irq, toshoboe_interrupt, + IRQF_SHARED | IRQF_DISABLED, dev->name, self); + if (rc) + return rc; spin_lock_irqsave(&self->spinlock, flags); toshoboe_startchip (self); diff --git a/drivers/net/irda/irport.c b/drivers/net/irda/irport.c index 6ea78ececca7..654a68b490ae 100644 --- a/drivers/net/irda/irport.c +++ b/drivers/net/irda/irport.c @@ -766,18 +766,14 @@ static inline void irport_receive(struct irport_cb *self) */ static irqreturn_t irport_interrupt(int irq, void *dev_id) { - struct net_device *dev = (struct net_device *) dev_id; + struct net_device *dev = dev_id; struct irport_cb *self; int boguscount = 0; int iobase; int iir, lsr; int handled = 0; - if (!dev) { - IRDA_WARNING("%s() irq %d for unknown device.\n", __FUNCTION__, irq); - return IRQ_NONE; - } - self = (struct irport_cb *) dev->priv; + self = dev->priv; spin_lock(&self->lock); diff --git a/drivers/net/irda/irport.h b/drivers/net/irda/irport.h index 4393168347e3..3f46b84c6c85 100644 --- a/drivers/net/irda/irport.h +++ b/drivers/net/irda/irport.h @@ -74,7 +74,7 @@ struct irport_cb { /* For piggyback drivers */ void *priv; void (*change_speed)(void *priv, __u32 speed); - int (*interrupt)(int irq, void *dev_id); + irqreturn_t (*interrupt)(int irq, void *dev_id); }; #endif /* IRPORT_H */ diff --git a/drivers/net/irda/nsc-ircc.c b/drivers/net/irda/nsc-ircc.c index ea12e999814a..29b5ccd29d0b 100644 --- a/drivers/net/irda/nsc-ircc.c +++ b/drivers/net/irda/nsc-ircc.c @@ -2068,17 +2068,12 @@ static void nsc_ircc_fir_interrupt(struct nsc_ircc_cb *self, int iobase, */ static irqreturn_t nsc_ircc_interrupt(int irq, void *dev_id) { - struct net_device *dev = (struct net_device *) dev_id; + struct net_device *dev = dev_id; struct nsc_ircc_cb *self; __u8 bsr, eir; int iobase; - if (!dev) { - IRDA_WARNING("%s: irq %d for unknown device.\n", - driver_name, irq); - return IRQ_NONE; - } - self = (struct nsc_ircc_cb *) dev->priv; + self = dev->priv; spin_lock(&self->lock); diff --git a/drivers/net/irda/w83977af_ir.c b/drivers/net/irda/w83977af_ir.c index b4fb92a7baa8..4212657fa4f9 100644 --- a/drivers/net/irda/w83977af_ir.c +++ b/drivers/net/irda/w83977af_ir.c @@ -1113,17 +1113,12 @@ static __u8 w83977af_fir_interrupt(struct w83977af_ir *self, int isr) */ static irqreturn_t w83977af_interrupt(int irq, void *dev_id) { - struct net_device *dev = (struct net_device *) dev_id; + struct net_device *dev = dev_id; struct w83977af_ir *self; __u8 set, icr, isr; int iobase; - if (!dev) { - printk(KERN_WARNING "%s: irq %d for unknown device.\n", - driver_name, irq); - return IRQ_NONE; - } - self = (struct w83977af_ir *) dev->priv; + self = dev->priv; iobase = self->io.fir_base; diff --git a/drivers/net/lance.c b/drivers/net/lance.c index 7afac47e59ad..6efbd499d752 100644 --- a/drivers/net/lance.c +++ b/drivers/net/lance.c @@ -1019,11 +1019,6 @@ static irqreturn_t lance_interrupt(int irq, void *dev_id) int csr0, ioaddr, boguscnt=10; int must_restart; - if (dev == NULL) { - printk ("lance_interrupt(): irq %d for unknown device.\n", irq); - return IRQ_NONE; - } - ioaddr = dev->base_addr; lp = dev->priv; diff --git a/drivers/net/lasi_82596.c b/drivers/net/lasi_82596.c index 8cbd940f0ac2..f4d815bca643 100644 --- a/drivers/net/lasi_82596.c +++ b/drivers/net/lasi_82596.c @@ -1252,7 +1252,7 @@ static int __devinit i82596_probe(struct net_device *dev, static void i596_poll_controller(struct net_device *dev) { disable_irq(dev->irq); - i596_interrupt(dev->irq, dev, NULL); + i596_interrupt(dev->irq, dev); enable_irq(dev->irq); } #endif diff --git a/drivers/net/mv643xx_eth.c b/drivers/net/mv643xx_eth.c index d26a819a9735..9997081c6dae 100644 --- a/drivers/net/mv643xx_eth.c +++ b/drivers/net/mv643xx_eth.c @@ -1251,7 +1251,7 @@ static void mv643xx_netpoll(struct net_device *netdev) /* wait for previous write to complete */ mv_read(MV643XX_ETH_INTERRUPT_MASK_REG(port_num)); - mv643xx_eth_int_handler(netdev->irq, netdev, NULL); + mv643xx_eth_int_handler(netdev->irq, netdev); mv_write(MV643XX_ETH_INTERRUPT_MASK_REG(port_num), ETH_INT_UNMASK_ALL); } @@ -2155,7 +2155,7 @@ static void eth_update_mib_counters(struct mv643xx_private *mp) for (offset = ETH_MIB_BAD_OCTETS_RECEIVED; offset <= ETH_MIB_FRAMES_1024_TO_MAX_OCTETS; offset += 4) - *(u32 *)((char *)p + offset) = read_mib(mp, offset); + *(u32 *)((char *)p + offset) += read_mib(mp, offset); p->good_octets_sent += read_mib(mp, ETH_MIB_GOOD_OCTETS_SENT_LOW); p->good_octets_sent += @@ -2164,7 +2164,7 @@ static void eth_update_mib_counters(struct mv643xx_private *mp) for (offset = ETH_MIB_GOOD_FRAMES_SENT; offset <= ETH_MIB_LATE_COLLISION; offset += 4) - *(u32 *)((char *)p + offset) = read_mib(mp, offset); + *(u32 *)((char *)p + offset) += read_mib(mp, offset); } /* diff --git a/drivers/net/pcmcia/axnet_cs.c b/drivers/net/pcmcia/axnet_cs.c index e5f366910914..5ddd5742f779 100644 --- a/drivers/net/pcmcia/axnet_cs.c +++ b/drivers/net/pcmcia/axnet_cs.c @@ -1201,14 +1201,8 @@ static irqreturn_t ax_interrupt(int irq, void *dev_id) struct ei_device *ei_local; int handled = 0; - if (dev == NULL) - { - printk ("net_interrupt(): irq %d for unknown device.\n", irq); - return IRQ_NONE; - } - e8390_base = dev->base_addr; - ei_local = (struct ei_device *) netdev_priv(dev); + ei_local = netdev_priv(dev); /* * Protect the irq test too. diff --git a/drivers/net/pcnet32.c b/drivers/net/pcnet32.c index c73e2f210774..36f9d988278f 100644 --- a/drivers/net/pcnet32.c +++ b/drivers/net/pcnet32.c @@ -2569,13 +2569,6 @@ pcnet32_interrupt(int irq, void *dev_id) u16 csr0; int boguscnt = max_interrupt_work; - if (!dev) { - if (pcnet32_debug & NETIF_MSG_INTR) - printk(KERN_DEBUG "%s(): irq %d for unknown device\n", - __FUNCTION__, irq); - return IRQ_NONE; - } - ioaddr = dev->base_addr; lp = dev->priv; diff --git a/drivers/net/plip.c b/drivers/net/plip.c index c0b333d2917a..71afb274498f 100644 --- a/drivers/net/plip.c +++ b/drivers/net/plip.c @@ -909,11 +909,6 @@ plip_interrupt(int irq, void *dev_id) struct plip_local *rcv; unsigned char c0; - if (dev == NULL) { - printk(KERN_DEBUG "plip_interrupt: irq %d for unknown device.\n", irq); - return; - } - nl = netdev_priv(dev); rcv = &nl->rcv_data; diff --git a/drivers/net/s2io.h b/drivers/net/s2io.h index 72f52dc98f53..12b719f4d00f 100644 --- a/drivers/net/s2io.h +++ b/drivers/net/s2io.h @@ -116,179 +116,179 @@ typedef struct { /* The statistics block of Xena */ typedef struct stat_block { /* Tx MAC statistics counters. */ - u32 tmac_data_octets; - u32 tmac_frms; - u64 tmac_drop_frms; - u32 tmac_bcst_frms; - u32 tmac_mcst_frms; - u64 tmac_pause_ctrl_frms; - u32 tmac_ucst_frms; - u32 tmac_ttl_octets; - u32 tmac_any_err_frms; - u32 tmac_nucst_frms; - u64 tmac_ttl_less_fb_octets; - u64 tmac_vld_ip_octets; - u32 tmac_drop_ip; - u32 tmac_vld_ip; - u32 tmac_rst_tcp; - u32 tmac_icmp; - u64 tmac_tcp; - u32 reserved_0; - u32 tmac_udp; + __le32 tmac_data_octets; + __le32 tmac_frms; + __le64 tmac_drop_frms; + __le32 tmac_bcst_frms; + __le32 tmac_mcst_frms; + __le64 tmac_pause_ctrl_frms; + __le32 tmac_ucst_frms; + __le32 tmac_ttl_octets; + __le32 tmac_any_err_frms; + __le32 tmac_nucst_frms; + __le64 tmac_ttl_less_fb_octets; + __le64 tmac_vld_ip_octets; + __le32 tmac_drop_ip; + __le32 tmac_vld_ip; + __le32 tmac_rst_tcp; + __le32 tmac_icmp; + __le64 tmac_tcp; + __le32 reserved_0; + __le32 tmac_udp; /* Rx MAC Statistics counters. */ - u32 rmac_data_octets; - u32 rmac_vld_frms; - u64 rmac_fcs_err_frms; - u64 rmac_drop_frms; - u32 rmac_vld_bcst_frms; - u32 rmac_vld_mcst_frms; - u32 rmac_out_rng_len_err_frms; - u32 rmac_in_rng_len_err_frms; - u64 rmac_long_frms; - u64 rmac_pause_ctrl_frms; - u64 rmac_unsup_ctrl_frms; - u32 rmac_accepted_ucst_frms; - u32 rmac_ttl_octets; - u32 rmac_discarded_frms; - u32 rmac_accepted_nucst_frms; - u32 reserved_1; - u32 rmac_drop_events; - u64 rmac_ttl_less_fb_octets; - u64 rmac_ttl_frms; - u64 reserved_2; - u32 rmac_usized_frms; - u32 reserved_3; - u32 rmac_frag_frms; - u32 rmac_osized_frms; - u32 reserved_4; - u32 rmac_jabber_frms; - u64 rmac_ttl_64_frms; - u64 rmac_ttl_65_127_frms; - u64 reserved_5; - u64 rmac_ttl_128_255_frms; - u64 rmac_ttl_256_511_frms; - u64 reserved_6; - u64 rmac_ttl_512_1023_frms; - u64 rmac_ttl_1024_1518_frms; - u32 rmac_ip; - u32 reserved_7; - u64 rmac_ip_octets; - u32 rmac_drop_ip; - u32 rmac_hdr_err_ip; - u32 reserved_8; - u32 rmac_icmp; - u64 rmac_tcp; - u32 rmac_err_drp_udp; - u32 rmac_udp; - u64 rmac_xgmii_err_sym; - u64 rmac_frms_q0; - u64 rmac_frms_q1; - u64 rmac_frms_q2; - u64 rmac_frms_q3; - u64 rmac_frms_q4; - u64 rmac_frms_q5; - u64 rmac_frms_q6; - u64 rmac_frms_q7; - u16 rmac_full_q3; - u16 rmac_full_q2; - u16 rmac_full_q1; - u16 rmac_full_q0; - u16 rmac_full_q7; - u16 rmac_full_q6; - u16 rmac_full_q5; - u16 rmac_full_q4; - u32 reserved_9; - u32 rmac_pause_cnt; - u64 rmac_xgmii_data_err_cnt; - u64 rmac_xgmii_ctrl_err_cnt; - u32 rmac_err_tcp; - u32 rmac_accepted_ip; + __le32 rmac_data_octets; + __le32 rmac_vld_frms; + __le64 rmac_fcs_err_frms; + __le64 rmac_drop_frms; + __le32 rmac_vld_bcst_frms; + __le32 rmac_vld_mcst_frms; + __le32 rmac_out_rng_len_err_frms; + __le32 rmac_in_rng_len_err_frms; + __le64 rmac_long_frms; + __le64 rmac_pause_ctrl_frms; + __le64 rmac_unsup_ctrl_frms; + __le32 rmac_accepted_ucst_frms; + __le32 rmac_ttl_octets; + __le32 rmac_discarded_frms; + __le32 rmac_accepted_nucst_frms; + __le32 reserved_1; + __le32 rmac_drop_events; + __le64 rmac_ttl_less_fb_octets; + __le64 rmac_ttl_frms; + __le64 reserved_2; + __le32 rmac_usized_frms; + __le32 reserved_3; + __le32 rmac_frag_frms; + __le32 rmac_osized_frms; + __le32 reserved_4; + __le32 rmac_jabber_frms; + __le64 rmac_ttl_64_frms; + __le64 rmac_ttl_65_127_frms; + __le64 reserved_5; + __le64 rmac_ttl_128_255_frms; + __le64 rmac_ttl_256_511_frms; + __le64 reserved_6; + __le64 rmac_ttl_512_1023_frms; + __le64 rmac_ttl_1024_1518_frms; + __le32 rmac_ip; + __le32 reserved_7; + __le64 rmac_ip_octets; + __le32 rmac_drop_ip; + __le32 rmac_hdr_err_ip; + __le32 reserved_8; + __le32 rmac_icmp; + __le64 rmac_tcp; + __le32 rmac_err_drp_udp; + __le32 rmac_udp; + __le64 rmac_xgmii_err_sym; + __le64 rmac_frms_q0; + __le64 rmac_frms_q1; + __le64 rmac_frms_q2; + __le64 rmac_frms_q3; + __le64 rmac_frms_q4; + __le64 rmac_frms_q5; + __le64 rmac_frms_q6; + __le64 rmac_frms_q7; + __le16 rmac_full_q3; + __le16 rmac_full_q2; + __le16 rmac_full_q1; + __le16 rmac_full_q0; + __le16 rmac_full_q7; + __le16 rmac_full_q6; + __le16 rmac_full_q5; + __le16 rmac_full_q4; + __le32 reserved_9; + __le32 rmac_pause_cnt; + __le64 rmac_xgmii_data_err_cnt; + __le64 rmac_xgmii_ctrl_err_cnt; + __le32 rmac_err_tcp; + __le32 rmac_accepted_ip; /* PCI/PCI-X Read transaction statistics. */ - u32 new_rd_req_cnt; - u32 rd_req_cnt; - u32 rd_rtry_cnt; - u32 new_rd_req_rtry_cnt; + __le32 new_rd_req_cnt; + __le32 rd_req_cnt; + __le32 rd_rtry_cnt; + __le32 new_rd_req_rtry_cnt; /* PCI/PCI-X Write/Read transaction statistics. */ - u32 wr_req_cnt; - u32 wr_rtry_rd_ack_cnt; - u32 new_wr_req_rtry_cnt; - u32 new_wr_req_cnt; - u32 wr_disc_cnt; - u32 wr_rtry_cnt; + __le32 wr_req_cnt; + __le32 wr_rtry_rd_ack_cnt; + __le32 new_wr_req_rtry_cnt; + __le32 new_wr_req_cnt; + __le32 wr_disc_cnt; + __le32 wr_rtry_cnt; /* PCI/PCI-X Write / DMA Transaction statistics. */ - u32 txp_wr_cnt; - u32 rd_rtry_wr_ack_cnt; - u32 txd_wr_cnt; - u32 txd_rd_cnt; - u32 rxd_wr_cnt; - u32 rxd_rd_cnt; - u32 rxf_wr_cnt; - u32 txf_rd_cnt; + __le32 txp_wr_cnt; + __le32 rd_rtry_wr_ack_cnt; + __le32 txd_wr_cnt; + __le32 txd_rd_cnt; + __le32 rxd_wr_cnt; + __le32 rxd_rd_cnt; + __le32 rxf_wr_cnt; + __le32 txf_rd_cnt; /* Tx MAC statistics overflow counters. */ - u32 tmac_data_octets_oflow; - u32 tmac_frms_oflow; - u32 tmac_bcst_frms_oflow; - u32 tmac_mcst_frms_oflow; - u32 tmac_ucst_frms_oflow; - u32 tmac_ttl_octets_oflow; - u32 tmac_any_err_frms_oflow; - u32 tmac_nucst_frms_oflow; - u64 tmac_vlan_frms; - u32 tmac_drop_ip_oflow; - u32 tmac_vld_ip_oflow; - u32 tmac_rst_tcp_oflow; - u32 tmac_icmp_oflow; - u32 tpa_unknown_protocol; - u32 tmac_udp_oflow; - u32 reserved_10; - u32 tpa_parse_failure; + __le32 tmac_data_octets_oflow; + __le32 tmac_frms_oflow; + __le32 tmac_bcst_frms_oflow; + __le32 tmac_mcst_frms_oflow; + __le32 tmac_ucst_frms_oflow; + __le32 tmac_ttl_octets_oflow; + __le32 tmac_any_err_frms_oflow; + __le32 tmac_nucst_frms_oflow; + __le64 tmac_vlan_frms; + __le32 tmac_drop_ip_oflow; + __le32 tmac_vld_ip_oflow; + __le32 tmac_rst_tcp_oflow; + __le32 tmac_icmp_oflow; + __le32 tpa_unknown_protocol; + __le32 tmac_udp_oflow; + __le32 reserved_10; + __le32 tpa_parse_failure; /* Rx MAC Statistics overflow counters. */ - u32 rmac_data_octets_oflow; - u32 rmac_vld_frms_oflow; - u32 rmac_vld_bcst_frms_oflow; - u32 rmac_vld_mcst_frms_oflow; - u32 rmac_accepted_ucst_frms_oflow; - u32 rmac_ttl_octets_oflow; - u32 rmac_discarded_frms_oflow; - u32 rmac_accepted_nucst_frms_oflow; - u32 rmac_usized_frms_oflow; - u32 rmac_drop_events_oflow; - u32 rmac_frag_frms_oflow; - u32 rmac_osized_frms_oflow; - u32 rmac_ip_oflow; - u32 rmac_jabber_frms_oflow; - u32 rmac_icmp_oflow; - u32 rmac_drop_ip_oflow; - u32 rmac_err_drp_udp_oflow; - u32 rmac_udp_oflow; - u32 reserved_11; - u32 rmac_pause_cnt_oflow; - u64 rmac_ttl_1519_4095_frms; - u64 rmac_ttl_4096_8191_frms; - u64 rmac_ttl_8192_max_frms; - u64 rmac_ttl_gt_max_frms; - u64 rmac_osized_alt_frms; - u64 rmac_jabber_alt_frms; - u64 rmac_gt_max_alt_frms; - u64 rmac_vlan_frms; - u32 rmac_len_discard; - u32 rmac_fcs_discard; - u32 rmac_pf_discard; - u32 rmac_da_discard; - u32 rmac_red_discard; - u32 rmac_rts_discard; - u32 reserved_12; - u32 rmac_ingm_full_discard; - u32 reserved_13; - u32 rmac_accepted_ip_oflow; - u32 reserved_14; - u32 link_fault_cnt; + __le32 rmac_data_octets_oflow; + __le32 rmac_vld_frms_oflow; + __le32 rmac_vld_bcst_frms_oflow; + __le32 rmac_vld_mcst_frms_oflow; + __le32 rmac_accepted_ucst_frms_oflow; + __le32 rmac_ttl_octets_oflow; + __le32 rmac_discarded_frms_oflow; + __le32 rmac_accepted_nucst_frms_oflow; + __le32 rmac_usized_frms_oflow; + __le32 rmac_drop_events_oflow; + __le32 rmac_frag_frms_oflow; + __le32 rmac_osized_frms_oflow; + __le32 rmac_ip_oflow; + __le32 rmac_jabber_frms_oflow; + __le32 rmac_icmp_oflow; + __le32 rmac_drop_ip_oflow; + __le32 rmac_err_drp_udp_oflow; + __le32 rmac_udp_oflow; + __le32 reserved_11; + __le32 rmac_pause_cnt_oflow; + __le64 rmac_ttl_1519_4095_frms; + __le64 rmac_ttl_4096_8191_frms; + __le64 rmac_ttl_8192_max_frms; + __le64 rmac_ttl_gt_max_frms; + __le64 rmac_osized_alt_frms; + __le64 rmac_jabber_alt_frms; + __le64 rmac_gt_max_alt_frms; + __le64 rmac_vlan_frms; + __le32 rmac_len_discard; + __le32 rmac_fcs_discard; + __le32 rmac_pf_discard; + __le32 rmac_da_discard; + __le32 rmac_red_discard; + __le32 rmac_rts_discard; + __le32 reserved_12; + __le32 rmac_ingm_full_discard; + __le32 reserved_13; + __le32 rmac_accepted_ip_oflow; + __le32 reserved_14; + __le32 link_fault_cnt; u8 buffer[20]; swStat_t sw_stat; xpakStat_t xpak_stat; diff --git a/drivers/net/saa9730.c b/drivers/net/saa9730.c index c9efad8a917e..b269513cde45 100644 --- a/drivers/net/saa9730.c +++ b/drivers/net/saa9730.c @@ -747,7 +747,7 @@ static int lan_saa9730_rx(struct net_device *dev) static irqreturn_t lan_saa9730_interrupt(const int irq, void *dev_id) { - struct net_device *dev = (struct net_device *) dev_id; + struct net_device *dev = dev_id; struct lan_saa9730_private *lp = netdev_priv(dev); if (lan_saa9730_debug > 5) diff --git a/drivers/net/sb1000.c b/drivers/net/sb1000.c index dc30dee5537f..b9fa4fbb1398 100644 --- a/drivers/net/sb1000.c +++ b/drivers/net/sb1000.c @@ -1084,19 +1084,13 @@ static irqreturn_t sb1000_interrupt(int irq, void *dev_id) char *name; unsigned char st; int ioaddr[2]; - struct net_device *dev = (struct net_device *) dev_id; + struct net_device *dev = dev_id; struct sb1000_private *lp = netdev_priv(dev); const unsigned char Command0[6] = {0x80, 0x2c, 0x00, 0x00, 0x00, 0x00}; const unsigned char Command1[6] = {0x80, 0x2e, 0x00, 0x00, 0x00, 0x00}; const int MaxRxErrorCount = 6; - if (dev == NULL) { - printk(KERN_ERR "sb1000_interrupt(): irq %d for unknown device.\n", - irq); - return IRQ_NONE; - } - ioaddr[0] = dev->base_addr; /* mem_start holds the second I/O address */ ioaddr[1] = dev->mem_start; diff --git a/drivers/net/skfp/skfddi.c b/drivers/net/skfp/skfddi.c index 06ea2626c6f4..9733a11c6146 100644 --- a/drivers/net/skfp/skfddi.c +++ b/drivers/net/skfp/skfddi.c @@ -616,15 +616,10 @@ static int skfp_close(struct net_device *dev) irqreturn_t skfp_interrupt(int irq, void *dev_id) { - struct net_device *dev = (struct net_device *) dev_id; + struct net_device *dev = dev_id; struct s_smc *smc; /* private board structure pointer */ skfddi_priv *bp; - if (dev == NULL) { - printk("%s: irq %d for unknown device\n", dev->name, irq); - return IRQ_NONE; - } - smc = netdev_priv(dev); bp = &smc->os; diff --git a/drivers/net/skge.c b/drivers/net/skge.c index a4a58e4e93a1..e7e414928f89 100644 --- a/drivers/net/skge.c +++ b/drivers/net/skge.c @@ -43,7 +43,7 @@ #include "skge.h" #define DRV_NAME "skge" -#define DRV_VERSION "1.8" +#define DRV_VERSION "1.9" #define PFX DRV_NAME " " #define DEFAULT_TX_RING_SIZE 128 @@ -197,8 +197,8 @@ static u32 skge_supported_modes(const struct skge_hw *hw) else if (hw->chip_id == CHIP_ID_YUKON) supported &= ~SUPPORTED_1000baseT_Half; } else - supported = SUPPORTED_1000baseT_Full | SUPPORTED_FIBRE - | SUPPORTED_Autoneg; + supported = SUPPORTED_1000baseT_Full | SUPPORTED_1000baseT_Half + | SUPPORTED_FIBRE | SUPPORTED_Autoneg; return supported; } @@ -487,31 +487,37 @@ static void skge_get_pauseparam(struct net_device *dev, { struct skge_port *skge = netdev_priv(dev); - ecmd->tx_pause = (skge->flow_control == FLOW_MODE_LOC_SEND) - || (skge->flow_control == FLOW_MODE_SYMMETRIC); - ecmd->rx_pause = (skge->flow_control == FLOW_MODE_REM_SEND) - || (skge->flow_control == FLOW_MODE_SYMMETRIC); + ecmd->rx_pause = (skge->flow_control == FLOW_MODE_SYMMETRIC) + || (skge->flow_control == FLOW_MODE_SYM_OR_REM); + ecmd->tx_pause = ecmd->rx_pause || (skge->flow_control == FLOW_MODE_LOC_SEND); - ecmd->autoneg = skge->autoneg; + ecmd->autoneg = ecmd->rx_pause || ecmd->tx_pause; } static int skge_set_pauseparam(struct net_device *dev, struct ethtool_pauseparam *ecmd) { struct skge_port *skge = netdev_priv(dev); + struct ethtool_pauseparam old; - skge->autoneg = ecmd->autoneg; - if (ecmd->rx_pause && ecmd->tx_pause) - skge->flow_control = FLOW_MODE_SYMMETRIC; - else if (ecmd->rx_pause && !ecmd->tx_pause) - skge->flow_control = FLOW_MODE_REM_SEND; - else if (!ecmd->rx_pause && ecmd->tx_pause) - skge->flow_control = FLOW_MODE_LOC_SEND; - else - skge->flow_control = FLOW_MODE_NONE; + skge_get_pauseparam(dev, &old); + + if (ecmd->autoneg != old.autoneg) + skge->flow_control = ecmd->autoneg ? FLOW_MODE_NONE : FLOW_MODE_SYMMETRIC; + else { + if (ecmd->rx_pause && ecmd->tx_pause) + skge->flow_control = FLOW_MODE_SYMMETRIC; + else if (ecmd->rx_pause && !ecmd->tx_pause) + skge->flow_control = FLOW_MODE_SYM_OR_REM; + else if (!ecmd->rx_pause && ecmd->tx_pause) + skge->flow_control = FLOW_MODE_LOC_SEND; + else + skge->flow_control = FLOW_MODE_NONE; + } if (netif_running(dev)) skge_phy_reset(skge); + return 0; } @@ -854,6 +860,23 @@ static int skge_rx_fill(struct net_device *dev) return 0; } +static const char *skge_pause(enum pause_status status) +{ + switch(status) { + case FLOW_STAT_NONE: + return "none"; + case FLOW_STAT_REM_SEND: + return "rx only"; + case FLOW_STAT_LOC_SEND: + return "tx_only"; + case FLOW_STAT_SYMMETRIC: /* Both station may send PAUSE */ + return "both"; + default: + return "indeterminated"; + } +} + + static void skge_link_up(struct skge_port *skge) { skge_write8(skge->hw, SK_REG(skge->port, LNK_LED_REG), @@ -862,16 +885,13 @@ static void skge_link_up(struct skge_port *skge) netif_carrier_on(skge->netdev); netif_wake_queue(skge->netdev); - if (netif_msg_link(skge)) + if (netif_msg_link(skge)) { printk(KERN_INFO PFX "%s: Link is up at %d Mbps, %s duplex, flow control %s\n", skge->netdev->name, skge->speed, skge->duplex == DUPLEX_FULL ? "full" : "half", - (skge->flow_control == FLOW_MODE_NONE) ? "none" : - (skge->flow_control == FLOW_MODE_LOC_SEND) ? "tx only" : - (skge->flow_control == FLOW_MODE_REM_SEND) ? "rx only" : - (skge->flow_control == FLOW_MODE_SYMMETRIC) ? "tx and rx" : - "unknown"); + skge_pause(skge->flow_status)); + } } static void skge_link_down(struct skge_port *skge) @@ -884,6 +904,29 @@ static void skge_link_down(struct skge_port *skge) printk(KERN_INFO PFX "%s: Link is down.\n", skge->netdev->name); } + +static void xm_link_down(struct skge_hw *hw, int port) +{ + struct net_device *dev = hw->dev[port]; + struct skge_port *skge = netdev_priv(dev); + u16 cmd, msk; + + if (hw->phy_type == SK_PHY_XMAC) { + msk = xm_read16(hw, port, XM_IMSK); + msk |= XM_IS_INP_ASS | XM_IS_LIPA_RC | XM_IS_RX_PAGE | XM_IS_AND; + xm_write16(hw, port, XM_IMSK, msk); + } + + cmd = xm_read16(hw, port, XM_MMU_CMD); + cmd &= ~(XM_MMU_ENA_RX | XM_MMU_ENA_TX); + xm_write16(hw, port, XM_MMU_CMD, cmd); + /* dummy read to ensure writing */ + (void) xm_read16(hw, port, XM_MMU_CMD); + + if (netif_carrier_ok(dev)) + skge_link_down(skge); +} + static int __xm_phy_read(struct skge_hw *hw, int port, u16 reg, u16 *val) { int i; @@ -992,7 +1035,15 @@ static const u16 phy_pause_map[] = { [FLOW_MODE_NONE] = 0, [FLOW_MODE_LOC_SEND] = PHY_AN_PAUSE_ASYM, [FLOW_MODE_SYMMETRIC] = PHY_AN_PAUSE_CAP, - [FLOW_MODE_REM_SEND] = PHY_AN_PAUSE_CAP | PHY_AN_PAUSE_ASYM, + [FLOW_MODE_SYM_OR_REM] = PHY_AN_PAUSE_CAP | PHY_AN_PAUSE_ASYM, +}; + +/* special defines for FIBER (88E1011S only) */ +static const u16 fiber_pause_map[] = { + [FLOW_MODE_NONE] = PHY_X_P_NO_PAUSE, + [FLOW_MODE_LOC_SEND] = PHY_X_P_ASYM_MD, + [FLOW_MODE_SYMMETRIC] = PHY_X_P_SYM_MD, + [FLOW_MODE_SYM_OR_REM] = PHY_X_P_BOTH_MD, }; @@ -1008,14 +1059,7 @@ static void bcom_check_link(struct skge_hw *hw, int port) status = xm_phy_read(hw, port, PHY_BCOM_STAT); if ((status & PHY_ST_LSYNC) == 0) { - u16 cmd = xm_read16(hw, port, XM_MMU_CMD); - cmd &= ~(XM_MMU_ENA_RX | XM_MMU_ENA_TX); - xm_write16(hw, port, XM_MMU_CMD, cmd); - /* dummy read to ensure writing */ - (void) xm_read16(hw, port, XM_MMU_CMD); - - if (netif_carrier_ok(dev)) - skge_link_down(skge); + xm_link_down(hw, port); return; } @@ -1048,20 +1092,19 @@ static void bcom_check_link(struct skge_hw *hw, int port) return; } - /* We are using IEEE 802.3z/D5.0 Table 37-4 */ switch (aux & PHY_B_AS_PAUSE_MSK) { case PHY_B_AS_PAUSE_MSK: - skge->flow_control = FLOW_MODE_SYMMETRIC; + skge->flow_status = FLOW_STAT_SYMMETRIC; break; case PHY_B_AS_PRR: - skge->flow_control = FLOW_MODE_REM_SEND; + skge->flow_status = FLOW_STAT_REM_SEND; break; case PHY_B_AS_PRT: - skge->flow_control = FLOW_MODE_LOC_SEND; + skge->flow_status = FLOW_STAT_LOC_SEND; break; default: - skge->flow_control = FLOW_MODE_NONE; + skge->flow_status = FLOW_STAT_NONE; } skge->speed = SPEED_1000; } @@ -1191,17 +1234,7 @@ static void xm_phy_init(struct skge_port *skge) if (skge->advertising & ADVERTISED_1000baseT_Full) ctrl |= PHY_X_AN_FD; - switch(skge->flow_control) { - case FLOW_MODE_NONE: - ctrl |= PHY_X_P_NO_PAUSE; - break; - case FLOW_MODE_LOC_SEND: - ctrl |= PHY_X_P_ASYM_MD; - break; - case FLOW_MODE_SYMMETRIC: - ctrl |= PHY_X_P_BOTH_MD; - break; - } + ctrl |= fiber_pause_map[skge->flow_control]; xm_phy_write(hw, port, PHY_XMAC_AUNE_ADV, ctrl); @@ -1235,14 +1268,7 @@ static void xm_check_link(struct net_device *dev) status = xm_phy_read(hw, port, PHY_XMAC_STAT); if ((status & PHY_ST_LSYNC) == 0) { - u16 cmd = xm_read16(hw, port, XM_MMU_CMD); - cmd &= ~(XM_MMU_ENA_RX | XM_MMU_ENA_TX); - xm_write16(hw, port, XM_MMU_CMD, cmd); - /* dummy read to ensure writing */ - (void) xm_read16(hw, port, XM_MMU_CMD); - - if (netif_carrier_ok(dev)) - skge_link_down(skge); + xm_link_down(hw, port); return; } @@ -1276,15 +1302,20 @@ static void xm_check_link(struct net_device *dev) } /* We are using IEEE 802.3z/D5.0 Table 37-4 */ - if (lpa & PHY_X_P_SYM_MD) - skge->flow_control = FLOW_MODE_SYMMETRIC; - else if ((lpa & PHY_X_RS_PAUSE) == PHY_X_P_ASYM_MD) - skge->flow_control = FLOW_MODE_REM_SEND; - else if ((lpa & PHY_X_RS_PAUSE) == PHY_X_P_BOTH_MD) - skge->flow_control = FLOW_MODE_LOC_SEND; + if ((skge->flow_control == FLOW_MODE_SYMMETRIC || + skge->flow_control == FLOW_MODE_SYM_OR_REM) && + (lpa & PHY_X_P_SYM_MD)) + skge->flow_status = FLOW_STAT_SYMMETRIC; + else if (skge->flow_control == FLOW_MODE_SYM_OR_REM && + (lpa & PHY_X_RS_PAUSE) == PHY_X_P_ASYM_MD) + /* Enable PAUSE receive, disable PAUSE transmit */ + skge->flow_status = FLOW_STAT_REM_SEND; + else if (skge->flow_control == FLOW_MODE_LOC_SEND && + (lpa & PHY_X_RS_PAUSE) == PHY_X_P_BOTH_MD) + /* Disable PAUSE receive, enable PAUSE transmit */ + skge->flow_status = FLOW_STAT_LOC_SEND; else - skge->flow_control = FLOW_MODE_NONE; - + skge->flow_status = FLOW_STAT_NONE; skge->speed = SPEED_1000; } @@ -1568,6 +1599,10 @@ static void genesis_mac_intr(struct skge_hw *hw, int port) printk(KERN_DEBUG PFX "%s: mac interrupt status 0x%x\n", skge->netdev->name, status); + if (hw->phy_type == SK_PHY_XMAC && + (status & (XM_IS_INP_ASS | XM_IS_LIPA_RC))) + xm_link_down(hw, port); + if (status & XM_IS_TXF_UR) { xm_write32(hw, port, XM_MODE, XM_MD_FTF); ++skge->net_stats.tx_fifo_errors; @@ -1582,7 +1617,7 @@ static void genesis_link_up(struct skge_port *skge) { struct skge_hw *hw = skge->hw; int port = skge->port; - u16 cmd; + u16 cmd, msk; u32 mode; cmd = xm_read16(hw, port, XM_MMU_CMD); @@ -1591,8 +1626,8 @@ static void genesis_link_up(struct skge_port *skge) * enabling pause frame reception is required for 1000BT * because the XMAC is not reset if the link is going down */ - if (skge->flow_control == FLOW_MODE_NONE || - skge->flow_control == FLOW_MODE_LOC_SEND) + if (skge->flow_status == FLOW_STAT_NONE || + skge->flow_status == FLOW_STAT_LOC_SEND) /* Disable Pause Frame Reception */ cmd |= XM_MMU_IGN_PF; else @@ -1602,8 +1637,8 @@ static void genesis_link_up(struct skge_port *skge) xm_write16(hw, port, XM_MMU_CMD, cmd); mode = xm_read32(hw, port, XM_MODE); - if (skge->flow_control == FLOW_MODE_SYMMETRIC || - skge->flow_control == FLOW_MODE_LOC_SEND) { + if (skge->flow_status== FLOW_STAT_SYMMETRIC || + skge->flow_status == FLOW_STAT_LOC_SEND) { /* * Configure Pause Frame Generation * Use internal and external Pause Frame Generation. @@ -1631,7 +1666,11 @@ static void genesis_link_up(struct skge_port *skge) } xm_write32(hw, port, XM_MODE, mode); - xm_write16(hw, port, XM_IMSK, XM_DEF_MSK); + msk = XM_DEF_MSK; + if (hw->phy_type != SK_PHY_XMAC) + msk |= XM_IS_INP_ASS; /* disable GP0 interrupt bit */ + + xm_write16(hw, port, XM_IMSK, msk); xm_read16(hw, port, XM_ISRC); /* get MMU Command Reg. */ @@ -1779,11 +1818,17 @@ static void yukon_init(struct skge_hw *hw, int port) adv |= PHY_M_AN_10_FD; if (skge->advertising & ADVERTISED_10baseT_Half) adv |= PHY_M_AN_10_HD; - } else /* special defines for FIBER (88E1011S only) */ - adv |= PHY_M_AN_1000X_AHD | PHY_M_AN_1000X_AFD; - /* Set Flow-control capabilities */ - adv |= phy_pause_map[skge->flow_control]; + /* Set Flow-control capabilities */ + adv |= phy_pause_map[skge->flow_control]; + } else { + if (skge->advertising & ADVERTISED_1000baseT_Full) + adv |= PHY_M_AN_1000X_AFD; + if (skge->advertising & ADVERTISED_1000baseT_Half) + adv |= PHY_M_AN_1000X_AHD; + + adv |= fiber_pause_map[skge->flow_control]; + } /* Restart Auto-negotiation */ ctrl |= PHY_CT_ANE | PHY_CT_RE_CFG; @@ -1917,6 +1962,11 @@ static void yukon_mac_init(struct skge_hw *hw, int port) case FLOW_MODE_LOC_SEND: /* disable Rx flow-control */ reg |= GM_GPCR_FC_RX_DIS | GM_GPCR_AU_FCT_DIS; + break; + case FLOW_MODE_SYMMETRIC: + case FLOW_MODE_SYM_OR_REM: + /* enable Tx & Rx flow-control */ + break; } gma_write16(hw, port, GM_GP_CTRL, reg); @@ -2111,13 +2161,11 @@ static void yukon_link_down(struct skge_port *skge) ctrl &= ~(GM_GPCR_RX_ENA | GM_GPCR_TX_ENA); gma_write16(hw, port, GM_GP_CTRL, ctrl); - if (skge->flow_control == FLOW_MODE_REM_SEND) { + if (skge->flow_status == FLOW_STAT_REM_SEND) { + ctrl = gm_phy_read(hw, port, PHY_MARV_AUNE_ADV); + ctrl |= PHY_M_AN_ASP; /* restore Asymmetric Pause bit */ - gm_phy_write(hw, port, PHY_MARV_AUNE_ADV, - gm_phy_read(hw, port, - PHY_MARV_AUNE_ADV) - | PHY_M_AN_ASP); - + gm_phy_write(hw, port, PHY_MARV_AUNE_ADV, ctrl); } yukon_reset(hw, port); @@ -2164,19 +2212,19 @@ static void yukon_phy_intr(struct skge_port *skge) /* We are using IEEE 802.3z/D5.0 Table 37-4 */ switch (phystat & PHY_M_PS_PAUSE_MSK) { case PHY_M_PS_PAUSE_MSK: - skge->flow_control = FLOW_MODE_SYMMETRIC; + skge->flow_status = FLOW_STAT_SYMMETRIC; break; case PHY_M_PS_RX_P_EN: - skge->flow_control = FLOW_MODE_REM_SEND; + skge->flow_status = FLOW_STAT_REM_SEND; break; case PHY_M_PS_TX_P_EN: - skge->flow_control = FLOW_MODE_LOC_SEND; + skge->flow_status = FLOW_STAT_LOC_SEND; break; default: - skge->flow_control = FLOW_MODE_NONE; + skge->flow_status = FLOW_STAT_NONE; } - if (skge->flow_control == FLOW_MODE_NONE || + if (skge->flow_status == FLOW_STAT_NONE || (skge->speed < SPEED_1000 && skge->duplex == DUPLEX_HALF)) skge_write8(hw, SK_REG(port, GMAC_CTRL), GMC_PAUSE_OFF); else @@ -3399,7 +3447,7 @@ static struct net_device *skge_devinit(struct skge_hw *hw, int port, /* Auto speed and flow control */ skge->autoneg = AUTONEG_ENABLE; - skge->flow_control = FLOW_MODE_SYMMETRIC; + skge->flow_control = FLOW_MODE_SYM_OR_REM; skge->duplex = -1; skge->speed = -1; skge->advertising = skge_supported_modes(hw); diff --git a/drivers/net/skge.h b/drivers/net/skge.h index d0b47d46cf9d..537c0aaa1db8 100644 --- a/drivers/net/skge.h +++ b/drivers/net/skge.h @@ -2195,7 +2195,8 @@ enum { XM_IS_RX_COMP = 1<<0, /* Bit 0: Frame Rx Complete */ }; -#define XM_DEF_MSK (~(XM_IS_RXC_OV | XM_IS_TXC_OV | XM_IS_RXF_OV | XM_IS_TXF_UR)) +#define XM_DEF_MSK (~(XM_IS_INP_ASS | XM_IS_LIPA_RC | \ + XM_IS_RXF_OV | XM_IS_TXF_UR)) /* XM_HW_CFG 16 bit r/w Hardware Config Register */ @@ -2426,13 +2427,24 @@ struct skge_hw { struct mutex phy_mutex; }; -enum { - FLOW_MODE_NONE = 0, /* No Flow-Control */ - FLOW_MODE_LOC_SEND = 1, /* Local station sends PAUSE */ - FLOW_MODE_REM_SEND = 2, /* Symmetric or just remote */ +enum pause_control { + FLOW_MODE_NONE = 1, /* No Flow-Control */ + FLOW_MODE_LOC_SEND = 2, /* Local station sends PAUSE */ FLOW_MODE_SYMMETRIC = 3, /* Both stations may send PAUSE */ + FLOW_MODE_SYM_OR_REM = 4, /* Both stations may send PAUSE or + * just the remote station may send PAUSE + */ +}; + +enum pause_status { + FLOW_STAT_INDETERMINATED=0, /* indeterminated */ + FLOW_STAT_NONE, /* No Flow Control */ + FLOW_STAT_REM_SEND, /* Remote Station sends PAUSE */ + FLOW_STAT_LOC_SEND, /* Local station sends PAUSE */ + FLOW_STAT_SYMMETRIC, /* Both station may send PAUSE */ }; + struct skge_port { u32 msg_enable; struct skge_hw *hw; @@ -2445,9 +2457,10 @@ struct skge_port { struct net_device_stats net_stats; struct work_struct link_thread; + enum pause_control flow_control; + enum pause_status flow_status; u8 rx_csum; u8 blink_on; - u8 flow_control; u8 wol; u8 autoneg; /* AUTONEG_ENABLE, AUTONEG_DISABLE */ u8 duplex; /* DUPLEX_HALF, DUPLEX_FULL */ diff --git a/drivers/net/sky2.c b/drivers/net/sky2.c index 459c845d6648..c10e7f5faa5f 100644 --- a/drivers/net/sky2.c +++ b/drivers/net/sky2.c @@ -683,7 +683,7 @@ static void sky2_mac_init(struct sky2_hw *hw, unsigned port) sky2_write16(hw, SK_REG(port, TX_GMF_CTRL_T), GMF_OPER_ON); if (hw->chip_id == CHIP_ID_YUKON_EC_U) { - sky2_write8(hw, SK_REG(port, RX_GMF_LP_THR), 768/8); + sky2_write8(hw, SK_REG(port, RX_GMF_LP_THR), 512/8); sky2_write8(hw, SK_REG(port, RX_GMF_UP_THR), 1024/8); if (hw->dev[port]->mtu > ETH_DATA_LEN) { /* set Tx GMAC FIFO Almost Empty Threshold */ @@ -1907,7 +1907,7 @@ static struct sk_buff *receive_copy(struct sky2_port *sky2, pci_dma_sync_single_for_device(sky2->hw->pdev, re->data_addr, length, PCI_DMA_FROMDEVICE); re->skb->ip_summed = CHECKSUM_NONE; - __skb_put(skb, length); + skb_put(skb, length); } return skb; } @@ -1970,7 +1970,7 @@ static struct sk_buff *receive_new(struct sky2_port *sky2, if (skb_shinfo(skb)->nr_frags) skb_put_frags(skb, hdr_space, length); else - skb_put(skb, hdr_space); + skb_put(skb, length); return skb; } @@ -2220,8 +2220,7 @@ static void sky2_hw_intr(struct sky2_hw *hw) /* PCI-Express uncorrectable Error occurred */ u32 pex_err; - pex_err = sky2_pci_read32(hw, - hw->err_cap + PCI_ERR_UNCOR_STATUS); + pex_err = sky2_pci_read32(hw, PEX_UNC_ERR_STAT); if (net_ratelimit()) printk(KERN_ERR PFX "%s: pci express error (0x%x)\n", @@ -2229,20 +2228,15 @@ static void sky2_hw_intr(struct sky2_hw *hw) /* clear the interrupt */ sky2_write32(hw, B2_TST_CTRL1, TST_CFG_WRITE_ON); - sky2_pci_write32(hw, - hw->err_cap + PCI_ERR_UNCOR_STATUS, - 0xffffffffUL); + sky2_pci_write32(hw, PEX_UNC_ERR_STAT, + 0xffffffffUL); sky2_write32(hw, B2_TST_CTRL1, TST_CFG_WRITE_OFF); - - /* In case of fatal error mask off to keep from getting stuck */ - if (pex_err & (PCI_ERR_UNC_POISON_TLP | PCI_ERR_UNC_FCP - | PCI_ERR_UNC_DLP)) { + if (pex_err & PEX_FATAL_ERRORS) { u32 hwmsk = sky2_read32(hw, B0_HWE_IMSK); hwmsk &= ~Y2_IS_PCI_EXP; sky2_write32(hw, B0_HWE_IMSK, hwmsk); } - } if (status & Y2_HWE_L1_MASK) @@ -2423,7 +2417,6 @@ static int sky2_reset(struct sky2_hw *hw) u16 status; u8 t8; int i; - u32 msk; sky2_write8(hw, B0_CTST, CS_RST_CLR); @@ -2464,13 +2457,9 @@ static int sky2_reset(struct sky2_hw *hw) sky2_write8(hw, B0_CTST, CS_MRST_CLR); /* clear any PEX errors */ - if (pci_find_capability(hw->pdev, PCI_CAP_ID_EXP)) { - hw->err_cap = pci_find_ext_capability(hw->pdev, PCI_EXT_CAP_ID_ERR); - if (hw->err_cap) - sky2_pci_write32(hw, - hw->err_cap + PCI_ERR_UNCOR_STATUS, - 0xffffffffUL); - } + if (pci_find_capability(hw->pdev, PCI_CAP_ID_EXP)) + sky2_pci_write32(hw, PEX_UNC_ERR_STAT, 0xffffffffUL); + hw->pmd_type = sky2_read8(hw, B2_PMD_TYP); hw->ports = 1; @@ -2527,10 +2516,7 @@ static int sky2_reset(struct sky2_hw *hw) sky2_write8(hw, RAM_BUFFER(i, B3_RI_RTO_XS2), SK_RI_TO_53); } - msk = Y2_HWE_ALL_MASK; - if (!hw->err_cap) - msk &= ~Y2_IS_PCI_EXP; - sky2_write32(hw, B0_HWE_IMSK, msk); + sky2_write32(hw, B0_HWE_IMSK, Y2_HWE_ALL_MASK); for (i = 0; i < hw->ports; i++) sky2_gmac_reset(hw, i); diff --git a/drivers/net/sky2.h b/drivers/net/sky2.h index f66109a96d95..43d2accf60e1 100644 --- a/drivers/net/sky2.h +++ b/drivers/net/sky2.h @@ -6,15 +6,24 @@ #define ETH_JUMBO_MTU 9000 /* Maximum MTU supported */ -/* PCI device specific config registers */ +/* PCI config registers */ enum { PCI_DEV_REG1 = 0x40, PCI_DEV_REG2 = 0x44, + PCI_DEV_STATUS = 0x7c, PCI_DEV_REG3 = 0x80, PCI_DEV_REG4 = 0x84, PCI_DEV_REG5 = 0x88, }; +enum { + PEX_DEV_CAP = 0xe4, + PEX_DEV_CTRL = 0xe8, + PEX_DEV_STA = 0xea, + PEX_LNK_STAT = 0xf2, + PEX_UNC_ERR_STAT= 0x104, +}; + /* Yukon-2 */ enum pci_dev_reg_1 { PCI_Y2_PIG_ENA = 1<<31, /* Enable Plug-in-Go (YUKON-2) */ @@ -63,6 +72,39 @@ enum pci_dev_reg_4 { PCI_STATUS_REC_MASTER_ABORT | \ PCI_STATUS_REC_TARGET_ABORT | \ PCI_STATUS_PARITY) + +enum pex_dev_ctrl { + PEX_DC_MAX_RRS_MSK = 7<<12, /* Bit 14..12: Max. Read Request Size */ + PEX_DC_EN_NO_SNOOP = 1<<11,/* Enable No Snoop */ + PEX_DC_EN_AUX_POW = 1<<10,/* Enable AUX Power */ + PEX_DC_EN_PHANTOM = 1<<9, /* Enable Phantom Functions */ + PEX_DC_EN_EXT_TAG = 1<<8, /* Enable Extended Tag Field */ + PEX_DC_MAX_PLS_MSK = 7<<5, /* Bit 7.. 5: Max. Payload Size Mask */ + PEX_DC_EN_REL_ORD = 1<<4, /* Enable Relaxed Ordering */ + PEX_DC_EN_UNS_RQ_RP = 1<<3, /* Enable Unsupported Request Reporting */ + PEX_DC_EN_FAT_ER_RP = 1<<2, /* Enable Fatal Error Reporting */ + PEX_DC_EN_NFA_ER_RP = 1<<1, /* Enable Non-Fatal Error Reporting */ + PEX_DC_EN_COR_ER_RP = 1<<0, /* Enable Correctable Error Reporting */ +}; +#define PEX_DC_MAX_RD_RQ_SIZE(x) (((x)<<12) & PEX_DC_MAX_RRS_MSK) + +/* PEX_UNC_ERR_STAT PEX Uncorrectable Errors Status Register (Yukon-2) */ +enum pex_err { + PEX_UNSUP_REQ = 1<<20, /* Unsupported Request Error */ + + PEX_MALFOR_TLP = 1<<18, /* Malformed TLP */ + + PEX_UNEXP_COMP = 1<<16, /* Unexpected Completion */ + + PEX_COMP_TO = 1<<14, /* Completion Timeout */ + PEX_FLOW_CTRL_P = 1<<13, /* Flow Control Protocol Error */ + PEX_POIS_TLP = 1<<12, /* Poisoned TLP */ + + PEX_DATA_LINK_P = 1<<4, /* Data Link Protocol Error */ + PEX_FATAL_ERRORS= (PEX_MALFOR_TLP | PEX_FLOW_CTRL_P | PEX_DATA_LINK_P), +}; + + enum csr_regs { B0_RAP = 0x0000, B0_CTST = 0x0004, @@ -1836,7 +1878,6 @@ struct sky2_hw { struct net_device *dev[2]; int pm_cap; - int err_cap; u8 chip_id; u8 chip_rev; u8 pmd_type; diff --git a/drivers/net/smc91x.c b/drivers/net/smc91x.c index 506807fa5268..95b6478f55c6 100644 --- a/drivers/net/smc91x.c +++ b/drivers/net/smc91x.c @@ -1400,7 +1400,7 @@ static irqreturn_t smc_interrupt(int irq, void *dev_id) static void smc_poll_controller(struct net_device *dev) { disable_irq(dev->irq); - smc_interrupt(dev->irq, dev, NULL); + smc_interrupt(dev->irq, dev); enable_irq(dev->irq); } #endif diff --git a/drivers/net/smc91x.h b/drivers/net/smc91x.h index 636dbfcdf8cb..0c9f1e7dab2e 100644 --- a/drivers/net/smc91x.h +++ b/drivers/net/smc91x.h @@ -398,6 +398,24 @@ static inline void LPD7_SMC_outsw (unsigned char* a, int r, #define SMC_IRQ_FLAGS (0) +#elif defined(CONFIG_ARCH_VERSATILE) + +#define SMC_CAN_USE_8BIT 1 +#define SMC_CAN_USE_16BIT 1 +#define SMC_CAN_USE_32BIT 1 +#define SMC_NOWAIT 1 + +#define SMC_inb(a, r) readb((a) + (r)) +#define SMC_inw(a, r) readw((a) + (r)) +#define SMC_inl(a, r) readl((a) + (r)) +#define SMC_outb(v, a, r) writeb(v, (a) + (r)) +#define SMC_outw(v, a, r) writew(v, (a) + (r)) +#define SMC_outl(v, a, r) writel(v, (a) + (r)) +#define SMC_insl(a, r, p, l) readsl((a) + (r), p, l) +#define SMC_outsl(a, r, p, l) writesl((a) + (r), p, l) + +#define SMC_IRQ_FLAGS (0) + #else #define SMC_CAN_USE_8BIT 1 diff --git a/drivers/net/sonic.c b/drivers/net/sonic.c index cfece9676aff..ed7aa0a5acca 100644 --- a/drivers/net/sonic.c +++ b/drivers/net/sonic.c @@ -295,15 +295,10 @@ static int sonic_send_packet(struct sk_buff *skb, struct net_device *dev) */ static irqreturn_t sonic_interrupt(int irq, void *dev_id) { - struct net_device *dev = (struct net_device *) dev_id; + struct net_device *dev = dev_id; struct sonic_local *lp = netdev_priv(dev); int status; - if (dev == NULL) { - printk(KERN_ERR "sonic_interrupt: irq %d for unknown device.\n", irq); - return IRQ_NONE; - } - if (!(status = SONIC_READ(SONIC_ISR) & SONIC_IMR_DEFAULT)) return IRQ_NONE; diff --git a/drivers/net/spider_net.c b/drivers/net/spider_net.c index 46a009085f7c..418138dd6c68 100644 --- a/drivers/net/spider_net.c +++ b/drivers/net/spider_net.c @@ -55,12 +55,13 @@ MODULE_AUTHOR("Utz Bacher <utz.bacher@de.ibm.com> and Jens Osterkamp " \ "<Jens.Osterkamp@de.ibm.com>"); MODULE_DESCRIPTION("Spider Southbridge Gigabit Ethernet driver"); MODULE_LICENSE("GPL"); +MODULE_VERSION(VERSION); static int rx_descriptors = SPIDER_NET_RX_DESCRIPTORS_DEFAULT; static int tx_descriptors = SPIDER_NET_TX_DESCRIPTORS_DEFAULT; -module_param(rx_descriptors, int, 0644); -module_param(tx_descriptors, int, 0644); +module_param(rx_descriptors, int, 0444); +module_param(tx_descriptors, int, 0444); MODULE_PARM_DESC(rx_descriptors, "number of descriptors used " \ "in rx chains"); @@ -300,7 +301,7 @@ static int spider_net_init_chain(struct spider_net_card *card, struct spider_net_descr_chain *chain, struct spider_net_descr *start_descr, - int direction, int no) + int no) { int i; struct spider_net_descr *descr; @@ -315,7 +316,7 @@ spider_net_init_chain(struct spider_net_card *card, buf = pci_map_single(card->pdev, descr, SPIDER_NET_DESCR_SIZE, - direction); + PCI_DMA_BIDIRECTIONAL); if (pci_dma_mapping_error(buf)) goto iommu_error; @@ -329,11 +330,6 @@ spider_net_init_chain(struct spider_net_card *card, (descr-1)->next = start_descr; start_descr->prev = descr-1; - descr = start_descr; - if (direction == PCI_DMA_FROMDEVICE) - for (i=0; i < no; i++, descr++) - descr->next_descr_addr = descr->next->bus_addr; - spin_lock_init(&chain->lock); chain->head = start_descr; chain->tail = start_descr; @@ -346,7 +342,7 @@ iommu_error: if (descr->bus_addr) pci_unmap_single(card->pdev, descr->bus_addr, SPIDER_NET_DESCR_SIZE, - direction); + PCI_DMA_BIDIRECTIONAL); return -ENOMEM; } @@ -362,15 +358,15 @@ spider_net_free_rx_chain_contents(struct spider_net_card *card) struct spider_net_descr *descr; descr = card->rx_chain.head; - while (descr->next != card->rx_chain.head) { + do { if (descr->skb) { dev_kfree_skb(descr->skb); pci_unmap_single(card->pdev, descr->buf_addr, SPIDER_NET_MAX_FRAME, - PCI_DMA_FROMDEVICE); + PCI_DMA_BIDIRECTIONAL); } descr = descr->next; - } + } while (descr != card->rx_chain.head); } /** @@ -645,26 +641,41 @@ static int spider_net_prepare_tx_descr(struct spider_net_card *card, struct sk_buff *skb) { - struct spider_net_descr *descr = card->tx_chain.head; + struct spider_net_descr *descr; dma_addr_t buf; + unsigned long flags; + int length; - buf = pci_map_single(card->pdev, skb->data, skb->len, PCI_DMA_TODEVICE); + length = skb->len; + if (length < ETH_ZLEN) { + if (skb_pad(skb, ETH_ZLEN-length)) + return 0; + length = ETH_ZLEN; + } + + buf = pci_map_single(card->pdev, skb->data, length, PCI_DMA_TODEVICE); if (pci_dma_mapping_error(buf)) { if (netif_msg_tx_err(card) && net_ratelimit()) pr_err("could not iommu-map packet (%p, %i). " - "Dropping packet\n", skb->data, skb->len); + "Dropping packet\n", skb->data, length); card->spider_stats.tx_iommu_map_error++; return -ENOMEM; } + spin_lock_irqsave(&card->tx_chain.lock, flags); + descr = card->tx_chain.head; + card->tx_chain.head = descr->next; + descr->buf_addr = buf; - descr->buf_size = skb->len; + descr->buf_size = length; descr->next_descr_addr = 0; descr->skb = skb; descr->data_status = 0; descr->dmac_cmd_status = SPIDER_NET_DESCR_CARDOWNED | SPIDER_NET_DMAC_NOCS; + spin_unlock_irqrestore(&card->tx_chain.lock, flags); + if (skb->protocol == htons(ETH_P_IP)) switch (skb->nh.iph->protocol) { case IPPROTO_TCP: @@ -675,32 +686,51 @@ spider_net_prepare_tx_descr(struct spider_net_card *card, break; } + /* Chain the bus address, so that the DMA engine finds this descr. */ descr->prev->next_descr_addr = descr->bus_addr; + card->netdev->trans_start = jiffies; /* set netdev watchdog timer */ return 0; } -/** - * spider_net_release_tx_descr - processes a used tx descriptor - * @card: card structure - * @descr: descriptor to release - * - * releases a used tx descriptor (unmapping, freeing of skb) - */ -static inline void -spider_net_release_tx_descr(struct spider_net_card *card) +static int +spider_net_set_low_watermark(struct spider_net_card *card) { + unsigned long flags; + int status; + int cnt=0; + int i; struct spider_net_descr *descr = card->tx_chain.tail; - struct sk_buff *skb; - card->tx_chain.tail = card->tx_chain.tail->next; - descr->dmac_cmd_status |= SPIDER_NET_DESCR_NOT_IN_USE; + /* Measure the length of the queue. Measurement does not + * need to be precise -- does not need a lock. */ + while (descr != card->tx_chain.head) { + status = descr->dmac_cmd_status & SPIDER_NET_DESCR_NOT_IN_USE; + if (status == SPIDER_NET_DESCR_NOT_IN_USE) + break; + descr = descr->next; + cnt++; + } - /* unmap the skb */ - skb = descr->skb; - pci_unmap_single(card->pdev, descr->buf_addr, skb->len, - PCI_DMA_TODEVICE); - dev_kfree_skb_any(skb); + /* If TX queue is short, don't even bother with interrupts */ + if (cnt < card->num_tx_desc/4) + return cnt; + + /* Set low-watermark 3/4th's of the way into the queue. */ + descr = card->tx_chain.tail; + cnt = (cnt*3)/4; + for (i=0;i<cnt; i++) + descr = descr->next; + + /* Set the new watermark, clear the old watermark */ + spin_lock_irqsave(&card->tx_chain.lock, flags); + descr->dmac_cmd_status |= SPIDER_NET_DESCR_TXDESFLG; + if (card->low_watermark && card->low_watermark != descr) + card->low_watermark->dmac_cmd_status = + card->low_watermark->dmac_cmd_status & ~SPIDER_NET_DESCR_TXDESFLG; + card->low_watermark = descr; + spin_unlock_irqrestore(&card->tx_chain.lock, flags); + return cnt; } /** @@ -719,21 +749,29 @@ static int spider_net_release_tx_chain(struct spider_net_card *card, int brutal) { struct spider_net_descr_chain *chain = &card->tx_chain; + struct spider_net_descr *descr; + struct sk_buff *skb; + u32 buf_addr; + unsigned long flags; int status; - spider_net_read_reg(card, SPIDER_NET_GDTDMACCNTR); - while (chain->tail != chain->head) { - status = spider_net_get_descr_status(chain->tail); + spin_lock_irqsave(&chain->lock, flags); + descr = chain->tail; + + status = spider_net_get_descr_status(descr); switch (status) { case SPIDER_NET_DESCR_COMPLETE: card->netdev_stats.tx_packets++; - card->netdev_stats.tx_bytes += chain->tail->skb->len; + card->netdev_stats.tx_bytes += descr->skb->len; break; case SPIDER_NET_DESCR_CARDOWNED: - if (!brutal) + if (!brutal) { + spin_unlock_irqrestore(&chain->lock, flags); return 1; + } + /* fallthrough, if we release the descriptors * brutally (then we don't care about * SPIDER_NET_DESCR_CARDOWNED) */ @@ -750,11 +788,25 @@ spider_net_release_tx_chain(struct spider_net_card *card, int brutal) default: card->netdev_stats.tx_dropped++; - return 1; + if (!brutal) { + spin_unlock_irqrestore(&chain->lock, flags); + return 1; + } } - spider_net_release_tx_descr(card); - } + chain->tail = descr->next; + descr->dmac_cmd_status |= SPIDER_NET_DESCR_NOT_IN_USE; + skb = descr->skb; + buf_addr = descr->buf_addr; + spin_unlock_irqrestore(&chain->lock, flags); + + /* unmap the skb */ + if (skb) { + int len = skb->len < ETH_ZLEN ? ETH_ZLEN : skb->len; + pci_unmap_single(card->pdev, buf_addr, len, PCI_DMA_TODEVICE); + dev_kfree_skb(skb); + } + } return 0; } @@ -763,8 +815,12 @@ spider_net_release_tx_chain(struct spider_net_card *card, int brutal) * @card: card structure * @descr: descriptor address to enable TX processing at * - * spider_net_kick_tx_dma writes the current tx chain head as start address - * of the tx descriptor chain and enables the transmission DMA engine + * This routine will start the transmit DMA running if + * it is not already running. This routine ned only be + * called when queueing a new packet to an empty tx queue. + * Writes the current tx chain head as start address + * of the tx descriptor chain and enables the transmission + * DMA engine. */ static inline void spider_net_kick_tx_dma(struct spider_net_card *card) @@ -804,65 +860,43 @@ out: static int spider_net_xmit(struct sk_buff *skb, struct net_device *netdev) { + int cnt; struct spider_net_card *card = netdev_priv(netdev); struct spider_net_descr_chain *chain = &card->tx_chain; - struct spider_net_descr *descr = chain->head; - unsigned long flags; - int result; - - spin_lock_irqsave(&chain->lock, flags); spider_net_release_tx_chain(card, 0); - if (chain->head->next == chain->tail->prev) { - card->netdev_stats.tx_dropped++; - result = NETDEV_TX_LOCKED; - goto out; - } + if ((chain->head->next == chain->tail->prev) || + (spider_net_prepare_tx_descr(card, skb) != 0)) { - if (spider_net_get_descr_status(descr) != SPIDER_NET_DESCR_NOT_IN_USE) { card->netdev_stats.tx_dropped++; - result = NETDEV_TX_LOCKED; - goto out; + netif_stop_queue(netdev); + return NETDEV_TX_BUSY; } - if (spider_net_prepare_tx_descr(card, skb) != 0) { - card->netdev_stats.tx_dropped++; - result = NETDEV_TX_BUSY; - goto out; - } - - result = NETDEV_TX_OK; - - spider_net_kick_tx_dma(card); - card->tx_chain.head = card->tx_chain.head->next; - -out: - spin_unlock_irqrestore(&chain->lock, flags); - netif_wake_queue(netdev); - return result; + cnt = spider_net_set_low_watermark(card); + if (cnt < 5) + spider_net_kick_tx_dma(card); + return NETDEV_TX_OK; } /** * spider_net_cleanup_tx_ring - cleans up the TX ring * @card: card structure * - * spider_net_cleanup_tx_ring is called by the tx_timer (as we don't use - * interrupts to cleanup our TX ring) and returns sent packets to the stack - * by freeing them + * spider_net_cleanup_tx_ring is called by either the tx_timer + * or from the NAPI polling routine. + * This routine releases resources associted with transmitted + * packets, including updating the queue tail pointer. */ static void spider_net_cleanup_tx_ring(struct spider_net_card *card) { - unsigned long flags; - - spin_lock_irqsave(&card->tx_chain.lock, flags); - if ((spider_net_release_tx_chain(card, 0) != 0) && - (card->netdev->flags & IFF_UP)) + (card->netdev->flags & IFF_UP)) { spider_net_kick_tx_dma(card); - - spin_unlock_irqrestore(&card->tx_chain.lock, flags); + netif_wake_queue(card->netdev); + } } /** @@ -1053,6 +1087,7 @@ spider_net_poll(struct net_device *netdev, int *budget) int packets_to_do, packets_done = 0; int no_more_packets = 0; + spider_net_cleanup_tx_ring(card); packets_to_do = min(*budget, netdev->quota); while (packets_to_do) { @@ -1243,12 +1278,15 @@ spider_net_handle_error_irq(struct spider_net_card *card, u32 status_reg) case SPIDER_NET_PHYINT: case SPIDER_NET_GMAC2INT: case SPIDER_NET_GMAC1INT: - case SPIDER_NET_GIPSINT: case SPIDER_NET_GFIFOINT: case SPIDER_NET_DMACINT: case SPIDER_NET_GSYSINT: break; */ + case SPIDER_NET_GIPSINT: + show_error = 0; + break; + case SPIDER_NET_GPWOPCMPINT: /* PHY write operation completed */ show_error = 0; @@ -1307,9 +1345,10 @@ spider_net_handle_error_irq(struct spider_net_card *card, u32 status_reg) case SPIDER_NET_GDTDCEINT: /* chain end. If a descriptor should be sent, kick off * tx dma - if (card->tx_chain.tail == card->tx_chain.head) + if (card->tx_chain.tail != card->tx_chain.head) spider_net_kick_tx_dma(card); - show_error = 0; */ + */ + show_error = 0; break; /* case SPIDER_NET_G1TMCNTINT: not used. print a message */ @@ -1354,7 +1393,7 @@ spider_net_handle_error_irq(struct spider_net_card *card, u32 status_reg) if (netif_msg_intr(card)) pr_err("got descriptor chain end interrupt, " "restarting DMAC %c.\n", - 'D'+i-SPIDER_NET_GDDDCEINT); + 'D'-(i-SPIDER_NET_GDDDCEINT)/3); spider_net_refill_rx_chain(card); spider_net_enable_rxdmac(card); show_error = 0; @@ -1423,8 +1462,9 @@ spider_net_handle_error_irq(struct spider_net_card *card, u32 status_reg) } if ((show_error) && (netif_msg_intr(card))) - pr_err("Got error interrupt, GHIINT0STS = 0x%08x, " + pr_err("Got error interrupt on %s, GHIINT0STS = 0x%08x, " "GHIINT1STS = 0x%08x, GHIINT2STS = 0x%08x\n", + card->netdev->name, status_reg, error_reg1, error_reg2); /* clear interrupt sources */ @@ -1460,6 +1500,8 @@ spider_net_interrupt(int irq, void *ptr) spider_net_rx_irq_off(card); netif_rx_schedule(netdev); } + if (status_reg & SPIDER_NET_TXINT) + netif_rx_schedule(netdev); if (status_reg & SPIDER_NET_ERRINT ) spider_net_handle_error_irq(card, status_reg); @@ -1599,7 +1641,7 @@ spider_net_enable_card(struct spider_net_card *card) SPIDER_NET_INT2_MASK_VALUE); spider_net_write_reg(card, SPIDER_NET_GDTDMACCNTR, - SPIDER_NET_GDTDCEIDIS); + SPIDER_NET_GDTBSTA | SPIDER_NET_GDTDCEIDIS); } /** @@ -1615,17 +1657,26 @@ int spider_net_open(struct net_device *netdev) { struct spider_net_card *card = netdev_priv(netdev); - int result; + struct spider_net_descr *descr; + int i, result; result = -ENOMEM; if (spider_net_init_chain(card, &card->tx_chain, card->descr, - PCI_DMA_TODEVICE, card->tx_desc)) + card->num_tx_desc)) goto alloc_tx_failed; + + card->low_watermark = NULL; + + /* rx_chain is after tx_chain, so offset is descr + tx_count */ if (spider_net_init_chain(card, &card->rx_chain, - card->descr + card->rx_desc, - PCI_DMA_FROMDEVICE, card->rx_desc)) + card->descr + card->num_tx_desc, + card->num_rx_desc)) goto alloc_rx_failed; + descr = card->rx_chain.head; + for (i=0; i < card->num_rx_desc; i++, descr++) + descr->next_descr_addr = descr->next->bus_addr; + /* allocate rx skbs */ if (spider_net_alloc_rx_skbs(card)) goto alloc_skbs_failed; @@ -1878,10 +1929,7 @@ spider_net_stop(struct net_device *netdev) spider_net_disable_rxdmac(card); /* release chains */ - if (spin_trylock(&card->tx_chain.lock)) { - spider_net_release_tx_chain(card, 1); - spin_unlock(&card->tx_chain.lock); - } + spider_net_release_tx_chain(card, 1); spider_net_free_chain(card, &card->tx_chain); spider_net_free_chain(card, &card->rx_chain); @@ -2012,8 +2060,8 @@ spider_net_setup_netdev(struct spider_net_card *card) card->options.rx_csum = SPIDER_NET_RX_CSUM_DEFAULT; - card->tx_desc = tx_descriptors; - card->rx_desc = rx_descriptors; + card->num_tx_desc = tx_descriptors; + card->num_rx_desc = rx_descriptors; spider_net_setup_netdev_ops(netdev); @@ -2252,6 +2300,8 @@ static struct pci_driver spider_net_driver = { */ static int __init spider_net_init(void) { + printk(KERN_INFO "Spidernet version %s.\n", VERSION); + if (rx_descriptors < SPIDER_NET_RX_DESCRIPTORS_MIN) { rx_descriptors = SPIDER_NET_RX_DESCRIPTORS_MIN; pr_info("adjusting rx descriptors to %i.\n", rx_descriptors); diff --git a/drivers/net/spider_net.h b/drivers/net/spider_net.h index a59deda2f95e..b3b46119b424 100644 --- a/drivers/net/spider_net.h +++ b/drivers/net/spider_net.h @@ -24,6 +24,8 @@ #ifndef _SPIDER_NET_H #define _SPIDER_NET_H +#define VERSION "1.1 A" + #include "sungem_phy.h" extern int spider_net_stop(struct net_device *netdev); @@ -47,7 +49,7 @@ extern char spider_net_driver_name[]; #define SPIDER_NET_TX_DESCRIPTORS_MIN 16 #define SPIDER_NET_TX_DESCRIPTORS_MAX 512 -#define SPIDER_NET_TX_TIMER 20 +#define SPIDER_NET_TX_TIMER (HZ/5) #define SPIDER_NET_RX_CSUM_DEFAULT 1 @@ -189,7 +191,9 @@ extern char spider_net_driver_name[]; #define SPIDER_NET_MACMODE_VALUE 0x00000001 #define SPIDER_NET_BURSTLMT_VALUE 0x00000200 /* about 16 us */ -/* 1(0) enable r/tx dma +/* DMAC control register GDMACCNTR + * + * 1(0) enable r/tx dma * 0000000 fixed to 0 * * 000000 fixed to 0 @@ -198,6 +202,7 @@ extern char spider_net_driver_name[]; * * 000000 fixed to 0 * 00 burst alignment: 128 bytes + * 11 burst alignment: 1024 bytes * * 00000 fixed to 0 * 0 descr writeback size 32 bytes @@ -208,10 +213,13 @@ extern char spider_net_driver_name[]; #define SPIDER_NET_DMA_RX_VALUE 0x80000000 #define SPIDER_NET_DMA_RX_FEND_VALUE 0x00030003 /* to set TX_DMA_EN */ -#define SPIDER_NET_TX_DMA_EN 0x80000000 -#define SPIDER_NET_GDTDCEIDIS 0x00000002 -#define SPIDER_NET_DMA_TX_VALUE SPIDER_NET_TX_DMA_EN | \ - SPIDER_NET_GDTDCEIDIS +#define SPIDER_NET_TX_DMA_EN 0x80000000 +#define SPIDER_NET_GDTBSTA 0x00000300 +#define SPIDER_NET_GDTDCEIDIS 0x00000002 +#define SPIDER_NET_DMA_TX_VALUE SPIDER_NET_TX_DMA_EN | \ + SPIDER_NET_GDTBSTA | \ + SPIDER_NET_GDTDCEIDIS + #define SPIDER_NET_DMA_TX_FEND_VALUE 0x00030003 /* SPIDER_NET_UA_DESCR_VALUE is OR'ed with the unicast address */ @@ -320,13 +328,10 @@ enum spider_net_int2_status { SPIDER_NET_GRISPDNGINT }; -#define SPIDER_NET_TXINT ( (1 << SPIDER_NET_GTTEDINT) | \ - (1 << SPIDER_NET_GDTDCEINT) | \ - (1 << SPIDER_NET_GDTFDCINT) ) +#define SPIDER_NET_TXINT ( (1 << SPIDER_NET_GDTFDCINT) ) -/* we rely on flagged descriptor interrupts*/ -#define SPIDER_NET_RXINT ( (1 << SPIDER_NET_GDAFDCINT) | \ - (1 << SPIDER_NET_GRMFLLINT) ) +/* We rely on flagged descriptor interrupts */ +#define SPIDER_NET_RXINT ( (1 << SPIDER_NET_GDAFDCINT) ) #define SPIDER_NET_ERRINT ( 0xffffffff & \ (~SPIDER_NET_TXINT) & \ @@ -349,6 +354,7 @@ enum spider_net_int2_status { #define SPIDER_NET_DESCR_FORCE_END 0x50000000 /* used in rx and tx */ #define SPIDER_NET_DESCR_CARDOWNED 0xA0000000 /* used in rx and tx */ #define SPIDER_NET_DESCR_NOT_IN_USE 0xF0000000 +#define SPIDER_NET_DESCR_TXDESFLG 0x00800000 struct spider_net_descr { /* as defined by the hardware */ @@ -433,6 +439,7 @@ struct spider_net_card { struct spider_net_descr_chain tx_chain; struct spider_net_descr_chain rx_chain; + struct spider_net_descr *low_watermark; struct net_device_stats netdev_stats; @@ -448,8 +455,8 @@ struct spider_net_card { /* for ethtool */ int msg_enable; - int rx_desc; - int tx_desc; + int num_rx_desc; + int num_tx_desc; struct spider_net_extra_stats spider_stats; struct spider_net_descr descr[0]; diff --git a/drivers/net/spider_net_ethtool.c b/drivers/net/spider_net_ethtool.c index 589e43658dee..91b995102915 100644 --- a/drivers/net/spider_net_ethtool.c +++ b/drivers/net/spider_net_ethtool.c @@ -76,7 +76,7 @@ spider_net_ethtool_get_drvinfo(struct net_device *netdev, /* clear and fill out info */ memset(drvinfo, 0, sizeof(struct ethtool_drvinfo)); strncpy(drvinfo->driver, spider_net_driver_name, 32); - strncpy(drvinfo->version, "0.1", 32); + strncpy(drvinfo->version, VERSION, 32); strcpy(drvinfo->fw_version, "no information"); strncpy(drvinfo->bus_info, pci_name(card->pdev), 32); } @@ -158,9 +158,9 @@ spider_net_ethtool_get_ringparam(struct net_device *netdev, struct spider_net_card *card = netdev->priv; ering->tx_max_pending = SPIDER_NET_TX_DESCRIPTORS_MAX; - ering->tx_pending = card->tx_desc; + ering->tx_pending = card->num_tx_desc; ering->rx_max_pending = SPIDER_NET_RX_DESCRIPTORS_MAX; - ering->rx_pending = card->rx_desc; + ering->rx_pending = card->num_rx_desc; } static int spider_net_get_stats_count(struct net_device *netdev) diff --git a/drivers/net/sun3_82586.c b/drivers/net/sun3_82586.c index d1d1885b0295..a3220a96524f 100644 --- a/drivers/net/sun3_82586.c +++ b/drivers/net/sun3_82586.c @@ -330,7 +330,7 @@ out2: out1: free_netdev(dev); out: - iounmap((void *)ioaddr); + iounmap((void __iomem *)ioaddr); return ERR_PTR(err); } diff --git a/drivers/net/sun3lance.c b/drivers/net/sun3lance.c index 91c76544e4dd..b865db363ba0 100644 --- a/drivers/net/sun3lance.c +++ b/drivers/net/sun3lance.c @@ -286,7 +286,7 @@ struct net_device * __init sun3lance_probe(int unit) out1: #ifdef CONFIG_SUN3 - iounmap((void *)dev->base_addr); + iounmap((void __iomem *)dev->base_addr); #endif out: free_netdev(dev); @@ -326,7 +326,7 @@ static int __init lance_probe( struct net_device *dev) ioaddr_probe[1] = tmp2; #ifdef CONFIG_SUN3 - iounmap((void *)ioaddr); + iounmap((void __iomem *)ioaddr); #endif return 0; } @@ -956,7 +956,7 @@ void cleanup_module(void) { unregister_netdev(sun3lance_dev); #ifdef CONFIG_SUN3 - iounmap((void *)sun3lance_dev->base_addr); + iounmap((void __iomem *)sun3lance_dev->base_addr); #endif free_netdev(sun3lance_dev); } diff --git a/drivers/net/sunhme.c b/drivers/net/sunhme.c index 45d07faf7b96..9d7cd130c19d 100644 --- a/drivers/net/sunhme.c +++ b/drivers/net/sunhme.c @@ -2095,8 +2095,8 @@ static void happy_meal_rx(struct happy_meal *hp, struct net_device *dev) static irqreturn_t happy_meal_interrupt(int irq, void *dev_id) { - struct net_device *dev = (struct net_device *) dev_id; - struct happy_meal *hp = dev->priv; + struct net_device *dev = dev_id; + struct happy_meal *hp = netdev_priv(dev); u32 happy_status = hme_read32(hp, hp->gregs + GREG_STAT); HMD(("happy_meal_interrupt: status=%08x ", happy_status)); diff --git a/drivers/net/sunlance.c b/drivers/net/sunlance.c index 9207e19cac34..5b00d79b5573 100644 --- a/drivers/net/sunlance.c +++ b/drivers/net/sunlance.c @@ -822,7 +822,7 @@ out: static irqreturn_t lance_interrupt(int irq, void *dev_id) { - struct net_device *dev = (struct net_device *)dev_id; + struct net_device *dev = dev_id; struct lance_private *lp = netdev_priv(dev); int csr0; diff --git a/drivers/net/sunqe.c b/drivers/net/sunqe.c index 020e78170595..7874eb1ef043 100644 --- a/drivers/net/sunqe.c +++ b/drivers/net/sunqe.c @@ -468,7 +468,7 @@ static void qe_tx_reclaim(struct sunqe *qep); */ static irqreturn_t qec_interrupt(int irq, void *dev_id) { - struct sunqec *qecp = (struct sunqec *) dev_id; + struct sunqec *qecp = dev_id; u32 qec_status; int channel = 0; diff --git a/drivers/net/tokenring/smctr.c b/drivers/net/tokenring/smctr.c index 9bd4cba87872..46dabdb12071 100644 --- a/drivers/net/tokenring/smctr.c +++ b/drivers/net/tokenring/smctr.c @@ -1990,15 +1990,8 @@ static irqreturn_t smctr_interrupt(int irq, void *dev_id) __u8 isb_type, isb_subtype; __u16 isb_index; - if(dev == NULL) - { - printk(KERN_CRIT "%s: irq %d for unknown device.\n", dev->name, irq); - return IRQ_NONE; - } - ioaddr = dev->base_addr; tp = netdev_priv(dev); - if(tp->status == NOT_INITIALIZED) return IRQ_NONE; diff --git a/drivers/net/tokenring/tms380tr.c b/drivers/net/tokenring/tms380tr.c index c0ab6e44eb1f..ea797ca2b988 100644 --- a/drivers/net/tokenring/tms380tr.c +++ b/drivers/net/tokenring/tms380tr.c @@ -751,11 +751,6 @@ irqreturn_t tms380tr_interrupt(int irq, void *dev_id) unsigned short irq_type; int handled = 0; - if(dev == NULL) { - printk(KERN_INFO "%s: irq %d for unknown device.\n", dev->name, irq); - return IRQ_NONE; - } - tp = netdev_priv(dev); irq_type = SIFREADW(SIFSTS); diff --git a/drivers/net/tulip/de2104x.c b/drivers/net/tulip/de2104x.c index 2cfd9634895a..f6b3a94e97bf 100644 --- a/drivers/net/tulip/de2104x.c +++ b/drivers/net/tulip/de2104x.c @@ -1730,7 +1730,7 @@ static void __init de21040_get_media_info(struct de_private *de) } /* Note: this routine returns extra data bits for size detection. */ -static unsigned __init tulip_read_eeprom(void __iomem *regs, int location, int addr_len) +static unsigned __devinit tulip_read_eeprom(void __iomem *regs, int location, int addr_len) { int i; unsigned retval = 0; @@ -1926,7 +1926,7 @@ bad_srom: goto fill_defaults; } -static int __init de_init_one (struct pci_dev *pdev, +static int __devinit de_init_one (struct pci_dev *pdev, const struct pci_device_id *ent) { struct net_device *dev; @@ -2082,7 +2082,7 @@ err_out_free: return rc; } -static void __exit de_remove_one (struct pci_dev *pdev) +static void __devexit de_remove_one (struct pci_dev *pdev) { struct net_device *dev = pci_get_drvdata(pdev); struct de_private *de = dev->priv; @@ -2164,7 +2164,7 @@ static struct pci_driver de_driver = { .name = DRV_NAME, .id_table = de_pci_tbl, .probe = de_init_one, - .remove = __exit_p(de_remove_one), + .remove = __devexit_p(de_remove_one), #ifdef CONFIG_PM .suspend = de_suspend, .resume = de_resume, diff --git a/drivers/net/tulip/de4x5.c b/drivers/net/tulip/de4x5.c index e17f9779ead2..3f4b6408b755 100644 --- a/drivers/net/tulip/de4x5.c +++ b/drivers/net/tulip/de4x5.c @@ -1540,16 +1540,12 @@ de4x5_queue_pkt(struct sk_buff *skb, struct net_device *dev) static irqreturn_t de4x5_interrupt(int irq, void *dev_id) { - struct net_device *dev = (struct net_device *)dev_id; + struct net_device *dev = dev_id; struct de4x5_private *lp; s32 imr, omr, sts, limit; u_long iobase; unsigned int handled = 0; - if (dev == NULL) { - printk ("de4x5_interrupt(): irq %d for unknown device.\n", irq); - return IRQ_NONE; - } lp = netdev_priv(dev); spin_lock(&lp->lock); iobase = dev->base_addr; diff --git a/drivers/net/wan/cycx_main.c b/drivers/net/wan/cycx_main.c index 12363e056b63..6e5f1c898517 100644 --- a/drivers/net/wan/cycx_main.c +++ b/drivers/net/wan/cycx_main.c @@ -303,9 +303,9 @@ out: return ret; */ static irqreturn_t cycx_isr(int irq, void *dev_id) { - struct cycx_device *card = (struct cycx_device *)dev_id; + struct cycx_device *card = dev_id; - if (!card || card->wandev.state == WAN_UNCONFIGURED) + if (card->wandev.state == WAN_UNCONFIGURED) goto out; if (card->in_isr) { diff --git a/drivers/net/wan/sdla.c b/drivers/net/wan/sdla.c index 5715d25271f1..6a485f0556f4 100644 --- a/drivers/net/wan/sdla.c +++ b/drivers/net/wan/sdla.c @@ -875,13 +875,7 @@ static irqreturn_t sdla_isr(int irq, void *dev_id) dev = dev_id; - if (dev == NULL) - { - printk(KERN_WARNING "sdla_isr(): irq %d for unknown device.\n", irq); - return IRQ_NONE; - } - - flp = dev->priv; + flp = netdev_priv(dev); if (!flp->initialized) { diff --git a/drivers/net/wireless/orinoco.c b/drivers/net/wireless/orinoco.c index 793da5f69344..b779c7dcc1a8 100644 --- a/drivers/net/wireless/orinoco.c +++ b/drivers/net/wireless/orinoco.c @@ -1954,7 +1954,7 @@ static void __orinoco_ev_wterr(struct net_device *dev, hermes_t *hw) irqreturn_t orinoco_interrupt(int irq, void *dev_id) { - struct net_device *dev = (struct net_device *)dev_id; + struct net_device *dev = dev_id; struct orinoco_private *priv = netdev_priv(dev); hermes_t *hw = &priv->hw; int count = MAX_IRQLOOPS_PER_IRQ; diff --git a/drivers/net/wireless/wavelan_cs.c b/drivers/net/wireless/wavelan_cs.c index cadfe132db84..aafb301041b1 100644 --- a/drivers/net/wireless/wavelan_cs.c +++ b/drivers/net/wireless/wavelan_cs.c @@ -4119,21 +4119,12 @@ static irqreturn_t wavelan_interrupt(int irq, void * dev_id) { - struct net_device * dev; + struct net_device * dev = dev_id; net_local * lp; kio_addr_t base; int status0; u_int tx_status; - if ((dev = dev_id) == NULL) - { -#ifdef DEBUG_INTERRUPT_ERROR - printk(KERN_WARNING "wavelan_interrupt(): irq %d for unknown device.\n", - irq); -#endif - return IRQ_NONE; - } - #ifdef DEBUG_INTERRUPT_TRACE printk(KERN_DEBUG "%s: ->wavelan_interrupt()\n", dev->name); #endif diff --git a/drivers/net/wireless/wl3501_cs.c b/drivers/net/wireless/wl3501_cs.c index a1430352169b..5b98a7876982 100644 --- a/drivers/net/wireless/wl3501_cs.c +++ b/drivers/net/wireless/wl3501_cs.c @@ -1155,25 +1155,18 @@ static inline void wl3501_ack_interrupt(struct wl3501_card *this) */ static irqreturn_t wl3501_interrupt(int irq, void *dev_id) { - struct net_device *dev = (struct net_device *)dev_id; + struct net_device *dev = dev_id; struct wl3501_card *this; - int handled = 1; - if (!dev) - goto unknown; - this = dev->priv; + this = netdev_priv(dev); spin_lock(&this->lock); wl3501_ack_interrupt(this); wl3501_block_interrupt(this); wl3501_rx_interrupt(dev); wl3501_unblock_interrupt(this); spin_unlock(&this->lock); -out: - return IRQ_RETVAL(handled); -unknown: - handled = 0; - printk(KERN_ERR "%s: irq %d for unknown device.\n", __FUNCTION__, irq); - goto out; + + return IRQ_HANDLED; } static int wl3501_reset_board(struct wl3501_card *this) diff --git a/drivers/net/yellowfin.c b/drivers/net/yellowfin.c index ac600212d9a9..2412ce4917f2 100644 --- a/drivers/net/yellowfin.c +++ b/drivers/net/yellowfin.c @@ -896,13 +896,6 @@ static irqreturn_t yellowfin_interrupt(int irq, void *dev_instance) int boguscnt = max_interrupt_work; unsigned int handled = 0; -#ifndef final_version /* Can never occur. */ - if (dev == NULL) { - printk (KERN_ERR "yellowfin_interrupt(): irq %d for unknown device.\n", irq); - return IRQ_NONE; - } -#endif - yp = netdev_priv(dev); ioaddr = yp->base; diff --git a/drivers/net/znet.c b/drivers/net/znet.c index 2068a109a86c..b24b0727108c 100644 --- a/drivers/net/znet.c +++ b/drivers/net/znet.c @@ -610,11 +610,6 @@ static irqreturn_t znet_interrupt(int irq, void *dev_id) int boguscnt = 20; int handled = 0; - if (dev == NULL) { - printk(KERN_WARNING "znet_interrupt(): IRQ %d for unknown device.\n", irq); - return IRQ_NONE; - } - spin_lock (&znet->lock); ioaddr = dev->base_addr; diff --git a/drivers/parisc/dino.c b/drivers/parisc/dino.c index a0a8fd8d2124..03c763c2d0e0 100644 --- a/drivers/parisc/dino.c +++ b/drivers/parisc/dino.c @@ -389,7 +389,7 @@ ilr_again: int irq = dino_dev->global_irq[local_irq]; DBG(KERN_DEBUG "%s(%d, %p) mask 0x%x\n", __FUNCTION__, irq, intr_dev, mask); - __do_IRQ(irq, regs); + __do_IRQ(irq); mask &= ~(1 << local_irq); } while (mask); diff --git a/drivers/parisc/eisa.c b/drivers/parisc/eisa.c index 094562e044f3..e97cecbc4d18 100644 --- a/drivers/parisc/eisa.c +++ b/drivers/parisc/eisa.c @@ -234,7 +234,7 @@ static irqreturn_t eisa_irq(int wax_irq, void *intr_dev) } spin_unlock_irqrestore(&eisa_irq_lock, flags); - __do_IRQ(irq, regs); + __do_IRQ(irq); spin_lock_irqsave(&eisa_irq_lock, flags); /* unmask */ diff --git a/drivers/parport/parport_mfc3.c b/drivers/parport/parport_mfc3.c index 6541cde4df00..e5b0a544de40 100644 --- a/drivers/parport/parport_mfc3.c +++ b/drivers/parport/parport_mfc3.c @@ -219,7 +219,7 @@ static irqreturn_t mfc3_interrupt(int irq, void *dev_id) if (this_port[i] != NULL) if (pia(this_port[i])->crb & 128) { /* Board caused interrupt */ dummy = pia(this_port[i])->pprb; /* clear irq bit */ - parport_generic_irq(irq, this_port[i], regs); + parport_generic_irq(irq, this_port[i]); } return IRQ_HANDLED; } diff --git a/drivers/parport/parport_sunbpp.c b/drivers/parport/parport_sunbpp.c index d758c90c86af..9793533276ec 100644 --- a/drivers/parport/parport_sunbpp.c +++ b/drivers/parport/parport_sunbpp.c @@ -48,7 +48,7 @@ static irqreturn_t parport_sunbpp_interrupt(int irq, void *dev_id) { - parport_generic_irq(irq, (struct parport *) dev_id, regs); + parport_generic_irq(irq, (struct parport *) dev_id); return IRQ_HANDLED; } diff --git a/drivers/pci/Kconfig b/drivers/pci/Kconfig index 30294127a0aa..ecc50db8585a 100644 --- a/drivers/pci/Kconfig +++ b/drivers/pci/Kconfig @@ -55,7 +55,7 @@ config PCI_DEBUG config HT_IRQ bool "Interrupts on hypertransport devices" default y - depends on X86_LOCAL_APIC && X86_IO_APIC + depends on PCI && X86_LOCAL_APIC && X86_IO_APIC help This allows native hypertransport devices to use interrupts. diff --git a/drivers/pcmcia/at91_cf.c b/drivers/pcmcia/at91_cf.c index 991e084db2d6..7f5df9a9f393 100644 --- a/drivers/pcmcia/at91_cf.c +++ b/drivers/pcmcia/at91_cf.c @@ -66,7 +66,7 @@ static int at91_cf_ss_init(struct pcmcia_socket *s) static irqreturn_t at91_cf_irq(int irq, void *_cf) { - struct at91_cf_socket *cf = (struct at91_cf_socket *) _cf; + struct at91_cf_socket *cf = _cf; if (irq == cf->board->det_pin) { unsigned present = at91_cf_present(cf); diff --git a/drivers/pcmcia/hd64465_ss.c b/drivers/pcmcia/hd64465_ss.c index db3c26b5de14..caca0dc9d30f 100644 --- a/drivers/pcmcia/hd64465_ss.c +++ b/drivers/pcmcia/hd64465_ss.c @@ -650,7 +650,7 @@ static int hs_set_mem_map(struct pcmcia_socket *s, struct pccard_mem_map *mem) */ static int hs_irq_demux(int irq, void *dev) { - hs_socket_t *sp = (hs_socket_t *)dev; + hs_socket_t *sp = dev; u_int cscr; DPRINTK("hs_irq_demux(irq=%d)\n", irq); @@ -673,11 +673,10 @@ static int hs_irq_demux(int irq, void *dev) static irqreturn_t hs_interrupt(int irq, void *dev) { - hs_socket_t *sp = (hs_socket_t *)dev; + hs_socket_t *sp = dev; u_int events = 0; u_int cscr; - - + cscr = hs_in(sp, CSCR); DPRINTK("hs_interrupt, cscr=%04x\n", cscr); diff --git a/drivers/pcmcia/m32r_pcc.c b/drivers/pcmcia/m32r_pcc.c index 0964fd76bfe3..bbf025874d0c 100644 --- a/drivers/pcmcia/m32r_pcc.c +++ b/drivers/pcmcia/m32r_pcc.c @@ -395,7 +395,7 @@ static irqreturn_t pcc_interrupt(int irq, void *dev) static void pcc_interrupt_wrapper(u_long data) { - pcc_interrupt(0, NULL, NULL); + pcc_interrupt(0, NULL); init_timer(&poll_timer); poll_timer.expires = jiffies + poll_interval; add_timer(&poll_timer); diff --git a/drivers/s390/block/dasd_diag.c b/drivers/s390/block/dasd_diag.c index 222a8a71a5e8..53db58a68617 100644 --- a/drivers/s390/block/dasd_diag.c +++ b/drivers/s390/block/dasd_diag.c @@ -218,7 +218,7 @@ dasd_diag_term_IO(struct dasd_ccw_req * cqr) /* Handle external interruption. */ static void -dasd_ext_handler(struct pt_regs *regs, __u16 code) +dasd_ext_handler(__u16 code) { struct dasd_ccw_req *cqr, *next; struct dasd_device *device; diff --git a/drivers/s390/char/ctrlchar.c b/drivers/s390/char/ctrlchar.c index d83eb6358bac..49e9628d9297 100644 --- a/drivers/s390/char/ctrlchar.c +++ b/drivers/s390/char/ctrlchar.c @@ -20,7 +20,7 @@ static int ctrlchar_sysrq_key; static void ctrlchar_handle_sysrq(void *tty) { - handle_sysrq(ctrlchar_sysrq_key, NULL, (struct tty_struct *) tty); + handle_sysrq(ctrlchar_sysrq_key, (struct tty_struct *) tty); } static DECLARE_WORK(ctrlchar_work, ctrlchar_handle_sysrq, NULL); diff --git a/drivers/s390/char/keyboard.c b/drivers/s390/char/keyboard.c index 3be06569180d..e3491a5f5219 100644 --- a/drivers/s390/char/keyboard.c +++ b/drivers/s390/char/keyboard.c @@ -304,7 +304,7 @@ kbd_keycode(struct kbd_data *kbd, unsigned int keycode) if (kbd->sysrq) { if (kbd->sysrq == K(KT_LATIN, '-')) { kbd->sysrq = 0; - handle_sysrq(value, NULL, kbd->tty); + handle_sysrq(value, kbd->tty); return; } if (value == '-') { diff --git a/drivers/s390/char/monwriter.c b/drivers/s390/char/monwriter.c index 1e3939aeb8ab..abd02ed501cb 100644 --- a/drivers/s390/char/monwriter.c +++ b/drivers/s390/char/monwriter.c @@ -26,6 +26,7 @@ #define MONWRITE_MAX_DATALEN 4024 static int mon_max_bufs = 255; +static int mon_buf_count; struct mon_buf { struct list_head list; @@ -40,7 +41,6 @@ struct mon_private { size_t hdr_to_read; size_t data_to_read; struct mon_buf *current_buf; - int mon_buf_count; }; /* @@ -99,18 +99,18 @@ static int monwrite_new_hdr(struct mon_private *monpriv) rc = monwrite_diag(monhdr, monbuf->data, APPLDATA_STOP_REC); list_del(&monbuf->list); - monpriv->mon_buf_count--; + mon_buf_count--; kfree(monbuf->data); kfree(monbuf); monbuf = NULL; } } else { - if (monpriv->mon_buf_count >= mon_max_bufs) + if (mon_buf_count >= mon_max_bufs) return -ENOSPC; monbuf = kzalloc(sizeof(struct mon_buf), GFP_KERNEL); if (!monbuf) return -ENOMEM; - monbuf->data = kzalloc(monbuf->hdr.datalen, + monbuf->data = kzalloc(monhdr->datalen, GFP_KERNEL | GFP_DMA); if (!monbuf->data) { kfree(monbuf); @@ -118,7 +118,7 @@ static int monwrite_new_hdr(struct mon_private *monpriv) } monbuf->hdr = *monhdr; list_add_tail(&monbuf->list, &monpriv->list); - monpriv->mon_buf_count++; + mon_buf_count++; } monpriv->current_buf = monbuf; return 0; @@ -186,7 +186,7 @@ static int monwrite_close(struct inode *inode, struct file *filp) if (entry->hdr.mon_function != MONWRITE_GEN_EVENT) monwrite_diag(&entry->hdr, entry->data, APPLDATA_STOP_REC); - monpriv->mon_buf_count--; + mon_buf_count--; list_del(&entry->list); kfree(entry->data); kfree(entry); diff --git a/drivers/s390/char/sclp.c b/drivers/s390/char/sclp.c index 31e335751d6d..8a056df09d6b 100644 --- a/drivers/s390/char/sclp.c +++ b/drivers/s390/char/sclp.c @@ -324,7 +324,7 @@ __sclp_find_req(u32 sccb) * Prepare read event data request if necessary. Start processing of next * request on queue. */ static void -sclp_interrupt_handler(struct pt_regs *regs, __u16 code) +sclp_interrupt_handler(__u16 code) { struct sclp_req *req; u32 finished_sccb; @@ -743,7 +743,7 @@ EXPORT_SYMBOL(sclp_reactivate); /* Handler for external interruption used during initialization. Modify * request state to done. */ static void -sclp_check_handler(struct pt_regs *regs, __u16 code) +sclp_check_handler(__u16 code) { u32 finished_sccb; diff --git a/drivers/s390/cio/chsc.c b/drivers/s390/cio/chsc.c index 3bb4e472d73d..2d78f0f4a40f 100644 --- a/drivers/s390/cio/chsc.c +++ b/drivers/s390/cio/chsc.c @@ -200,11 +200,13 @@ css_get_ssd_info(struct subchannel *sch) spin_unlock_irq(&sch->lock); free_page((unsigned long)page); if (!ret) { - int j, chpid; + int j, chpid, mask; /* Allocate channel path structures, if needed. */ for (j = 0; j < 8; j++) { + mask = 0x80 >> j; chpid = sch->ssd_info.chpid[j]; - if (chpid && (get_chp_status(chpid) < 0)) + if ((sch->schib.pmcw.pim & mask) && + (get_chp_status(chpid) < 0)) new_channel_path(chpid); } } @@ -222,13 +224,15 @@ s390_subchannel_remove_chpid(struct device *dev, void *data) sch = to_subchannel(dev); chpid = data; - for (j = 0; j < 8; j++) - if (sch->schib.pmcw.chpid[j] == chpid->id) + for (j = 0; j < 8; j++) { + mask = 0x80 >> j; + if ((sch->schib.pmcw.pim & mask) && + (sch->schib.pmcw.chpid[j] == chpid->id)) break; + } if (j >= 8) return 0; - mask = 0x80 >> j; spin_lock_irq(&sch->lock); stsch(sch->schid, &schib); @@ -366,7 +370,7 @@ __s390_process_res_acc(struct subchannel_id schid, void *data) struct res_acc_data *res_data; struct subchannel *sch; - res_data = (struct res_acc_data *)data; + res_data = data; sch = get_subchannel_by_schid(schid); if (!sch) /* Check if a subchannel is newly available. */ @@ -440,7 +444,7 @@ __get_chpid_from_lir(void *data) u32 isinfo[28]; } *lir; - lir = (struct lir*) data; + lir = data; if (!(lir->iq&0x80)) /* NULL link incident record */ return -EINVAL; @@ -620,18 +624,20 @@ __chp_add_new_sch(struct subchannel_id schid) static int __chp_add(struct subchannel_id schid, void *data) { - int i; + int i, mask; struct channel_path *chp; struct subchannel *sch; - chp = (struct channel_path *)data; + chp = data; sch = get_subchannel_by_schid(schid); if (!sch) /* Check if the subchannel is now available. */ return __chp_add_new_sch(schid); spin_lock_irq(&sch->lock); - for (i=0; i<8; i++) - if (sch->schib.pmcw.chpid[i] == chp->id) { + for (i=0; i<8; i++) { + mask = 0x80 >> i; + if ((sch->schib.pmcw.pim & mask) && + (sch->schib.pmcw.chpid[i] == chp->id)) { if (stsch(sch->schid, &sch->schib) != 0) { /* Endgame. */ spin_unlock_irq(&sch->lock); @@ -639,6 +645,7 @@ __chp_add(struct subchannel_id schid, void *data) } break; } + } if (i==8) { spin_unlock_irq(&sch->lock); return 0; @@ -646,7 +653,7 @@ __chp_add(struct subchannel_id schid, void *data) sch->lpm = ((sch->schib.pmcw.pim & sch->schib.pmcw.pam & sch->schib.pmcw.pom) - | 0x80 >> i) & sch->opm; + | mask) & sch->opm; if (sch->driver && sch->driver->verify) sch->driver->verify(&sch->dev); @@ -700,8 +707,7 @@ chp_process_crw(int chpid, int on) return chp_add(chpid); } -static inline int -__check_for_io_and_kill(struct subchannel *sch, int index) +static inline int check_for_io_on_path(struct subchannel *sch, int index) { int cc; @@ -711,10 +717,8 @@ __check_for_io_and_kill(struct subchannel *sch, int index) cc = stsch(sch->schid, &sch->schib); if (cc) return 0; - if (sch->schib.scsw.actl && sch->schib.pmcw.lpum == (0x80 >> index)) { - device_set_waiting(sch); + if (sch->schib.scsw.actl && sch->schib.pmcw.lpum == (0x80 >> index)) return 1; - } return 0; } @@ -743,12 +747,10 @@ __s390_subchannel_vary_chpid(struct subchannel *sch, __u8 chpid, int on) } else { sch->opm &= ~(0x80 >> chp); sch->lpm &= ~(0x80 >> chp); - /* - * Give running I/O a grace period in which it - * can successfully terminate, even using the - * just varied off path. Then kill it. - */ - if (!__check_for_io_and_kill(sch, chp) && !sch->lpm) { + if (check_for_io_on_path(sch, chp)) + /* Path verification is done after killing. */ + device_kill_io(sch); + else if (!sch->lpm) { if (css_enqueue_subchannel_slow(sch->schid)) { css_clear_subchannel_slow_list(); need_rescan = 1; diff --git a/drivers/s390/cio/cio.c b/drivers/s390/cio/cio.c index 2e2882daefbb..8936e460a807 100644 --- a/drivers/s390/cio/cio.c +++ b/drivers/s390/cio/cio.c @@ -19,6 +19,7 @@ #include <asm/cio.h> #include <asm/delay.h> #include <asm/irq.h> +#include <asm/irq_regs.h> #include <asm/setup.h> #include "airq.h" #include "cio.h" @@ -606,15 +607,17 @@ do_IRQ (struct pt_regs *regs) struct tpi_info *tpi_info; struct subchannel *sch; struct irb *irb; + struct pt_regs *old_regs; - irq_enter (); + old_regs = set_irq_regs(regs); + irq_enter(); asm volatile ("mc 0,0"); if (S390_lowcore.int_clock >= S390_lowcore.jiffy_timer) /** * Make sure that the i/o interrupt did not "overtake" * the last HZ timer interrupt. */ - account_ticks(regs); + account_ticks(); /* * Get interrupt information from lowcore */ @@ -652,7 +655,8 @@ do_IRQ (struct pt_regs *regs) * out of the sie which costs more cycles than it saves. */ } while (!MACHINE_IS_VM && tpi (NULL) != 0); - irq_exit (); + irq_exit(); + set_irq_regs(old_regs); } #ifdef CONFIG_CCW_CONSOLE diff --git a/drivers/s390/cio/css.c b/drivers/s390/cio/css.c index 7086a74e9871..a2dee5bf5a17 100644 --- a/drivers/s390/cio/css.c +++ b/drivers/s390/cio/css.c @@ -177,7 +177,7 @@ get_subchannel_by_schid(struct subchannel_id schid) struct device *dev; dev = bus_find_device(&css_bus_type, NULL, - (void *)&schid, check_subchannel); + &schid, check_subchannel); return dev ? to_subchannel(dev) : NULL; } diff --git a/drivers/s390/cio/css.h b/drivers/s390/cio/css.h index 8aabb4adeb5f..4c2ff8336288 100644 --- a/drivers/s390/cio/css.h +++ b/drivers/s390/cio/css.h @@ -76,9 +76,8 @@ struct ccw_device_private { int state; /* device state */ atomic_t onoff; unsigned long registered; - __u16 devno; /* device number */ - __u16 sch_no; /* subchannel number */ - __u8 ssid; /* subchannel set id */ + struct ccw_dev_id dev_id; /* device id */ + struct subchannel_id schid; /* subchannel number */ __u8 imask; /* lpm mask for SNID/SID/SPGID */ int iretry; /* retry counter SNID/SID/SPGID */ struct { @@ -171,7 +170,7 @@ void device_trigger_reprobe(struct subchannel *); /* Helper functions for vary on/off. */ int device_is_online(struct subchannel *); -void device_set_waiting(struct subchannel *); +void device_kill_io(struct subchannel *); /* Machine check helper function. */ void device_kill_pending_timer(struct subchannel *); diff --git a/drivers/s390/cio/device.c b/drivers/s390/cio/device.c index 688945662c15..94bdd4d8a4c9 100644 --- a/drivers/s390/cio/device.c +++ b/drivers/s390/cio/device.c @@ -552,21 +552,19 @@ ccw_device_register(struct ccw_device *cdev) } struct match_data { - unsigned int devno; - unsigned int ssid; + struct ccw_dev_id dev_id; struct ccw_device * sibling; }; static int match_devno(struct device * dev, void * data) { - struct match_data * d = (struct match_data *)data; + struct match_data * d = data; struct ccw_device * cdev; cdev = to_ccwdev(dev); if ((cdev->private->state == DEV_STATE_DISCONNECTED) && - (cdev->private->devno == d->devno) && - (cdev->private->ssid == d->ssid) && + ccw_dev_id_is_equal(&cdev->private->dev_id, &d->dev_id) && (cdev != d->sibling)) { cdev->private->state = DEV_STATE_NOT_OPER; return 1; @@ -574,15 +572,13 @@ match_devno(struct device * dev, void * data) return 0; } -static struct ccw_device * -get_disc_ccwdev_by_devno(unsigned int devno, unsigned int ssid, - struct ccw_device *sibling) +static struct ccw_device * get_disc_ccwdev_by_dev_id(struct ccw_dev_id *dev_id, + struct ccw_device *sibling) { struct device *dev; struct match_data data; - data.devno = devno; - data.ssid = ssid; + data.dev_id = *dev_id; data.sibling = sibling; dev = bus_find_device(&ccw_bus_type, NULL, &data, match_devno); @@ -595,7 +591,7 @@ ccw_device_add_changed(void *data) struct ccw_device *cdev; - cdev = (struct ccw_device *)data; + cdev = data; if (device_add(&cdev->dev)) { put_device(&cdev->dev); return; @@ -616,9 +612,9 @@ ccw_device_do_unreg_rereg(void *data) struct subchannel *sch; int need_rename; - cdev = (struct ccw_device *)data; + cdev = data; sch = to_subchannel(cdev->dev.parent); - if (cdev->private->devno != sch->schib.pmcw.dev) { + if (cdev->private->dev_id.devno != sch->schib.pmcw.dev) { /* * The device number has changed. This is usually only when * a device has been detached under VM and then re-appeared @@ -633,10 +629,12 @@ ccw_device_do_unreg_rereg(void *data) * get possibly sick... */ struct ccw_device *other_cdev; + struct ccw_dev_id dev_id; need_rename = 1; - other_cdev = get_disc_ccwdev_by_devno(sch->schib.pmcw.dev, - sch->schid.ssid, cdev); + dev_id.devno = sch->schib.pmcw.dev; + dev_id.ssid = sch->schid.ssid; + other_cdev = get_disc_ccwdev_by_dev_id(&dev_id, cdev); if (other_cdev) { struct subchannel *other_sch; @@ -652,7 +650,7 @@ ccw_device_do_unreg_rereg(void *data) } /* Update ssd info here. */ css_get_ssd_info(sch); - cdev->private->devno = sch->schib.pmcw.dev; + cdev->private->dev_id.devno = sch->schib.pmcw.dev; } else need_rename = 0; device_remove_files(&cdev->dev); @@ -662,7 +660,7 @@ ccw_device_do_unreg_rereg(void *data) snprintf (cdev->dev.bus_id, BUS_ID_SIZE, "0.%x.%04x", sch->schid.ssid, sch->schib.pmcw.dev); PREPARE_WORK(&cdev->private->kick_work, - ccw_device_add_changed, (void *)cdev); + ccw_device_add_changed, cdev); queue_work(ccw_device_work, &cdev->private->kick_work); } @@ -687,7 +685,7 @@ io_subchannel_register(void *data) int ret; unsigned long flags; - cdev = (struct ccw_device *) data; + cdev = data; sch = to_subchannel(cdev->dev.parent); if (klist_node_attached(&cdev->dev.knode_parent)) { @@ -759,7 +757,7 @@ io_subchannel_recog_done(struct ccw_device *cdev) break; sch = to_subchannel(cdev->dev.parent); PREPARE_WORK(&cdev->private->kick_work, - ccw_device_call_sch_unregister, (void *) cdev); + ccw_device_call_sch_unregister, cdev); queue_work(slow_path_wq, &cdev->private->kick_work); if (atomic_dec_and_test(&ccw_device_init_count)) wake_up(&ccw_device_init_wq); @@ -774,7 +772,7 @@ io_subchannel_recog_done(struct ccw_device *cdev) if (!get_device(&cdev->dev)) break; PREPARE_WORK(&cdev->private->kick_work, - io_subchannel_register, (void *) cdev); + io_subchannel_register, cdev); queue_work(slow_path_wq, &cdev->private->kick_work); break; } @@ -792,9 +790,9 @@ io_subchannel_recog(struct ccw_device *cdev, struct subchannel *sch) /* Init private data. */ priv = cdev->private; - priv->devno = sch->schib.pmcw.dev; - priv->ssid = sch->schid.ssid; - priv->sch_no = sch->schid.sch_no; + priv->dev_id.devno = sch->schib.pmcw.dev; + priv->dev_id.ssid = sch->schid.ssid; + priv->schid = sch->schid; priv->state = DEV_STATE_NOT_OPER; INIT_LIST_HEAD(&priv->cmb_list); init_waitqueue_head(&priv->wait_q); @@ -912,7 +910,7 @@ io_subchannel_remove (struct subchannel *sch) */ if (get_device(&cdev->dev)) { PREPARE_WORK(&cdev->private->kick_work, - ccw_device_unregister, (void *) cdev); + ccw_device_unregister, cdev); queue_work(ccw_device_work, &cdev->private->kick_work); } return 0; @@ -1055,7 +1053,7 @@ __ccwdev_check_busid(struct device *dev, void *id) { char *bus_id; - bus_id = (char *)id; + bus_id = id; return (strncmp(bus_id, dev->bus_id, BUS_ID_SIZE) == 0); } diff --git a/drivers/s390/cio/device.h b/drivers/s390/cio/device.h index 00be9a5b4acd..c6140cc97a80 100644 --- a/drivers/s390/cio/device.h +++ b/drivers/s390/cio/device.h @@ -21,7 +21,6 @@ enum dev_state { /* states to wait for i/o completion before doing something */ DEV_STATE_CLEAR_VERIFY, DEV_STATE_TIMEOUT_KILL, - DEV_STATE_WAIT4IO, DEV_STATE_QUIESCE, /* special states for devices gone not operational */ DEV_STATE_DISCONNECTED, diff --git a/drivers/s390/cio/device_fsm.c b/drivers/s390/cio/device_fsm.c index b67620208f36..fcaf28d7b4eb 100644 --- a/drivers/s390/cio/device_fsm.c +++ b/drivers/s390/cio/device_fsm.c @@ -59,18 +59,6 @@ device_set_disconnected(struct subchannel *sch) cdev->private->state = DEV_STATE_DISCONNECTED; } -void -device_set_waiting(struct subchannel *sch) -{ - struct ccw_device *cdev; - - if (!sch->dev.driver_data) - return; - cdev = sch->dev.driver_data; - ccw_device_set_timeout(cdev, 10*HZ); - cdev->private->state = DEV_STATE_WAIT4IO; -} - /* * Timeout function. It just triggers a DEV_EVENT_TIMEOUT. */ @@ -183,9 +171,9 @@ ccw_device_handle_oper(struct ccw_device *cdev) cdev->id.cu_model != cdev->private->senseid.cu_model || cdev->id.dev_type != cdev->private->senseid.dev_type || cdev->id.dev_model != cdev->private->senseid.dev_model || - cdev->private->devno != sch->schib.pmcw.dev) { + cdev->private->dev_id.devno != sch->schib.pmcw.dev) { PREPARE_WORK(&cdev->private->kick_work, - ccw_device_do_unreg_rereg, (void *)cdev); + ccw_device_do_unreg_rereg, cdev); queue_work(ccw_device_work, &cdev->private->kick_work); return 0; } @@ -255,7 +243,7 @@ ccw_device_recog_done(struct ccw_device *cdev, int state) case DEV_STATE_NOT_OPER: CIO_DEBUG(KERN_WARNING, 2, "SenseID : unknown device %04x on subchannel " - "0.%x.%04x\n", cdev->private->devno, + "0.%x.%04x\n", cdev->private->dev_id.devno, sch->schid.ssid, sch->schid.sch_no); break; case DEV_STATE_OFFLINE: @@ -282,14 +270,15 @@ ccw_device_recog_done(struct ccw_device *cdev, int state) CIO_DEBUG(KERN_INFO, 2, "SenseID : device 0.%x.%04x reports: " "CU Type/Mod = %04X/%02X, Dev Type/Mod = " "%04X/%02X\n", - cdev->private->ssid, cdev->private->devno, + cdev->private->dev_id.ssid, + cdev->private->dev_id.devno, cdev->id.cu_type, cdev->id.cu_model, cdev->id.dev_type, cdev->id.dev_model); break; case DEV_STATE_BOXED: CIO_DEBUG(KERN_WARNING, 2, "SenseID : boxed device %04x on subchannel " - "0.%x.%04x\n", cdev->private->devno, + "0.%x.%04x\n", cdev->private->dev_id.devno, sch->schid.ssid, sch->schid.sch_no); break; } @@ -325,13 +314,13 @@ ccw_device_oper_notify(void *data) struct subchannel *sch; int ret; - cdev = (struct ccw_device *)data; + cdev = data; sch = to_subchannel(cdev->dev.parent); ret = (sch->driver && sch->driver->notify) ? sch->driver->notify(&sch->dev, CIO_OPER) : 0; if (!ret) /* Driver doesn't want device back. */ - ccw_device_do_unreg_rereg((void *)cdev); + ccw_device_do_unreg_rereg(cdev); else { /* Reenable channel measurements, if needed. */ cmf_reenable(cdev); @@ -363,12 +352,12 @@ ccw_device_done(struct ccw_device *cdev, int state) if (state == DEV_STATE_BOXED) CIO_DEBUG(KERN_WARNING, 2, "Boxed device %04x on subchannel %04x\n", - cdev->private->devno, sch->schid.sch_no); + cdev->private->dev_id.devno, sch->schid.sch_no); if (cdev->private->flags.donotify) { cdev->private->flags.donotify = 0; PREPARE_WORK(&cdev->private->kick_work, ccw_device_oper_notify, - (void *)cdev); + cdev); queue_work(ccw_device_notify_work, &cdev->private->kick_work); } wake_up(&cdev->private->wait_q); @@ -412,7 +401,8 @@ static void __ccw_device_get_common_pgid(struct ccw_device *cdev) /* PGID mismatch, can't pathgroup. */ CIO_MSG_EVENT(0, "SNID - pgid mismatch for device " "0.%x.%04x, can't pathgroup\n", - cdev->private->ssid, cdev->private->devno); + cdev->private->dev_id.ssid, + cdev->private->dev_id.devno); cdev->private->options.pgroup = 0; return; } @@ -523,7 +513,7 @@ ccw_device_nopath_notify(void *data) struct subchannel *sch; int ret; - cdev = (struct ccw_device *)data; + cdev = data; sch = to_subchannel(cdev->dev.parent); /* Extra sanity. */ if (sch->lpm) @@ -537,7 +527,7 @@ ccw_device_nopath_notify(void *data) if (get_device(&cdev->dev)) { PREPARE_WORK(&cdev->private->kick_work, ccw_device_call_sch_unregister, - (void *)cdev); + cdev); queue_work(ccw_device_work, &cdev->private->kick_work); } else @@ -592,7 +582,7 @@ ccw_device_verify_done(struct ccw_device *cdev, int err) break; default: PREPARE_WORK(&cdev->private->kick_work, - ccw_device_nopath_notify, (void *)cdev); + ccw_device_nopath_notify, cdev); queue_work(ccw_device_notify_work, &cdev->private->kick_work); ccw_device_done(cdev, DEV_STATE_NOT_OPER); break; @@ -723,7 +713,7 @@ ccw_device_offline_notoper(struct ccw_device *cdev, enum dev_event dev_event) sch = to_subchannel(cdev->dev.parent); if (get_device(&cdev->dev)) { PREPARE_WORK(&cdev->private->kick_work, - ccw_device_call_sch_unregister, (void *)cdev); + ccw_device_call_sch_unregister, cdev); queue_work(ccw_device_work, &cdev->private->kick_work); } wake_up(&cdev->private->wait_q); @@ -754,7 +744,7 @@ ccw_device_online_notoper(struct ccw_device *cdev, enum dev_event dev_event) } if (get_device(&cdev->dev)) { PREPARE_WORK(&cdev->private->kick_work, - ccw_device_call_sch_unregister, (void *)cdev); + ccw_device_call_sch_unregister, cdev); queue_work(ccw_device_work, &cdev->private->kick_work); } wake_up(&cdev->private->wait_q); @@ -859,7 +849,7 @@ ccw_device_online_timeout(struct ccw_device *cdev, enum dev_event dev_event) sch = to_subchannel(cdev->dev.parent); if (!sch->lpm) { PREPARE_WORK(&cdev->private->kick_work, - ccw_device_nopath_notify, (void *)cdev); + ccw_device_nopath_notify, cdev); queue_work(ccw_device_notify_work, &cdev->private->kick_work); } else @@ -885,7 +875,8 @@ ccw_device_w4sense(struct ccw_device *cdev, enum dev_event dev_event) /* Basic sense hasn't started. Try again. */ ccw_device_do_sense(cdev, irb); else { - printk("Huh? %s(%s): unsolicited interrupt...\n", + printk(KERN_INFO "Huh? %s(%s): unsolicited " + "interrupt...\n", __FUNCTION__, cdev->dev.bus_id); if (cdev->handler) cdev->handler (cdev, 0, irb); @@ -944,10 +935,10 @@ ccw_device_killing_irq(struct ccw_device *cdev, enum dev_event dev_event) cdev->private->state = DEV_STATE_ONLINE; if (cdev->handler) cdev->handler(cdev, cdev->private->intparm, - ERR_PTR(-ETIMEDOUT)); + ERR_PTR(-EIO)); if (!sch->lpm) { PREPARE_WORK(&cdev->private->kick_work, - ccw_device_nopath_notify, (void *)cdev); + ccw_device_nopath_notify, cdev); queue_work(ccw_device_notify_work, &cdev->private->kick_work); } else if (cdev->private->flags.doverify) /* Start delayed path verification. */ @@ -970,7 +961,7 @@ ccw_device_killing_timeout(struct ccw_device *cdev, enum dev_event dev_event) sch = to_subchannel(cdev->dev.parent); if (!sch->lpm) { PREPARE_WORK(&cdev->private->kick_work, - ccw_device_nopath_notify, (void *)cdev); + ccw_device_nopath_notify, cdev); queue_work(ccw_device_notify_work, &cdev->private->kick_work); } else @@ -981,51 +972,15 @@ ccw_device_killing_timeout(struct ccw_device *cdev, enum dev_event dev_event) cdev->private->state = DEV_STATE_ONLINE; if (cdev->handler) cdev->handler(cdev, cdev->private->intparm, - ERR_PTR(-ETIMEDOUT)); -} - -static void -ccw_device_wait4io_irq(struct ccw_device *cdev, enum dev_event dev_event) -{ - struct irb *irb; - struct subchannel *sch; - - irb = (struct irb *) __LC_IRB; - /* - * Accumulate status and find out if a basic sense is needed. - * This is fine since we have already adapted the lpm. - */ - ccw_device_accumulate_irb(cdev, irb); - if (cdev->private->flags.dosense) { - if (ccw_device_do_sense(cdev, irb) == 0) { - cdev->private->state = DEV_STATE_W4SENSE; - } - return; - } - - /* Iff device is idle, reset timeout. */ - sch = to_subchannel(cdev->dev.parent); - if (!stsch(sch->schid, &sch->schib)) - if (sch->schib.scsw.actl == 0) - ccw_device_set_timeout(cdev, 0); - /* Call the handler. */ - ccw_device_call_handler(cdev); - if (!sch->lpm) { - PREPARE_WORK(&cdev->private->kick_work, - ccw_device_nopath_notify, (void *)cdev); - queue_work(ccw_device_notify_work, &cdev->private->kick_work); - } else if (cdev->private->flags.doverify) - ccw_device_online_verify(cdev, 0); + ERR_PTR(-EIO)); } -static void -ccw_device_wait4io_timeout(struct ccw_device *cdev, enum dev_event dev_event) +void device_kill_io(struct subchannel *sch) { int ret; - struct subchannel *sch; + struct ccw_device *cdev; - sch = to_subchannel(cdev->dev.parent); - ccw_device_set_timeout(cdev, 0); + cdev = sch->dev.driver_data; ret = ccw_device_cancel_halt_clear(cdev); if (ret == -EBUSY) { ccw_device_set_timeout(cdev, 3*HZ); @@ -1035,7 +990,7 @@ ccw_device_wait4io_timeout(struct ccw_device *cdev, enum dev_event dev_event) if (ret == -ENODEV) { if (!sch->lpm) { PREPARE_WORK(&cdev->private->kick_work, - ccw_device_nopath_notify, (void *)cdev); + ccw_device_nopath_notify, cdev); queue_work(ccw_device_notify_work, &cdev->private->kick_work); } else @@ -1044,12 +999,12 @@ ccw_device_wait4io_timeout(struct ccw_device *cdev, enum dev_event dev_event) } if (cdev->handler) cdev->handler(cdev, cdev->private->intparm, - ERR_PTR(-ETIMEDOUT)); + ERR_PTR(-EIO)); if (!sch->lpm) { PREPARE_WORK(&cdev->private->kick_work, - ccw_device_nopath_notify, (void *)cdev); + ccw_device_nopath_notify, cdev); queue_work(ccw_device_notify_work, &cdev->private->kick_work); - } else if (cdev->private->flags.doverify) + } else /* Start delayed path verification. */ ccw_device_online_verify(cdev, 0); } @@ -1286,12 +1241,6 @@ fsm_func_t *dev_jumptable[NR_DEV_STATES][NR_DEV_EVENTS] = { [DEV_EVENT_TIMEOUT] = ccw_device_killing_timeout, [DEV_EVENT_VERIFY] = ccw_device_nop, //FIXME }, - [DEV_STATE_WAIT4IO] = { - [DEV_EVENT_NOTOPER] = ccw_device_online_notoper, - [DEV_EVENT_INTERRUPT] = ccw_device_wait4io_irq, - [DEV_EVENT_TIMEOUT] = ccw_device_wait4io_timeout, - [DEV_EVENT_VERIFY] = ccw_device_delay_verify, - }, [DEV_STATE_QUIESCE] = { [DEV_EVENT_NOTOPER] = ccw_device_quiesce_done, [DEV_EVENT_INTERRUPT] = ccw_device_quiesce_done, diff --git a/drivers/s390/cio/device_id.c b/drivers/s390/cio/device_id.c index 1398367b5f68..a74785b9e4eb 100644 --- a/drivers/s390/cio/device_id.c +++ b/drivers/s390/cio/device_id.c @@ -251,7 +251,7 @@ ccw_device_check_sense_id(struct ccw_device *cdev) */ CIO_MSG_EVENT(2, "SenseID : device %04x on Subchannel " "0.%x.%04x reports cmd reject\n", - cdev->private->devno, sch->schid.ssid, + cdev->private->dev_id.devno, sch->schid.ssid, sch->schid.sch_no); return -EOPNOTSUPP; } @@ -259,7 +259,8 @@ ccw_device_check_sense_id(struct ccw_device *cdev) CIO_MSG_EVENT(2, "SenseID : UC on dev 0.%x.%04x, " "lpum %02X, cnt %02d, sns :" " %02X%02X%02X%02X %02X%02X%02X%02X ...\n", - cdev->private->ssid, cdev->private->devno, + cdev->private->dev_id.ssid, + cdev->private->dev_id.devno, irb->esw.esw0.sublog.lpum, irb->esw.esw0.erw.scnt, irb->ecw[0], irb->ecw[1], @@ -274,14 +275,15 @@ ccw_device_check_sense_id(struct ccw_device *cdev) CIO_MSG_EVENT(2, "SenseID : path %02X for device %04x " "on subchannel 0.%x.%04x is " "'not operational'\n", sch->orb.lpm, - cdev->private->devno, sch->schid.ssid, - sch->schid.sch_no); + cdev->private->dev_id.devno, + sch->schid.ssid, sch->schid.sch_no); return -EACCES; } /* Hmm, whatever happened, try again. */ CIO_MSG_EVENT(2, "SenseID : start_IO() for device %04x on " "subchannel 0.%x.%04x returns status %02X%02X\n", - cdev->private->devno, sch->schid.ssid, sch->schid.sch_no, + cdev->private->dev_id.devno, sch->schid.ssid, + sch->schid.sch_no, irb->scsw.dstat, irb->scsw.cstat); return -EAGAIN; } @@ -330,7 +332,7 @@ ccw_device_sense_id_irq(struct ccw_device *cdev, enum dev_event dev_event) /* fall through. */ default: /* Sense ID failed. Try asking VM. */ if (MACHINE_IS_VM) { - VM_virtual_device_info (cdev->private->devno, + VM_virtual_device_info (cdev->private->dev_id.devno, &cdev->private->senseid); if (cdev->private->senseid.cu_type != 0xFFFF) { /* Got the device information from VM. */ diff --git a/drivers/s390/cio/device_ops.c b/drivers/s390/cio/device_ops.c index 84b9b18eabc2..b39c1fa48acd 100644 --- a/drivers/s390/cio/device_ops.c +++ b/drivers/s390/cio/device_ops.c @@ -50,7 +50,6 @@ ccw_device_clear(struct ccw_device *cdev, unsigned long intparm) if (cdev->private->state == DEV_STATE_NOT_OPER) return -ENODEV; if (cdev->private->state != DEV_STATE_ONLINE && - cdev->private->state != DEV_STATE_WAIT4IO && cdev->private->state != DEV_STATE_W4SENSE) return -EINVAL; sch = to_subchannel(cdev->dev.parent); @@ -155,7 +154,6 @@ ccw_device_halt(struct ccw_device *cdev, unsigned long intparm) if (cdev->private->state == DEV_STATE_NOT_OPER) return -ENODEV; if (cdev->private->state != DEV_STATE_ONLINE && - cdev->private->state != DEV_STATE_WAIT4IO && cdev->private->state != DEV_STATE_W4SENSE) return -EINVAL; sch = to_subchannel(cdev->dev.parent); @@ -592,13 +590,13 @@ ccw_device_get_chp_desc(struct ccw_device *cdev, int chp_no) int _ccw_device_get_subchannel_number(struct ccw_device *cdev) { - return cdev->private->sch_no; + return cdev->private->schid.sch_no; } int _ccw_device_get_device_number(struct ccw_device *cdev) { - return cdev->private->devno; + return cdev->private->dev_id.devno; } diff --git a/drivers/s390/cio/device_pgid.c b/drivers/s390/cio/device_pgid.c index 84917b39de45..2975ce888c19 100644 --- a/drivers/s390/cio/device_pgid.c +++ b/drivers/s390/cio/device_pgid.c @@ -79,7 +79,8 @@ __ccw_device_sense_pgid_start(struct ccw_device *cdev) CIO_MSG_EVENT(2, "SNID - Device %04x on Subchannel " "0.%x.%04x, lpm %02X, became 'not " "operational'\n", - cdev->private->devno, sch->schid.ssid, + cdev->private->dev_id.devno, + sch->schid.ssid, sch->schid.sch_no, cdev->private->imask); } @@ -135,7 +136,8 @@ __ccw_device_check_sense_pgid(struct ccw_device *cdev) CIO_MSG_EVENT(2, "SNID - device 0.%x.%04x, unit check, " "lpum %02X, cnt %02d, sns : " "%02X%02X%02X%02X %02X%02X%02X%02X ...\n", - cdev->private->ssid, cdev->private->devno, + cdev->private->dev_id.ssid, + cdev->private->dev_id.devno, irb->esw.esw0.sublog.lpum, irb->esw.esw0.erw.scnt, irb->ecw[0], irb->ecw[1], @@ -147,7 +149,7 @@ __ccw_device_check_sense_pgid(struct ccw_device *cdev) if (irb->scsw.cc == 3) { CIO_MSG_EVENT(2, "SNID - Device %04x on Subchannel 0.%x.%04x," " lpm %02X, became 'not operational'\n", - cdev->private->devno, sch->schid.ssid, + cdev->private->dev_id.devno, sch->schid.ssid, sch->schid.sch_no, sch->orb.lpm); return -EACCES; } @@ -155,7 +157,7 @@ __ccw_device_check_sense_pgid(struct ccw_device *cdev) if (cdev->private->pgid[i].inf.ps.state2 == SNID_STATE2_RESVD_ELSE) { CIO_MSG_EVENT(2, "SNID - Device %04x on Subchannel 0.%x.%04x " "is reserved by someone else\n", - cdev->private->devno, sch->schid.ssid, + cdev->private->dev_id.devno, sch->schid.ssid, sch->schid.sch_no); return -EUSERS; } @@ -261,7 +263,7 @@ __ccw_device_do_pgid(struct ccw_device *cdev, __u8 func) /* PGID command failed on this path. */ CIO_MSG_EVENT(2, "SPID - Device %04x on Subchannel " "0.%x.%04x, lpm %02X, became 'not operational'\n", - cdev->private->devno, sch->schid.ssid, + cdev->private->dev_id.devno, sch->schid.ssid, sch->schid.sch_no, cdev->private->imask); return ret; } @@ -301,7 +303,7 @@ static int __ccw_device_do_nop(struct ccw_device *cdev) /* nop command failed on this path. */ CIO_MSG_EVENT(2, "NOP - Device %04x on Subchannel " "0.%x.%04x, lpm %02X, became 'not operational'\n", - cdev->private->devno, sch->schid.ssid, + cdev->private->dev_id.devno, sch->schid.ssid, sch->schid.sch_no, cdev->private->imask); return ret; } @@ -328,8 +330,9 @@ __ccw_device_check_pgid(struct ccw_device *cdev) CIO_MSG_EVENT(2, "SPID - device 0.%x.%04x, unit check, " "cnt %02d, " "sns : %02X%02X%02X%02X %02X%02X%02X%02X ...\n", - cdev->private->ssid, - cdev->private->devno, irb->esw.esw0.erw.scnt, + cdev->private->dev_id.ssid, + cdev->private->dev_id.devno, + irb->esw.esw0.erw.scnt, irb->ecw[0], irb->ecw[1], irb->ecw[2], irb->ecw[3], irb->ecw[4], irb->ecw[5], @@ -339,7 +342,7 @@ __ccw_device_check_pgid(struct ccw_device *cdev) if (irb->scsw.cc == 3) { CIO_MSG_EVENT(2, "SPID - Device %04x on Subchannel 0.%x.%04x," " lpm %02X, became 'not operational'\n", - cdev->private->devno, sch->schid.ssid, + cdev->private->dev_id.devno, sch->schid.ssid, sch->schid.sch_no, cdev->private->imask); return -EACCES; } @@ -362,7 +365,7 @@ static int __ccw_device_check_nop(struct ccw_device *cdev) if (irb->scsw.cc == 3) { CIO_MSG_EVENT(2, "NOP - Device %04x on Subchannel 0.%x.%04x," " lpm %02X, became 'not operational'\n", - cdev->private->devno, sch->schid.ssid, + cdev->private->dev_id.devno, sch->schid.ssid, sch->schid.sch_no, cdev->private->imask); return -EACCES; } diff --git a/drivers/s390/cio/device_status.c b/drivers/s390/cio/device_status.c index caf148d5caad..3f7cbce4cd87 100644 --- a/drivers/s390/cio/device_status.c +++ b/drivers/s390/cio/device_status.c @@ -32,19 +32,18 @@ ccw_device_msg_control_check(struct ccw_device *cdev, struct irb *irb) SCHN_STAT_CHN_CTRL_CHK | SCHN_STAT_INTF_CTRL_CHK))) return; - CIO_MSG_EVENT(0, "Channel-Check or Interface-Control-Check " "received" " ... device %04x on subchannel 0.%x.%04x, dev_stat " ": %02X sch_stat : %02X\n", - cdev->private->devno, cdev->private->ssid, - cdev->private->sch_no, + cdev->private->dev_id.devno, cdev->private->schid.ssid, + cdev->private->schid.sch_no, irb->scsw.dstat, irb->scsw.cstat); if (irb->scsw.cc != 3) { char dbf_text[15]; - sprintf(dbf_text, "chk%x", cdev->private->sch_no); + sprintf(dbf_text, "chk%x", cdev->private->schid.sch_no); CIO_TRACE_EVENT(0, dbf_text); CIO_HEX_EVENT(0, irb, sizeof (struct irb)); } diff --git a/drivers/s390/cio/qdio.c b/drivers/s390/cio/qdio.c index cde822d8b5c8..0648ce5bb684 100644 --- a/drivers/s390/cio/qdio.c +++ b/drivers/s390/cio/qdio.c @@ -1741,7 +1741,7 @@ qdio_fill_qs(struct qdio_irq *irq_ptr, struct ccw_device *cdev, void *ptr; int available; - sprintf(dbf_text,"qfqs%4x",cdev->private->sch_no); + sprintf(dbf_text,"qfqs%4x",cdev->private->schid.sch_no); QDIO_DBF_TEXT0(0,setup,dbf_text); for (i=0;i<no_input_qs;i++) { q=irq_ptr->input_qs[i]; @@ -2924,7 +2924,7 @@ qdio_establish_handle_irq(struct ccw_device *cdev, int cstat, int dstat) irq_ptr = cdev->private->qdio_data; - sprintf(dbf_text,"qehi%4x",cdev->private->sch_no); + sprintf(dbf_text,"qehi%4x",cdev->private->schid.sch_no); QDIO_DBF_TEXT0(0,setup,dbf_text); QDIO_DBF_TEXT0(0,trace,dbf_text); @@ -2943,7 +2943,7 @@ qdio_initialize(struct qdio_initialize *init_data) int rc; char dbf_text[15]; - sprintf(dbf_text,"qini%4x",init_data->cdev->private->sch_no); + sprintf(dbf_text,"qini%4x",init_data->cdev->private->schid.sch_no); QDIO_DBF_TEXT0(0,setup,dbf_text); QDIO_DBF_TEXT0(0,trace,dbf_text); @@ -2964,7 +2964,7 @@ qdio_allocate(struct qdio_initialize *init_data) struct qdio_irq *irq_ptr; char dbf_text[15]; - sprintf(dbf_text,"qalc%4x",init_data->cdev->private->sch_no); + sprintf(dbf_text,"qalc%4x",init_data->cdev->private->schid.sch_no); QDIO_DBF_TEXT0(0,setup,dbf_text); QDIO_DBF_TEXT0(0,trace,dbf_text); if ( (init_data->no_input_qs>QDIO_MAX_QUEUES_PER_IRQ) || @@ -3187,7 +3187,7 @@ qdio_establish(struct qdio_initialize *init_data) tiqdio_set_delay_target(irq_ptr,TIQDIO_DELAY_TARGET); } - sprintf(dbf_text,"qest%4x",cdev->private->sch_no); + sprintf(dbf_text,"qest%4x",cdev->private->schid.sch_no); QDIO_DBF_TEXT0(0,setup,dbf_text); QDIO_DBF_TEXT0(0,trace,dbf_text); diff --git a/drivers/s390/crypto/ap_bus.c b/drivers/s390/crypto/ap_bus.c index cd30f37fceae..c5ccd20b110c 100644 --- a/drivers/s390/crypto/ap_bus.c +++ b/drivers/s390/crypto/ap_bus.c @@ -1062,7 +1062,7 @@ static int ap_poll_thread(void *data) unsigned long flags; int requests; - set_user_nice(current, -20); + set_user_nice(current, 19); while (1) { if (need_resched()) { schedule(); diff --git a/drivers/s390/net/iucv.c b/drivers/s390/net/iucv.c index 809dd8d7f47a..1476ce2b437c 100644 --- a/drivers/s390/net/iucv.c +++ b/drivers/s390/net/iucv.c @@ -116,7 +116,7 @@ static DEFINE_SPINLOCK(iucv_irq_queue_lock); *Internal function prototypes */ static void iucv_tasklet_handler(unsigned long); -static void iucv_irq_handler(struct pt_regs *, __u16); +static void iucv_irq_handler(__u16); static DECLARE_TASKLET(iucv_tasklet,iucv_tasklet_handler,0); @@ -2251,7 +2251,7 @@ iucv_sever(__u16 pathid, __u8 user_data[16]) * Places the interrupt buffer on a queue and schedules iucv_tasklet_handler(). */ static void -iucv_irq_handler(struct pt_regs *regs, __u16 code) +iucv_irq_handler(__u16 code) { iucv_irqdata *irqdata; diff --git a/drivers/s390/scsi/zfcp_erp.c b/drivers/s390/scsi/zfcp_erp.c index 862a411a4aa0..c88babce9bca 100644 --- a/drivers/s390/scsi/zfcp_erp.c +++ b/drivers/s390/scsi/zfcp_erp.c @@ -1987,7 +1987,7 @@ zfcp_erp_adapter_strategy_open_qdio(struct zfcp_erp_action *erp_action) sbale = &(adapter->response_queue.buffer[i]->element[0]); sbale->length = 0; sbale->flags = SBAL_FLAGS_LAST_ENTRY; - sbale->addr = 0; + sbale->addr = NULL; } ZFCP_LOG_TRACE("calling do_QDIO on adapter %s (flags=0x%x, " diff --git a/drivers/sbus/char/openprom.c b/drivers/sbus/char/openprom.c index 2f698763ba5d..81ba2d71cee2 100644 --- a/drivers/sbus/char/openprom.c +++ b/drivers/sbus/char/openprom.c @@ -630,7 +630,7 @@ static int openprom_ioctl(struct inode * inode, struct file * file, case OPROMPATH2NODE: if ((file->f_mode & FMODE_READ) == 0) return -EPERM; - return openprom_sunos_ioctl(inode, file, cmd, arg, 0); + return openprom_sunos_ioctl(inode, file, cmd, arg, NULL); case OPIOCGET: case OPIOCNEXTPROP: diff --git a/drivers/sbus/char/uctrl.c b/drivers/sbus/char/uctrl.c index ddc0681ac759..b30372f17f1c 100644 --- a/drivers/sbus/char/uctrl.c +++ b/drivers/sbus/char/uctrl.c @@ -400,7 +400,7 @@ static int __init ts102_uctrl_init(void) } driver->regs->uctrl_intr = UCTRL_INTR_RXNE_REQ|UCTRL_INTR_RXNE_MSK; - printk("uctrl: 0x%x (irq %d)\n", driver->regs, driver->irq); + printk("uctrl: 0x%p (irq %d)\n", driver->regs, driver->irq); uctrl_get_event_status(); uctrl_get_external_status(); return 0; diff --git a/drivers/scsi/NCR53c406a.c b/drivers/scsi/NCR53c406a.c index 3896278594c7..d4613815f685 100644 --- a/drivers/scsi/NCR53c406a.c +++ b/drivers/scsi/NCR53c406a.c @@ -168,7 +168,7 @@ enum Phase { }; /* Static function prototypes */ -static void NCR53c406a_intr(int, void *); +static void NCR53c406a_intr(void *); static irqreturn_t do_NCR53c406a_intr(int, void *); static void chip_init(void); static void calc_port_addr(void); @@ -685,7 +685,7 @@ static void wait_intr(void) return; } - NCR53c406a_intr(0, NULL, NULL); + NCR53c406a_intr(NULL); } #endif @@ -767,12 +767,12 @@ static irqreturn_t do_NCR53c406a_intr(int unused, void *dev_id) struct Scsi_Host *dev = dev_id; spin_lock_irqsave(dev->host_lock, flags); - NCR53c406a_intr(0, dev_id); + NCR53c406a_intr(dev_id); spin_unlock_irqrestore(dev->host_lock, flags); return IRQ_HANDLED; } -static void NCR53c406a_intr(int unused, void *dev_id) +static void NCR53c406a_intr(void *dev_id) { DEB(unsigned char fifo_size; ) diff --git a/drivers/scsi/advansys.c b/drivers/scsi/advansys.c index 83695416b2c9..2b344356a29e 100644 --- a/drivers/scsi/advansys.c +++ b/drivers/scsi/advansys.c @@ -3881,7 +3881,7 @@ typedef struct asc_board { /* * The following fields are used only for Wide Boards. */ - void *ioremap_addr; /* I/O Memory remap address. */ + void __iomem *ioremap_addr; /* I/O Memory remap address. */ ushort ioport; /* I/O Port address. */ ADV_CARR_T *orig_carrp; /* ADV_CARR_T memory block. */ adv_req_t *orig_reqp; /* adv_req_t memory block. */ @@ -3951,7 +3951,7 @@ typedef struct _PCI_CONFIG_SPACE_ /* Number of boards detected in system. */ STATIC int asc_board_count = 0; -STATIC struct Scsi_Host *asc_host[ASC_NUM_BOARD_SUPPORTED] = { 0 }; +STATIC struct Scsi_Host *asc_host[ASC_NUM_BOARD_SUPPORTED] = { NULL }; /* Overrun buffer used by all narrow boards. */ STATIC uchar overrun_buf[ASC_OVERRUN_BSIZE] = { 0 }; @@ -6621,7 +6621,7 @@ adv_build_req(asc_board_t *boardp, struct scsi_cmnd *scp, dma_map_single(dev, scp->request_buffer, scp->request_bufflen, scp->sc_data_direction); } else { - scsiqp->vdata_addr = 0; + scsiqp->vdata_addr = NULL; scp->SCp.dma_handle = 0; } scsiqp->data_addr = cpu_to_le32(scp->SCp.dma_handle); diff --git a/drivers/scsi/aha152x.c b/drivers/scsi/aha152x.c index e04c2bc1932b..306f46b85a55 100644 --- a/drivers/scsi/aha152x.c +++ b/drivers/scsi/aha152x.c @@ -238,7 +238,7 @@ #include <linux/module.h> #include <linux/sched.h> #include <asm/irq.h> -#include <asm/io.h> +#include <linux/io.h> #include <linux/blkdev.h> #include <asm/system.h> #include <linux/errno.h> @@ -759,12 +759,7 @@ static inline Scsi_Cmnd *remove_SC(Scsi_Cmnd **SC, Scsi_Cmnd *SCp) static irqreturn_t swintr(int irqno, void *dev_id) { - struct Scsi_Host *shpnt = (struct Scsi_Host *)dev_id; - - if (!shpnt) { - printk(KERN_ERR "aha152x: catched software interrupt %d for unknown controller.\n", irqno); - return IRQ_NONE; - } + struct Scsi_Host *shpnt = dev_id; HOSTDATA(shpnt)->swint++; diff --git a/drivers/scsi/aic7xxx_old.c b/drivers/scsi/aic7xxx_old.c index 7f0adf9c4c7e..bcd7fffab907 100644 --- a/drivers/scsi/aic7xxx_old.c +++ b/drivers/scsi/aic7xxx_old.c @@ -6345,12 +6345,12 @@ aic7xxx_handle_command_completion_intr(struct aic7xxx_host *p) * SCSI controller interrupt handler. *-F*************************************************************************/ static void -aic7xxx_isr(int irq, void *dev_id) +aic7xxx_isr(void *dev_id) { struct aic7xxx_host *p; unsigned char intstat; - p = (struct aic7xxx_host *)dev_id; + p = dev_id; /* * Just a few sanity checks. Make sure that we have an int pending. @@ -6489,7 +6489,7 @@ do_aic7xxx_isr(int irq, void *dev_id) p->flags |= AHC_IN_ISR; do { - aic7xxx_isr(irq, dev_id); + aic7xxx_isr(dev_id); } while ( (aic_inb(p, INTSTAT) & INT_PEND) ); aic7xxx_done_cmds_complete(p); aic7xxx_run_waiting_queues(p); @@ -10377,7 +10377,7 @@ static int __aic7xxx_bus_device_reset(struct scsi_cmnd *cmd) hscb = scb->hscb; - aic7xxx_isr(p->irq, (void *)p); + aic7xxx_isr(p); aic7xxx_done_cmds_complete(p); /* If the command was already complete or just completed, then we didn't * do a reset, return FAILED */ @@ -10608,7 +10608,7 @@ static int __aic7xxx_abort(struct scsi_cmnd *cmd) else return FAILED; - aic7xxx_isr(p->irq, (void *)p); + aic7xxx_isr(p); aic7xxx_done_cmds_complete(p); /* If the command was already complete or just completed, then we didn't * do a reset, return FAILED */ @@ -10863,7 +10863,7 @@ static int aic7xxx_reset(struct scsi_cmnd *cmd) while((aic_inb(p, INTSTAT) & INT_PEND) && !(p->flags & AHC_IN_ISR)) { - aic7xxx_isr(p->irq, p); + aic7xxx_isr(p); pause_sequencer(p); } aic7xxx_done_cmds_complete(p); diff --git a/drivers/scsi/dc395x.c b/drivers/scsi/dc395x.c index 81e3ee51d897..e95b367d09ed 100644 --- a/drivers/scsi/dc395x.c +++ b/drivers/scsi/dc395x.c @@ -1219,7 +1219,7 @@ static void dump_register_info(struct AdapterCtlBlk *acb, srb, srb->cmd, srb->cmd->pid, srb->cmd->cmnd[0], srb->cmd->device->id, srb->cmd->device->lun); - printk(" sglist=%p cnt=%i idx=%i len=%Zd\n", + printk(" sglist=%p cnt=%i idx=%i len=%zu\n", srb->segment_x, srb->sg_count, srb->sg_index, srb->total_xfer_length); printk(" state=0x%04x status=0x%02x phase=0x%02x (%sconn.)\n", @@ -1815,7 +1815,7 @@ static void dc395x_handle_interrupt(struct AdapterCtlBlk *acb, static irqreturn_t dc395x_interrupt(int irq, void *dev_id) { - struct AdapterCtlBlk *acb = (struct AdapterCtlBlk *)dev_id; + struct AdapterCtlBlk *acb = dev_id; u16 scsi_status; u8 dma_status; irqreturn_t handled = IRQ_NONE; diff --git a/drivers/scsi/dtc.c b/drivers/scsi/dtc.c index 0d5713dfa204..54756722dd5f 100644 --- a/drivers/scsi/dtc.c +++ b/drivers/scsi/dtc.c @@ -82,7 +82,7 @@ #include <linux/string.h> #include <linux/init.h> #include <linux/interrupt.h> -#include <asm/io.h> +#include <linux/io.h> #include "scsi.h" #include <scsi/scsi_host.h> #include "dtc.h" diff --git a/drivers/scsi/fdomain.c b/drivers/scsi/fdomain.c index 41b05fc45380..72794a7b6dcc 100644 --- a/drivers/scsi/fdomain.c +++ b/drivers/scsi/fdomain.c @@ -278,9 +278,9 @@ #include <linux/pci.h> #include <linux/stat.h> #include <linux/delay.h> +#include <linux/io.h> #include <scsi/scsicam.h> -#include <asm/io.h> #include <asm/system.h> #include <scsi/scsi.h> diff --git a/drivers/scsi/mesh.c b/drivers/scsi/mesh.c index c773e35dace7..1fd3c7590d31 100644 --- a/drivers/scsi/mesh.c +++ b/drivers/scsi/mesh.c @@ -466,7 +466,7 @@ static void mesh_start_cmd(struct mesh_state *ms, struct scsi_cmnd *cmd) dlog(ms, "intr b4 arb, intr/exc/err/fc=%.8x", MKWORD(mr->interrupt, mr->exception, mr->error, mr->fifo_count)); - mesh_interrupt(0, (void *)ms, NULL); + mesh_interrupt(0, (void *)ms); if (ms->phase != arbitrating) return; } @@ -504,7 +504,7 @@ static void mesh_start_cmd(struct mesh_state *ms, struct scsi_cmnd *cmd) dlog(ms, "intr after disresel, intr/exc/err/fc=%.8x", MKWORD(mr->interrupt, mr->exception, mr->error, mr->fifo_count)); - mesh_interrupt(0, (void *)ms, NULL); + mesh_interrupt(0, (void *)ms); if (ms->phase != arbitrating) return; dlog(ms, "after intr after disresel, intr/exc/err/fc=%.8x", diff --git a/drivers/scsi/qlogicfas408.c b/drivers/scsi/qlogicfas408.c index 1a7de3bd796f..e0725353c99c 100644 --- a/drivers/scsi/qlogicfas408.c +++ b/drivers/scsi/qlogicfas408.c @@ -405,10 +405,10 @@ static unsigned int ql_pcmd(Scsi_Cmnd * cmd) * Interrupt handler */ -static void ql_ihandl(int irq, void *dev_id) +static void ql_ihandl(void *dev_id) { Scsi_Cmnd *icmd; - struct Scsi_Host *host = (struct Scsi_Host *)dev_id; + struct Scsi_Host *host = dev_id; struct qlogicfas408_priv *priv = get_priv_by_host(host); int qbase = priv->qbase; REG0; @@ -438,7 +438,7 @@ irqreturn_t qlogicfas408_ihandl(int irq, void *dev_id) struct Scsi_Host *host = dev_id; spin_lock_irqsave(host->host_lock, flags); - ql_ihandl(irq, dev_id); + ql_ihandl(dev_id); spin_unlock_irqrestore(host->host_lock, flags); return IRQ_HANDLED; } diff --git a/drivers/scsi/scsi_error.c b/drivers/scsi/scsi_error.c index 3d355d054612..aff1b0cfd4b2 100644 --- a/drivers/scsi/scsi_error.c +++ b/drivers/scsi/scsi_error.c @@ -495,7 +495,7 @@ static int scsi_send_eh_cmnd(struct scsi_cmnd *scmd, unsigned char *cmnd, memcpy(scmd->cmnd, cmnd, cmnd_size); if (copy_sense) { - int gfp_mask = GFP_ATOMIC; + gfp_t gfp_mask = GFP_ATOMIC; if (shost->hostt->unchecked_isa_dma) gfp_mask |= __GFP_DMA; diff --git a/drivers/scsi/seagate.c b/drivers/scsi/seagate.c index 8ff1f2866f7b..5ffec2721b28 100644 --- a/drivers/scsi/seagate.c +++ b/drivers/scsi/seagate.c @@ -97,8 +97,8 @@ #include <linux/blkdev.h> #include <linux/stat.h> #include <linux/delay.h> +#include <linux/io.h> -#include <asm/io.h> #include <asm/system.h> #include <asm/uaccess.h> diff --git a/drivers/scsi/t128.c b/drivers/scsi/t128.c index 2df6747cb76f..0b7a70f61e0d 100644 --- a/drivers/scsi/t128.c +++ b/drivers/scsi/t128.c @@ -109,7 +109,7 @@ #include <asm/system.h> #include <linux/signal.h> #include <linux/sched.h> -#include <asm/io.h> +#include <linux/io.h> #include <linux/blkdev.h> #include <linux/interrupt.h> #include <linux/stat.h> diff --git a/drivers/scsi/tmscsim.c b/drivers/scsi/tmscsim.c index 0f0ac925d319..d03aa6ce8fe8 100644 --- a/drivers/scsi/tmscsim.c +++ b/drivers/scsi/tmscsim.c @@ -700,9 +700,9 @@ dc390_InvalidCmd(struct dc390_acb* pACB) static irqreturn_t __inline__ -DC390_Interrupt(int irq, void *dev_id) +DC390_Interrupt(void *dev_id) { - struct dc390_acb *pACB = (struct dc390_acb*)dev_id; + struct dc390_acb *pACB = dev_id; struct dc390_dcb *pDCB; struct dc390_srb *pSRB; u8 sstatus=0; @@ -811,12 +811,12 @@ DC390_Interrupt(int irq, void *dev_id) return IRQ_HANDLED; } -static irqreturn_t do_DC390_Interrupt( int irq, void *dev_id) +static irqreturn_t do_DC390_Interrupt(int irq, void *dev_id) { irqreturn_t ret; DEBUG1(printk (KERN_INFO "DC390: Irq (%i) caught: ", irq)); /* Locking is done in DC390_Interrupt */ - ret = DC390_Interrupt(irq, dev_id); + ret = DC390_Interrupt(dev_id); DEBUG1(printk (".. IRQ returned\n")); return ret; } diff --git a/drivers/scsi/ultrastor.c b/drivers/scsi/ultrastor.c index 107f0fc34949..56906aba5ee3 100644 --- a/drivers/scsi/ultrastor.c +++ b/drivers/scsi/ultrastor.c @@ -287,7 +287,7 @@ static const unsigned short ultrastor_ports_14f[] = { }; #endif -static void ultrastor_interrupt(int, void *); +static void ultrastor_interrupt(void *); static irqreturn_t do_ultrastor_interrupt(int, void *); static inline void build_sg_list(struct mscp *, struct scsi_cmnd *SCpnt); @@ -893,7 +893,7 @@ static int ultrastor_abort(struct scsi_cmnd *SCpnt) spin_lock_irqsave(host->host_lock, flags); /* FIXME: Ewww... need to think about passing host around properly */ - ultrastor_interrupt(0, NULL); + ultrastor_interrupt(NULL); spin_unlock_irqrestore(host->host_lock, flags); return SUCCESS; } @@ -1039,7 +1039,7 @@ int ultrastor_biosparam(struct scsi_device *sdev, struct block_device *bdev, return 0; } -static void ultrastor_interrupt(int irq, void *dev_id) +static void ultrastor_interrupt(void *dev_id) { unsigned int status; #if ULTRASTOR_MAX_CMDS > 1 @@ -1177,7 +1177,7 @@ static irqreturn_t do_ultrastor_interrupt(int irq, void *dev_id) struct Scsi_Host *dev = dev_id; spin_lock_irqsave(dev->host_lock, flags); - ultrastor_interrupt(irq, dev_id); + ultrastor_interrupt(dev_id); spin_unlock_irqrestore(dev->host_lock, flags); return IRQ_HANDLED; } diff --git a/drivers/scsi/wd7000.c b/drivers/scsi/wd7000.c index 331e1cf159b0..30be76514c43 100644 --- a/drivers/scsi/wd7000.c +++ b/drivers/scsi/wd7000.c @@ -178,10 +178,10 @@ #include <linux/blkdev.h> #include <linux/init.h> #include <linux/stat.h> +#include <linux/io.h> #include <asm/system.h> #include <asm/dma.h> -#include <asm/io.h> #include <scsi/scsi.h> #include <scsi/scsi_cmnd.h> diff --git a/drivers/serial/68360serial.c b/drivers/serial/68360serial.c index 4e56ec803861..634ecca36a77 100644 --- a/drivers/serial/68360serial.c +++ b/drivers/serial/68360serial.c @@ -620,7 +620,7 @@ static void rs_360_interrupt(int vec, void *dev_id) volatile struct smc_regs *smcp; volatile struct scc_regs *sccp; - info = (ser_info_t *)dev_id; + info = dev_id; idx = PORT_NUM(info->state->smc_scc_num); if (info->state->smc_scc_num & NUM_IS_SCC) { diff --git a/drivers/serial/jsm/jsm_neo.c b/drivers/serial/jsm/jsm_neo.c index 8fa31e68989a..8be8da37f629 100644 --- a/drivers/serial/jsm/jsm_neo.c +++ b/drivers/serial/jsm/jsm_neo.c @@ -1116,7 +1116,7 @@ static void neo_param(struct jsm_channel *ch) */ static irqreturn_t neo_intr(int irq, void *voidbrd) { - struct jsm_board *brd = (struct jsm_board *) voidbrd; + struct jsm_board *brd = voidbrd; struct jsm_channel *ch; int port = 0; int type = 0; diff --git a/drivers/serial/m32r_sio.c b/drivers/serial/m32r_sio.c index c85ac1a77608..7656a35f5e2f 100644 --- a/drivers/serial/m32r_sio.c +++ b/drivers/serial/m32r_sio.c @@ -590,7 +590,7 @@ static void m32r_sio_timeout(unsigned long data) sts = sio_in(up, SIOSTS); if (sts & 0x5) { spin_lock(&up->port.lock); - m32r_sio_handle_port(up, sts, NULL); + m32r_sio_handle_port(up, sts); spin_unlock(&up->port.lock); } diff --git a/drivers/serial/mpc52xx_uart.c b/drivers/serial/mpc52xx_uart.c index 039c2fd6d496..4f80c5b4a753 100644 --- a/drivers/serial/mpc52xx_uart.c +++ b/drivers/serial/mpc52xx_uart.c @@ -512,19 +512,11 @@ mpc52xx_uart_int_tx_chars(struct uart_port *port) static irqreturn_t mpc52xx_uart_int(int irq, void *dev_id) { - struct uart_port *port = (struct uart_port *) dev_id; + struct uart_port *port = dev_id; unsigned long pass = ISR_PASS_LIMIT; unsigned int keepgoing; unsigned short status; - if ( irq != port->irq ) { - printk( KERN_WARNING - "mpc52xx_uart_int : " \ - "Received wrong int %d. Waiting for %d\n", - irq, port->irq); - return IRQ_NONE; - } - spin_lock(&port->lock); /* While we have stuff to do, we continue */ diff --git a/drivers/serial/mux.c b/drivers/serial/mux.c index aa819d3f8ee5..8ad1b8c5ec5d 100644 --- a/drivers/serial/mux.c +++ b/drivers/serial/mux.c @@ -230,7 +230,7 @@ static void mux_read(struct uart_port *port) continue; } - if (uart_handle_sysrq_char(port, data & 0xffu, NULL)) + if (uart_handle_sysrq_char(port, data & 0xffu)) continue; tty_insert_flip_char(tty, data & 0xFF, TTY_NORMAL); diff --git a/drivers/serial/netx-serial.c b/drivers/serial/netx-serial.c index e92d7e1c22cf..062bad457b1a 100644 --- a/drivers/serial/netx-serial.c +++ b/drivers/serial/netx-serial.c @@ -247,7 +247,7 @@ static void netx_rxint(struct uart_port *port) static irqreturn_t netx_int(int irq, void *dev_id) { - struct uart_port *port = (struct uart_port *)dev_id; + struct uart_port *port = dev_id; unsigned long flags; unsigned char status; diff --git a/drivers/serial/pxa.c b/drivers/serial/pxa.c index 846089f222d4..415fe9633a9b 100644 --- a/drivers/serial/pxa.c +++ b/drivers/serial/pxa.c @@ -232,7 +232,7 @@ static inline void check_modem_status(struct uart_pxa_port *up) */ static inline irqreturn_t serial_pxa_irq(int irq, void *dev_id) { - struct uart_pxa_port *up = (struct uart_pxa_port *)dev_id; + struct uart_pxa_port *up = dev_id; unsigned int iir, lsr; iir = serial_in(up, UART_IIR); diff --git a/drivers/serial/sn_console.c b/drivers/serial/sn_console.c index 709f93a6c18c..956b2cf08e1e 100644 --- a/drivers/serial/sn_console.c +++ b/drivers/serial/sn_console.c @@ -674,7 +674,7 @@ static void sn_sal_timer_poll(unsigned long data) if (!port->sc_port.irq) { spin_lock_irqsave(&port->sc_port.lock, flags); if (sn_process_input) - sn_receive_chars(port, NULL, flags); + sn_receive_chars(port, flags); sn_transmit_chars(port, TRANSMIT_RAW); spin_unlock_irqrestore(&port->sc_port.lock, flags); mod_timer(&port->sc_timer, diff --git a/drivers/serial/sunzilog.c b/drivers/serial/sunzilog.c index b11f6dea2704..b2cc703b2b9e 100644 --- a/drivers/serial/sunzilog.c +++ b/drivers/serial/sunzilog.c @@ -1057,7 +1057,7 @@ static void sunzilog_free_tables(void) static void sunzilog_putchar(struct uart_port *port, int ch) { - struct zilog_channel *channel = ZILOG_CHANNEL_FROM_PORT(port); + struct zilog_channel __iomem *channel = ZILOG_CHANNEL_FROM_PORT(port); int loops = ZS_PUT_CHAR_MAX_DELAY; /* This is a timed polling loop so do not switch the explicit @@ -1182,7 +1182,7 @@ static int __init sunzilog_console_setup(struct console *con, char *options) return 0; } -static struct console sunzilog_console = { +static struct console sunzilog_console_ops = { .name = "ttyS", .write = sunzilog_console_write, .device = uart_console_device, @@ -1208,10 +1208,10 @@ static inline struct console *SUNZILOG_CONSOLE(void) if (i == NUM_CHANNELS) return NULL; - sunzilog_console.index = i; + sunzilog_console_ops.index = i; sunzilog_port_table[i].flags |= SUNZILOG_FLAG_IS_CONS; - return &sunzilog_console; + return &sunzilog_console_ops; } #else diff --git a/drivers/sn/ioc3.c b/drivers/sn/ioc3.c index 3d91b6b9287d..cd6b65333b71 100644 --- a/drivers/sn/ioc3.c +++ b/drivers/sn/ioc3.c @@ -401,7 +401,7 @@ static inline uint32_t get_pending_intrs(struct ioc3_driver_data *idd) static irqreturn_t ioc3_intr_io(int irq, void *arg) { unsigned long flags; - struct ioc3_driver_data *idd = (struct ioc3_driver_data *)arg; + struct ioc3_driver_data *idd = arg; int handled = 1, id; unsigned int pending; diff --git a/drivers/spi/pxa2xx_spi.c b/drivers/spi/pxa2xx_spi.c index 77122edeb206..72025df5561d 100644 --- a/drivers/spi/pxa2xx_spi.c +++ b/drivers/spi/pxa2xx_spi.c @@ -669,7 +669,7 @@ static irqreturn_t interrupt_transfer(struct driver_data *drv_data) static irqreturn_t ssp_int(int irq, void *dev_id) { - struct driver_data *drv_data = (struct driver_data *)dev_id; + struct driver_data *drv_data = dev_id; void *reg = drv_data->ioaddr; if (!drv_data->cur_msg) { diff --git a/drivers/tc/zs.c b/drivers/tc/zs.c index 7c0fe1dc96a9..792becdfe6f8 100644 --- a/drivers/tc/zs.c +++ b/drivers/tc/zs.c @@ -389,7 +389,7 @@ static void receive_chars(struct dec_serial *info) if (ch == 0) continue; if (time_before(jiffies, break_pressed + HZ * 5)) { - handle_sysrq(ch, regs, NULL); + handle_sysrq(ch, NULL); break_pressed = 0; continue; } diff --git a/drivers/usb/core/devio.c b/drivers/usb/core/devio.c index 2c9c9462d899..724822cac2b1 100644 --- a/drivers/usb/core/devio.c +++ b/drivers/usb/core/devio.c @@ -1216,7 +1216,7 @@ static int proc_submiturb_compat(struct dev_state *ps, void __user *arg) { struct usbdevfs_urb uurb; - if (get_urb32(&uurb,(struct usbdevfs_urb32 *)arg)) + if (get_urb32(&uurb,(struct usbdevfs_urb32 __user *)arg)) return -EFAULT; return proc_do_submiturb(ps, &uurb, ((struct usbdevfs_urb32 __user *)arg)->iso_frame_desc, arg); @@ -1251,7 +1251,7 @@ static int processcompl_compat(struct async *as, void __user * __user *arg) } free_async(as); - if (put_user((u32)(u64)addr, (u32 __user *)arg)) + if (put_user(ptr_to_compat(addr), (u32 __user *)arg)) return -EFAULT; return 0; } @@ -1520,7 +1520,7 @@ static int usbdev_ioctl(struct inode *inode, struct file *file, unsigned int cmd case USBDEVFS_IOCTL32: snoop(&dev->dev, "%s: IOCTL\n", __FUNCTION__); - ret = proc_ioctl_compat(ps, (compat_uptr_t)(long)p); + ret = proc_ioctl_compat(ps, ptr_to_compat(p)); break; #endif diff --git a/drivers/usb/gadget/dummy_hcd.c b/drivers/usb/gadget/dummy_hcd.c index 4d2946e540cf..f1f32d7be5f9 100644 --- a/drivers/usb/gadget/dummy_hcd.c +++ b/drivers/usb/gadget/dummy_hcd.c @@ -1551,7 +1551,7 @@ return_urb: ep->already_seen = ep->setup_stage = 0; spin_unlock (&dum->lock); - usb_hcd_giveback_urb (dummy_to_hcd(dum), urb, NULL); + usb_hcd_giveback_urb (dummy_to_hcd(dum), urb); spin_lock (&dum->lock); goto restart; diff --git a/drivers/usb/host/ohci-q.c b/drivers/usb/host/ohci-q.c index e08d1a2664e6..fe1fe2f97cb5 100644 --- a/drivers/usb/host/ohci-q.c +++ b/drivers/usb/host/ohci-q.c @@ -925,7 +925,7 @@ rescan_all: /* only take off EDs that the HC isn't using, accounting for * frame counter wraps and EDs with partially retired TDs */ - if (likely (get_irq_regs() && HC_IS_RUNNING(ohci_to_hcd(ohci)->state))) { + if (likely (HC_IS_RUNNING(ohci_to_hcd(ohci)->state))) { if (tick_before (tick, ed->tick)) { skip_ed: last = &ed->ed_next; diff --git a/drivers/usb/serial/mos7840.c b/drivers/usb/serial/mos7840.c index 2306d493e55b..021be39fe16e 100644 --- a/drivers/usb/serial/mos7840.c +++ b/drivers/usb/serial/mos7840.c @@ -1087,7 +1087,7 @@ static int mos7840_open(struct usb_serial_port *port, struct file *filp) mos7840_port->icount.tx = 0; mos7840_port->icount.rx = 0; - dbg("\n\nusb_serial serial:%x mos7840_port:%x\n usb_serial_port port:%x\n\n", (unsigned int)serial, (unsigned int)mos7840_port, (unsigned int)port); + dbg("\n\nusb_serial serial:%p mos7840_port:%p\n usb_serial_port port:%p\n\n", serial, mos7840_port, port); return 0; @@ -1420,7 +1420,6 @@ static int mos7840_write(struct usb_serial_port *port, int i; int bytes_sent = 0; int transfer_size; - int from_user = 0; struct moschip_port *mos7840_port; struct usb_serial *serial; @@ -1511,15 +1510,7 @@ static int mos7840_write(struct usb_serial_port *port, } transfer_size = min(count, URB_TRANSFER_BUFFER_SIZE); - if (from_user) { - if (copy_from_user - (urb->transfer_buffer, current_position, transfer_size)) { - bytes_sent = -EFAULT; - goto exit; - } - } else { - memcpy(urb->transfer_buffer, current_position, transfer_size); - } + memcpy(urb->transfer_buffer, current_position, transfer_size); /* fill urb with data and submit */ usb_fill_bulk_urb(urb, @@ -2225,7 +2216,7 @@ static void mos7840_set_termios(struct usb_serial_port *port, *****************************************************************************/ static int mos7840_get_lsr_info(struct moschip_port *mos7840_port, - unsigned int *value) + unsigned int __user *value) { int count; unsigned int result = 0; @@ -2248,7 +2239,7 @@ static int mos7840_get_lsr_info(struct moschip_port *mos7840_port, *****************************************************************************/ static int mos7840_get_bytes_avail(struct moschip_port *mos7840_port, - unsigned int *value) + unsigned int __user *value) { unsigned int result = 0; struct tty_struct *tty = mos7840_port->port->tty; @@ -2271,7 +2262,7 @@ static int mos7840_get_bytes_avail(struct moschip_port *mos7840_port, *****************************************************************************/ static int mos7840_set_modem_info(struct moschip_port *mos7840_port, - unsigned int cmd, unsigned int *value) + unsigned int cmd, unsigned int __user *value) { unsigned int mcr; unsigned int arg; @@ -2341,7 +2332,7 @@ static int mos7840_set_modem_info(struct moschip_port *mos7840_port, *****************************************************************************/ static int mos7840_get_modem_info(struct moschip_port *mos7840_port, - unsigned int *value) + unsigned int __user *value) { unsigned int result = 0; __u16 msr; @@ -2370,7 +2361,7 @@ static int mos7840_get_modem_info(struct moschip_port *mos7840_port, *****************************************************************************/ static int mos7840_get_serial_info(struct moschip_port *mos7840_port, - struct serial_struct *retinfo) + struct serial_struct __user *retinfo) { struct serial_struct tmp; @@ -2405,6 +2396,7 @@ static int mos7840_get_serial_info(struct moschip_port *mos7840_port, static int mos7840_ioctl(struct usb_serial_port *port, struct file *file, unsigned int cmd, unsigned long arg) { + void __user *argp = (void __user *)arg; struct moschip_port *mos7840_port; struct tty_struct *tty; @@ -2433,16 +2425,13 @@ static int mos7840_ioctl(struct usb_serial_port *port, struct file *file, case TIOCINQ: dbg("%s (%d) TIOCINQ", __FUNCTION__, port->number); - return mos7840_get_bytes_avail(mos7840_port, - (unsigned int *)arg); - break; + return mos7840_get_bytes_avail(mos7840_port, argp); case TIOCOUTQ: dbg("%s (%d) TIOCOUTQ", __FUNCTION__, port->number); return put_user(tty->driver->chars_in_buffer ? tty->driver->chars_in_buffer(tty) : 0, (int __user *)arg); - break; case TCFLSH: retval = tty_check_change(tty); @@ -2472,13 +2461,13 @@ static int mos7840_ioctl(struct usb_serial_port *port, struct file *file, case TCGETS: if (kernel_termios_to_user_termios - ((struct termios __user *)arg, tty->termios)) + ((struct termios __user *)argp, tty->termios)) return -EFAULT; return 0; case TIOCSERGETLSR: dbg("%s (%d) TIOCSERGETLSR", __FUNCTION__, port->number); - return mos7840_get_lsr_info(mos7840_port, (unsigned int *)arg); + return mos7840_get_lsr_info(mos7840_port, argp); return 0; case TIOCMBIS: @@ -2487,19 +2476,16 @@ static int mos7840_ioctl(struct usb_serial_port *port, struct file *file, dbg("%s (%d) TIOCMSET/TIOCMBIC/TIOCMSET", __FUNCTION__, port->number); mosret = - mos7840_set_modem_info(mos7840_port, cmd, - (unsigned int *)arg); + mos7840_set_modem_info(mos7840_port, cmd, argp); return mosret; case TIOCMGET: dbg("%s (%d) TIOCMGET", __FUNCTION__, port->number); - return mos7840_get_modem_info(mos7840_port, - (unsigned int *)arg); + return mos7840_get_modem_info(mos7840_port, argp); case TIOCGSERIAL: dbg("%s (%d) TIOCGSERIAL", __FUNCTION__, port->number); - return mos7840_get_serial_info(mos7840_port, - (struct serial_struct *)arg); + return mos7840_get_serial_info(mos7840_port, argp); case TIOCSSERIAL: dbg("%s (%d) TIOCSSERIAL", __FUNCTION__, port->number); @@ -2549,7 +2535,7 @@ static int mos7840_ioctl(struct usb_serial_port *port, struct file *file, dbg("%s (%d) TIOCGICOUNT RX=%d, TX=%d", __FUNCTION__, port->number, icount.rx, icount.tx); - if (copy_to_user((void *)arg, &icount, sizeof(icount))) + if (copy_to_user(argp, &icount, sizeof(icount))) return -EFAULT; return 0; @@ -2817,7 +2803,7 @@ static int mos7840_startup(struct usb_serial *serial) /* setting configuration feature to one */ usb_control_msg(serial->dev, usb_sndctrlpipe(serial->dev, 0), - (__u8) 0x03, 0x00, 0x01, 0x00, 0x00, 0x00, 5 * HZ); + (__u8) 0x03, 0x00, 0x01, 0x00, NULL, 0x00, 5 * HZ); return 0; } diff --git a/drivers/video/Kconfig b/drivers/video/Kconfig index daaa486159cf..7a43020fa583 100644 --- a/drivers/video/Kconfig +++ b/drivers/video/Kconfig @@ -701,7 +701,6 @@ config FB_NVIDIA depends on FB && PCI select I2C_ALGOBIT if FB_NVIDIA_I2C select I2C if FB_NVIDIA_I2C - select FB_DDC if FB_NVIDIA_I2C select FB_MODE_HELPERS select FB_CFB_FILLRECT select FB_CFB_COPYAREA diff --git a/drivers/video/nvidia/nv_i2c.c b/drivers/video/nvidia/nv_i2c.c index e48de3c9fd13..19eef3a09023 100644 --- a/drivers/video/nvidia/nv_i2c.c +++ b/drivers/video/nvidia/nv_i2c.c @@ -160,12 +160,51 @@ void nvidia_delete_i2c_busses(struct nvidia_par *par) } +static u8 *nvidia_do_probe_i2c_edid(struct nvidia_i2c_chan *chan) +{ + u8 start = 0x0; + struct i2c_msg msgs[] = { + { + .addr = 0x50, + .len = 1, + .buf = &start, + }, { + .addr = 0x50, + .flags = I2C_M_RD, + .len = EDID_LENGTH, + }, + }; + u8 *buf; + + if (!chan->par) + return NULL; + + buf = kmalloc(EDID_LENGTH, GFP_KERNEL); + if (!buf) { + dev_warn(&chan->par->pci_dev->dev, "Out of memory!\n"); + return NULL; + } + msgs[1].buf = buf; + + if (i2c_transfer(&chan->adapter, msgs, 2) == 2) + return buf; + dev_dbg(&chan->par->pci_dev->dev, "Unable to read EDID block.\n"); + kfree(buf); + return NULL; +} + int nvidia_probe_i2c_connector(struct fb_info *info, int conn, u8 **out_edid) { struct nvidia_par *par = info->par; - u8 *edid; - - edid = fb_ddc_read(&par->chan[conn - 1].adapter); + u8 *edid = NULL; + int i; + + for (i = 0; i < 3; i++) { + /* Do the real work */ + edid = nvidia_do_probe_i2c_edid(&par->chan[conn - 1]); + if (edid) + break; + } if (!edid && conn == 1) { /* try to get from firmware */ diff --git a/fs/Kconfig b/fs/Kconfig index 599de54451af..db4d13324c36 100644 --- a/fs/Kconfig +++ b/fs/Kconfig @@ -140,6 +140,73 @@ config EXT3_FS_SECURITY If you are not using a security module that requires using extended attributes for file security labels, say N. +config EXT4DEV_FS + tristate "Ext4dev/ext4 extended fs support development (EXPERIMENTAL)" + depends on EXPERIMENTAL + select JBD2 + help + Ext4dev is a predecessor filesystem of the next generation + extended fs ext4, based on ext3 filesystem code. It will be + renamed ext4 fs later, once ext4dev is mature and stabilized. + + Unlike the change from ext2 filesystem to ext3 filesystem, + the on-disk format of ext4dev is not the same as ext3 any more: + it is based on extent maps and it supports 48-bit physical block + numbers. These combined on-disk format changes will allow + ext4dev/ext4 to handle more than 16 TB filesystem volumes -- + a hard limit that ext3 cannot overcome without changing the + on-disk format. + + Other than extent maps and 48-bit block numbers, ext4dev also is + likely to have other new features such as persistent preallocation, + high resolution time stamps, and larger file support etc. These + features will be added to ext4dev gradually. + + To compile this file system support as a module, choose M here. The + module will be called ext4dev. Be aware, however, that the filesystem + of your root partition (the one containing the directory /) cannot + be compiled as a module, and so this could be dangerous. + + If unsure, say N. + +config EXT4DEV_FS_XATTR + bool "Ext4dev extended attributes" + depends on EXT4DEV_FS + default y + help + Extended attributes are name:value pairs associated with inodes by + the kernel or by users (see the attr(5) manual page, or visit + <http://acl.bestbits.at/> for details). + + If unsure, say N. + + You need this for POSIX ACL support on ext4dev/ext4. + +config EXT4DEV_FS_POSIX_ACL + bool "Ext4dev POSIX Access Control Lists" + depends on EXT4DEV_FS_XATTR + select FS_POSIX_ACL + help + POSIX Access Control Lists (ACLs) support permissions for users and + groups beyond the owner/group/world scheme. + + To learn more about Access Control Lists, visit the POSIX ACLs for + Linux website <http://acl.bestbits.at/>. + + If you don't know what Access Control Lists are, say N + +config EXT4DEV_FS_SECURITY + bool "Ext4dev Security Labels" + depends on EXT4DEV_FS_XATTR + help + Security labels support alternative access control models + implemented by security modules like SELinux. This option + enables an extended attribute handler for file security + labels in the ext4dev/ext4 filesystem. + + If you are not using a security module that requires using + extended attributes for file security labels, say N. + config JBD tristate help @@ -172,12 +239,44 @@ config JBD_DEBUG generated. To turn debugging off again, do "echo 0 > /proc/sys/fs/jbd-debug". +config JBD2 + tristate + help + This is a generic journaling layer for block devices that support + both 32-bit and 64-bit block numbers. It is currently used by + the ext4dev/ext4 filesystem, but it could also be used to add + journal support to other file systems or block devices such + as RAID or LVM. + + If you are using ext4dev/ext4, you need to say Y here. If you are not + using ext4dev/ext4 then you will probably want to say N. + + To compile this device as a module, choose M here. The module will be + called jbd2. If you are compiling ext4dev/ext4 into the kernel, + you cannot compile this code as a module. + +config JBD2_DEBUG + bool "JBD2 (ext4dev/ext4) debugging support" + depends on JBD2 + help + If you are using the ext4dev/ext4 journaled file system (or + potentially any other filesystem/device using JBD2), this option + allows you to enable debugging output while the system is running, + in order to help track down any problems you are having. + By default, the debugging output will be turned off. + + If you select Y here, then you will be able to turn on debugging + with "echo N > /proc/sys/fs/jbd2-debug", where N is a number between + 1 and 5. The higher the number, the more debugging output is + generated. To turn debugging off again, do + "echo 0 > /proc/sys/fs/jbd2-debug". + config FS_MBCACHE -# Meta block cache for Extended Attributes (ext2/ext3) +# Meta block cache for Extended Attributes (ext2/ext3/ext4) tristate - depends on EXT2_FS_XATTR || EXT3_FS_XATTR - default y if EXT2_FS=y || EXT3_FS=y - default m if EXT2_FS=m || EXT3_FS=m + depends on EXT2_FS_XATTR || EXT3_FS_XATTR || EXT4DEV_FS_XATTR + default y if EXT2_FS=y || EXT3_FS=y || EXT4DEV_FS=y + default m if EXT2_FS=m || EXT3_FS=m || EXT4DEV_FS=m config REISERFS_FS tristate "Reiserfs support" diff --git a/fs/Makefile b/fs/Makefile index df614eacee86..9a5ce9323bfd 100644 --- a/fs/Makefile +++ b/fs/Makefile @@ -62,7 +62,9 @@ obj-$(CONFIG_DLM) += dlm/ # Do not add any filesystems before this line obj-$(CONFIG_REISERFS_FS) += reiserfs/ obj-$(CONFIG_EXT3_FS) += ext3/ # Before ext2 so root fs can be ext3 +obj-$(CONFIG_EXT4DEV_FS) += ext4/ # Before ext2 so root fs can be ext4dev obj-$(CONFIG_JBD) += jbd/ +obj-$(CONFIG_JBD2) += jbd2/ obj-$(CONFIG_EXT2_FS) += ext2/ obj-$(CONFIG_CRAMFS) += cramfs/ obj-$(CONFIG_RAMFS) += ramfs/ diff --git a/fs/afs/dir.c b/fs/afs/dir.c index cf8a2cb28505..a6ec75c56fcf 100644 --- a/fs/afs/dir.c +++ b/fs/afs/dir.c @@ -211,8 +211,8 @@ static int afs_dir_open(struct inode *inode, struct file *file) { _enter("{%lu}", inode->i_ino); - BUG_ON(sizeof(union afs_dir_block) != 2048); - BUG_ON(sizeof(union afs_dirent) != 32); + BUILD_BUG_ON(sizeof(union afs_dir_block) != 2048); + BUILD_BUG_ON(sizeof(union afs_dirent) != 32); if (AFS_FS_I(inode)->flags & AFS_VNODE_DELETED) return -ENOENT; @@ -446,8 +446,8 @@ static struct dentry *afs_dir_lookup(struct inode *dir, struct dentry *dentry, _enter("{%lu},%p{%s}", dir->i_ino, dentry, dentry->d_name.name); /* insanity checks first */ - BUG_ON(sizeof(union afs_dir_block) != 2048); - BUG_ON(sizeof(union afs_dirent) != 32); + BUILD_BUG_ON(sizeof(union afs_dir_block) != 2048); + BUILD_BUG_ON(sizeof(union afs_dirent) != 32); if (dentry->d_name.len > 255) { _leave(" = -ENAMETOOLONG"); diff --git a/fs/autofs4/autofs_i.h b/fs/autofs4/autofs_i.h index 480ab178cba5..b13f32c8aeee 100644 --- a/fs/autofs4/autofs_i.h +++ b/fs/autofs4/autofs_i.h @@ -94,7 +94,6 @@ struct autofs_wait_queue { struct autofs_sb_info { u32 magic; - struct dentry *root; int pipefd; struct file *pipe; pid_t oz_pgrp; @@ -229,4 +228,4 @@ out: } void autofs4_dentry_release(struct dentry *); - +extern void autofs4_kill_sb(struct super_block *); diff --git a/fs/autofs4/init.c b/fs/autofs4/init.c index 5d9193332bef..723a1c5e361b 100644 --- a/fs/autofs4/init.c +++ b/fs/autofs4/init.c @@ -24,7 +24,7 @@ static struct file_system_type autofs_fs_type = { .owner = THIS_MODULE, .name = "autofs", .get_sb = autofs_get_sb, - .kill_sb = kill_anon_super, + .kill_sb = autofs4_kill_sb, }; static int __init init_autofs4_fs(void) diff --git a/fs/autofs4/inode.c b/fs/autofs4/inode.c index 800ce876caec..51fd8595bf85 100644 --- a/fs/autofs4/inode.c +++ b/fs/autofs4/inode.c @@ -96,7 +96,7 @@ void autofs4_free_ino(struct autofs_info *ino) */ static void autofs4_force_release(struct autofs_sb_info *sbi) { - struct dentry *this_parent = sbi->root; + struct dentry *this_parent = sbi->sb->s_root; struct list_head *next; spin_lock(&dcache_lock); @@ -127,7 +127,7 @@ resume: spin_lock(&dcache_lock); } - if (this_parent != sbi->root) { + if (this_parent != sbi->sb->s_root) { struct dentry *dentry = this_parent; next = this_parent->d_u.d_child.next; @@ -140,15 +140,9 @@ resume: goto resume; } spin_unlock(&dcache_lock); - - dput(sbi->root); - sbi->root = NULL; - shrink_dcache_sb(sbi->sb); - - return; } -static void autofs4_put_super(struct super_block *sb) +void autofs4_kill_sb(struct super_block *sb) { struct autofs_sb_info *sbi = autofs4_sbi(sb); @@ -163,6 +157,7 @@ static void autofs4_put_super(struct super_block *sb) kfree(sbi); DPRINTK("shutting down"); + kill_anon_super(sb); } static int autofs4_show_options(struct seq_file *m, struct vfsmount *mnt) @@ -189,7 +184,6 @@ static int autofs4_show_options(struct seq_file *m, struct vfsmount *mnt) } static struct super_operations autofs4_sops = { - .put_super = autofs4_put_super, .statfs = simple_statfs, .show_options = autofs4_show_options, }; @@ -315,7 +309,6 @@ int autofs4_fill_super(struct super_block *s, void *data, int silent) s->s_fs_info = sbi; sbi->magic = AUTOFS_SBI_MAGIC; - sbi->root = NULL; sbi->pipefd = -1; sbi->catatonic = 0; sbi->exp_timeout = 0; @@ -397,13 +390,6 @@ int autofs4_fill_super(struct super_block *s, void *data, int silent) sbi->pipefd = pipefd; /* - * Take a reference to the root dentry so we get a chance to - * clean up the dentry tree on umount. - * See autofs4_force_release. - */ - sbi->root = dget(root); - - /* * Success! Install the root dentry now to indicate completion. */ s->s_root = root; diff --git a/fs/autofs4/waitq.c b/fs/autofs4/waitq.c index ce103e7b0bc3..c0a6c8d445c7 100644 --- a/fs/autofs4/waitq.c +++ b/fs/autofs4/waitq.c @@ -45,7 +45,6 @@ void autofs4_catatonic_mode(struct autofs_sb_info *sbi) fput(sbi->pipe); /* Close the pipe */ sbi->pipe = NULL; } - shrink_dcache_sb(sbi->sb); } static int autofs4_write(struct file *file, const void *addr, int bytes) diff --git a/fs/befs/befs.h b/fs/befs/befs.h index 057a2c3d73b7..d9a40abda6b7 100644 --- a/fs/befs/befs.h +++ b/fs/befs/befs.h @@ -94,7 +94,7 @@ void befs_debug(const struct super_block *sb, const char *fmt, ...); void befs_dump_super_block(const struct super_block *sb, befs_super_block *); void befs_dump_inode(const struct super_block *sb, befs_inode *); -void befs_dump_index_entry(const struct super_block *sb, befs_btree_super *); +void befs_dump_index_entry(const struct super_block *sb, befs_disk_btree_super *); void befs_dump_index_node(const struct super_block *sb, befs_btree_nodehead *); /****************************/ @@ -136,7 +136,7 @@ blockno2iaddr(struct super_block *sb, befs_blocknr_t blockno) static inline unsigned int befs_iaddrs_per_block(struct super_block *sb) { - return BEFS_SB(sb)->block_size / sizeof (befs_inode_addr); + return BEFS_SB(sb)->block_size / sizeof (befs_disk_inode_addr); } static inline int @@ -151,4 +151,6 @@ befs_brun_size(struct super_block *sb, befs_block_run run) return BEFS_SB(sb)->block_size * run.len; } +#include "endian.h" + #endif /* _LINUX_BEFS_H */ diff --git a/fs/befs/befs_fs_types.h b/fs/befs/befs_fs_types.h index 63ef1e18fb84..e2595c2c403a 100644 --- a/fs/befs/befs_fs_types.h +++ b/fs/befs/befs_fs_types.h @@ -79,17 +79,27 @@ enum inode_flags { * On-Disk datastructures of BeFS */ +typedef u64 __bitwise fs64; +typedef u32 __bitwise fs32; +typedef u16 __bitwise fs16; + typedef u64 befs_off_t; -typedef u64 befs_time_t; -typedef void befs_binode_etc; +typedef fs64 befs_time_t; /* Block runs */ typedef struct { + fs32 allocation_group; + fs16 start; + fs16 len; +} PACKED befs_disk_block_run; + +typedef struct { u32 allocation_group; u16 start; u16 len; } PACKED befs_block_run; +typedef befs_disk_block_run befs_disk_inode_addr; typedef befs_block_run befs_inode_addr; /* @@ -97,31 +107,31 @@ typedef befs_block_run befs_inode_addr; */ typedef struct { char name[B_OS_NAME_LENGTH]; - u32 magic1; - u32 fs_byte_order; + fs32 magic1; + fs32 fs_byte_order; - u32 block_size; - u32 block_shift; + fs32 block_size; + fs32 block_shift; - befs_off_t num_blocks; - befs_off_t used_blocks; + fs64 num_blocks; + fs64 used_blocks; - u32 inode_size; + fs32 inode_size; - u32 magic2; - u32 blocks_per_ag; - u32 ag_shift; - u32 num_ags; + fs32 magic2; + fs32 blocks_per_ag; + fs32 ag_shift; + fs32 num_ags; - u32 flags; + fs32 flags; - befs_block_run log_blocks; - befs_off_t log_start; - befs_off_t log_end; + befs_disk_block_run log_blocks; + fs64 log_start; + fs64 log_end; - u32 magic3; - befs_inode_addr root_dir; - befs_inode_addr indices; + fs32 magic3; + befs_disk_inode_addr root_dir; + befs_disk_inode_addr indices; } PACKED befs_super_block; @@ -130,6 +140,16 @@ typedef struct { * be longer than one block! */ typedef struct { + befs_disk_block_run direct[BEFS_NUM_DIRECT_BLOCKS]; + fs64 max_direct_range; + befs_disk_block_run indirect; + fs64 max_indirect_range; + befs_disk_block_run double_indirect; + fs64 max_double_indirect_range; + fs64 size; +} PACKED befs_disk_data_stream; + +typedef struct { befs_block_run direct[BEFS_NUM_DIRECT_BLOCKS]; befs_off_t max_direct_range; befs_block_run indirect; @@ -141,35 +161,35 @@ typedef struct { /* Attribute */ typedef struct { - u32 type; - u16 name_size; - u16 data_size; + fs32 type; + fs16 name_size; + fs16 data_size; char name[1]; } PACKED befs_small_data; /* Inode structure */ typedef struct { - u32 magic1; - befs_inode_addr inode_num; - u32 uid; - u32 gid; - u32 mode; - u32 flags; + fs32 magic1; + befs_disk_inode_addr inode_num; + fs32 uid; + fs32 gid; + fs32 mode; + fs32 flags; befs_time_t create_time; befs_time_t last_modified_time; - befs_inode_addr parent; - befs_inode_addr attributes; - u32 type; + befs_disk_inode_addr parent; + befs_disk_inode_addr attributes; + fs32 type; - u32 inode_size; - u32 etc; /* not use */ + fs32 inode_size; + fs32 etc; /* not use */ union { - befs_data_stream datastream; + befs_disk_data_stream datastream; char symlink[BEFS_SYMLINK_LEN]; } data; - u32 pad[4]; /* not use */ + fs32 pad[4]; /* not use */ befs_small_data small_data[1]; } PACKED befs_inode; @@ -190,6 +210,16 @@ enum btree_types { }; typedef struct { + fs32 magic; + fs32 node_size; + fs32 max_depth; + fs32 data_type; + fs64 root_node_ptr; + fs64 free_node_ptr; + fs64 max_size; +} PACKED befs_disk_btree_super; + +typedef struct { u32 magic; u32 node_size; u32 max_depth; @@ -203,11 +233,19 @@ typedef struct { * Header stucture of each btree node */ typedef struct { + fs64 left; + fs64 right; + fs64 overflow; + fs16 all_key_count; + fs16 all_key_length; +} PACKED befs_btree_nodehead; + +typedef struct { befs_off_t left; befs_off_t right; befs_off_t overflow; u16 all_key_count; u16 all_key_length; -} PACKED befs_btree_nodehead; +} PACKED befs_host_btree_nodehead; #endif /* _LINUX_BEFS_FS_TYPES */ diff --git a/fs/befs/btree.c b/fs/befs/btree.c index 76e219799409..81b042ee24e6 100644 --- a/fs/befs/btree.c +++ b/fs/befs/btree.c @@ -30,7 +30,6 @@ #include "befs.h" #include "btree.h" #include "datastream.h" -#include "endian.h" /* * The btree functions in this file are built on top of the @@ -80,7 +79,7 @@ * In memory structure of each btree node */ typedef struct { - befs_btree_nodehead head; /* head of node converted to cpu byteorder */ + befs_host_btree_nodehead head; /* head of node converted to cpu byteorder */ struct buffer_head *bh; befs_btree_nodehead *od_node; /* on disk node */ } befs_btree_node; @@ -102,9 +101,9 @@ static int befs_bt_read_node(struct super_block *sb, befs_data_stream * ds, static int befs_leafnode(befs_btree_node * node); -static u16 *befs_bt_keylen_index(befs_btree_node * node); +static fs16 *befs_bt_keylen_index(befs_btree_node * node); -static befs_off_t *befs_bt_valarray(befs_btree_node * node); +static fs64 *befs_bt_valarray(befs_btree_node * node); static char *befs_bt_keydata(befs_btree_node * node); @@ -136,7 +135,7 @@ befs_bt_read_super(struct super_block *sb, befs_data_stream * ds, befs_btree_super * sup) { struct buffer_head *bh = NULL; - befs_btree_super *od_sup = NULL; + befs_disk_btree_super *od_sup = NULL; befs_debug(sb, "---> befs_btree_read_super()"); @@ -146,7 +145,7 @@ befs_bt_read_super(struct super_block *sb, befs_data_stream * ds, befs_error(sb, "Couldn't read index header."); goto error; } - od_sup = (befs_btree_super *) bh->b_data; + od_sup = (befs_disk_btree_super *) bh->b_data; befs_dump_index_entry(sb, od_sup); sup->magic = fs32_to_cpu(sb, od_sup->magic); @@ -342,7 +341,7 @@ befs_find_key(struct super_block *sb, befs_btree_node * node, u16 keylen; int findkey_len; char *thiskey; - befs_off_t *valarray; + fs64 *valarray; befs_debug(sb, "---> befs_find_key() %s", findkey); @@ -422,7 +421,7 @@ befs_btree_read(struct super_block *sb, befs_data_stream * ds, befs_btree_super bt_super; befs_off_t node_off = 0; int cur_key; - befs_off_t *valarray; + fs64 *valarray; char *keystart; u16 keylen; int res; @@ -572,7 +571,7 @@ befs_btree_seekleaf(struct super_block *sb, befs_data_stream * ds, this_node->head.overflow); *node_off = this_node->head.overflow; } else { - befs_off_t *valarray = befs_bt_valarray(this_node); + fs64 *valarray = befs_bt_valarray(this_node); *node_off = fs64_to_cpu(sb, valarray[0]); } if (befs_bt_read_node(sb, ds, this_node, *node_off) != BEFS_OK) { @@ -622,7 +621,7 @@ befs_leafnode(befs_btree_node * node) * * Except that rounding up to 8 works, and rounding up to 4 doesn't. */ -static u16 * +static fs16 * befs_bt_keylen_index(befs_btree_node * node) { const int keylen_align = 8; @@ -633,7 +632,7 @@ befs_bt_keylen_index(befs_btree_node * node) if (tmp) off += keylen_align - tmp; - return (u16 *) ((void *) node->od_node + off); + return (fs16 *) ((void *) node->od_node + off); } /** @@ -643,13 +642,13 @@ befs_bt_keylen_index(befs_btree_node * node) * Returns a pointer to the start of the value array * of the node pointed to by the node header */ -static befs_off_t * +static fs64 * befs_bt_valarray(befs_btree_node * node) { void *keylen_index_start = (void *) befs_bt_keylen_index(node); - size_t keylen_index_size = node->head.all_key_count * sizeof (u16); + size_t keylen_index_size = node->head.all_key_count * sizeof (fs16); - return (befs_off_t *) (keylen_index_start + keylen_index_size); + return (fs64 *) (keylen_index_start + keylen_index_size); } /** @@ -681,7 +680,7 @@ befs_bt_get_key(struct super_block *sb, befs_btree_node * node, { int prev_key_end; char *keystart; - u16 *keylen_index; + fs16 *keylen_index; if (index < 0 || index > node->head.all_key_count) { *keylen = 0; diff --git a/fs/befs/datastream.c b/fs/befs/datastream.c index b7d6b920f65f..aacb4da6298a 100644 --- a/fs/befs/datastream.c +++ b/fs/befs/datastream.c @@ -18,7 +18,6 @@ #include "befs.h" #include "datastream.h" #include "io.h" -#include "endian.h" const befs_inode_addr BAD_IADDR = { 0, 0, 0 }; @@ -312,7 +311,7 @@ befs_find_brun_indirect(struct super_block *sb, befs_blocknr_t indir_start_blk; befs_blocknr_t search_blk; struct buffer_head *indirblock; - befs_block_run *array; + befs_disk_block_run *array; befs_block_run indirect = data->indirect; befs_blocknr_t indirblockno = iaddr2blockno(sb, &indirect); @@ -334,7 +333,7 @@ befs_find_brun_indirect(struct super_block *sb, return BEFS_ERR; } - array = (befs_block_run *) indirblock->b_data; + array = (befs_disk_block_run *) indirblock->b_data; for (j = 0; j < arraylen; ++j) { int len = fs16_to_cpu(sb, array[j].len); @@ -427,7 +426,7 @@ befs_find_brun_dblindirect(struct super_block *sb, struct buffer_head *dbl_indir_block; struct buffer_head *indir_block; befs_block_run indir_run; - befs_inode_addr *iaddr_array = NULL; + befs_disk_inode_addr *iaddr_array = NULL; befs_sb_info *befs_sb = BEFS_SB(sb); befs_blocknr_t indir_start_blk = @@ -482,7 +481,7 @@ befs_find_brun_dblindirect(struct super_block *sb, dbl_block_indx = dblindir_indx - (dbl_which_block * befs_iaddrs_per_block(sb)); - iaddr_array = (befs_inode_addr *) dbl_indir_block->b_data; + iaddr_array = (befs_disk_inode_addr *) dbl_indir_block->b_data; indir_run = fsrun_to_cpu(sb, iaddr_array[dbl_block_indx]); brelse(dbl_indir_block); iaddr_array = NULL; @@ -507,7 +506,7 @@ befs_find_brun_dblindirect(struct super_block *sb, } block_indx = indir_indx - (which_block * befs_iaddrs_per_block(sb)); - iaddr_array = (befs_inode_addr *) indir_block->b_data; + iaddr_array = (befs_disk_inode_addr *) indir_block->b_data; *run = fsrun_to_cpu(sb, iaddr_array[block_indx]); brelse(indir_block); iaddr_array = NULL; diff --git a/fs/befs/debug.c b/fs/befs/debug.c index 875cc0aa318c..e831a8f30849 100644 --- a/fs/befs/debug.c +++ b/fs/befs/debug.c @@ -21,7 +21,6 @@ #endif /* __KERNEL__ */ #include "befs.h" -#include "endian.h" #define ERRBUFSIZE 1024 @@ -125,7 +124,7 @@ befs_dump_inode(const struct super_block *sb, befs_inode * inode) befs_debug(sb, " type %08x", fs32_to_cpu(sb, inode->type)); befs_debug(sb, " inode_size %u", fs32_to_cpu(sb, inode->inode_size)); - if (S_ISLNK(inode->mode)) { + if (S_ISLNK(fs32_to_cpu(sb, inode->mode))) { befs_debug(sb, " Symbolic link [%s]", inode->data.symlink); } else { int i; @@ -231,21 +230,20 @@ befs_dump_small_data(const struct super_block *sb, befs_small_data * sd) /* unused */ void -befs_dump_run(const struct super_block *sb, befs_block_run run) +befs_dump_run(const struct super_block *sb, befs_disk_block_run run) { #ifdef CONFIG_BEFS_DEBUG - run = fsrun_to_cpu(sb, run); + befs_block_run n = fsrun_to_cpu(sb, run); - befs_debug(sb, "[%u, %hu, %hu]", - run.allocation_group, run.start, run.len); + befs_debug(sb, "[%u, %hu, %hu]", n.allocation_group, n.start, n.len); #endif //CONFIG_BEFS_DEBUG } #endif /* 0 */ void -befs_dump_index_entry(const struct super_block *sb, befs_btree_super * super) +befs_dump_index_entry(const struct super_block *sb, befs_disk_btree_super * super) { #ifdef CONFIG_BEFS_DEBUG diff --git a/fs/befs/endian.h b/fs/befs/endian.h index 9ecaea4e3325..e254a20869f4 100644 --- a/fs/befs/endian.h +++ b/fs/befs/endian.h @@ -10,85 +10,84 @@ #define LINUX_BEFS_ENDIAN #include <linux/byteorder/generic.h> -#include "befs.h" static inline u64 -fs64_to_cpu(const struct super_block *sb, u64 n) +fs64_to_cpu(const struct super_block *sb, fs64 n) { if (BEFS_SB(sb)->byte_order == BEFS_BYTESEX_LE) - return le64_to_cpu(n); + return le64_to_cpu((__force __le64)n); else - return be64_to_cpu(n); + return be64_to_cpu((__force __be64)n); } -static inline u64 +static inline fs64 cpu_to_fs64(const struct super_block *sb, u64 n) { if (BEFS_SB(sb)->byte_order == BEFS_BYTESEX_LE) - return cpu_to_le64(n); + return (__force fs64)cpu_to_le64(n); else - return cpu_to_be64(n); + return (__force fs64)cpu_to_be64(n); } static inline u32 -fs32_to_cpu(const struct super_block *sb, u32 n) +fs32_to_cpu(const struct super_block *sb, fs32 n) { if (BEFS_SB(sb)->byte_order == BEFS_BYTESEX_LE) - return le32_to_cpu(n); + return le32_to_cpu((__force __le32)n); else - return be32_to_cpu(n); + return be32_to_cpu((__force __be32)n); } -static inline u32 +static inline fs32 cpu_to_fs32(const struct super_block *sb, u32 n) { if (BEFS_SB(sb)->byte_order == BEFS_BYTESEX_LE) - return cpu_to_le32(n); + return (__force fs32)cpu_to_le32(n); else - return cpu_to_be32(n); + return (__force fs32)cpu_to_be32(n); } static inline u16 -fs16_to_cpu(const struct super_block *sb, u16 n) +fs16_to_cpu(const struct super_block *sb, fs16 n) { if (BEFS_SB(sb)->byte_order == BEFS_BYTESEX_LE) - return le16_to_cpu(n); + return le16_to_cpu((__force __le16)n); else - return be16_to_cpu(n); + return be16_to_cpu((__force __be16)n); } -static inline u16 +static inline fs16 cpu_to_fs16(const struct super_block *sb, u16 n) { if (BEFS_SB(sb)->byte_order == BEFS_BYTESEX_LE) - return cpu_to_le16(n); + return (__force fs16)cpu_to_le16(n); else - return cpu_to_be16(n); + return (__force fs16)cpu_to_be16(n); } /* Composite types below here */ static inline befs_block_run -fsrun_to_cpu(const struct super_block *sb, befs_block_run n) +fsrun_to_cpu(const struct super_block *sb, befs_disk_block_run n) { befs_block_run run; if (BEFS_SB(sb)->byte_order == BEFS_BYTESEX_LE) { - run.allocation_group = le32_to_cpu(n.allocation_group); - run.start = le16_to_cpu(n.start); - run.len = le16_to_cpu(n.len); + run.allocation_group = le32_to_cpu((__force __le32)n.allocation_group); + run.start = le16_to_cpu((__force __le16)n.start); + run.len = le16_to_cpu((__force __le16)n.len); } else { - run.allocation_group = be32_to_cpu(n.allocation_group); - run.start = be16_to_cpu(n.start); - run.len = be16_to_cpu(n.len); + run.allocation_group = be32_to_cpu((__force __be32)n.allocation_group); + run.start = be16_to_cpu((__force __be16)n.start); + run.len = be16_to_cpu((__force __be16)n.len); } return run; } -static inline befs_block_run +static inline befs_disk_block_run cpu_to_fsrun(const struct super_block *sb, befs_block_run n) { - befs_block_run run; + befs_disk_block_run run; if (BEFS_SB(sb)->byte_order == BEFS_BYTESEX_LE) { run.allocation_group = cpu_to_le32(n.allocation_group); @@ -103,7 +102,7 @@ cpu_to_fsrun(const struct super_block *sb, befs_block_run n) } static inline befs_data_stream -fsds_to_cpu(const struct super_block *sb, befs_data_stream n) +fsds_to_cpu(const struct super_block *sb, befs_disk_data_stream n) { befs_data_stream data; int i; diff --git a/fs/befs/inode.c b/fs/befs/inode.c index d41c9247ae8a..94c17f9a9576 100644 --- a/fs/befs/inode.c +++ b/fs/befs/inode.c @@ -8,7 +8,6 @@ #include "befs.h" #include "inode.h" -#include "endian.h" /* Validates the correctness of the befs inode diff --git a/fs/befs/linuxvfs.c b/fs/befs/linuxvfs.c index 57020c7a7e65..07f7144f0e2e 100644 --- a/fs/befs/linuxvfs.c +++ b/fs/befs/linuxvfs.c @@ -22,7 +22,6 @@ #include "datastream.h" #include "super.h" #include "io.h" -#include "endian.h" MODULE_DESCRIPTION("BeOS File System (BeFS) driver"); MODULE_AUTHOR("Will Dyson"); diff --git a/fs/befs/super.c b/fs/befs/super.c index 4557acbac528..8c3401ff6d6a 100644 --- a/fs/befs/super.c +++ b/fs/befs/super.c @@ -11,7 +11,6 @@ #include "befs.h" #include "super.h" -#include "endian.h" /** * load_befs_sb -- Read from disk and properly byteswap all the fields @@ -79,7 +79,6 @@ static struct bio_set *fs_bio_set; static inline struct bio_vec *bvec_alloc_bs(gfp_t gfp_mask, int nr, unsigned long *idx, struct bio_set *bs) { struct bio_vec *bvl; - struct biovec_slab *bp; /* * see comment near bvec_array define! @@ -98,10 +97,12 @@ static inline struct bio_vec *bvec_alloc_bs(gfp_t gfp_mask, int nr, unsigned lon * idx now points to the pool we want to allocate from */ - bp = bvec_slabs + *idx; bvl = mempool_alloc(bs->bvec_pools[*idx], gfp_mask); - if (bvl) + if (bvl) { + struct biovec_slab *bp = bvec_slabs + *idx; + memset(bvl, 0, bp->nr_vecs * sizeof(struct bio_vec)); + } return bvl; } @@ -166,7 +167,7 @@ struct bio *bio_alloc_bioset(gfp_t gfp_mask, int nr_iovecs, struct bio_set *bs) bio_init(bio); if (likely(nr_iovecs)) { - unsigned long idx; + unsigned long idx = 0; /* shut up gcc */ bvl = bvec_alloc_bs(gfp_mask, nr_iovecs, &idx, bs); if (unlikely(!bvl)) { diff --git a/fs/buffer.c b/fs/buffer.c index 16cfbcd254f1..f65ef8821c73 100644 --- a/fs/buffer.c +++ b/fs/buffer.c @@ -701,7 +701,10 @@ EXPORT_SYMBOL(mark_buffer_dirty_inode); */ int __set_page_dirty_buffers(struct page *page) { - struct address_space * const mapping = page->mapping; + struct address_space * const mapping = page_mapping(page); + + if (unlikely(!mapping)) + return !TestSetPageDirty(page); spin_lock(&mapping->private_lock); if (page_has_buffers(page)) { @@ -1039,8 +1042,21 @@ grow_buffers(struct block_device *bdev, sector_t block, int size) } while ((size << sizebits) < PAGE_SIZE); index = block >> sizebits; - block = index << sizebits; + /* + * Check for a block which wants to lie outside our maximum possible + * pagecache index. (this comparison is done using sector_t types). + */ + if (unlikely(index != block >> sizebits)) { + char b[BDEVNAME_SIZE]; + + printk(KERN_ERR "%s: requested out-of-range block %llu for " + "device %s\n", + __FUNCTION__, (unsigned long long)block, + bdevname(bdev, b)); + return -EIO; + } + block = index << sizebits; /* Create a page with the proper size buffers.. */ page = grow_dev_page(bdev, block, index, size); if (!page) @@ -1067,12 +1083,16 @@ __getblk_slow(struct block_device *bdev, sector_t block, int size) for (;;) { struct buffer_head * bh; + int ret; bh = __find_get_block(bdev, block, size); if (bh) return bh; - if (!grow_buffers(bdev, block, size)) + ret = grow_buffers(bdev, block, size); + if (ret < 0) + return NULL; + if (ret == 0) free_more_memory(); } } @@ -1834,6 +1854,7 @@ static int __block_prepare_write(struct inode *inode, struct page *page, clear_buffer_new(bh); kaddr = kmap_atomic(page, KM_USER0); memset(kaddr+block_start, 0, bh->b_size); + flush_dcache_page(page); kunmap_atomic(kaddr, KM_USER0); set_buffer_uptodate(bh); mark_buffer_dirty(bh); @@ -2340,6 +2361,7 @@ failed: */ kaddr = kmap_atomic(page, KM_USER0); memset(kaddr, 0, PAGE_CACHE_SIZE); + flush_dcache_page(page); kunmap_atomic(kaddr, KM_USER0); SetPageUptodate(page); set_page_dirty(page); diff --git a/fs/compat.c b/fs/compat.c index 4d3fbcb2ddb1..50624d4a70c6 100644 --- a/fs/compat.c +++ b/fs/compat.c @@ -1316,7 +1316,7 @@ compat_sys_vmsplice(int fd, const struct compat_iovec __user *iov32, unsigned int nr_segs, unsigned int flags) { unsigned i; - struct iovec *iov; + struct iovec __user *iov; if (nr_segs > UIO_MAXIOV) return -EINVAL; iov = compat_alloc_user_space(nr_segs * sizeof(struct iovec)); diff --git a/fs/compat_ioctl.c b/fs/compat_ioctl.c index 27ca1aa30562..a91f2628c981 100644 --- a/fs/compat_ioctl.c +++ b/fs/compat_ioctl.c @@ -2438,13 +2438,17 @@ HANDLE_IOCTL(0x1260, broken_blkgetsize) HANDLE_IOCTL(BLKFRAGET, w_long) HANDLE_IOCTL(BLKSECTGET, w_long) HANDLE_IOCTL(BLKPG, blkpg_ioctl_trans) -HANDLE_IOCTL(HDIO_GET_KEEPSETTINGS, hdio_ioctl_trans) HANDLE_IOCTL(HDIO_GET_UNMASKINTR, hdio_ioctl_trans) -HANDLE_IOCTL(HDIO_GET_DMA, hdio_ioctl_trans) -HANDLE_IOCTL(HDIO_GET_32BIT, hdio_ioctl_trans) HANDLE_IOCTL(HDIO_GET_MULTCOUNT, hdio_ioctl_trans) +HANDLE_IOCTL(HDIO_GET_KEEPSETTINGS, hdio_ioctl_trans) +HANDLE_IOCTL(HDIO_GET_32BIT, hdio_ioctl_trans) HANDLE_IOCTL(HDIO_GET_NOWERR, hdio_ioctl_trans) +HANDLE_IOCTL(HDIO_GET_DMA, hdio_ioctl_trans) HANDLE_IOCTL(HDIO_GET_NICE, hdio_ioctl_trans) +HANDLE_IOCTL(HDIO_GET_WCACHE, hdio_ioctl_trans) +HANDLE_IOCTL(HDIO_GET_ACOUSTIC, hdio_ioctl_trans) +HANDLE_IOCTL(HDIO_GET_ADDRESS, hdio_ioctl_trans) +HANDLE_IOCTL(HDIO_GET_BUSSTATE, hdio_ioctl_trans) HANDLE_IOCTL(FDSETPRM32, fd_ioctl_trans) HANDLE_IOCTL(FDDEFPRM32, fd_ioctl_trans) HANDLE_IOCTL(FDGETPRM32, fd_ioctl_trans) diff --git a/fs/dcache.c b/fs/dcache.c index 2355bddad8de..2bac4ba1d1d3 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -549,6 +549,136 @@ repeat: } /* + * destroy a single subtree of dentries for unmount + * - see the comments on shrink_dcache_for_umount() for a description of the + * locking + */ +static void shrink_dcache_for_umount_subtree(struct dentry *dentry) +{ + struct dentry *parent; + + BUG_ON(!IS_ROOT(dentry)); + + /* detach this root from the system */ + spin_lock(&dcache_lock); + if (!list_empty(&dentry->d_lru)) { + dentry_stat.nr_unused--; + list_del_init(&dentry->d_lru); + } + __d_drop(dentry); + spin_unlock(&dcache_lock); + + for (;;) { + /* descend to the first leaf in the current subtree */ + while (!list_empty(&dentry->d_subdirs)) { + struct dentry *loop; + + /* this is a branch with children - detach all of them + * from the system in one go */ + spin_lock(&dcache_lock); + list_for_each_entry(loop, &dentry->d_subdirs, + d_u.d_child) { + if (!list_empty(&loop->d_lru)) { + dentry_stat.nr_unused--; + list_del_init(&loop->d_lru); + } + + __d_drop(loop); + cond_resched_lock(&dcache_lock); + } + spin_unlock(&dcache_lock); + + /* move to the first child */ + dentry = list_entry(dentry->d_subdirs.next, + struct dentry, d_u.d_child); + } + + /* consume the dentries from this leaf up through its parents + * until we find one with children or run out altogether */ + do { + struct inode *inode; + + if (atomic_read(&dentry->d_count) != 0) { + printk(KERN_ERR + "BUG: Dentry %p{i=%lx,n=%s}" + " still in use (%d)" + " [unmount of %s %s]\n", + dentry, + dentry->d_inode ? + dentry->d_inode->i_ino : 0UL, + dentry->d_name.name, + atomic_read(&dentry->d_count), + dentry->d_sb->s_type->name, + dentry->d_sb->s_id); + BUG(); + } + + parent = dentry->d_parent; + if (parent == dentry) + parent = NULL; + else + atomic_dec(&parent->d_count); + + list_del(&dentry->d_u.d_child); + dentry_stat.nr_dentry--; /* For d_free, below */ + + inode = dentry->d_inode; + if (inode) { + dentry->d_inode = NULL; + list_del_init(&dentry->d_alias); + if (dentry->d_op && dentry->d_op->d_iput) + dentry->d_op->d_iput(dentry, inode); + else + iput(inode); + } + + d_free(dentry); + + /* finished when we fall off the top of the tree, + * otherwise we ascend to the parent and move to the + * next sibling if there is one */ + if (!parent) + return; + + dentry = parent; + + } while (list_empty(&dentry->d_subdirs)); + + dentry = list_entry(dentry->d_subdirs.next, + struct dentry, d_u.d_child); + } +} + +/* + * destroy the dentries attached to a superblock on unmounting + * - we don't need to use dentry->d_lock, and only need dcache_lock when + * removing the dentry from the system lists and hashes because: + * - the superblock is detached from all mountings and open files, so the + * dentry trees will not be rearranged by the VFS + * - s_umount is write-locked, so the memory pressure shrinker will ignore + * any dentries belonging to this superblock that it comes across + * - the filesystem itself is no longer permitted to rearrange the dentries + * in this superblock + */ +void shrink_dcache_for_umount(struct super_block *sb) +{ + struct dentry *dentry; + + if (down_read_trylock(&sb->s_umount)) + BUG(); + + dentry = sb->s_root; + sb->s_root = NULL; + atomic_dec(&dentry->d_count); + shrink_dcache_for_umount_subtree(dentry); + + while (!hlist_empty(&sb->s_anon)) { + dentry = hlist_entry(sb->s_anon.first, struct dentry, d_hash); + shrink_dcache_for_umount_subtree(dentry); + } +} + +/* * Search for at least 1 mount point in the dentry's subdirs. * We descend to the next level whenever the d_subdirs * list is non-empty and continue searching. diff --git a/fs/dlm/lowcomms.c b/fs/dlm/lowcomms.c index 23f5ce12080b..7bcea7c5addb 100644 --- a/fs/dlm/lowcomms.c +++ b/fs/dlm/lowcomms.c @@ -174,7 +174,7 @@ static int nodeid_to_addr(int nodeid, struct sockaddr *retaddr) return 0; } -static struct nodeinfo *nodeid2nodeinfo(int nodeid, int alloc) +static struct nodeinfo *nodeid2nodeinfo(int nodeid, gfp_t alloc) { struct nodeinfo *ni; int r; @@ -726,7 +726,7 @@ static int init_sock(void) } -static struct writequeue_entry *new_writequeue_entry(int allocation) +static struct writequeue_entry *new_writequeue_entry(gfp_t allocation) { struct writequeue_entry *entry; @@ -748,7 +748,7 @@ static struct writequeue_entry *new_writequeue_entry(int allocation) return entry; } -void *dlm_lowcomms_get_buffer(int nodeid, int len, int allocation, char **ppc) +void *dlm_lowcomms_get_buffer(int nodeid, int len, gfp_t allocation, char **ppc) { struct writequeue_entry *e; int offset = 0; diff --git a/fs/dlm/lowcomms.h b/fs/dlm/lowcomms.h index 6c04bb09cfa8..2d045e0daae1 100644 --- a/fs/dlm/lowcomms.h +++ b/fs/dlm/lowcomms.h @@ -19,7 +19,7 @@ void dlm_lowcomms_exit(void); int dlm_lowcomms_start(void); void dlm_lowcomms_stop(void); int dlm_lowcomms_close(int nodeid); -void *dlm_lowcomms_get_buffer(int nodeid, int len, int allocation, char **ppc); +void *dlm_lowcomms_get_buffer(int nodeid, int len, gfp_t allocation, char **ppc); void dlm_lowcomms_commit_buffer(void *mh); #endif /* __LOWCOMMS_DOT_H__ */ diff --git a/fs/eventpoll.c b/fs/eventpoll.c index 557d5b614fae..ae228ec54e94 100644 --- a/fs/eventpoll.c +++ b/fs/eventpoll.c @@ -105,6 +105,8 @@ /* Maximum msec timeout value storeable in a long int */ #define EP_MAX_MSTIMEO min(1000ULL * MAX_SCHEDULE_TIMEOUT / HZ, (LONG_MAX - 999ULL) / HZ) +#define EP_MAX_EVENTS (INT_MAX / sizeof(struct epoll_event)) + struct epoll_filefd { struct file *file; @@ -497,7 +499,7 @@ void eventpoll_release_file(struct file *file) */ asmlinkage long sys_epoll_create(int size) { - int error, fd; + int error, fd = -1; struct eventpoll *ep; struct inode *inode; struct file *file; @@ -640,7 +642,6 @@ eexit_1: return error; } -#define MAX_EVENTS (INT_MAX / sizeof(struct epoll_event)) /* * Implement the event wait interface for the eventpoll file. It is the kernel @@ -657,7 +658,7 @@ asmlinkage long sys_epoll_wait(int epfd, struct epoll_event __user *events, current, epfd, events, maxevents, timeout)); /* The maximum number of event must be greater than zero */ - if (maxevents <= 0 || maxevents > MAX_EVENTS) + if (maxevents <= 0 || maxevents > EP_MAX_EVENTS) return -EINVAL; /* Verify that the area passed by the user is writeable */ @@ -699,6 +700,55 @@ eexit_1: } +#ifdef TIF_RESTORE_SIGMASK + +/* + * Implement the event wait interface for the eventpoll file. It is the kernel + * part of the user space epoll_pwait(2). + */ +asmlinkage long sys_epoll_pwait(int epfd, struct epoll_event __user *events, + int maxevents, int timeout, const sigset_t __user *sigmask, + size_t sigsetsize) +{ + int error; + sigset_t ksigmask, sigsaved; + + /* + * If the caller wants a certain signal mask to be set during the wait, + * we apply it here. + */ + if (sigmask) { + if (sigsetsize != sizeof(sigset_t)) + return -EINVAL; + if (copy_from_user(&ksigmask, sigmask, sizeof(ksigmask))) + return -EFAULT; + sigdelsetmask(&ksigmask, sigmask(SIGKILL) | sigmask(SIGSTOP)); + sigprocmask(SIG_SETMASK, &ksigmask, &sigsaved); + } + + error = sys_epoll_wait(epfd, events, maxevents, timeout); + + /* + * If we changed the signal mask, we need to restore the original one. + * In case we've got a signal while waiting, we do not restore the + * signal mask yet, and we allow do_signal() to deliver the signal on + * the way back to userspace, before the signal mask is restored. + */ + if (sigmask) { + if (error == -EINTR) { + memcpy(¤t->saved_sigmask, &sigsaved, + sizeof(sigsaved)); + set_thread_flag(TIF_RESTORE_SIGMASK); + } else + sigprocmask(SIG_SETMASK, &sigsaved, NULL); + } + + return error; +} + +#endif /* #ifdef TIF_RESTORE_SIGMASK */ + + /* * Creates the file descriptor to be used by the epoll interface. */ diff --git a/fs/ext2/super.c b/fs/ext2/super.c index 513cd421ac0b..d8b9abd95d07 100644 --- a/fs/ext2/super.c +++ b/fs/ext2/super.c @@ -364,7 +364,6 @@ static int parse_options (char * options, { char * p; substring_t args[MAX_OPT_ARGS]; - unsigned long kind = EXT2_MOUNT_ERRORS_CONT; int option; if (!options) @@ -404,13 +403,19 @@ static int parse_options (char * options, /* *sb_block = match_int(&args[0]); */ break; case Opt_err_panic: - kind = EXT2_MOUNT_ERRORS_PANIC; + clear_opt (sbi->s_mount_opt, ERRORS_CONT); + clear_opt (sbi->s_mount_opt, ERRORS_RO); + set_opt (sbi->s_mount_opt, ERRORS_PANIC); break; case Opt_err_ro: - kind = EXT2_MOUNT_ERRORS_RO; + clear_opt (sbi->s_mount_opt, ERRORS_CONT); + clear_opt (sbi->s_mount_opt, ERRORS_PANIC); + set_opt (sbi->s_mount_opt, ERRORS_RO); break; case Opt_err_cont: - kind = EXT2_MOUNT_ERRORS_CONT; + clear_opt (sbi->s_mount_opt, ERRORS_RO); + clear_opt (sbi->s_mount_opt, ERRORS_PANIC); + set_opt (sbi->s_mount_opt, ERRORS_CONT); break; case Opt_nouid32: set_opt (sbi->s_mount_opt, NO_UID32); @@ -489,7 +494,6 @@ static int parse_options (char * options, return 0; } } - sbi->s_mount_opt |= kind; return 1; } @@ -715,6 +719,8 @@ static int ext2_fill_super(struct super_block *sb, void *data, int silent) set_opt(sbi->s_mount_opt, ERRORS_PANIC); else if (le16_to_cpu(sbi->s_es->s_errors) == EXT2_ERRORS_RO) set_opt(sbi->s_mount_opt, ERRORS_RO); + else + set_opt(sbi->s_mount_opt, ERRORS_CONT); sbi->s_resuid = le16_to_cpu(es->s_def_resuid); sbi->s_resgid = le16_to_cpu(es->s_def_resgid); diff --git a/fs/ext3/super.c b/fs/ext3/super.c index 8bfd56ef18ca..afc2d4f42d77 100644 --- a/fs/ext3/super.c +++ b/fs/ext3/super.c @@ -1470,6 +1470,8 @@ static int ext3_fill_super (struct super_block *sb, void *data, int silent) set_opt(sbi->s_mount_opt, ERRORS_PANIC); else if (le16_to_cpu(sbi->s_es->s_errors) == EXT3_ERRORS_RO) set_opt(sbi->s_mount_opt, ERRORS_RO); + else + set_opt(sbi->s_mount_opt, ERRORS_CONT); sbi->s_resuid = le16_to_cpu(es->s_def_resuid); sbi->s_resgid = le16_to_cpu(es->s_def_resgid); diff --git a/fs/ext4/Makefile b/fs/ext4/Makefile new file mode 100644 index 000000000000..a6acb96ebeb9 --- /dev/null +++ b/fs/ext4/Makefile @@ -0,0 +1,12 @@ +# +# Makefile for the linux ext4-filesystem routines. +# + +obj-$(CONFIG_EXT4DEV_FS) += ext4dev.o + +ext4dev-y := balloc.o bitmap.o dir.o file.o fsync.o ialloc.o inode.o \ + ioctl.o namei.o super.o symlink.o hash.o resize.o extents.o + +ext4dev-$(CONFIG_EXT4DEV_FS_XATTR) += xattr.o xattr_user.o xattr_trusted.o +ext4dev-$(CONFIG_EXT4DEV_FS_POSIX_ACL) += acl.o +ext4dev-$(CONFIG_EXT4DEV_FS_SECURITY) += xattr_security.o diff --git a/fs/ext4/acl.c b/fs/ext4/acl.c new file mode 100644 index 000000000000..9e882546d91a --- /dev/null +++ b/fs/ext4/acl.c @@ -0,0 +1,551 @@ +/* + * linux/fs/ext4/acl.c + * + * Copyright (C) 2001-2003 Andreas Gruenbacher, <agruen@suse.de> + */ + +#include <linux/init.h> +#include <linux/sched.h> +#include <linux/slab.h> +#include <linux/capability.h> +#include <linux/fs.h> +#include <linux/ext4_jbd2.h> +#include <linux/ext4_fs.h> +#include "xattr.h" +#include "acl.h" + +/* + * Convert from filesystem to in-memory representation. + */ +static struct posix_acl * +ext4_acl_from_disk(const void *value, size_t size) +{ + const char *end = (char *)value + size; + int n, count; + struct posix_acl *acl; + + if (!value) + return NULL; + if (size < sizeof(ext4_acl_header)) + return ERR_PTR(-EINVAL); + if (((ext4_acl_header *)value)->a_version != + cpu_to_le32(EXT4_ACL_VERSION)) + return ERR_PTR(-EINVAL); + value = (char *)value + sizeof(ext4_acl_header); + count = ext4_acl_count(size); + if (count < 0) + return ERR_PTR(-EINVAL); + if (count == 0) + return NULL; + acl = posix_acl_alloc(count, GFP_KERNEL); + if (!acl) + return ERR_PTR(-ENOMEM); + for (n=0; n < count; n++) { + ext4_acl_entry *entry = + (ext4_acl_entry *)value; + if ((char *)value + sizeof(ext4_acl_entry_short) > end) + goto fail; + acl->a_entries[n].e_tag = le16_to_cpu(entry->e_tag); + acl->a_entries[n].e_perm = le16_to_cpu(entry->e_perm); + switch(acl->a_entries[n].e_tag) { + case ACL_USER_OBJ: + case ACL_GROUP_OBJ: + case ACL_MASK: + case ACL_OTHER: + value = (char *)value + + sizeof(ext4_acl_entry_short); + acl->a_entries[n].e_id = ACL_UNDEFINED_ID; + break; + + case ACL_USER: + case ACL_GROUP: + value = (char *)value + sizeof(ext4_acl_entry); + if ((char *)value > end) + goto fail; + acl->a_entries[n].e_id = + le32_to_cpu(entry->e_id); + break; + + default: + goto fail; + } + } + if (value != end) + goto fail; + return acl; + +fail: + posix_acl_release(acl); + return ERR_PTR(-EINVAL); +} + +/* + * Convert from in-memory to filesystem representation. + */ +static void * +ext4_acl_to_disk(const struct posix_acl *acl, size_t *size) +{ + ext4_acl_header *ext_acl; + char *e; + size_t n; + + *size = ext4_acl_size(acl->a_count); + ext_acl = kmalloc(sizeof(ext4_acl_header) + acl->a_count * + sizeof(ext4_acl_entry), GFP_KERNEL); + if (!ext_acl) + return ERR_PTR(-ENOMEM); + ext_acl->a_version = cpu_to_le32(EXT4_ACL_VERSION); + e = (char *)ext_acl + sizeof(ext4_acl_header); + for (n=0; n < acl->a_count; n++) { + ext4_acl_entry *entry = (ext4_acl_entry *)e; + entry->e_tag = cpu_to_le16(acl->a_entries[n].e_tag); + entry->e_perm = cpu_to_le16(acl->a_entries[n].e_perm); + switch(acl->a_entries[n].e_tag) { + case ACL_USER: + case ACL_GROUP: + entry->e_id = + cpu_to_le32(acl->a_entries[n].e_id); + e += sizeof(ext4_acl_entry); + break; + + case ACL_USER_OBJ: + case ACL_GROUP_OBJ: + case ACL_MASK: + case ACL_OTHER: + e += sizeof(ext4_acl_entry_short); + break; + + default: + goto fail; + } + } + return (char *)ext_acl; + +fail: + kfree(ext_acl); + return ERR_PTR(-EINVAL); +} + +static inline struct posix_acl * +ext4_iget_acl(struct inode *inode, struct posix_acl **i_acl) +{ + struct posix_acl *acl = EXT4_ACL_NOT_CACHED; + + spin_lock(&inode->i_lock); + if (*i_acl != EXT4_ACL_NOT_CACHED) + acl = posix_acl_dup(*i_acl); + spin_unlock(&inode->i_lock); + + return acl; +} + +static inline void +ext4_iset_acl(struct inode *inode, struct posix_acl **i_acl, + struct posix_acl *acl) +{ + spin_lock(&inode->i_lock); + if (*i_acl != EXT4_ACL_NOT_CACHED) + posix_acl_release(*i_acl); + *i_acl = posix_acl_dup(acl); + spin_unlock(&inode->i_lock); +} + +/* + * Inode operation get_posix_acl(). + * + * inode->i_mutex: don't care + */ +static struct posix_acl * +ext4_get_acl(struct inode *inode, int type) +{ + struct ext4_inode_info *ei = EXT4_I(inode); + int name_index; + char *value = NULL; + struct posix_acl *acl; + int retval; + + if (!test_opt(inode->i_sb, POSIX_ACL)) + return NULL; + + switch(type) { + case ACL_TYPE_ACCESS: + acl = ext4_iget_acl(inode, &ei->i_acl); + if (acl != EXT4_ACL_NOT_CACHED) + return acl; + name_index = EXT4_XATTR_INDEX_POSIX_ACL_ACCESS; + break; + + case ACL_TYPE_DEFAULT: + acl = ext4_iget_acl(inode, &ei->i_default_acl); + if (acl != EXT4_ACL_NOT_CACHED) + return acl; + name_index = EXT4_XATTR_INDEX_POSIX_ACL_DEFAULT; + break; + + default: + return ERR_PTR(-EINVAL); + } + retval = ext4_xattr_get(inode, name_index, "", NULL, 0); + if (retval > 0) { + value = kmalloc(retval, GFP_KERNEL); + if (!value) + return ERR_PTR(-ENOMEM); + retval = ext4_xattr_get(inode, name_index, "", value, retval); + } + if (retval > 0) + acl = ext4_acl_from_disk(value, retval); + else if (retval == -ENODATA || retval == -ENOSYS) + acl = NULL; + else + acl = ERR_PTR(retval); + kfree(value); + + if (!IS_ERR(acl)) { + switch(type) { + case ACL_TYPE_ACCESS: + ext4_iset_acl(inode, &ei->i_acl, acl); + break; + + case ACL_TYPE_DEFAULT: + ext4_iset_acl(inode, &ei->i_default_acl, acl); + break; + } + } + return acl; +} + +/* + * Set the access or default ACL of an inode. + * + * inode->i_mutex: down unless called from ext4_new_inode + */ +static int +ext4_set_acl(handle_t *handle, struct inode *inode, int type, + struct posix_acl *acl) +{ + struct ext4_inode_info *ei = EXT4_I(inode); + int name_index; + void *value = NULL; + size_t size = 0; + int error; + + if (S_ISLNK(inode->i_mode)) + return -EOPNOTSUPP; + + switch(type) { + case ACL_TYPE_ACCESS: + name_index = EXT4_XATTR_INDEX_POSIX_ACL_ACCESS; + if (acl) { + mode_t mode = inode->i_mode; + error = posix_acl_equiv_mode(acl, &mode); + if (error < 0) + return error; + else { + inode->i_mode = mode; + ext4_mark_inode_dirty(handle, inode); + if (error == 0) + acl = NULL; + } + } + break; + + case ACL_TYPE_DEFAULT: + name_index = EXT4_XATTR_INDEX_POSIX_ACL_DEFAULT; + if (!S_ISDIR(inode->i_mode)) + return acl ? -EACCES : 0; + break; + + default: + return -EINVAL; + } + if (acl) { + value = ext4_acl_to_disk(acl, &size); + if (IS_ERR(value)) + return (int)PTR_ERR(value); + } + + error = ext4_xattr_set_handle(handle, inode, name_index, "", + value, size, 0); + + kfree(value); + if (!error) { + switch(type) { + case ACL_TYPE_ACCESS: + ext4_iset_acl(inode, &ei->i_acl, acl); + break; + + case ACL_TYPE_DEFAULT: + ext4_iset_acl(inode, &ei->i_default_acl, acl); + break; + } + } + return error; +} + +static int +ext4_check_acl(struct inode *inode, int mask) +{ + struct posix_acl *acl = ext4_get_acl(inode, ACL_TYPE_ACCESS); + + if (IS_ERR(acl)) + return PTR_ERR(acl); + if (acl) { + int error = posix_acl_permission(inode, acl, mask); + posix_acl_release(acl); + return error; + } + + return -EAGAIN; +} + +int +ext4_permission(struct inode *inode, int mask, struct nameidata *nd) +{ + return generic_permission(inode, mask, ext4_check_acl); +} + +/* + * Initialize the ACLs of a new inode. Called from ext4_new_inode. + * + * dir->i_mutex: down + * inode->i_mutex: up (access to inode is still exclusive) + */ +int +ext4_init_acl(handle_t *handle, struct inode *inode, struct inode *dir) +{ + struct posix_acl *acl = NULL; + int error = 0; + + if (!S_ISLNK(inode->i_mode)) { + if (test_opt(dir->i_sb, POSIX_ACL)) { + acl = ext4_get_acl(dir, ACL_TYPE_DEFAULT); + if (IS_ERR(acl)) + return PTR_ERR(acl); + } + if (!acl) + inode->i_mode &= ~current->fs->umask; + } + if (test_opt(inode->i_sb, POSIX_ACL) && acl) { + struct posix_acl *clone; + mode_t mode; + + if (S_ISDIR(inode->i_mode)) { + error = ext4_set_acl(handle, inode, + ACL_TYPE_DEFAULT, acl); + if (error) + goto cleanup; + } + clone = posix_acl_clone(acl, GFP_KERNEL); + error = -ENOMEM; + if (!clone) + goto cleanup; + + mode = inode->i_mode; + error = posix_acl_create_masq(clone, &mode); + if (error >= 0) { + inode->i_mode = mode; + if (error > 0) { + /* This is an extended ACL */ + error = ext4_set_acl(handle, inode, + ACL_TYPE_ACCESS, clone); + } + } + posix_acl_release(clone); + } +cleanup: + posix_acl_release(acl); + return error; +} + +/* + * Does chmod for an inode that may have an Access Control List. The + * inode->i_mode field must be updated to the desired value by the caller + * before calling this function. + * Returns 0 on success, or a negative error number. + * + * We change the ACL rather than storing some ACL entries in the file + * mode permission bits (which would be more efficient), because that + * would break once additional permissions (like ACL_APPEND, ACL_DELETE + * for directories) are added. There are no more bits available in the + * file mode. + * + * inode->i_mutex: down + */ +int +ext4_acl_chmod(struct inode *inode) +{ + struct posix_acl *acl, *clone; + int error; + + if (S_ISLNK(inode->i_mode)) + return -EOPNOTSUPP; + if (!test_opt(inode->i_sb, POSIX_ACL)) + return 0; + acl = ext4_get_acl(inode, ACL_TYPE_ACCESS); + if (IS_ERR(acl) || !acl) + return PTR_ERR(acl); + clone = posix_acl_clone(acl, GFP_KERNEL); + posix_acl_release(acl); + if (!clone) + return -ENOMEM; + error = posix_acl_chmod_masq(clone, inode->i_mode); + if (!error) { + handle_t *handle; + int retries = 0; + + retry: + handle = ext4_journal_start(inode, + EXT4_DATA_TRANS_BLOCKS(inode->i_sb)); + if (IS_ERR(handle)) { + error = PTR_ERR(handle); + ext4_std_error(inode->i_sb, error); + goto out; + } + error = ext4_set_acl(handle, inode, ACL_TYPE_ACCESS, clone); + ext4_journal_stop(handle); + if (error == -ENOSPC && + ext4_should_retry_alloc(inode->i_sb, &retries)) + goto retry; + } +out: + posix_acl_release(clone); + return error; +} + +/* + * Extended attribute handlers + */ +static size_t +ext4_xattr_list_acl_access(struct inode *inode, char *list, size_t list_len, + const char *name, size_t name_len) +{ + const size_t size = sizeof(POSIX_ACL_XATTR_ACCESS); + + if (!test_opt(inode->i_sb, POSIX_ACL)) + return 0; + if (list && size <= list_len) + memcpy(list, POSIX_ACL_XATTR_ACCESS, size); + return size; +} + +static size_t +ext4_xattr_list_acl_default(struct inode *inode, char *list, size_t list_len, + const char *name, size_t name_len) +{ + const size_t size = sizeof(POSIX_ACL_XATTR_DEFAULT); + + if (!test_opt(inode->i_sb, POSIX_ACL)) + return 0; + if (list && size <= list_len) + memcpy(list, POSIX_ACL_XATTR_DEFAULT, size); + return size; +} + +static int +ext4_xattr_get_acl(struct inode *inode, int type, void *buffer, size_t size) +{ + struct posix_acl *acl; + int error; + + if (!test_opt(inode->i_sb, POSIX_ACL)) + return -EOPNOTSUPP; + + acl = ext4_get_acl(inode, type); + if (IS_ERR(acl)) + return PTR_ERR(acl); + if (acl == NULL) + return -ENODATA; + error = posix_acl_to_xattr(acl, buffer, size); + posix_acl_release(acl); + + return error; +} + +static int +ext4_xattr_get_acl_access(struct inode *inode, const char *name, + void *buffer, size_t size) +{ + if (strcmp(name, "") != 0) + return -EINVAL; + return ext4_xattr_get_acl(inode, ACL_TYPE_ACCESS, buffer, size); +} + +static int +ext4_xattr_get_acl_default(struct inode *inode, const char *name, + void *buffer, size_t size) +{ + if (strcmp(name, "") != 0) + return -EINVAL; + return ext4_xattr_get_acl(inode, ACL_TYPE_DEFAULT, buffer, size); +} + +static int +ext4_xattr_set_acl(struct inode *inode, int type, const void *value, + size_t size) +{ + handle_t *handle; + struct posix_acl *acl; + int error, retries = 0; + + if (!test_opt(inode->i_sb, POSIX_ACL)) + return -EOPNOTSUPP; + if ((current->fsuid != inode->i_uid) && !capable(CAP_FOWNER)) + return -EPERM; + + if (value) { + acl = posix_acl_from_xattr(value, size); + if (IS_ERR(acl)) + return PTR_ERR(acl); + else if (acl) { + error = posix_acl_valid(acl); + if (error) + goto release_and_out; + } + } else + acl = NULL; + +retry: + handle = ext4_journal_start(inode, EXT4_DATA_TRANS_BLOCKS(inode->i_sb)); + if (IS_ERR(handle)) + return PTR_ERR(handle); + error = ext4_set_acl(handle, inode, type, acl); + ext4_journal_stop(handle); + if (error == -ENOSPC && ext4_should_retry_alloc(inode->i_sb, &retries)) + goto retry; + +release_and_out: + posix_acl_release(acl); + return error; +} + +static int +ext4_xattr_set_acl_access(struct inode *inode, const char *name, + const void *value, size_t size, int flags) +{ + if (strcmp(name, "") != 0) + return -EINVAL; + return ext4_xattr_set_acl(inode, ACL_TYPE_ACCESS, value, size); +} + +static int +ext4_xattr_set_acl_default(struct inode *inode, const char *name, + const void *value, size_t size, int flags) +{ + if (strcmp(name, "") != 0) + return -EINVAL; + return ext4_xattr_set_acl(inode, ACL_TYPE_DEFAULT, value, size); +} + +struct xattr_handler ext4_xattr_acl_access_handler = { + .prefix = POSIX_ACL_XATTR_ACCESS, + .list = ext4_xattr_list_acl_access, + .get = ext4_xattr_get_acl_access, + .set = ext4_xattr_set_acl_access, +}; + +struct xattr_handler ext4_xattr_acl_default_handler = { + .prefix = POSIX_ACL_XATTR_DEFAULT, + .list = ext4_xattr_list_acl_default, + .get = ext4_xattr_get_acl_default, + .set = ext4_xattr_set_acl_default, +}; diff --git a/fs/ext4/acl.h b/fs/ext4/acl.h new file mode 100644 index 000000000000..26a5c1abf147 --- /dev/null +++ b/fs/ext4/acl.h @@ -0,0 +1,81 @@ +/* + File: fs/ext4/acl.h + + (C) 2001 Andreas Gruenbacher, <a.gruenbacher@computer.org> +*/ + +#include <linux/posix_acl_xattr.h> + +#define EXT4_ACL_VERSION 0x0001 + +typedef struct { + __le16 e_tag; + __le16 e_perm; + __le32 e_id; +} ext4_acl_entry; + +typedef struct { + __le16 e_tag; + __le16 e_perm; +} ext4_acl_entry_short; + +typedef struct { + __le32 a_version; +} ext4_acl_header; + +static inline size_t ext4_acl_size(int count) +{ + if (count <= 4) { + return sizeof(ext4_acl_header) + + count * sizeof(ext4_acl_entry_short); + } else { + return sizeof(ext4_acl_header) + + 4 * sizeof(ext4_acl_entry_short) + + (count - 4) * sizeof(ext4_acl_entry); + } +} + +static inline int ext4_acl_count(size_t size) +{ + ssize_t s; + size -= sizeof(ext4_acl_header); + s = size - 4 * sizeof(ext4_acl_entry_short); + if (s < 0) { + if (size % sizeof(ext4_acl_entry_short)) + return -1; + return size / sizeof(ext4_acl_entry_short); + } else { + if (s % sizeof(ext4_acl_entry)) + return -1; + return s / sizeof(ext4_acl_entry) + 4; + } +} + +#ifdef CONFIG_EXT4DEV_FS_POSIX_ACL + +/* Value for inode->u.ext4_i.i_acl and inode->u.ext4_i.i_default_acl + if the ACL has not been cached */ +#define EXT4_ACL_NOT_CACHED ((void *)-1) + +/* acl.c */ +extern int ext4_permission (struct inode *, int, struct nameidata *); +extern int ext4_acl_chmod (struct inode *); +extern int ext4_init_acl (handle_t *, struct inode *, struct inode *); + +#else /* CONFIG_EXT4DEV_FS_POSIX_ACL */ +#include <linux/sched.h> +#define ext4_permission NULL + +static inline int +ext4_acl_chmod(struct inode *inode) +{ + return 0; +} + +static inline int +ext4_init_acl(handle_t *handle, struct inode *inode, struct inode *dir) +{ + return 0; +} +#endif /* CONFIG_EXT4DEV_FS_POSIX_ACL */ + diff --git a/fs/ext4/balloc.c b/fs/ext4/balloc.c new file mode 100644 index 000000000000..5d45582f9517 --- /dev/null +++ b/fs/ext4/balloc.c @@ -0,0 +1,1833 @@ +/* + * linux/fs/ext4/balloc.c + * + * Copyright (C) 1992, 1993, 1994, 1995 + * Remy Card (card@masi.ibp.fr) + * Laboratoire MASI - Institut Blaise Pascal + * Universite Pierre et Marie Curie (Paris VI) + * + * Enhanced block allocation by Stephen Tweedie (sct@redhat.com), 1993 + * Big-endian to little-endian byte-swapping/bitmaps by + * David S. Miller (davem@caip.rutgers.edu), 1995 + */ + +#include <linux/time.h> +#include <linux/capability.h> +#include <linux/fs.h> +#include <linux/jbd2.h> +#include <linux/ext4_fs.h> +#include <linux/ext4_jbd2.h> +#include <linux/quotaops.h> +#include <linux/buffer_head.h> + +/* + * balloc.c contains the blocks allocation and deallocation routines + */ + +/* + * Calculate the block group number and offset, given a block number + */ +void ext4_get_group_no_and_offset(struct super_block *sb, ext4_fsblk_t blocknr, + unsigned long *blockgrpp, ext4_grpblk_t *offsetp) +{ + struct ext4_super_block *es = EXT4_SB(sb)->s_es; + ext4_grpblk_t offset; + + blocknr = blocknr - le32_to_cpu(es->s_first_data_block); + offset = do_div(blocknr, EXT4_BLOCKS_PER_GROUP(sb)); + if (offsetp) + *offsetp = offset; + if (blockgrpp) + *blockgrpp = blocknr; + +} + +/* + * The free blocks are managed by bitmaps. A file system contains several + * blocks groups. Each group contains 1 bitmap block for blocks, 1 bitmap + * block for inodes, N blocks for the inode table and data blocks. + * + * The file system contains group descriptors which are located after the + * super block. Each descriptor contains the number of the bitmap block and + * the free blocks count in the block. The descriptors are loaded in memory + * when a file system is mounted (see ext4_read_super). + */ + + +#define in_range(b, first, len) ((b) >= (first) && (b) <= (first) + (len) - 1) + +/** + * ext4_get_group_desc() -- load group descriptor from disk + * @sb: super block + * @block_group: given block group + * @bh: pointer to the buffer head to store the block + * group descriptor + */ +struct ext4_group_desc * ext4_get_group_desc(struct super_block * sb, + unsigned int block_group, + struct buffer_head ** bh) +{ + unsigned long group_desc; + unsigned long offset; + struct ext4_group_desc * desc; + struct ext4_sb_info *sbi = EXT4_SB(sb); + + if (block_group >= sbi->s_groups_count) { + ext4_error (sb, "ext4_get_group_desc", + "block_group >= groups_count - " + "block_group = %d, groups_count = %lu", + block_group, sbi->s_groups_count); + + return NULL; + } + smp_rmb(); + + group_desc = block_group >> EXT4_DESC_PER_BLOCK_BITS(sb); + offset = block_group & (EXT4_DESC_PER_BLOCK(sb) - 1); + if (!sbi->s_group_desc[group_desc]) { + ext4_error (sb, "ext4_get_group_desc", + "Group descriptor not loaded - " + "block_group = %d, group_desc = %lu, desc = %lu", + block_group, group_desc, offset); + return NULL; + } + + desc = (struct ext4_group_desc *)( + (__u8 *)sbi->s_group_desc[group_desc]->b_data + + offset * EXT4_DESC_SIZE(sb)); + if (bh) + *bh = sbi->s_group_desc[group_desc]; + return desc; +} + +/** + * read_block_bitmap() + * @sb: super block + * @block_group: given block group + * + * Read the bitmap for a given block_group, reading into the specified + * slot in the superblock's bitmap cache. + * + * Return buffer_head on success or NULL in case of failure. + */ +static struct buffer_head * +read_block_bitmap(struct super_block *sb, unsigned int block_group) +{ + struct ext4_group_desc * desc; + struct buffer_head * bh = NULL; + + desc = ext4_get_group_desc (sb, block_group, NULL); + if (!desc) + goto error_out; + bh = sb_bread(sb, ext4_block_bitmap(sb, desc)); + if (!bh) + ext4_error (sb, "read_block_bitmap", + "Cannot read block bitmap - " + "block_group = %d, block_bitmap = %llu", + block_group, + ext4_block_bitmap(sb, desc)); +error_out: + return bh; +} +/* + * The reservation window structure operations + * -------------------------------------------- + * Operations include: + * dump, find, add, remove, is_empty, find_next_reservable_window, etc. + * + * We use a red-black tree to represent per-filesystem reservation + * windows. + * + */ + +/** + * __rsv_window_dump() -- Dump the filesystem block allocation reservation map + * @rb_root: root of per-filesystem reservation rb tree + * @verbose: verbose mode + * @fn: function which wishes to dump the reservation map + * + * If verbose is turned on, it will print the whole block reservation + * windows(start, end). Otherwise, it will only print out the "bad" windows, + * those windows that overlap with their immediate neighbors. + */ +#if 1 +static void __rsv_window_dump(struct rb_root *root, int verbose, + const char *fn) +{ + struct rb_node *n; + struct ext4_reserve_window_node *rsv, *prev; + int bad; + +restart: + n = rb_first(root); + bad = 0; + prev = NULL; + + printk("Block Allocation Reservation Windows Map (%s):\n", fn); + while (n) { + rsv = list_entry(n, struct ext4_reserve_window_node, rsv_node); + if (verbose) + printk("reservation window 0x%p " + "start: %llu, end: %llu\n", + rsv, rsv->rsv_start, rsv->rsv_end); + if (rsv->rsv_start && rsv->rsv_start >= rsv->rsv_end) { + printk("Bad reservation %p (start >= end)\n", + rsv); + bad = 1; + } + if (prev && prev->rsv_end >= rsv->rsv_start) { + printk("Bad reservation %p (prev->end >= start)\n", + rsv); + bad = 1; + } + if (bad) { + if (!verbose) { + printk("Restarting reservation walk in verbose mode\n"); + verbose = 1; + goto restart; + } + } + n = rb_next(n); + prev = rsv; + } + printk("Window map complete.\n"); + if (bad) + BUG(); +} +#define rsv_window_dump(root, verbose) \ + __rsv_window_dump((root), (verbose), __FUNCTION__) +#else +#define rsv_window_dump(root, verbose) do {} while (0) +#endif + +/** + * goal_in_my_reservation() + * @rsv: inode's reservation window + * @grp_goal: given goal block relative to the allocation block group + * @group: the current allocation block group + * @sb: filesystem super block + * + * Test if the given goal block (group relative) is within the file's + * own block reservation window range. + * + * If the reservation window is outside the goal allocation group, return 0; + * grp_goal (given goal block) could be -1, which means no specific + * goal block. In this case, always return 1. + * If the goal block is within the reservation window, return 1; + * otherwise, return 0; + */ +static int +goal_in_my_reservation(struct ext4_reserve_window *rsv, ext4_grpblk_t grp_goal, + unsigned int group, struct super_block * sb) +{ + ext4_fsblk_t group_first_block, group_last_block; + + group_first_block = ext4_group_first_block_no(sb, group); + group_last_block = group_first_block + (EXT4_BLOCKS_PER_GROUP(sb) - 1); + + if ((rsv->_rsv_start > group_last_block) || + (rsv->_rsv_end < group_first_block)) + return 0; + if ((grp_goal >= 0) && ((grp_goal + group_first_block < rsv->_rsv_start) + || (grp_goal + group_first_block > rsv->_rsv_end))) + return 0; + return 1; +} + +/** + * search_reserve_window() + * @rb_root: root of reservation tree + * @goal: target allocation block + * + * Find the reserved window which includes the goal, or the previous one + * if the goal is not in any window. + * Returns NULL if there are no windows or if all windows start after the goal. + */ +static struct ext4_reserve_window_node * +search_reserve_window(struct rb_root *root, ext4_fsblk_t goal) +{ + struct rb_node *n = root->rb_node; + struct ext4_reserve_window_node *rsv; + + if (!n) + return NULL; + + do { + rsv = rb_entry(n, struct ext4_reserve_window_node, rsv_node); + + if (goal < rsv->rsv_start) + n = n->rb_left; + else if (goal > rsv->rsv_end) + n = n->rb_right; + else + return rsv; + } while (n); + /* + * We've fallen off the end of the tree: the goal wasn't inside + * any particular node. OK, the previous node must be to one + * side of the interval containing the goal. If it's the RHS, + * we need to back up one. + */ + if (rsv->rsv_start > goal) { + n = rb_prev(&rsv->rsv_node); + rsv = rb_entry(n, struct ext4_reserve_window_node, rsv_node); + } + return rsv; +} + +/** + * ext4_rsv_window_add() -- Insert a window to the block reservation rb tree. + * @sb: super block + * @rsv: reservation window to add + * + * Must be called with rsv_lock hold. + */ +void ext4_rsv_window_add(struct super_block *sb, + struct ext4_reserve_window_node *rsv) +{ + struct rb_root *root = &EXT4_SB(sb)->s_rsv_window_root; + struct rb_node *node = &rsv->rsv_node; + ext4_fsblk_t start = rsv->rsv_start; + + struct rb_node ** p = &root->rb_node; + struct rb_node * parent = NULL; + struct ext4_reserve_window_node *this; + + while (*p) + { + parent = *p; + this = rb_entry(parent, struct ext4_reserve_window_node, rsv_node); + + if (start < this->rsv_start) + p = &(*p)->rb_left; + else if (start > this->rsv_end) + p = &(*p)->rb_right; + else { + rsv_window_dump(root, 1); + BUG(); + } + } + + rb_link_node(node, parent, p); + rb_insert_color(node, root); +} + +/** + * ext4_rsv_window_remove() -- unlink a window from the reservation rb tree + * @sb: super block + * @rsv: reservation window to remove + * + * Mark the block reservation window as not allocated, and unlink it + * from the filesystem reservation window rb tree. Must be called with + * rsv_lock hold. + */ +static void rsv_window_remove(struct super_block *sb, + struct ext4_reserve_window_node *rsv) +{ + rsv->rsv_start = EXT4_RESERVE_WINDOW_NOT_ALLOCATED; + rsv->rsv_end = EXT4_RESERVE_WINDOW_NOT_ALLOCATED; + rsv->rsv_alloc_hit = 0; + rb_erase(&rsv->rsv_node, &EXT4_SB(sb)->s_rsv_window_root); +} + +/* + * rsv_is_empty() -- Check if the reservation window is allocated. + * @rsv: given reservation window to check + * + * returns 1 if the end block is EXT4_RESERVE_WINDOW_NOT_ALLOCATED. + */ +static inline int rsv_is_empty(struct ext4_reserve_window *rsv) +{ + /* a valid reservation end block could not be 0 */ + return rsv->_rsv_end == EXT4_RESERVE_WINDOW_NOT_ALLOCATED; +} + +/** + * ext4_init_block_alloc_info() + * @inode: file inode structure + * + * Allocate and initialize the reservation window structure, and + * link the window to the ext4 inode structure at last + * + * The reservation window structure is only dynamically allocated + * and linked to ext4 inode the first time the open file + * needs a new block. So, before every ext4_new_block(s) call, for + * regular files, we should check whether the reservation window + * structure exists or not. In the latter case, this function is called. + * Fail to do so will result in block reservation being turned off for that + * open file. + * + * This function is called from ext4_get_blocks_handle(), also called + * when setting the reservation window size through ioctl before the file + * is open for write (needs block allocation). + * + * Needs truncate_mutex protection prior to call this function. + */ +void ext4_init_block_alloc_info(struct inode *inode) +{ + struct ext4_inode_info *ei = EXT4_I(inode); + struct ext4_block_alloc_info *block_i = ei->i_block_alloc_info; + struct super_block *sb = inode->i_sb; + + block_i = kmalloc(sizeof(*block_i), GFP_NOFS); + if (block_i) { + struct ext4_reserve_window_node *rsv = &block_i->rsv_window_node; + + rsv->rsv_start = EXT4_RESERVE_WINDOW_NOT_ALLOCATED; + rsv->rsv_end = EXT4_RESERVE_WINDOW_NOT_ALLOCATED; + + /* + * if filesystem is mounted with NORESERVATION, the goal + * reservation window size is set to zero to indicate + * block reservation is off + */ + if (!test_opt(sb, RESERVATION)) + rsv->rsv_goal_size = 0; + else + rsv->rsv_goal_size = EXT4_DEFAULT_RESERVE_BLOCKS; + rsv->rsv_alloc_hit = 0; + block_i->last_alloc_logical_block = 0; + block_i->last_alloc_physical_block = 0; + } + ei->i_block_alloc_info = block_i; +} + +/** + * ext4_discard_reservation() + * @inode: inode + * + * Discard(free) block reservation window on last file close, or truncate + * or at last iput(). + * + * It is being called in three cases: + * ext4_release_file(): last writer close the file + * ext4_clear_inode(): last iput(), when nobody link to this file. + * ext4_truncate(): when the block indirect map is about to change. + * + */ +void ext4_discard_reservation(struct inode *inode) +{ + struct ext4_inode_info *ei = EXT4_I(inode); + struct ext4_block_alloc_info *block_i = ei->i_block_alloc_info; + struct ext4_reserve_window_node *rsv; + spinlock_t *rsv_lock = &EXT4_SB(inode->i_sb)->s_rsv_window_lock; + + if (!block_i) + return; + + rsv = &block_i->rsv_window_node; + if (!rsv_is_empty(&rsv->rsv_window)) { + spin_lock(rsv_lock); + if (!rsv_is_empty(&rsv->rsv_window)) + rsv_window_remove(inode->i_sb, rsv); + spin_unlock(rsv_lock); + } +} + +/** + * ext4_free_blocks_sb() -- Free given blocks and update quota + * @handle: handle to this transaction + * @sb: super block + * @block: start physcial block to free + * @count: number of blocks to free + * @pdquot_freed_blocks: pointer to quota + */ +void ext4_free_blocks_sb(handle_t *handle, struct super_block *sb, + ext4_fsblk_t block, unsigned long count, + unsigned long *pdquot_freed_blocks) +{ + struct buffer_head *bitmap_bh = NULL; + struct buffer_head *gd_bh; + unsigned long block_group; + ext4_grpblk_t bit; + unsigned long i; + unsigned long overflow; + struct ext4_group_desc * desc; + struct ext4_super_block * es; + struct ext4_sb_info *sbi; + int err = 0, ret; + ext4_grpblk_t group_freed; + + *pdquot_freed_blocks = 0; + sbi = EXT4_SB(sb); + es = sbi->s_es; + if (block < le32_to_cpu(es->s_first_data_block) || + block + count < block || + block + count > ext4_blocks_count(es)) { + ext4_error (sb, "ext4_free_blocks", + "Freeing blocks not in datazone - " + "block = %llu, count = %lu", block, count); + goto error_return; + } + + ext4_debug ("freeing block(s) %llu-%llu\n", block, block + count - 1); + +do_more: + overflow = 0; + ext4_get_group_no_and_offset(sb, block, &block_group, &bit); + /* + * Check to see if we are freeing blocks across a group + * boundary. + */ + if (bit + count > EXT4_BLOCKS_PER_GROUP(sb)) { + overflow = bit + count - EXT4_BLOCKS_PER_GROUP(sb); + count -= overflow; + } + brelse(bitmap_bh); + bitmap_bh = read_block_bitmap(sb, block_group); + if (!bitmap_bh) + goto error_return; + desc = ext4_get_group_desc (sb, block_group, &gd_bh); + if (!desc) + goto error_return; + + if (in_range(ext4_block_bitmap(sb, desc), block, count) || + in_range(ext4_inode_bitmap(sb, desc), block, count) || + in_range(block, ext4_inode_table(sb, desc), sbi->s_itb_per_group) || + in_range(block + count - 1, ext4_inode_table(sb, desc), + sbi->s_itb_per_group)) + ext4_error (sb, "ext4_free_blocks", + "Freeing blocks in system zones - " + "Block = %llu, count = %lu", + block, count); + + /* + * We are about to start releasing blocks in the bitmap, + * so we need undo access. + */ + /* @@@ check errors */ + BUFFER_TRACE(bitmap_bh, "getting undo access"); + err = ext4_journal_get_undo_access(handle, bitmap_bh); + if (err) + goto error_return; + + /* + * We are about to modify some metadata. Call the journal APIs + * to unshare ->b_data if a currently-committing transaction is + * using it + */ + BUFFER_TRACE(gd_bh, "get_write_access"); + err = ext4_journal_get_write_access(handle, gd_bh); + if (err) + goto error_return; + + jbd_lock_bh_state(bitmap_bh); + + for (i = 0, group_freed = 0; i < count; i++) { + /* + * An HJ special. This is expensive... + */ +#ifdef CONFIG_JBD_DEBUG + jbd_unlock_bh_state(bitmap_bh); + { + struct buffer_head *debug_bh; + debug_bh = sb_find_get_block(sb, block + i); + if (debug_bh) { + BUFFER_TRACE(debug_bh, "Deleted!"); + if (!bh2jh(bitmap_bh)->b_committed_data) + BUFFER_TRACE(debug_bh, + "No commited data in bitmap"); + BUFFER_TRACE2(debug_bh, bitmap_bh, "bitmap"); + __brelse(debug_bh); + } + } + jbd_lock_bh_state(bitmap_bh); +#endif + if (need_resched()) { + jbd_unlock_bh_state(bitmap_bh); + cond_resched(); + jbd_lock_bh_state(bitmap_bh); + } + /* @@@ This prevents newly-allocated data from being + * freed and then reallocated within the same + * transaction. + * + * Ideally we would want to allow that to happen, but to + * do so requires making jbd2_journal_forget() capable of + * revoking the queued write of a data block, which + * implies blocking on the journal lock. *forget() + * cannot block due to truncate races. + * + * Eventually we can fix this by making jbd2_journal_forget() + * return a status indicating whether or not it was able + * to revoke the buffer. On successful revoke, it is + * safe not to set the allocation bit in the committed + * bitmap, because we know that there is no outstanding + * activity on the buffer any more and so it is safe to + * reallocate it. + */ + BUFFER_TRACE(bitmap_bh, "set in b_committed_data"); + J_ASSERT_BH(bitmap_bh, + bh2jh(bitmap_bh)->b_committed_data != NULL); + ext4_set_bit_atomic(sb_bgl_lock(sbi, block_group), bit + i, + bh2jh(bitmap_bh)->b_committed_data); + + /* + * We clear the bit in the bitmap after setting the committed + * data bit, because this is the reverse order to that which + * the allocator uses. + */ + BUFFER_TRACE(bitmap_bh, "clear bit"); + if (!ext4_clear_bit_atomic(sb_bgl_lock(sbi, block_group), + bit + i, bitmap_bh->b_data)) { + jbd_unlock_bh_state(bitmap_bh); + ext4_error(sb, __FUNCTION__, + "bit already cleared for block %llu", + (ext4_fsblk_t)(block + i)); + jbd_lock_bh_state(bitmap_bh); + BUFFER_TRACE(bitmap_bh, "bit already cleared"); + } else { + group_freed++; + } + } + jbd_unlock_bh_state(bitmap_bh); + + spin_lock(sb_bgl_lock(sbi, block_group)); + desc->bg_free_blocks_count = + cpu_to_le16(le16_to_cpu(desc->bg_free_blocks_count) + + group_freed); + spin_unlock(sb_bgl_lock(sbi, block_group)); + percpu_counter_mod(&sbi->s_freeblocks_counter, count); + + /* We dirtied the bitmap block */ + BUFFER_TRACE(bitmap_bh, "dirtied bitmap block"); + err = ext4_journal_dirty_metadata(handle, bitmap_bh); + + /* And the group descriptor block */ + BUFFER_TRACE(gd_bh, "dirtied group descriptor block"); + ret = ext4_journal_dirty_metadata(handle, gd_bh); + if (!err) err = ret; + *pdquot_freed_blocks += group_freed; + + if (overflow && !err) { + block += count; + count = overflow; + goto do_more; + } + sb->s_dirt = 1; +error_return: + brelse(bitmap_bh); + ext4_std_error(sb, err); + return; +} + +/** + * ext4_free_blocks() -- Free given blocks and update quota + * @handle: handle for this transaction + * @inode: inode + * @block: start physical block to free + * @count: number of blocks to count + */ +void ext4_free_blocks(handle_t *handle, struct inode *inode, + ext4_fsblk_t block, unsigned long count) +{ + struct super_block * sb; + unsigned long dquot_freed_blocks; + + sb = inode->i_sb; + if (!sb) { + printk ("ext4_free_blocks: nonexistent device"); + return; + } + ext4_free_blocks_sb(handle, sb, block, count, &dquot_freed_blocks); + if (dquot_freed_blocks) + DQUOT_FREE_BLOCK(inode, dquot_freed_blocks); + return; +} + +/** + * ext4_test_allocatable() + * @nr: given allocation block group + * @bh: bufferhead contains the bitmap of the given block group + * + * For ext4 allocations, we must not reuse any blocks which are + * allocated in the bitmap buffer's "last committed data" copy. This + * prevents deletes from freeing up the page for reuse until we have + * committed the delete transaction. + * + * If we didn't do this, then deleting something and reallocating it as + * data would allow the old block to be overwritten before the + * transaction committed (because we force data to disk before commit). + * This would lead to corruption if we crashed between overwriting the + * data and committing the delete. + * + * @@@ We may want to make this allocation behaviour conditional on + * data-writes at some point, and disable it for metadata allocations or + * sync-data inodes. + */ +static int ext4_test_allocatable(ext4_grpblk_t nr, struct buffer_head *bh) +{ + int ret; + struct journal_head *jh = bh2jh(bh); + + if (ext4_test_bit(nr, bh->b_data)) + return 0; + + jbd_lock_bh_state(bh); + if (!jh->b_committed_data) + ret = 1; + else + ret = !ext4_test_bit(nr, jh->b_committed_data); + jbd_unlock_bh_state(bh); + return ret; +} + +/** + * bitmap_search_next_usable_block() + * @start: the starting block (group relative) of the search + * @bh: bufferhead contains the block group bitmap + * @maxblocks: the ending block (group relative) of the reservation + * + * The bitmap search --- search forward alternately through the actual + * bitmap on disk and the last-committed copy in journal, until we find a + * bit free in both bitmaps. + */ +static ext4_grpblk_t +bitmap_search_next_usable_block(ext4_grpblk_t start, struct buffer_head *bh, + ext4_grpblk_t maxblocks) +{ + ext4_grpblk_t next; + struct journal_head *jh = bh2jh(bh); + + while (start < maxblocks) { + next = ext4_find_next_zero_bit(bh->b_data, maxblocks, start); + if (next >= maxblocks) + return -1; + if (ext4_test_allocatable(next, bh)) + return next; + jbd_lock_bh_state(bh); + if (jh->b_committed_data) + start = ext4_find_next_zero_bit(jh->b_committed_data, + maxblocks, next); + jbd_unlock_bh_state(bh); + } + return -1; +} + +/** + * find_next_usable_block() + * @start: the starting block (group relative) to find next + * allocatable block in bitmap. + * @bh: bufferhead contains the block group bitmap + * @maxblocks: the ending block (group relative) for the search + * + * Find an allocatable block in a bitmap. We honor both the bitmap and + * its last-committed copy (if that exists), and perform the "most + * appropriate allocation" algorithm of looking for a free block near + * the initial goal; then for a free byte somewhere in the bitmap; then + * for any free bit in the bitmap. + */ +static ext4_grpblk_t +find_next_usable_block(ext4_grpblk_t start, struct buffer_head *bh, + ext4_grpblk_t maxblocks) +{ + ext4_grpblk_t here, next; + char *p, *r; + + if (start > 0) { + /* + * The goal was occupied; search forward for a free + * block within the next XX blocks. + * + * end_goal is more or less random, but it has to be + * less than EXT4_BLOCKS_PER_GROUP. Aligning up to the + * next 64-bit boundary is simple.. + */ + ext4_grpblk_t end_goal = (start + 63) & ~63; + if (end_goal > maxblocks) + end_goal = maxblocks; + here = ext4_find_next_zero_bit(bh->b_data, end_goal, start); + if (here < end_goal && ext4_test_allocatable(here, bh)) + return here; + ext4_debug("Bit not found near goal\n"); + } + + here = start; + if (here < 0) + here = 0; + + p = ((char *)bh->b_data) + (here >> 3); + r = memscan(p, 0, (maxblocks - here + 7) >> 3); + next = (r - ((char *)bh->b_data)) << 3; + + if (next < maxblocks && next >= start && ext4_test_allocatable(next, bh)) + return next; + + /* + * The bitmap search --- search forward alternately through the actual + * bitmap and the last-committed copy until we find a bit free in + * both + */ + here = bitmap_search_next_usable_block(here, bh, maxblocks); + return here; +} + +/** + * claim_block() + * @block: the free block (group relative) to allocate + * @bh: the bufferhead containts the block group bitmap + * + * We think we can allocate this block in this bitmap. Try to set the bit. + * If that succeeds then check that nobody has allocated and then freed the + * block since we saw that is was not marked in b_committed_data. If it _was_ + * allocated and freed then clear the bit in the bitmap again and return + * zero (failure). + */ +static inline int +claim_block(spinlock_t *lock, ext4_grpblk_t block, struct buffer_head *bh) +{ + struct journal_head *jh = bh2jh(bh); + int ret; + + if (ext4_set_bit_atomic(lock, block, bh->b_data)) + return 0; + jbd_lock_bh_state(bh); + if (jh->b_committed_data && ext4_test_bit(block,jh->b_committed_data)) { + ext4_clear_bit_atomic(lock, block, bh->b_data); + ret = 0; + } else { + ret = 1; + } + jbd_unlock_bh_state(bh); + return ret; +} + +/** + * ext4_try_to_allocate() + * @sb: superblock + * @handle: handle to this transaction + * @group: given allocation block group + * @bitmap_bh: bufferhead holds the block bitmap + * @grp_goal: given target block within the group + * @count: target number of blocks to allocate + * @my_rsv: reservation window + * + * Attempt to allocate blocks within a give range. Set the range of allocation + * first, then find the first free bit(s) from the bitmap (within the range), + * and at last, allocate the blocks by claiming the found free bit as allocated. + * + * To set the range of this allocation: + * if there is a reservation window, only try to allocate block(s) from the + * file's own reservation window; + * Otherwise, the allocation range starts from the give goal block, ends at + * the block group's last block. + * + * If we failed to allocate the desired block then we may end up crossing to a + * new bitmap. In that case we must release write access to the old one via + * ext4_journal_release_buffer(), else we'll run out of credits. + */ +static ext4_grpblk_t +ext4_try_to_allocate(struct super_block *sb, handle_t *handle, int group, + struct buffer_head *bitmap_bh, ext4_grpblk_t grp_goal, + unsigned long *count, struct ext4_reserve_window *my_rsv) +{ + ext4_fsblk_t group_first_block; + ext4_grpblk_t start, end; + unsigned long num = 0; + + /* we do allocation within the reservation window if we have a window */ + if (my_rsv) { + group_first_block = ext4_group_first_block_no(sb, group); + if (my_rsv->_rsv_start >= group_first_block) + start = my_rsv->_rsv_start - group_first_block; + else + /* reservation window cross group boundary */ + start = 0; + end = my_rsv->_rsv_end - group_first_block + 1; + if (end > EXT4_BLOCKS_PER_GROUP(sb)) + /* reservation window crosses group boundary */ + end = EXT4_BLOCKS_PER_GROUP(sb); + if ((start <= grp_goal) && (grp_goal < end)) + start = grp_goal; + else + grp_goal = -1; + } else { + if (grp_goal > 0) + start = grp_goal; + else + start = 0; + end = EXT4_BLOCKS_PER_GROUP(sb); + } + + BUG_ON(start > EXT4_BLOCKS_PER_GROUP(sb)); + +repeat: + if (grp_goal < 0 || !ext4_test_allocatable(grp_goal, bitmap_bh)) { + grp_goal = find_next_usable_block(start, bitmap_bh, end); + if (grp_goal < 0) + goto fail_access; + if (!my_rsv) { + int i; + + for (i = 0; i < 7 && grp_goal > start && + ext4_test_allocatable(grp_goal - 1, + bitmap_bh); + i++, grp_goal--) + ; + } + } + start = grp_goal; + + if (!claim_block(sb_bgl_lock(EXT4_SB(sb), group), + grp_goal, bitmap_bh)) { + /* + * The block was allocated by another thread, or it was + * allocated and then freed by another thread + */ + start++; + grp_goal++; + if (start >= end) + goto fail_access; + goto repeat; + } + num++; + grp_goal++; + while (num < *count && grp_goal < end + && ext4_test_allocatable(grp_goal, bitmap_bh) + && claim_block(sb_bgl_lock(EXT4_SB(sb), group), + grp_goal, bitmap_bh)) { + num++; + grp_goal++; + } + *count = num; + return grp_goal - num; +fail_access: + *count = num; + return -1; +} + +/** + * find_next_reservable_window(): + * find a reservable space within the given range. + * It does not allocate the reservation window for now: + * alloc_new_reservation() will do the work later. + * + * @search_head: the head of the searching list; + * This is not necessarily the list head of the whole filesystem + * + * We have both head and start_block to assist the search + * for the reservable space. The list starts from head, + * but we will shift to the place where start_block is, + * then start from there, when looking for a reservable space. + * + * @size: the target new reservation window size + * + * @group_first_block: the first block we consider to start + * the real search from + * + * @last_block: + * the maximum block number that our goal reservable space + * could start from. This is normally the last block in this + * group. The search will end when we found the start of next + * possible reservable space is out of this boundary. + * This could handle the cross boundary reservation window + * request. + * + * basically we search from the given range, rather than the whole + * reservation double linked list, (start_block, last_block) + * to find a free region that is of my size and has not + * been reserved. + * + */ +static int find_next_reservable_window( + struct ext4_reserve_window_node *search_head, + struct ext4_reserve_window_node *my_rsv, + struct super_block * sb, + ext4_fsblk_t start_block, + ext4_fsblk_t last_block) +{ + struct rb_node *next; + struct ext4_reserve_window_node *rsv, *prev; + ext4_fsblk_t cur; + int size = my_rsv->rsv_goal_size; + + /* TODO: make the start of the reservation window byte-aligned */ + /* cur = *start_block & ~7;*/ + cur = start_block; + rsv = search_head; + if (!rsv) + return -1; + + while (1) { + if (cur <= rsv->rsv_end) + cur = rsv->rsv_end + 1; + + /* TODO? + * in the case we could not find a reservable space + * that is what is expected, during the re-search, we could + * remember what's the largest reservable space we could have + * and return that one. + * + * For now it will fail if we could not find the reservable + * space with expected-size (or more)... + */ + if (cur > last_block) + return -1; /* fail */ + + prev = rsv; + next = rb_next(&rsv->rsv_node); + rsv = list_entry(next,struct ext4_reserve_window_node,rsv_node); + + /* + * Reached the last reservation, we can just append to the + * previous one. + */ + if (!next) + break; + + if (cur + size <= rsv->rsv_start) { + /* + * Found a reserveable space big enough. We could + * have a reservation across the group boundary here + */ + break; + } + } + /* + * we come here either : + * when we reach the end of the whole list, + * and there is empty reservable space after last entry in the list. + * append it to the end of the list. + * + * or we found one reservable space in the middle of the list, + * return the reservation window that we could append to. + * succeed. + */ + + if ((prev != my_rsv) && (!rsv_is_empty(&my_rsv->rsv_window))) + rsv_window_remove(sb, my_rsv); + + /* + * Let's book the whole avaliable window for now. We will check the + * disk bitmap later and then, if there are free blocks then we adjust + * the window size if it's larger than requested. + * Otherwise, we will remove this node from the tree next time + * call find_next_reservable_window. + */ + my_rsv->rsv_start = cur; + my_rsv->rsv_end = cur + size - 1; + my_rsv->rsv_alloc_hit = 0; + + if (prev != my_rsv) + ext4_rsv_window_add(sb, my_rsv); + + return 0; +} + +/** + * alloc_new_reservation()--allocate a new reservation window + * + * To make a new reservation, we search part of the filesystem + * reservation list (the list that inside the group). We try to + * allocate a new reservation window near the allocation goal, + * or the beginning of the group, if there is no goal. + * + * We first find a reservable space after the goal, then from + * there, we check the bitmap for the first free block after + * it. If there is no free block until the end of group, then the + * whole group is full, we failed. Otherwise, check if the free + * block is inside the expected reservable space, if so, we + * succeed. + * If the first free block is outside the reservable space, then + * start from the first free block, we search for next available + * space, and go on. + * + * on succeed, a new reservation will be found and inserted into the list + * It contains at least one free block, and it does not overlap with other + * reservation windows. + * + * failed: we failed to find a reservation window in this group + * + * @rsv: the reservation + * + * @grp_goal: The goal (group-relative). It is where the search for a + * free reservable space should start from. + * if we have a grp_goal(grp_goal >0 ), then start from there, + * no grp_goal(grp_goal = -1), we start from the first block + * of the group. + * + * @sb: the super block + * @group: the group we are trying to allocate in + * @bitmap_bh: the block group block bitmap + * + */ +static int alloc_new_reservation(struct ext4_reserve_window_node *my_rsv, + ext4_grpblk_t grp_goal, struct super_block *sb, + unsigned int group, struct buffer_head *bitmap_bh) +{ + struct ext4_reserve_window_node *search_head; + ext4_fsblk_t group_first_block, group_end_block, start_block; + ext4_grpblk_t first_free_block; + struct rb_root *fs_rsv_root = &EXT4_SB(sb)->s_rsv_window_root; + unsigned long size; + int ret; + spinlock_t *rsv_lock = &EXT4_SB(sb)->s_rsv_window_lock; + + group_first_block = ext4_group_first_block_no(sb, group); + group_end_block = group_first_block + (EXT4_BLOCKS_PER_GROUP(sb) - 1); + + if (grp_goal < 0) + start_block = group_first_block; + else + start_block = grp_goal + group_first_block; + + size = my_rsv->rsv_goal_size; + + if (!rsv_is_empty(&my_rsv->rsv_window)) { + /* + * if the old reservation is cross group boundary + * and if the goal is inside the old reservation window, + * we will come here when we just failed to allocate from + * the first part of the window. We still have another part + * that belongs to the next group. In this case, there is no + * point to discard our window and try to allocate a new one + * in this group(which will fail). we should + * keep the reservation window, just simply move on. + * + * Maybe we could shift the start block of the reservation + * window to the first block of next group. + */ + + if ((my_rsv->rsv_start <= group_end_block) && + (my_rsv->rsv_end > group_end_block) && + (start_block >= my_rsv->rsv_start)) + return -1; + + if ((my_rsv->rsv_alloc_hit > + (my_rsv->rsv_end - my_rsv->rsv_start + 1) / 2)) { + /* + * if the previously allocation hit ratio is + * greater than 1/2, then we double the size of + * the reservation window the next time, + * otherwise we keep the same size window + */ + size = size * 2; + if (size > EXT4_MAX_RESERVE_BLOCKS) + size = EXT4_MAX_RESERVE_BLOCKS; + my_rsv->rsv_goal_size= size; + } + } + + spin_lock(rsv_lock); + /* + * shift the search start to the window near the goal block + */ + search_head = search_reserve_window(fs_rsv_root, start_block); + + /* + * find_next_reservable_window() simply finds a reservable window + * inside the given range(start_block, group_end_block). + * + * To make sure the reservation window has a free bit inside it, we + * need to check the bitmap after we found a reservable window. + */ +retry: + ret = find_next_reservable_window(search_head, my_rsv, sb, + start_block, group_end_block); + + if (ret == -1) { + if (!rsv_is_empty(&my_rsv->rsv_window)) + rsv_window_remove(sb, my_rsv); + spin_unlock(rsv_lock); + return -1; + } + + /* + * On success, find_next_reservable_window() returns the + * reservation window where there is a reservable space after it. + * Before we reserve this reservable space, we need + * to make sure there is at least a free block inside this region. + * + * searching the first free bit on the block bitmap and copy of + * last committed bitmap alternatively, until we found a allocatable + * block. Search start from the start block of the reservable space + * we just found. + */ + spin_unlock(rsv_lock); + first_free_block = bitmap_search_next_usable_block( + my_rsv->rsv_start - group_first_block, + bitmap_bh, group_end_block - group_first_block + 1); + + if (first_free_block < 0) { + /* + * no free block left on the bitmap, no point + * to reserve the space. return failed. + */ + spin_lock(rsv_lock); + if (!rsv_is_empty(&my_rsv->rsv_window)) + rsv_window_remove(sb, my_rsv); + spin_unlock(rsv_lock); + return -1; /* failed */ + } + + start_block = first_free_block + group_first_block; + /* + * check if the first free block is within the + * free space we just reserved + */ + if (start_block >= my_rsv->rsv_start && start_block < my_rsv->rsv_end) + return 0; /* success */ + /* + * if the first free bit we found is out of the reservable space + * continue search for next reservable space, + * start from where the free block is, + * we also shift the list head to where we stopped last time + */ + search_head = my_rsv; + spin_lock(rsv_lock); + goto retry; +} + +/** + * try_to_extend_reservation() + * @my_rsv: given reservation window + * @sb: super block + * @size: the delta to extend + * + * Attempt to expand the reservation window large enough to have + * required number of free blocks + * + * Since ext4_try_to_allocate() will always allocate blocks within + * the reservation window range, if the window size is too small, + * multiple blocks allocation has to stop at the end of the reservation + * window. To make this more efficient, given the total number of + * blocks needed and the current size of the window, we try to + * expand the reservation window size if necessary on a best-effort + * basis before ext4_new_blocks() tries to allocate blocks, + */ +static void try_to_extend_reservation(struct ext4_reserve_window_node *my_rsv, + struct super_block *sb, int size) +{ + struct ext4_reserve_window_node *next_rsv; + struct rb_node *next; + spinlock_t *rsv_lock = &EXT4_SB(sb)->s_rsv_window_lock; + + if (!spin_trylock(rsv_lock)) + return; + + next = rb_next(&my_rsv->rsv_node); + + if (!next) + my_rsv->rsv_end += size; + else { + next_rsv = list_entry(next, struct ext4_reserve_window_node, rsv_node); + + if ((next_rsv->rsv_start - my_rsv->rsv_end - 1) >= size) + my_rsv->rsv_end += size; + else + my_rsv->rsv_end = next_rsv->rsv_start - 1; + } + spin_unlock(rsv_lock); +} + +/** + * ext4_try_to_allocate_with_rsv() + * @sb: superblock + * @handle: handle to this transaction + * @group: given allocation block group + * @bitmap_bh: bufferhead holds the block bitmap + * @grp_goal: given target block within the group + * @count: target number of blocks to allocate + * @my_rsv: reservation window + * @errp: pointer to store the error code + * + * This is the main function used to allocate a new block and its reservation + * window. + * + * Each time when a new block allocation is need, first try to allocate from + * its own reservation. If it does not have a reservation window, instead of + * looking for a free bit on bitmap first, then look up the reservation list to + * see if it is inside somebody else's reservation window, we try to allocate a + * reservation window for it starting from the goal first. Then do the block + * allocation within the reservation window. + * + * This will avoid keeping on searching the reservation list again and + * again when somebody is looking for a free block (without + * reservation), and there are lots of free blocks, but they are all + * being reserved. + * + * We use a red-black tree for the per-filesystem reservation list. + * + */ +static ext4_grpblk_t +ext4_try_to_allocate_with_rsv(struct super_block *sb, handle_t *handle, + unsigned int group, struct buffer_head *bitmap_bh, + ext4_grpblk_t grp_goal, + struct ext4_reserve_window_node * my_rsv, + unsigned long *count, int *errp) +{ + ext4_fsblk_t group_first_block, group_last_block; + ext4_grpblk_t ret = 0; + int fatal; + unsigned long num = *count; + + *errp = 0; + + /* + * Make sure we use undo access for the bitmap, because it is critical + * that we do the frozen_data COW on bitmap buffers in all cases even + * if the buffer is in BJ_Forget state in the committing transaction. + */ + BUFFER_TRACE(bitmap_bh, "get undo access for new block"); + fatal = ext4_journal_get_undo_access(handle, bitmap_bh); + if (fatal) { + *errp = fatal; + return -1; + } + + /* + * we don't deal with reservation when + * filesystem is mounted without reservation + * or the file is not a regular file + * or last attempt to allocate a block with reservation turned on failed + */ + if (my_rsv == NULL ) { + ret = ext4_try_to_allocate(sb, handle, group, bitmap_bh, + grp_goal, count, NULL); + goto out; + } + /* + * grp_goal is a group relative block number (if there is a goal) + * 0 < grp_goal < EXT4_BLOCKS_PER_GROUP(sb) + * first block is a filesystem wide block number + * first block is the block number of the first block in this group + */ + group_first_block = ext4_group_first_block_no(sb, group); + group_last_block = group_first_block + (EXT4_BLOCKS_PER_GROUP(sb) - 1); + + /* + * Basically we will allocate a new block from inode's reservation + * window. + * + * We need to allocate a new reservation window, if: + * a) inode does not have a reservation window; or + * b) last attempt to allocate a block from existing reservation + * failed; or + * c) we come here with a goal and with a reservation window + * + * We do not need to allocate a new reservation window if we come here + * at the beginning with a goal and the goal is inside the window, or + * we don't have a goal but already have a reservation window. + * then we could go to allocate from the reservation window directly. + */ + while (1) { + if (rsv_is_empty(&my_rsv->rsv_window) || (ret < 0) || + !goal_in_my_reservation(&my_rsv->rsv_window, + grp_goal, group, sb)) { + if (my_rsv->rsv_goal_size < *count) + my_rsv->rsv_goal_size = *count; + ret = alloc_new_reservation(my_rsv, grp_goal, sb, + group, bitmap_bh); + if (ret < 0) + break; /* failed */ + + if (!goal_in_my_reservation(&my_rsv->rsv_window, + grp_goal, group, sb)) + grp_goal = -1; + } else if (grp_goal > 0 && + (my_rsv->rsv_end-grp_goal+1) < *count) + try_to_extend_reservation(my_rsv, sb, + *count-my_rsv->rsv_end + grp_goal - 1); + + if ((my_rsv->rsv_start > group_last_block) || + (my_rsv->rsv_end < group_first_block)) { + rsv_window_dump(&EXT4_SB(sb)->s_rsv_window_root, 1); + BUG(); + } + ret = ext4_try_to_allocate(sb, handle, group, bitmap_bh, + grp_goal, &num, &my_rsv->rsv_window); + if (ret >= 0) { + my_rsv->rsv_alloc_hit += num; + *count = num; + break; /* succeed */ + } + num = *count; + } +out: + if (ret >= 0) { + BUFFER_TRACE(bitmap_bh, "journal_dirty_metadata for " + "bitmap block"); + fatal = ext4_journal_dirty_metadata(handle, bitmap_bh); + if (fatal) { + *errp = fatal; + return -1; + } + return ret; + } + + BUFFER_TRACE(bitmap_bh, "journal_release_buffer"); + ext4_journal_release_buffer(handle, bitmap_bh); + return ret; +} + +/** + * ext4_has_free_blocks() + * @sbi: in-core super block structure. + * + * Check if filesystem has at least 1 free block available for allocation. + */ +static int ext4_has_free_blocks(struct ext4_sb_info *sbi) +{ + ext4_fsblk_t free_blocks, root_blocks; + + free_blocks = percpu_counter_read_positive(&sbi->s_freeblocks_counter); + root_blocks = ext4_r_blocks_count(sbi->s_es); + if (free_blocks < root_blocks + 1 && !capable(CAP_SYS_RESOURCE) && + sbi->s_resuid != current->fsuid && + (sbi->s_resgid == 0 || !in_group_p (sbi->s_resgid))) { + return 0; + } + return 1; +} + +/** + * ext4_should_retry_alloc() + * @sb: super block + * @retries number of attemps has been made + * + * ext4_should_retry_alloc() is called when ENOSPC is returned, and if + * it is profitable to retry the operation, this function will wait + * for the current or commiting transaction to complete, and then + * return TRUE. + * + * if the total number of retries exceed three times, return FALSE. + */ +int ext4_should_retry_alloc(struct super_block *sb, int *retries) +{ + if (!ext4_has_free_blocks(EXT4_SB(sb)) || (*retries)++ > 3) + return 0; + + jbd_debug(1, "%s: retrying operation after ENOSPC\n", sb->s_id); + + return jbd2_journal_force_commit_nested(EXT4_SB(sb)->s_journal); +} + +/** + * ext4_new_blocks() -- core block(s) allocation function + * @handle: handle to this transaction + * @inode: file inode + * @goal: given target block(filesystem wide) + * @count: target number of blocks to allocate + * @errp: error code + * + * ext4_new_blocks uses a goal block to assist allocation. It tries to + * allocate block(s) from the block group contains the goal block first. If that + * fails, it will try to allocate block(s) from other block groups without + * any specific goal block. + * + */ +ext4_fsblk_t ext4_new_blocks(handle_t *handle, struct inode *inode, + ext4_fsblk_t goal, unsigned long *count, int *errp) +{ + struct buffer_head *bitmap_bh = NULL; + struct buffer_head *gdp_bh; + unsigned long group_no; + int goal_group; + ext4_grpblk_t grp_target_blk; /* blockgroup relative goal block */ + ext4_grpblk_t grp_alloc_blk; /* blockgroup-relative allocated block*/ + ext4_fsblk_t ret_block; /* filesyetem-wide allocated block */ + int bgi; /* blockgroup iteration index */ + int fatal = 0, err; + int performed_allocation = 0; + ext4_grpblk_t free_blocks; /* number of free blocks in a group */ + struct super_block *sb; + struct ext4_group_desc *gdp; + struct ext4_super_block *es; + struct ext4_sb_info *sbi; + struct ext4_reserve_window_node *my_rsv = NULL; + struct ext4_block_alloc_info *block_i; + unsigned short windowsz = 0; +#ifdef EXT4FS_DEBUG + static int goal_hits, goal_attempts; +#endif + unsigned long ngroups; + unsigned long num = *count; + + *errp = -ENOSPC; + sb = inode->i_sb; + if (!sb) { + printk("ext4_new_block: nonexistent device"); + return 0; + } + + /* + * Check quota for allocation of this block. + */ + if (DQUOT_ALLOC_BLOCK(inode, num)) { + *errp = -EDQUOT; + return 0; + } + + sbi = EXT4_SB(sb); + es = EXT4_SB(sb)->s_es; + ext4_debug("goal=%lu.\n", goal); + /* + * Allocate a block from reservation only when + * filesystem is mounted with reservation(default,-o reservation), and + * it's a regular file, and + * the desired window size is greater than 0 (One could use ioctl + * command EXT4_IOC_SETRSVSZ to set the window size to 0 to turn off + * reservation on that particular file) + */ + block_i = EXT4_I(inode)->i_block_alloc_info; + if (block_i && ((windowsz = block_i->rsv_window_node.rsv_goal_size) > 0)) + my_rsv = &block_i->rsv_window_node; + + if (!ext4_has_free_blocks(sbi)) { + *errp = -ENOSPC; + goto out; + } + + /* + * First, test whether the goal block is free. + */ + if (goal < le32_to_cpu(es->s_first_data_block) || + goal >= ext4_blocks_count(es)) + goal = le32_to_cpu(es->s_first_data_block); + ext4_get_group_no_and_offset(sb, goal, &group_no, &grp_target_blk); + goal_group = group_no; +retry_alloc: + gdp = ext4_get_group_desc(sb, group_no, &gdp_bh); + if (!gdp) + goto io_error; + + free_blocks = le16_to_cpu(gdp->bg_free_blocks_count); + /* + * if there is not enough free blocks to make a new resevation + * turn off reservation for this allocation + */ + if (my_rsv && (free_blocks < windowsz) + && (rsv_is_empty(&my_rsv->rsv_window))) + my_rsv = NULL; + + if (free_blocks > 0) { + bitmap_bh = read_block_bitmap(sb, group_no); + if (!bitmap_bh) + goto io_error; + grp_alloc_blk = ext4_try_to_allocate_with_rsv(sb, handle, + group_no, bitmap_bh, grp_target_blk, + my_rsv, &num, &fatal); + if (fatal) + goto out; + if (grp_alloc_blk >= 0) + goto allocated; + } + + ngroups = EXT4_SB(sb)->s_groups_count; + smp_rmb(); + + /* + * Now search the rest of the groups. We assume that + * i and gdp correctly point to the last group visited. + */ + for (bgi = 0; bgi < ngroups; bgi++) { + group_no++; + if (group_no >= ngroups) + group_no = 0; + gdp = ext4_get_group_desc(sb, group_no, &gdp_bh); + if (!gdp) { + *errp = -EIO; + goto out; + } + free_blocks = le16_to_cpu(gdp->bg_free_blocks_count); + /* + * skip this group if the number of + * free blocks is less than half of the reservation + * window size. + */ + if (free_blocks <= (windowsz/2)) + continue; + + brelse(bitmap_bh); + bitmap_bh = read_block_bitmap(sb, group_no); + if (!bitmap_bh) + goto io_error; + /* + * try to allocate block(s) from this group, without a goal(-1). + */ + grp_alloc_blk = ext4_try_to_allocate_with_rsv(sb, handle, + group_no, bitmap_bh, -1, my_rsv, + &num, &fatal); + if (fatal) + goto out; + if (grp_alloc_blk >= 0) + goto allocated; + } + /* + * We may end up a bogus ealier ENOSPC error due to + * filesystem is "full" of reservations, but + * there maybe indeed free blocks avaliable on disk + * In this case, we just forget about the reservations + * just do block allocation as without reservations. + */ + if (my_rsv) { + my_rsv = NULL; + group_no = goal_group; + goto retry_alloc; + } + /* No space left on the device */ + *errp = -ENOSPC; + goto out; + +allocated: + + ext4_debug("using block group %d(%d)\n", + group_no, gdp->bg_free_blocks_count); + + BUFFER_TRACE(gdp_bh, "get_write_access"); + fatal = ext4_journal_get_write_access(handle, gdp_bh); + if (fatal) + goto out; + + ret_block = grp_alloc_blk + ext4_group_first_block_no(sb, group_no); + + if (in_range(ext4_block_bitmap(sb, gdp), ret_block, num) || + in_range(ext4_block_bitmap(sb, gdp), ret_block, num) || + in_range(ret_block, ext4_inode_table(sb, gdp), + EXT4_SB(sb)->s_itb_per_group) || + in_range(ret_block + num - 1, ext4_inode_table(sb, gdp), + EXT4_SB(sb)->s_itb_per_group)) + ext4_error(sb, "ext4_new_block", + "Allocating block in system zone - " + "blocks from %llu, length %lu", + ret_block, num); + + performed_allocation = 1; + +#ifdef CONFIG_JBD_DEBUG + { + struct buffer_head *debug_bh; + + /* Record bitmap buffer state in the newly allocated block */ + debug_bh = sb_find_get_block(sb, ret_block); + if (debug_bh) { + BUFFER_TRACE(debug_bh, "state when allocated"); + BUFFER_TRACE2(debug_bh, bitmap_bh, "bitmap state"); + brelse(debug_bh); + } + } + jbd_lock_bh_state(bitmap_bh); + spin_lock(sb_bgl_lock(sbi, group_no)); + if (buffer_jbd(bitmap_bh) && bh2jh(bitmap_bh)->b_committed_data) { + int i; + + for (i = 0; i < num; i++) { + if (ext4_test_bit(grp_alloc_blk+i, + bh2jh(bitmap_bh)->b_committed_data)) { + printk("%s: block was unexpectedly set in " + "b_committed_data\n", __FUNCTION__); + } + } + } + ext4_debug("found bit %d\n", grp_alloc_blk); + spin_unlock(sb_bgl_lock(sbi, group_no)); + jbd_unlock_bh_state(bitmap_bh); +#endif + + if (ret_block + num - 1 >= ext4_blocks_count(es)) { + ext4_error(sb, "ext4_new_block", + "block(%llu) >= blocks count(%llu) - " + "block_group = %lu, es == %p ", ret_block, + ext4_blocks_count(es), group_no, es); + goto out; + } + + /* + * It is up to the caller to add the new buffer to a journal + * list of some description. We don't know in advance whether + * the caller wants to use it as metadata or data. + */ + ext4_debug("allocating block %lu. Goal hits %d of %d.\n", + ret_block, goal_hits, goal_attempts); + + spin_lock(sb_bgl_lock(sbi, group_no)); + gdp->bg_free_blocks_count = + cpu_to_le16(le16_to_cpu(gdp->bg_free_blocks_count)-num); + spin_unlock(sb_bgl_lock(sbi, group_no)); + percpu_counter_mod(&sbi->s_freeblocks_counter, -num); + + BUFFER_TRACE(gdp_bh, "journal_dirty_metadata for group descriptor"); + err = ext4_journal_dirty_metadata(handle, gdp_bh); + if (!fatal) + fatal = err; + + sb->s_dirt = 1; + if (fatal) + goto out; + + *errp = 0; + brelse(bitmap_bh); + DQUOT_FREE_BLOCK(inode, *count-num); + *count = num; + return ret_block; + +io_error: + *errp = -EIO; +out: + if (fatal) { + *errp = fatal; + ext4_std_error(sb, fatal); + } + /* + * Undo the block allocation + */ + if (!performed_allocation) + DQUOT_FREE_BLOCK(inode, *count); + brelse(bitmap_bh); + return 0; +} + +ext4_fsblk_t ext4_new_block(handle_t *handle, struct inode *inode, + ext4_fsblk_t goal, int *errp) +{ + unsigned long count = 1; + + return ext4_new_blocks(handle, inode, goal, &count, errp); +} + +/** + * ext4_count_free_blocks() -- count filesystem free blocks + * @sb: superblock + * + * Adds up the number of free blocks from each block group. + */ +ext4_fsblk_t ext4_count_free_blocks(struct super_block *sb) +{ + ext4_fsblk_t desc_count; + struct ext4_group_desc *gdp; + int i; + unsigned long ngroups = EXT4_SB(sb)->s_groups_count; +#ifdef EXT4FS_DEBUG + struct ext4_super_block *es; + ext4_fsblk_t bitmap_count; + unsigned long x; + struct buffer_head *bitmap_bh = NULL; + + es = EXT4_SB(sb)->s_es; + desc_count = 0; + bitmap_count = 0; + gdp = NULL; + + smp_rmb(); + for (i = 0; i < ngroups; i++) { + gdp = ext4_get_group_desc(sb, i, NULL); + if (!gdp) + continue; + desc_count += le16_to_cpu(gdp->bg_free_blocks_count); + brelse(bitmap_bh); + bitmap_bh = read_block_bitmap(sb, i); + if (bitmap_bh == NULL) + continue; + + x = ext4_count_free(bitmap_bh, sb->s_blocksize); + printk("group %d: stored = %d, counted = %lu\n", + i, le16_to_cpu(gdp->bg_free_blocks_count), x); + bitmap_count += x; + } + brelse(bitmap_bh); + printk("ext4_count_free_blocks: stored = %llu" + ", computed = %llu, %llu\n", + EXT4_FREE_BLOCKS_COUNT(es), + desc_count, bitmap_count); + return bitmap_count; +#else + desc_count = 0; + smp_rmb(); + for (i = 0; i < ngroups; i++) { + gdp = ext4_get_group_desc(sb, i, NULL); + if (!gdp) + continue; + desc_count += le16_to_cpu(gdp->bg_free_blocks_count); + } + + return desc_count; +#endif +} + +static inline int +block_in_use(ext4_fsblk_t block, struct super_block *sb, unsigned char *map) +{ + ext4_grpblk_t offset; + + ext4_get_group_no_and_offset(sb, block, NULL, &offset); + return ext4_test_bit (offset, map); +} + +static inline int test_root(int a, int b) +{ + int num = b; + + while (a > num) + num *= b; + return num == a; +} + +static int ext4_group_sparse(int group) +{ + if (group <= 1) + return 1; + if (!(group & 1)) + return 0; + return (test_root(group, 7) || test_root(group, 5) || + test_root(group, 3)); +} + +/** + * ext4_bg_has_super - number of blocks used by the superblock in group + * @sb: superblock for filesystem + * @group: group number to check + * + * Return the number of blocks used by the superblock (primary or backup) + * in this group. Currently this will be only 0 or 1. + */ +int ext4_bg_has_super(struct super_block *sb, int group) +{ + if (EXT4_HAS_RO_COMPAT_FEATURE(sb, + EXT4_FEATURE_RO_COMPAT_SPARSE_SUPER) && + !ext4_group_sparse(group)) + return 0; + return 1; +} + +static unsigned long ext4_bg_num_gdb_meta(struct super_block *sb, int group) +{ + unsigned long metagroup = group / EXT4_DESC_PER_BLOCK(sb); + unsigned long first = metagroup * EXT4_DESC_PER_BLOCK(sb); + unsigned long last = first + EXT4_DESC_PER_BLOCK(sb) - 1; + + if (group == first || group == first + 1 || group == last) + return 1; + return 0; +} + +static unsigned long ext4_bg_num_gdb_nometa(struct super_block *sb, int group) +{ + if (EXT4_HAS_RO_COMPAT_FEATURE(sb, + EXT4_FEATURE_RO_COMPAT_SPARSE_SUPER) && + !ext4_group_sparse(group)) + return 0; + return EXT4_SB(sb)->s_gdb_count; +} + +/** + * ext4_bg_num_gdb - number of blocks used by the group table in group + * @sb: superblock for filesystem + * @group: group number to check + * + * Return the number of blocks used by the group descriptor table + * (primary or backup) in this group. In the future there may be a + * different number of descriptor blocks in each group. + */ +unsigned long ext4_bg_num_gdb(struct super_block *sb, int group) +{ + unsigned long first_meta_bg = + le32_to_cpu(EXT4_SB(sb)->s_es->s_first_meta_bg); + unsigned long metagroup = group / EXT4_DESC_PER_BLOCK(sb); + + if (!EXT4_HAS_INCOMPAT_FEATURE(sb,EXT4_FEATURE_INCOMPAT_META_BG) || + metagroup < first_meta_bg) + return ext4_bg_num_gdb_nometa(sb,group); + + return ext4_bg_num_gdb_meta(sb,group); + +} diff --git a/fs/ext4/bitmap.c b/fs/ext4/bitmap.c new file mode 100644 index 000000000000..11e93c169bcf --- /dev/null +++ b/fs/ext4/bitmap.c @@ -0,0 +1,32 @@ +/* + * linux/fs/ext4/bitmap.c + * + * Copyright (C) 1992, 1993, 1994, 1995 + * Remy Card (card@masi.ibp.fr) + * Laboratoire MASI - Institut Blaise Pascal + * Universite Pierre et Marie Curie (Paris VI) + */ + +#include <linux/buffer_head.h> +#include <linux/jbd2.h> +#include <linux/ext4_fs.h> + +#ifdef EXT4FS_DEBUG + +static int nibblemap[] = {4, 3, 3, 2, 3, 2, 2, 1, 3, 2, 2, 1, 2, 1, 1, 0}; + +unsigned long ext4_count_free (struct buffer_head * map, unsigned int numchars) +{ + unsigned int i; + unsigned long sum = 0; + + if (!map) + return (0); + for (i = 0; i < numchars; i++) + sum += nibblemap[map->b_data[i] & 0xf] + + nibblemap[(map->b_data[i] >> 4) & 0xf]; + return (sum); +} + +#endif /* EXT4FS_DEBUG */ + diff --git a/fs/ext4/dir.c b/fs/ext4/dir.c new file mode 100644 index 000000000000..f8595787a70e --- /dev/null +++ b/fs/ext4/dir.c @@ -0,0 +1,518 @@ +/* + * linux/fs/ext4/dir.c + * + * Copyright (C) 1992, 1993, 1994, 1995 + * Remy Card (card@masi.ibp.fr) + * Laboratoire MASI - Institut Blaise Pascal + * Universite Pierre et Marie Curie (Paris VI) + * + * from + * + * linux/fs/minix/dir.c + * + * Copyright (C) 1991, 1992 Linus Torvalds + * + * ext4 directory handling functions + * + * Big-endian to little-endian byte-swapping/bitmaps by + * David S. Miller (davem@caip.rutgers.edu), 1995 + * + * Hash Tree Directory indexing (c) 2001 Daniel Phillips + * + */ + +#include <linux/fs.h> +#include <linux/jbd2.h> +#include <linux/ext4_fs.h> +#include <linux/buffer_head.h> +#include <linux/smp_lock.h> +#include <linux/slab.h> +#include <linux/rbtree.h> + +static unsigned char ext4_filetype_table[] = { + DT_UNKNOWN, DT_REG, DT_DIR, DT_CHR, DT_BLK, DT_FIFO, DT_SOCK, DT_LNK +}; + +static int ext4_readdir(struct file *, void *, filldir_t); +static int ext4_dx_readdir(struct file * filp, + void * dirent, filldir_t filldir); +static int ext4_release_dir (struct inode * inode, + struct file * filp); + +const struct file_operations ext4_dir_operations = { + .llseek = generic_file_llseek, + .read = generic_read_dir, + .readdir = ext4_readdir, /* we take BKL. needed?*/ + .ioctl = ext4_ioctl, /* BKL held */ +#ifdef CONFIG_COMPAT + .compat_ioctl = ext4_compat_ioctl, +#endif + .fsync = ext4_sync_file, /* BKL held */ +#ifdef CONFIG_EXT4_INDEX + .release = ext4_release_dir, +#endif +}; + + +static unsigned char get_dtype(struct super_block *sb, int filetype) +{ + if (!EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_FILETYPE) || + (filetype >= EXT4_FT_MAX)) + return DT_UNKNOWN; + + return (ext4_filetype_table[filetype]); +} + + +int ext4_check_dir_entry (const char * function, struct inode * dir, + struct ext4_dir_entry_2 * de, + struct buffer_head * bh, + unsigned long offset) +{ + const char * error_msg = NULL; + const int rlen = le16_to_cpu(de->rec_len); + + if (rlen < EXT4_DIR_REC_LEN(1)) + error_msg = "rec_len is smaller than minimal"; + else if (rlen % 4 != 0) + error_msg = "rec_len % 4 != 0"; + else if (rlen < EXT4_DIR_REC_LEN(de->name_len)) + error_msg = "rec_len is too small for name_len"; + else if (((char *) de - bh->b_data) + rlen > dir->i_sb->s_blocksize) + error_msg = "directory entry across blocks"; + else if (le32_to_cpu(de->inode) > + le32_to_cpu(EXT4_SB(dir->i_sb)->s_es->s_inodes_count)) + error_msg = "inode out of bounds"; + + if (error_msg != NULL) + ext4_error (dir->i_sb, function, + "bad entry in directory #%lu: %s - " + "offset=%lu, inode=%lu, rec_len=%d, name_len=%d", + dir->i_ino, error_msg, offset, + (unsigned long) le32_to_cpu(de->inode), + rlen, de->name_len); + return error_msg == NULL ? 1 : 0; +} + +static int ext4_readdir(struct file * filp, + void * dirent, filldir_t filldir) +{ + int error = 0; + unsigned long offset; + int i, stored; + struct ext4_dir_entry_2 *de; + struct super_block *sb; + int err; + struct inode *inode = filp->f_dentry->d_inode; + int ret = 0; + + sb = inode->i_sb; + +#ifdef CONFIG_EXT4_INDEX + if (EXT4_HAS_COMPAT_FEATURE(inode->i_sb, + EXT4_FEATURE_COMPAT_DIR_INDEX) && + ((EXT4_I(inode)->i_flags & EXT4_INDEX_FL) || + ((inode->i_size >> sb->s_blocksize_bits) == 1))) { + err = ext4_dx_readdir(filp, dirent, filldir); + if (err != ERR_BAD_DX_DIR) { + ret = err; + goto out; + } + /* + * We don't set the inode dirty flag since it's not + * critical that it get flushed back to the disk. + */ + EXT4_I(filp->f_dentry->d_inode)->i_flags &= ~EXT4_INDEX_FL; + } +#endif + stored = 0; + offset = filp->f_pos & (sb->s_blocksize - 1); + + while (!error && !stored && filp->f_pos < inode->i_size) { + unsigned long blk = filp->f_pos >> EXT4_BLOCK_SIZE_BITS(sb); + struct buffer_head map_bh; + struct buffer_head *bh = NULL; + + map_bh.b_state = 0; + err = ext4_get_blocks_wrap(NULL, inode, blk, 1, &map_bh, 0, 0); + if (err > 0) { + page_cache_readahead(sb->s_bdev->bd_inode->i_mapping, + &filp->f_ra, + filp, + map_bh.b_blocknr >> + (PAGE_CACHE_SHIFT - inode->i_blkbits), + 1); + bh = ext4_bread(NULL, inode, blk, 0, &err); + } + + /* + * We ignore I/O errors on directories so users have a chance + * of recovering data when there's a bad sector + */ + if (!bh) { + ext4_error (sb, "ext4_readdir", + "directory #%lu contains a hole at offset %lu", + inode->i_ino, (unsigned long)filp->f_pos); + filp->f_pos += sb->s_blocksize - offset; + continue; + } + +revalidate: + /* If the dir block has changed since the last call to + * readdir(2), then we might be pointing to an invalid + * dirent right now. Scan from the start of the block + * to make sure. */ + if (filp->f_version != inode->i_version) { + for (i = 0; i < sb->s_blocksize && i < offset; ) { + de = (struct ext4_dir_entry_2 *) + (bh->b_data + i); + /* It's too expensive to do a full + * dirent test each time round this + * loop, but we do have to test at + * least that it is non-zero. A + * failure will be detected in the + * dirent test below. */ + if (le16_to_cpu(de->rec_len) < + EXT4_DIR_REC_LEN(1)) + break; + i += le16_to_cpu(de->rec_len); + } + offset = i; + filp->f_pos = (filp->f_pos & ~(sb->s_blocksize - 1)) + | offset; + filp->f_version = inode->i_version; + } + + while (!error && filp->f_pos < inode->i_size + && offset < sb->s_blocksize) { + de = (struct ext4_dir_entry_2 *) (bh->b_data + offset); + if (!ext4_check_dir_entry ("ext4_readdir", inode, de, + bh, offset)) { + /* + * On error, skip the f_pos to the next block + */ + filp->f_pos = (filp->f_pos | + (sb->s_blocksize - 1)) + 1; + brelse (bh); + ret = stored; + goto out; + } + offset += le16_to_cpu(de->rec_len); + if (le32_to_cpu(de->inode)) { + /* We might block in the next section + * if the data destination is + * currently swapped out. So, use a + * version stamp to detect whether or + * not the directory has been modified + * during the copy operation. + */ + unsigned long version = filp->f_version; + + error = filldir(dirent, de->name, + de->name_len, + filp->f_pos, + le32_to_cpu(de->inode), + get_dtype(sb, de->file_type)); + if (error) + break; + if (version != filp->f_version) + goto revalidate; + stored ++; + } + filp->f_pos += le16_to_cpu(de->rec_len); + } + offset = 0; + brelse (bh); + } +out: + return ret; +} + +#ifdef CONFIG_EXT4_INDEX +/* + * These functions convert from the major/minor hash to an f_pos + * value. + * + * Currently we only use major hash numer. This is unfortunate, but + * on 32-bit machines, the same VFS interface is used for lseek and + * llseek, so if we use the 64 bit offset, then the 32-bit versions of + * lseek/telldir/seekdir will blow out spectacularly, and from within + * the ext2 low-level routine, we don't know if we're being called by + * a 64-bit version of the system call or the 32-bit version of the + * system call. Worse yet, NFSv2 only allows for a 32-bit readdir + * cookie. Sigh. + */ +#define hash2pos(major, minor) (major >> 1) +#define pos2maj_hash(pos) ((pos << 1) & 0xffffffff) +#define pos2min_hash(pos) (0) + +/* + * This structure holds the nodes of the red-black tree used to store + * the directory entry in hash order. + */ +struct fname { + __u32 hash; + __u32 minor_hash; + struct rb_node rb_hash; + struct fname *next; + __u32 inode; + __u8 name_len; + __u8 file_type; + char name[0]; +}; + +/* + * This functoin implements a non-recursive way of freeing all of the + * nodes in the red-black tree. + */ +static void free_rb_tree_fname(struct rb_root *root) +{ + struct rb_node *n = root->rb_node; + struct rb_node *parent; + struct fname *fname; + + while (n) { + /* Do the node's children first */ + if ((n)->rb_left) { + n = n->rb_left; + continue; + } + if (n->rb_right) { + n = n->rb_right; + continue; + } + /* + * The node has no children; free it, and then zero + * out parent's link to it. Finally go to the + * beginning of the loop and try to free the parent + * node. + */ + parent = rb_parent(n); + fname = rb_entry(n, struct fname, rb_hash); + while (fname) { + struct fname * old = fname; + fname = fname->next; + kfree (old); + } + if (!parent) + root->rb_node = NULL; + else if (parent->rb_left == n) + parent->rb_left = NULL; + else if (parent->rb_right == n) + parent->rb_right = NULL; + n = parent; + } + root->rb_node = NULL; +} + + +static struct dir_private_info *create_dir_info(loff_t pos) +{ + struct dir_private_info *p; + + p = kmalloc(sizeof(struct dir_private_info), GFP_KERNEL); + if (!p) + return NULL; + p->root.rb_node = NULL; + p->curr_node = NULL; + p->extra_fname = NULL; + p->last_pos = 0; + p->curr_hash = pos2maj_hash(pos); + p->curr_minor_hash = pos2min_hash(pos); + p->next_hash = 0; + return p; +} + +void ext4_htree_free_dir_info(struct dir_private_info *p) +{ + free_rb_tree_fname(&p->root); + kfree(p); +} + +/* + * Given a directory entry, enter it into the fname rb tree. + */ +int ext4_htree_store_dirent(struct file *dir_file, __u32 hash, + __u32 minor_hash, + struct ext4_dir_entry_2 *dirent) +{ + struct rb_node **p, *parent = NULL; + struct fname * fname, *new_fn; + struct dir_private_info *info; + int len; + + info = (struct dir_private_info *) dir_file->private_data; + p = &info->root.rb_node; + + /* Create and allocate the fname structure */ + len = sizeof(struct fname) + dirent->name_len + 1; + new_fn = kzalloc(len, GFP_KERNEL); + if (!new_fn) + return -ENOMEM; + new_fn->hash = hash; + new_fn->minor_hash = minor_hash; + new_fn->inode = le32_to_cpu(dirent->inode); + new_fn->name_len = dirent->name_len; + new_fn->file_type = dirent->file_type; + memcpy(new_fn->name, dirent->name, dirent->name_len); + new_fn->name[dirent->name_len] = 0; + + while (*p) { + parent = *p; + fname = rb_entry(parent, struct fname, rb_hash); + + /* + * If the hash and minor hash match up, then we put + * them on a linked list. This rarely happens... + */ + if ((new_fn->hash == fname->hash) && + (new_fn->minor_hash == fname->minor_hash)) { + new_fn->next = fname->next; + fname->next = new_fn; + return 0; + } + + if (new_fn->hash < fname->hash) + p = &(*p)->rb_left; + else if (new_fn->hash > fname->hash) + p = &(*p)->rb_right; + else if (new_fn->minor_hash < fname->minor_hash) + p = &(*p)->rb_left; + else /* if (new_fn->minor_hash > fname->minor_hash) */ + p = &(*p)->rb_right; + } + + rb_link_node(&new_fn->rb_hash, parent, p); + rb_insert_color(&new_fn->rb_hash, &info->root); + return 0; +} + + + +/* + * This is a helper function for ext4_dx_readdir. It calls filldir + * for all entres on the fname linked list. (Normally there is only + * one entry on the linked list, unless there are 62 bit hash collisions.) + */ +static int call_filldir(struct file * filp, void * dirent, + filldir_t filldir, struct fname *fname) +{ + struct dir_private_info *info = filp->private_data; + loff_t curr_pos; + struct inode *inode = filp->f_dentry->d_inode; + struct super_block * sb; + int error; + + sb = inode->i_sb; + + if (!fname) { + printk("call_filldir: called with null fname?!?\n"); + return 0; + } + curr_pos = hash2pos(fname->hash, fname->minor_hash); + while (fname) { + error = filldir(dirent, fname->name, + fname->name_len, curr_pos, + fname->inode, + get_dtype(sb, fname->file_type)); + if (error) { + filp->f_pos = curr_pos; + info->extra_fname = fname->next; + return error; + } + fname = fname->next; + } + return 0; +} + +static int ext4_dx_readdir(struct file * filp, + void * dirent, filldir_t filldir) +{ + struct dir_private_info *info = filp->private_data; + struct inode *inode = filp->f_dentry->d_inode; + struct fname *fname; + int ret; + + if (!info) { + info = create_dir_info(filp->f_pos); + if (!info) + return -ENOMEM; + filp->private_data = info; + } + + if (filp->f_pos == EXT4_HTREE_EOF) + return 0; /* EOF */ + + /* Some one has messed with f_pos; reset the world */ + if (info->last_pos != filp->f_pos) { + free_rb_tree_fname(&info->root); + info->curr_node = NULL; + info->extra_fname = NULL; + info->curr_hash = pos2maj_hash(filp->f_pos); + info->curr_minor_hash = pos2min_hash(filp->f_pos); + } + + /* + * If there are any leftover names on the hash collision + * chain, return them first. + */ + if (info->extra_fname && + call_filldir(filp, dirent, filldir, info->extra_fname)) + goto finished; + + if (!info->curr_node) + info->curr_node = rb_first(&info->root); + + while (1) { + /* + * Fill the rbtree if we have no more entries, + * or the inode has changed since we last read in the + * cached entries. + */ + if ((!info->curr_node) || + (filp->f_version != inode->i_version)) { + info->curr_node = NULL; + free_rb_tree_fname(&info->root); + filp->f_version = inode->i_version; + ret = ext4_htree_fill_tree(filp, info->curr_hash, + info->curr_minor_hash, + &info->next_hash); + if (ret < 0) + return ret; + if (ret == 0) { + filp->f_pos = EXT4_HTREE_EOF; + break; + } + info->curr_node = rb_first(&info->root); + } + + fname = rb_entry(info->curr_node, struct fname, rb_hash); + info->curr_hash = fname->hash; + info->curr_minor_hash = fname->minor_hash; + if (call_filldir(filp, dirent, filldir, fname)) + break; + + info->curr_node = rb_next(info->curr_node); + if (!info->curr_node) { + if (info->next_hash == ~0) { + filp->f_pos = EXT4_HTREE_EOF; + break; + } + info->curr_hash = info->next_hash; + info->curr_minor_hash = 0; + } + } +finished: + info->last_pos = filp->f_pos; + return 0; +} + +static int ext4_release_dir (struct inode * inode, struct file * filp) +{ + if (filp->private_data) + ext4_htree_free_dir_info(filp->private_data); + + return 0; +} + +#endif diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c new file mode 100644 index 000000000000..2608dce18f3e --- /dev/null +++ b/fs/ext4/extents.c @@ -0,0 +1,2152 @@ +/* + * Copyright (c) 2003-2006, Cluster File Systems, Inc, info@clusterfs.com + * Written by Alex Tomas <alex@clusterfs.com> + * + * Architecture independence: + * Copyright (c) 2005, Bull S.A. + * Written by Pierre Peiffer <pierre.peiffer@bull.net> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public Licens + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111- + */ + +/* + * Extents support for EXT4 + * + * TODO: + * - ext4*_error() should be used in some situations + * - analyze all BUG()/BUG_ON(), use -EIO where appropriate + * - smart tree reduction + */ + +#include <linux/module.h> +#include <linux/fs.h> +#include <linux/time.h> +#include <linux/ext4_jbd2.h> +#include <linux/jbd.h> +#include <linux/smp_lock.h> +#include <linux/highuid.h> +#include <linux/pagemap.h> +#include <linux/quotaops.h> +#include <linux/string.h> +#include <linux/slab.h> +#include <linux/ext4_fs_extents.h> +#include <asm/uaccess.h> + + +/* + * ext_pblock: + * combine low and high parts of physical block number into ext4_fsblk_t + */ +static inline ext4_fsblk_t ext_pblock(struct ext4_extent *ex) +{ + ext4_fsblk_t block; + + block = le32_to_cpu(ex->ee_start); + block |= ((ext4_fsblk_t) le16_to_cpu(ex->ee_start_hi) << 31) << 1; + return block; +} + +/* + * idx_pblock: + * combine low and high parts of a leaf physical block number into ext4_fsblk_t + */ +static inline ext4_fsblk_t idx_pblock(struct ext4_extent_idx *ix) +{ + ext4_fsblk_t block; + + block = le32_to_cpu(ix->ei_leaf); + block |= ((ext4_fsblk_t) le16_to_cpu(ix->ei_leaf_hi) << 31) << 1; + return block; +} + +/* + * ext4_ext_store_pblock: + * stores a large physical block number into an extent struct, + * breaking it into parts + */ +static inline void ext4_ext_store_pblock(struct ext4_extent *ex, ext4_fsblk_t pb) +{ + ex->ee_start = cpu_to_le32((unsigned long) (pb & 0xffffffff)); + ex->ee_start_hi = cpu_to_le16((unsigned long) ((pb >> 31) >> 1) & 0xffff); +} + +/* + * ext4_idx_store_pblock: + * stores a large physical block number into an index struct, + * breaking it into parts + */ +static inline void ext4_idx_store_pblock(struct ext4_extent_idx *ix, ext4_fsblk_t pb) +{ + ix->ei_leaf = cpu_to_le32((unsigned long) (pb & 0xffffffff)); + ix->ei_leaf_hi = cpu_to_le16((unsigned long) ((pb >> 31) >> 1) & 0xffff); +} + +static int ext4_ext_check_header(const char *function, struct inode *inode, + struct ext4_extent_header *eh) +{ + const char *error_msg = NULL; + + if (unlikely(eh->eh_magic != EXT4_EXT_MAGIC)) { + error_msg = "invalid magic"; + goto corrupted; + } + if (unlikely(eh->eh_max == 0)) { + error_msg = "invalid eh_max"; + goto corrupted; + } + if (unlikely(le16_to_cpu(eh->eh_entries) > le16_to_cpu(eh->eh_max))) { + error_msg = "invalid eh_entries"; + goto corrupted; + } + return 0; + +corrupted: + ext4_error(inode->i_sb, function, + "bad header in inode #%lu: %s - magic %x, " + "entries %u, max %u, depth %u", + inode->i_ino, error_msg, le16_to_cpu(eh->eh_magic), + le16_to_cpu(eh->eh_entries), le16_to_cpu(eh->eh_max), + le16_to_cpu(eh->eh_depth)); + + return -EIO; +} + +static handle_t *ext4_ext_journal_restart(handle_t *handle, int needed) +{ + int err; + + if (handle->h_buffer_credits > needed) + return handle; + if (!ext4_journal_extend(handle, needed)) + return handle; + err = ext4_journal_restart(handle, needed); + + return handle; +} + +/* + * could return: + * - EROFS + * - ENOMEM + */ +static int ext4_ext_get_access(handle_t *handle, struct inode *inode, + struct ext4_ext_path *path) +{ + if (path->p_bh) { + /* path points to block */ + return ext4_journal_get_write_access(handle, path->p_bh); + } + /* path points to leaf/index in inode body */ + /* we use in-core data, no need to protect them */ + return 0; +} + +/* + * could return: + * - EROFS + * - ENOMEM + * - EIO + */ +static int ext4_ext_dirty(handle_t *handle, struct inode *inode, + struct ext4_ext_path *path) +{ + int err; + if (path->p_bh) { + /* path points to block */ + err = ext4_journal_dirty_metadata(handle, path->p_bh); + } else { + /* path points to leaf/index in inode body */ + err = ext4_mark_inode_dirty(handle, inode); + } + return err; +} + +static ext4_fsblk_t ext4_ext_find_goal(struct inode *inode, + struct ext4_ext_path *path, + ext4_fsblk_t block) +{ + struct ext4_inode_info *ei = EXT4_I(inode); + ext4_fsblk_t bg_start; + ext4_grpblk_t colour; + int depth; + + if (path) { + struct ext4_extent *ex; + depth = path->p_depth; + + /* try to predict block placement */ + if ((ex = path[depth].p_ext)) + return ext_pblock(ex)+(block-le32_to_cpu(ex->ee_block)); + + /* it looks like index is empty; + * try to find starting block from index itself */ + if (path[depth].p_bh) + return path[depth].p_bh->b_blocknr; + } + + /* OK. use inode's group */ + bg_start = (ei->i_block_group * EXT4_BLOCKS_PER_GROUP(inode->i_sb)) + + le32_to_cpu(EXT4_SB(inode->i_sb)->s_es->s_first_data_block); + colour = (current->pid % 16) * + (EXT4_BLOCKS_PER_GROUP(inode->i_sb) / 16); + return bg_start + colour + block; +} + +static ext4_fsblk_t +ext4_ext_new_block(handle_t *handle, struct inode *inode, + struct ext4_ext_path *path, + struct ext4_extent *ex, int *err) +{ + ext4_fsblk_t goal, newblock; + + goal = ext4_ext_find_goal(inode, path, le32_to_cpu(ex->ee_block)); + newblock = ext4_new_block(handle, inode, goal, err); + return newblock; +} + +static inline int ext4_ext_space_block(struct inode *inode) +{ + int size; + + size = (inode->i_sb->s_blocksize - sizeof(struct ext4_extent_header)) + / sizeof(struct ext4_extent); +#ifdef AGRESSIVE_TEST + if (size > 6) + size = 6; +#endif + return size; +} + +static inline int ext4_ext_space_block_idx(struct inode *inode) +{ + int size; + + size = (inode->i_sb->s_blocksize - sizeof(struct ext4_extent_header)) + / sizeof(struct ext4_extent_idx); +#ifdef AGRESSIVE_TEST + if (size > 5) + size = 5; +#endif + return size; +} + +static inline int ext4_ext_space_root(struct inode *inode) +{ + int size; + + size = sizeof(EXT4_I(inode)->i_data); + size -= sizeof(struct ext4_extent_header); + size /= sizeof(struct ext4_extent); +#ifdef AGRESSIVE_TEST + if (size > 3) + size = 3; +#endif + return size; +} + +static inline int ext4_ext_space_root_idx(struct inode *inode) +{ + int size; + + size = sizeof(EXT4_I(inode)->i_data); + size -= sizeof(struct ext4_extent_header); + size /= sizeof(struct ext4_extent_idx); +#ifdef AGRESSIVE_TEST + if (size > 4) + size = 4; +#endif + return size; +} + +#ifdef EXT_DEBUG +static void ext4_ext_show_path(struct inode *inode, struct ext4_ext_path *path) +{ + int k, l = path->p_depth; + + ext_debug("path:"); + for (k = 0; k <= l; k++, path++) { + if (path->p_idx) { + ext_debug(" %d->%llu", le32_to_cpu(path->p_idx->ei_block), + idx_pblock(path->p_idx)); + } else if (path->p_ext) { + ext_debug(" %d:%d:%llu ", + le32_to_cpu(path->p_ext->ee_block), + le16_to_cpu(path->p_ext->ee_len), + ext_pblock(path->p_ext)); + } else + ext_debug(" []"); + } + ext_debug("\n"); +} + +static void ext4_ext_show_leaf(struct inode *inode, struct ext4_ext_path *path) +{ + int depth = ext_depth(inode); + struct ext4_extent_header *eh; + struct ext4_extent *ex; + int i; + + if (!path) + return; + + eh = path[depth].p_hdr; + ex = EXT_FIRST_EXTENT(eh); + + for (i = 0; i < le16_to_cpu(eh->eh_entries); i++, ex++) { + ext_debug("%d:%d:%llu ", le32_to_cpu(ex->ee_block), + le16_to_cpu(ex->ee_len), ext_pblock(ex)); + } + ext_debug("\n"); +} +#else +#define ext4_ext_show_path(inode,path) +#define ext4_ext_show_leaf(inode,path) +#endif + +static void ext4_ext_drop_refs(struct ext4_ext_path *path) +{ + int depth = path->p_depth; + int i; + + for (i = 0; i <= depth; i++, path++) + if (path->p_bh) { + brelse(path->p_bh); + path->p_bh = NULL; + } +} + +/* + * ext4_ext_binsearch_idx: + * binary search for the closest index of the given block + */ +static void +ext4_ext_binsearch_idx(struct inode *inode, struct ext4_ext_path *path, int block) +{ + struct ext4_extent_header *eh = path->p_hdr; + struct ext4_extent_idx *r, *l, *m; + + BUG_ON(eh->eh_magic != EXT4_EXT_MAGIC); + BUG_ON(le16_to_cpu(eh->eh_entries) > le16_to_cpu(eh->eh_max)); + BUG_ON(le16_to_cpu(eh->eh_entries) <= 0); + + ext_debug("binsearch for %d(idx): ", block); + + l = EXT_FIRST_INDEX(eh) + 1; + r = EXT_FIRST_INDEX(eh) + le16_to_cpu(eh->eh_entries) - 1; + while (l <= r) { + m = l + (r - l) / 2; + if (block < le32_to_cpu(m->ei_block)) + r = m - 1; + else + l = m + 1; + ext_debug("%p(%u):%p(%u):%p(%u) ", l, l->ei_block, + m, m->ei_block, r, r->ei_block); + } + + path->p_idx = l - 1; + ext_debug(" -> %d->%lld ", le32_to_cpu(path->p_idx->ei_block), + idx_block(path->p_idx)); + +#ifdef CHECK_BINSEARCH + { + struct ext4_extent_idx *chix, *ix; + int k; + + chix = ix = EXT_FIRST_INDEX(eh); + for (k = 0; k < le16_to_cpu(eh->eh_entries); k++, ix++) { + if (k != 0 && + le32_to_cpu(ix->ei_block) <= le32_to_cpu(ix[-1].ei_block)) { + printk("k=%d, ix=0x%p, first=0x%p\n", k, + ix, EXT_FIRST_INDEX(eh)); + printk("%u <= %u\n", + le32_to_cpu(ix->ei_block), + le32_to_cpu(ix[-1].ei_block)); + } + BUG_ON(k && le32_to_cpu(ix->ei_block) + <= le32_to_cpu(ix[-1].ei_block)); + if (block < le32_to_cpu(ix->ei_block)) + break; + chix = ix; + } + BUG_ON(chix != path->p_idx); + } +#endif + +} + +/* + * ext4_ext_binsearch: + * binary search for closest extent of the given block + */ +static void +ext4_ext_binsearch(struct inode *inode, struct ext4_ext_path *path, int block) +{ + struct ext4_extent_header *eh = path->p_hdr; + struct ext4_extent *r, *l, *m; + + BUG_ON(eh->eh_magic != EXT4_EXT_MAGIC); + BUG_ON(le16_to_cpu(eh->eh_entries) > le16_to_cpu(eh->eh_max)); + + if (eh->eh_entries == 0) { + /* + * this leaf is empty: + * we get such a leaf in split/add case + */ + return; + } + + ext_debug("binsearch for %d: ", block); + + l = EXT_FIRST_EXTENT(eh) + 1; + r = EXT_FIRST_EXTENT(eh) + le16_to_cpu(eh->eh_entries) - 1; + + while (l <= r) { + m = l + (r - l) / 2; + if (block < le32_to_cpu(m->ee_block)) + r = m - 1; + else + l = m + 1; + ext_debug("%p(%u):%p(%u):%p(%u) ", l, l->ee_block, + m, m->ee_block, r, r->ee_block); + } + + path->p_ext = l - 1; + ext_debug(" -> %d:%llu:%d ", + le32_to_cpu(path->p_ext->ee_block), + ext_pblock(path->p_ext), + le16_to_cpu(path->p_ext->ee_len)); + +#ifdef CHECK_BINSEARCH + { + struct ext4_extent *chex, *ex; + int k; + + chex = ex = EXT_FIRST_EXTENT(eh); + for (k = 0; k < le16_to_cpu(eh->eh_entries); k++, ex++) { + BUG_ON(k && le32_to_cpu(ex->ee_block) + <= le32_to_cpu(ex[-1].ee_block)); + if (block < le32_to_cpu(ex->ee_block)) + break; + chex = ex; + } + BUG_ON(chex != path->p_ext); + } +#endif + +} + +int ext4_ext_tree_init(handle_t *handle, struct inode *inode) +{ + struct ext4_extent_header *eh; + + eh = ext_inode_hdr(inode); + eh->eh_depth = 0; + eh->eh_entries = 0; + eh->eh_magic = EXT4_EXT_MAGIC; + eh->eh_max = cpu_to_le16(ext4_ext_space_root(inode)); + ext4_mark_inode_dirty(handle, inode); + ext4_ext_invalidate_cache(inode); + return 0; +} + +struct ext4_ext_path * +ext4_ext_find_extent(struct inode *inode, int block, struct ext4_ext_path *path) +{ + struct ext4_extent_header *eh; + struct buffer_head *bh; + short int depth, i, ppos = 0, alloc = 0; + + eh = ext_inode_hdr(inode); + BUG_ON(eh == NULL); + if (ext4_ext_check_header(__FUNCTION__, inode, eh)) + return ERR_PTR(-EIO); + + i = depth = ext_depth(inode); + + /* account possible depth increase */ + if (!path) { + path = kmalloc(sizeof(struct ext4_ext_path) * (depth + 2), + GFP_NOFS); + if (!path) + return ERR_PTR(-ENOMEM); + alloc = 1; + } + memset(path, 0, sizeof(struct ext4_ext_path) * (depth + 1)); + path[0].p_hdr = eh; + + /* walk through the tree */ + while (i) { + ext_debug("depth %d: num %d, max %d\n", + ppos, le16_to_cpu(eh->eh_entries), le16_to_cpu(eh->eh_max)); + ext4_ext_binsearch_idx(inode, path + ppos, block); + path[ppos].p_block = idx_pblock(path[ppos].p_idx); + path[ppos].p_depth = i; + path[ppos].p_ext = NULL; + + bh = sb_bread(inode->i_sb, path[ppos].p_block); + if (!bh) + goto err; + + eh = ext_block_hdr(bh); + ppos++; + BUG_ON(ppos > depth); + path[ppos].p_bh = bh; + path[ppos].p_hdr = eh; + i--; + + if (ext4_ext_check_header(__FUNCTION__, inode, eh)) + goto err; + } + + path[ppos].p_depth = i; + path[ppos].p_hdr = eh; + path[ppos].p_ext = NULL; + path[ppos].p_idx = NULL; + + if (ext4_ext_check_header(__FUNCTION__, inode, eh)) + goto err; + + /* find extent */ + ext4_ext_binsearch(inode, path + ppos, block); + + ext4_ext_show_path(inode, path); + + return path; + +err: + ext4_ext_drop_refs(path); + if (alloc) + kfree(path); + return ERR_PTR(-EIO); +} + +/* + * ext4_ext_insert_index: + * insert new index [@logical;@ptr] into the block at @curp; + * check where to insert: before @curp or after @curp + */ +static int ext4_ext_insert_index(handle_t *handle, struct inode *inode, + struct ext4_ext_path *curp, + int logical, ext4_fsblk_t ptr) +{ + struct ext4_extent_idx *ix; + int len, err; + + if ((err = ext4_ext_get_access(handle, inode, curp))) + return err; + + BUG_ON(logical == le32_to_cpu(curp->p_idx->ei_block)); + len = EXT_MAX_INDEX(curp->p_hdr) - curp->p_idx; + if (logical > le32_to_cpu(curp->p_idx->ei_block)) { + /* insert after */ + if (curp->p_idx != EXT_LAST_INDEX(curp->p_hdr)) { + len = (len - 1) * sizeof(struct ext4_extent_idx); + len = len < 0 ? 0 : len; + ext_debug("insert new index %d after: %d. " + "move %d from 0x%p to 0x%p\n", + logical, ptr, len, + (curp->p_idx + 1), (curp->p_idx + 2)); + memmove(curp->p_idx + 2, curp->p_idx + 1, len); + } + ix = curp->p_idx + 1; + } else { + /* insert before */ + len = len * sizeof(struct ext4_extent_idx); + len = len < 0 ? 0 : len; + ext_debug("insert new index %d before: %d. " + "move %d from 0x%p to 0x%p\n", + logical, ptr, len, + curp->p_idx, (curp->p_idx + 1)); + memmove(curp->p_idx + 1, curp->p_idx, len); + ix = curp->p_idx; + } + + ix->ei_block = cpu_to_le32(logical); + ext4_idx_store_pblock(ix, ptr); + curp->p_hdr->eh_entries = cpu_to_le16(le16_to_cpu(curp->p_hdr->eh_entries)+1); + + BUG_ON(le16_to_cpu(curp->p_hdr->eh_entries) + > le16_to_cpu(curp->p_hdr->eh_max)); + BUG_ON(ix > EXT_LAST_INDEX(curp->p_hdr)); + + err = ext4_ext_dirty(handle, inode, curp); + ext4_std_error(inode->i_sb, err); + + return err; +} + +/* + * ext4_ext_split: + * inserts new subtree into the path, using free index entry + * at depth @at: + * - allocates all needed blocks (new leaf and all intermediate index blocks) + * - makes decision where to split + * - moves remaining extents and index entries (right to the split point) + * into the newly allocated blocks + * - initializes subtree + */ +static int ext4_ext_split(handle_t *handle, struct inode *inode, + struct ext4_ext_path *path, + struct ext4_extent *newext, int at) +{ + struct buffer_head *bh = NULL; + int depth = ext_depth(inode); + struct ext4_extent_header *neh; + struct ext4_extent_idx *fidx; + struct ext4_extent *ex; + int i = at, k, m, a; + ext4_fsblk_t newblock, oldblock; + __le32 border; + ext4_fsblk_t *ablocks = NULL; /* array of allocated blocks */ + int err = 0; + + /* make decision: where to split? */ + /* FIXME: now decision is simplest: at current extent */ + + /* if current leaf will be split, then we should use + * border from split point */ + BUG_ON(path[depth].p_ext > EXT_MAX_EXTENT(path[depth].p_hdr)); + if (path[depth].p_ext != EXT_MAX_EXTENT(path[depth].p_hdr)) { + border = path[depth].p_ext[1].ee_block; + ext_debug("leaf will be split." + " next leaf starts at %d\n", + le32_to_cpu(border)); + } else { + border = newext->ee_block; + ext_debug("leaf will be added." + " next leaf starts at %d\n", + le32_to_cpu(border)); + } + + /* + * If error occurs, then we break processing + * and mark filesystem read-only. index won't + * be inserted and tree will be in consistent + * state. Next mount will repair buffers too. + */ + + /* + * Get array to track all allocated blocks. + * We need this to handle errors and free blocks + * upon them. + */ + ablocks = kmalloc(sizeof(ext4_fsblk_t) * depth, GFP_NOFS); + if (!ablocks) + return -ENOMEM; + memset(ablocks, 0, sizeof(ext4_fsblk_t) * depth); + + /* allocate all needed blocks */ + ext_debug("allocate %d blocks for indexes/leaf\n", depth - at); + for (a = 0; a < depth - at; a++) { + newblock = ext4_ext_new_block(handle, inode, path, newext, &err); + if (newblock == 0) + goto cleanup; + ablocks[a] = newblock; + } + + /* initialize new leaf */ + newblock = ablocks[--a]; + BUG_ON(newblock == 0); + bh = sb_getblk(inode->i_sb, newblock); + if (!bh) { + err = -EIO; + goto cleanup; + } + lock_buffer(bh); + + if ((err = ext4_journal_get_create_access(handle, bh))) + goto cleanup; + + neh = ext_block_hdr(bh); + neh->eh_entries = 0; + neh->eh_max = cpu_to_le16(ext4_ext_space_block(inode)); + neh->eh_magic = EXT4_EXT_MAGIC; + neh->eh_depth = 0; + ex = EXT_FIRST_EXTENT(neh); + + /* move remainder of path[depth] to the new leaf */ + BUG_ON(path[depth].p_hdr->eh_entries != path[depth].p_hdr->eh_max); + /* start copy from next extent */ + /* TODO: we could do it by single memmove */ + m = 0; + path[depth].p_ext++; + while (path[depth].p_ext <= + EXT_MAX_EXTENT(path[depth].p_hdr)) { + ext_debug("move %d:%llu:%d in new leaf %llu\n", + le32_to_cpu(path[depth].p_ext->ee_block), + ext_pblock(path[depth].p_ext), + le16_to_cpu(path[depth].p_ext->ee_len), + newblock); + /*memmove(ex++, path[depth].p_ext++, + sizeof(struct ext4_extent)); + neh->eh_entries++;*/ + path[depth].p_ext++; + m++; + } + if (m) { + memmove(ex, path[depth].p_ext-m, sizeof(struct ext4_extent)*m); + neh->eh_entries = cpu_to_le16(le16_to_cpu(neh->eh_entries)+m); + } + + set_buffer_uptodate(bh); + unlock_buffer(bh); + + if ((err = ext4_journal_dirty_metadata(handle, bh))) + goto cleanup; + brelse(bh); + bh = NULL; + + /* correct old leaf */ + if (m) { + if ((err = ext4_ext_get_access(handle, inode, path + depth))) + goto cleanup; + path[depth].p_hdr->eh_entries = + cpu_to_le16(le16_to_cpu(path[depth].p_hdr->eh_entries)-m); + if ((err = ext4_ext_dirty(handle, inode, path + depth))) + goto cleanup; + + } + + /* create intermediate indexes */ + k = depth - at - 1; + BUG_ON(k < 0); + if (k) + ext_debug("create %d intermediate indices\n", k); + /* insert new index into current index block */ + /* current depth stored in i var */ + i = depth - 1; + while (k--) { + oldblock = newblock; + newblock = ablocks[--a]; + bh = sb_getblk(inode->i_sb, (ext4_fsblk_t)newblock); + if (!bh) { + err = -EIO; + goto cleanup; + } + lock_buffer(bh); + + if ((err = ext4_journal_get_create_access(handle, bh))) + goto cleanup; + + neh = ext_block_hdr(bh); + neh->eh_entries = cpu_to_le16(1); + neh->eh_magic = EXT4_EXT_MAGIC; + neh->eh_max = cpu_to_le16(ext4_ext_space_block_idx(inode)); + neh->eh_depth = cpu_to_le16(depth - i); + fidx = EXT_FIRST_INDEX(neh); + fidx->ei_block = border; + ext4_idx_store_pblock(fidx, oldblock); + + ext_debug("int.index at %d (block %llu): %lu -> %llu\n", i, + newblock, (unsigned long) le32_to_cpu(border), + oldblock); + /* copy indexes */ + m = 0; + path[i].p_idx++; + + ext_debug("cur 0x%p, last 0x%p\n", path[i].p_idx, + EXT_MAX_INDEX(path[i].p_hdr)); + BUG_ON(EXT_MAX_INDEX(path[i].p_hdr) != + EXT_LAST_INDEX(path[i].p_hdr)); + while (path[i].p_idx <= EXT_MAX_INDEX(path[i].p_hdr)) { + ext_debug("%d: move %d:%d in new index %llu\n", i, + le32_to_cpu(path[i].p_idx->ei_block), + idx_pblock(path[i].p_idx), + newblock); + /*memmove(++fidx, path[i].p_idx++, + sizeof(struct ext4_extent_idx)); + neh->eh_entries++; + BUG_ON(neh->eh_entries > neh->eh_max);*/ + path[i].p_idx++; + m++; + } + if (m) { + memmove(++fidx, path[i].p_idx - m, + sizeof(struct ext4_extent_idx) * m); + neh->eh_entries = + cpu_to_le16(le16_to_cpu(neh->eh_entries) + m); + } + set_buffer_uptodate(bh); + unlock_buffer(bh); + + if ((err = ext4_journal_dirty_metadata(handle, bh))) + goto cleanup; + brelse(bh); + bh = NULL; + + /* correct old index */ + if (m) { + err = ext4_ext_get_access(handle, inode, path + i); + if (err) + goto cleanup; + path[i].p_hdr->eh_entries = cpu_to_le16(le16_to_cpu(path[i].p_hdr->eh_entries)-m); + err = ext4_ext_dirty(handle, inode, path + i); + if (err) + goto cleanup; + } + + i--; + } + + /* insert new index */ + if (err) + goto cleanup; + + err = ext4_ext_insert_index(handle, inode, path + at, + le32_to_cpu(border), newblock); + +cleanup: + if (bh) { + if (buffer_locked(bh)) + unlock_buffer(bh); + brelse(bh); + } + + if (err) { + /* free all allocated blocks in error case */ + for (i = 0; i < depth; i++) { + if (!ablocks[i]) + continue; + ext4_free_blocks(handle, inode, ablocks[i], 1); + } + } + kfree(ablocks); + + return err; +} + +/* + * ext4_ext_grow_indepth: + * implements tree growing procedure: + * - allocates new block + * - moves top-level data (index block or leaf) into the new block + * - initializes new top-level, creating index that points to the + * just created block + */ +static int ext4_ext_grow_indepth(handle_t *handle, struct inode *inode, + struct ext4_ext_path *path, + struct ext4_extent *newext) +{ + struct ext4_ext_path *curp = path; + struct ext4_extent_header *neh; + struct ext4_extent_idx *fidx; + struct buffer_head *bh; + ext4_fsblk_t newblock; + int err = 0; + + newblock = ext4_ext_new_block(handle, inode, path, newext, &err); + if (newblock == 0) + return err; + + bh = sb_getblk(inode->i_sb, newblock); + if (!bh) { + err = -EIO; + ext4_std_error(inode->i_sb, err); + return err; + } + lock_buffer(bh); + + if ((err = ext4_journal_get_create_access(handle, bh))) { + unlock_buffer(bh); + goto out; + } + + /* move top-level index/leaf into new block */ + memmove(bh->b_data, curp->p_hdr, sizeof(EXT4_I(inode)->i_data)); + + /* set size of new block */ + neh = ext_block_hdr(bh); + /* old root could have indexes or leaves + * so calculate e_max right way */ + if (ext_depth(inode)) + neh->eh_max = cpu_to_le16(ext4_ext_space_block_idx(inode)); + else + neh->eh_max = cpu_to_le16(ext4_ext_space_block(inode)); + neh->eh_magic = EXT4_EXT_MAGIC; + set_buffer_uptodate(bh); + unlock_buffer(bh); + + if ((err = ext4_journal_dirty_metadata(handle, bh))) + goto out; + + /* create index in new top-level index: num,max,pointer */ + if ((err = ext4_ext_get_access(handle, inode, curp))) + goto out; + + curp->p_hdr->eh_magic = EXT4_EXT_MAGIC; + curp->p_hdr->eh_max = cpu_to_le16(ext4_ext_space_root_idx(inode)); + curp->p_hdr->eh_entries = cpu_to_le16(1); + curp->p_idx = EXT_FIRST_INDEX(curp->p_hdr); + /* FIXME: it works, but actually path[0] can be index */ + curp->p_idx->ei_block = EXT_FIRST_EXTENT(path[0].p_hdr)->ee_block; + ext4_idx_store_pblock(curp->p_idx, newblock); + + neh = ext_inode_hdr(inode); + fidx = EXT_FIRST_INDEX(neh); + ext_debug("new root: num %d(%d), lblock %d, ptr %llu\n", + le16_to_cpu(neh->eh_entries), le16_to_cpu(neh->eh_max), + le32_to_cpu(fidx->ei_block), idx_pblock(fidx)); + + neh->eh_depth = cpu_to_le16(path->p_depth + 1); + err = ext4_ext_dirty(handle, inode, curp); +out: + brelse(bh); + + return err; +} + +/* + * ext4_ext_create_new_leaf: + * finds empty index and adds new leaf. + * if no free index is found, then it requests in-depth growing. + */ +static int ext4_ext_create_new_leaf(handle_t *handle, struct inode *inode, + struct ext4_ext_path *path, + struct ext4_extent *newext) +{ + struct ext4_ext_path *curp; + int depth, i, err = 0; + +repeat: + i = depth = ext_depth(inode); + + /* walk up to the tree and look for free index entry */ + curp = path + depth; + while (i > 0 && !EXT_HAS_FREE_INDEX(curp)) { + i--; + curp--; + } + + /* we use already allocated block for index block, + * so subsequent data blocks should be contiguous */ + if (EXT_HAS_FREE_INDEX(curp)) { + /* if we found index with free entry, then use that + * entry: create all needed subtree and add new leaf */ + err = ext4_ext_split(handle, inode, path, newext, i); + + /* refill path */ + ext4_ext_drop_refs(path); + path = ext4_ext_find_extent(inode, + le32_to_cpu(newext->ee_block), + path); + if (IS_ERR(path)) + err = PTR_ERR(path); + } else { + /* tree is full, time to grow in depth */ + err = ext4_ext_grow_indepth(handle, inode, path, newext); + if (err) + goto out; + + /* refill path */ + ext4_ext_drop_refs(path); + path = ext4_ext_find_extent(inode, + le32_to_cpu(newext->ee_block), + path); + if (IS_ERR(path)) { + err = PTR_ERR(path); + goto out; + } + + /* + * only first (depth 0 -> 1) produces free space; + * in all other cases we have to split the grown tree + */ + depth = ext_depth(inode); + if (path[depth].p_hdr->eh_entries == path[depth].p_hdr->eh_max) { + /* now we need to split */ + goto repeat; + } + } + +out: + return err; +} + +/* + * ext4_ext_next_allocated_block: + * returns allocated block in subsequent extent or EXT_MAX_BLOCK. + * NOTE: it considers block number from index entry as + * allocated block. Thus, index entries have to be consistent + * with leaves. + */ +static unsigned long +ext4_ext_next_allocated_block(struct ext4_ext_path *path) +{ + int depth; + + BUG_ON(path == NULL); + depth = path->p_depth; + + if (depth == 0 && path->p_ext == NULL) + return EXT_MAX_BLOCK; + + while (depth >= 0) { + if (depth == path->p_depth) { + /* leaf */ + if (path[depth].p_ext != + EXT_LAST_EXTENT(path[depth].p_hdr)) + return le32_to_cpu(path[depth].p_ext[1].ee_block); + } else { + /* index */ + if (path[depth].p_idx != + EXT_LAST_INDEX(path[depth].p_hdr)) + return le32_to_cpu(path[depth].p_idx[1].ei_block); + } + depth--; + } + + return EXT_MAX_BLOCK; +} + +/* + * ext4_ext_next_leaf_block: + * returns first allocated block from next leaf or EXT_MAX_BLOCK + */ +static unsigned ext4_ext_next_leaf_block(struct inode *inode, + struct ext4_ext_path *path) +{ + int depth; + + BUG_ON(path == NULL); + depth = path->p_depth; + + /* zero-tree has no leaf blocks at all */ + if (depth == 0) + return EXT_MAX_BLOCK; + + /* go to index block */ + depth--; + + while (depth >= 0) { + if (path[depth].p_idx != + EXT_LAST_INDEX(path[depth].p_hdr)) + return le32_to_cpu(path[depth].p_idx[1].ei_block); + depth--; + } + + return EXT_MAX_BLOCK; +} + +/* + * ext4_ext_correct_indexes: + * if leaf gets modified and modified extent is first in the leaf, + * then we have to correct all indexes above. + * TODO: do we need to correct tree in all cases? + */ +int ext4_ext_correct_indexes(handle_t *handle, struct inode *inode, + struct ext4_ext_path *path) +{ + struct ext4_extent_header *eh; + int depth = ext_depth(inode); + struct ext4_extent *ex; + __le32 border; + int k, err = 0; + + eh = path[depth].p_hdr; + ex = path[depth].p_ext; + BUG_ON(ex == NULL); + BUG_ON(eh == NULL); + + if (depth == 0) { + /* there is no tree at all */ + return 0; + } + + if (ex != EXT_FIRST_EXTENT(eh)) { + /* we correct tree if first leaf got modified only */ + return 0; + } + + /* + * TODO: we need correction if border is smaller than current one + */ + k = depth - 1; + border = path[depth].p_ext->ee_block; + if ((err = ext4_ext_get_access(handle, inode, path + k))) + return err; + path[k].p_idx->ei_block = border; + if ((err = ext4_ext_dirty(handle, inode, path + k))) + return err; + + while (k--) { + /* change all left-side indexes */ + if (path[k+1].p_idx != EXT_FIRST_INDEX(path[k+1].p_hdr)) + break; + if ((err = ext4_ext_get_access(handle, inode, path + k))) + break; + path[k].p_idx->ei_block = border; + if ((err = ext4_ext_dirty(handle, inode, path + k))) + break; + } + + return err; +} + +static int inline +ext4_can_extents_be_merged(struct inode *inode, struct ext4_extent *ex1, + struct ext4_extent *ex2) +{ + if (le32_to_cpu(ex1->ee_block) + le16_to_cpu(ex1->ee_len) != + le32_to_cpu(ex2->ee_block)) + return 0; + + /* + * To allow future support for preallocated extents to be added + * as an RO_COMPAT feature, refuse to merge to extents if + * this can result in the top bit of ee_len being set. + */ + if (le16_to_cpu(ex1->ee_len) + le16_to_cpu(ex2->ee_len) > EXT_MAX_LEN) + return 0; +#ifdef AGRESSIVE_TEST + if (le16_to_cpu(ex1->ee_len) >= 4) + return 0; +#endif + + if (ext_pblock(ex1) + le16_to_cpu(ex1->ee_len) == ext_pblock(ex2)) + return 1; + return 0; +} + +/* + * ext4_ext_insert_extent: + * tries to merge requsted extent into the existing extent or + * inserts requested extent as new one into the tree, + * creating new leaf in the no-space case. + */ +int ext4_ext_insert_extent(handle_t *handle, struct inode *inode, + struct ext4_ext_path *path, + struct ext4_extent *newext) +{ + struct ext4_extent_header * eh; + struct ext4_extent *ex, *fex; + struct ext4_extent *nearex; /* nearest extent */ + struct ext4_ext_path *npath = NULL; + int depth, len, err, next; + + BUG_ON(newext->ee_len == 0); + depth = ext_depth(inode); + ex = path[depth].p_ext; + BUG_ON(path[depth].p_hdr == NULL); + + /* try to insert block into found extent and return */ + if (ex && ext4_can_extents_be_merged(inode, ex, newext)) { + ext_debug("append %d block to %d:%d (from %llu)\n", + le16_to_cpu(newext->ee_len), + le32_to_cpu(ex->ee_block), + le16_to_cpu(ex->ee_len), ext_pblock(ex)); + if ((err = ext4_ext_get_access(handle, inode, path + depth))) + return err; + ex->ee_len = cpu_to_le16(le16_to_cpu(ex->ee_len) + + le16_to_cpu(newext->ee_len)); + eh = path[depth].p_hdr; + nearex = ex; + goto merge; + } + +repeat: + depth = ext_depth(inode); + eh = path[depth].p_hdr; + if (le16_to_cpu(eh->eh_entries) < le16_to_cpu(eh->eh_max)) + goto has_space; + + /* probably next leaf has space for us? */ + fex = EXT_LAST_EXTENT(eh); + next = ext4_ext_next_leaf_block(inode, path); + if (le32_to_cpu(newext->ee_block) > le32_to_cpu(fex->ee_block) + && next != EXT_MAX_BLOCK) { + ext_debug("next leaf block - %d\n", next); + BUG_ON(npath != NULL); + npath = ext4_ext_find_extent(inode, next, NULL); + if (IS_ERR(npath)) + return PTR_ERR(npath); + BUG_ON(npath->p_depth != path->p_depth); + eh = npath[depth].p_hdr; + if (le16_to_cpu(eh->eh_entries) < le16_to_cpu(eh->eh_max)) { + ext_debug("next leaf isnt full(%d)\n", + le16_to_cpu(eh->eh_entries)); + path = npath; + goto repeat; + } + ext_debug("next leaf has no free space(%d,%d)\n", + le16_to_cpu(eh->eh_entries), le16_to_cpu(eh->eh_max)); + } + + /* + * There is no free space in the found leaf. + * We're gonna add a new leaf in the tree. + */ + err = ext4_ext_create_new_leaf(handle, inode, path, newext); + if (err) + goto cleanup; + depth = ext_depth(inode); + eh = path[depth].p_hdr; + +has_space: + nearex = path[depth].p_ext; + + if ((err = ext4_ext_get_access(handle, inode, path + depth))) + goto cleanup; + + if (!nearex) { + /* there is no extent in this leaf, create first one */ + ext_debug("first extent in the leaf: %d:%llu:%d\n", + le32_to_cpu(newext->ee_block), + ext_pblock(newext), + le16_to_cpu(newext->ee_len)); + path[depth].p_ext = EXT_FIRST_EXTENT(eh); + } else if (le32_to_cpu(newext->ee_block) + > le32_to_cpu(nearex->ee_block)) { +/* BUG_ON(newext->ee_block == nearex->ee_block); */ + if (nearex != EXT_LAST_EXTENT(eh)) { + len = EXT_MAX_EXTENT(eh) - nearex; + len = (len - 1) * sizeof(struct ext4_extent); + len = len < 0 ? 0 : len; + ext_debug("insert %d:%llu:%d after: nearest 0x%p, " + "move %d from 0x%p to 0x%p\n", + le32_to_cpu(newext->ee_block), + ext_pblock(newext), + le16_to_cpu(newext->ee_len), + nearex, len, nearex + 1, nearex + 2); + memmove(nearex + 2, nearex + 1, len); + } + path[depth].p_ext = nearex + 1; + } else { + BUG_ON(newext->ee_block == nearex->ee_block); + len = (EXT_MAX_EXTENT(eh) - nearex) * sizeof(struct ext4_extent); + len = len < 0 ? 0 : len; + ext_debug("insert %d:%llu:%d before: nearest 0x%p, " + "move %d from 0x%p to 0x%p\n", + le32_to_cpu(newext->ee_block), + ext_pblock(newext), + le16_to_cpu(newext->ee_len), + nearex, len, nearex + 1, nearex + 2); + memmove(nearex + 1, nearex, len); + path[depth].p_ext = nearex; + } + + eh->eh_entries = cpu_to_le16(le16_to_cpu(eh->eh_entries)+1); + nearex = path[depth].p_ext; + nearex->ee_block = newext->ee_block; + nearex->ee_start = newext->ee_start; + nearex->ee_start_hi = newext->ee_start_hi; + nearex->ee_len = newext->ee_len; + +merge: + /* try to merge extents to the right */ + while (nearex < EXT_LAST_EXTENT(eh)) { + if (!ext4_can_extents_be_merged(inode, nearex, nearex + 1)) + break; + /* merge with next extent! */ + nearex->ee_len = cpu_to_le16(le16_to_cpu(nearex->ee_len) + + le16_to_cpu(nearex[1].ee_len)); + if (nearex + 1 < EXT_LAST_EXTENT(eh)) { + len = (EXT_LAST_EXTENT(eh) - nearex - 1) + * sizeof(struct ext4_extent); + memmove(nearex + 1, nearex + 2, len); + } + eh->eh_entries = cpu_to_le16(le16_to_cpu(eh->eh_entries)-1); + BUG_ON(eh->eh_entries == 0); + } + + /* try to merge extents to the left */ + + /* time to correct all indexes above */ + err = ext4_ext_correct_indexes(handle, inode, path); + if (err) + goto cleanup; + + err = ext4_ext_dirty(handle, inode, path + depth); + +cleanup: + if (npath) { + ext4_ext_drop_refs(npath); + kfree(npath); + } + ext4_ext_tree_changed(inode); + ext4_ext_invalidate_cache(inode); + return err; +} + +int ext4_ext_walk_space(struct inode *inode, unsigned long block, + unsigned long num, ext_prepare_callback func, + void *cbdata) +{ + struct ext4_ext_path *path = NULL; + struct ext4_ext_cache cbex; + struct ext4_extent *ex; + unsigned long next, start = 0, end = 0; + unsigned long last = block + num; + int depth, exists, err = 0; + + BUG_ON(func == NULL); + BUG_ON(inode == NULL); + + while (block < last && block != EXT_MAX_BLOCK) { + num = last - block; + /* find extent for this block */ + path = ext4_ext_find_extent(inode, block, path); + if (IS_ERR(path)) { + err = PTR_ERR(path); + path = NULL; + break; + } + + depth = ext_depth(inode); + BUG_ON(path[depth].p_hdr == NULL); + ex = path[depth].p_ext; + next = ext4_ext_next_allocated_block(path); + + exists = 0; + if (!ex) { + /* there is no extent yet, so try to allocate + * all requested space */ + start = block; + end = block + num; + } else if (le32_to_cpu(ex->ee_block) > block) { + /* need to allocate space before found extent */ + start = block; + end = le32_to_cpu(ex->ee_block); + if (block + num < end) + end = block + num; + } else if (block >= + le32_to_cpu(ex->ee_block) + le16_to_cpu(ex->ee_len)) { + /* need to allocate space after found extent */ + start = block; + end = block + num; + if (end >= next) + end = next; + } else if (block >= le32_to_cpu(ex->ee_block)) { + /* + * some part of requested space is covered + * by found extent + */ + start = block; + end = le32_to_cpu(ex->ee_block) + le16_to_cpu(ex->ee_len); + if (block + num < end) + end = block + num; + exists = 1; + } else { + BUG(); + } + BUG_ON(end <= start); + + if (!exists) { + cbex.ec_block = start; + cbex.ec_len = end - start; + cbex.ec_start = 0; + cbex.ec_type = EXT4_EXT_CACHE_GAP; + } else { + cbex.ec_block = le32_to_cpu(ex->ee_block); + cbex.ec_len = le16_to_cpu(ex->ee_len); + cbex.ec_start = ext_pblock(ex); + cbex.ec_type = EXT4_EXT_CACHE_EXTENT; + } + + BUG_ON(cbex.ec_len == 0); + err = func(inode, path, &cbex, cbdata); + ext4_ext_drop_refs(path); + + if (err < 0) + break; + if (err == EXT_REPEAT) + continue; + else if (err == EXT_BREAK) { + err = 0; + break; + } + + if (ext_depth(inode) != depth) { + /* depth was changed. we have to realloc path */ + kfree(path); + path = NULL; + } + + block = cbex.ec_block + cbex.ec_len; + } + + if (path) { + ext4_ext_drop_refs(path); + kfree(path); + } + + return err; +} + +static inline void +ext4_ext_put_in_cache(struct inode *inode, __u32 block, + __u32 len, __u32 start, int type) +{ + struct ext4_ext_cache *cex; + BUG_ON(len == 0); + cex = &EXT4_I(inode)->i_cached_extent; + cex->ec_type = type; + cex->ec_block = block; + cex->ec_len = len; + cex->ec_start = start; +} + +/* + * ext4_ext_put_gap_in_cache: + * calculate boundaries of the gap that the requested block fits into + * and cache this gap + */ +static inline void +ext4_ext_put_gap_in_cache(struct inode *inode, struct ext4_ext_path *path, + unsigned long block) +{ + int depth = ext_depth(inode); + unsigned long lblock, len; + struct ext4_extent *ex; + + ex = path[depth].p_ext; + if (ex == NULL) { + /* there is no extent yet, so gap is [0;-] */ + lblock = 0; + len = EXT_MAX_BLOCK; + ext_debug("cache gap(whole file):"); + } else if (block < le32_to_cpu(ex->ee_block)) { + lblock = block; + len = le32_to_cpu(ex->ee_block) - block; + ext_debug("cache gap(before): %lu [%lu:%lu]", + (unsigned long) block, + (unsigned long) le32_to_cpu(ex->ee_block), + (unsigned long) le16_to_cpu(ex->ee_len)); + } else if (block >= le32_to_cpu(ex->ee_block) + + le16_to_cpu(ex->ee_len)) { + lblock = le32_to_cpu(ex->ee_block) + + le16_to_cpu(ex->ee_len); + len = ext4_ext_next_allocated_block(path); + ext_debug("cache gap(after): [%lu:%lu] %lu", + (unsigned long) le32_to_cpu(ex->ee_block), + (unsigned long) le16_to_cpu(ex->ee_len), + (unsigned long) block); + BUG_ON(len == lblock); + len = len - lblock; + } else { + lblock = len = 0; + BUG(); + } + + ext_debug(" -> %lu:%lu\n", (unsigned long) lblock, len); + ext4_ext_put_in_cache(inode, lblock, len, 0, EXT4_EXT_CACHE_GAP); +} + +static inline int +ext4_ext_in_cache(struct inode *inode, unsigned long block, + struct ext4_extent *ex) +{ + struct ext4_ext_cache *cex; + + cex = &EXT4_I(inode)->i_cached_extent; + + /* has cache valid data? */ + if (cex->ec_type == EXT4_EXT_CACHE_NO) + return EXT4_EXT_CACHE_NO; + + BUG_ON(cex->ec_type != EXT4_EXT_CACHE_GAP && + cex->ec_type != EXT4_EXT_CACHE_EXTENT); + if (block >= cex->ec_block && block < cex->ec_block + cex->ec_len) { + ex->ee_block = cpu_to_le32(cex->ec_block); + ext4_ext_store_pblock(ex, cex->ec_start); + ex->ee_len = cpu_to_le16(cex->ec_len); + ext_debug("%lu cached by %lu:%lu:%llu\n", + (unsigned long) block, + (unsigned long) cex->ec_block, + (unsigned long) cex->ec_len, + cex->ec_start); + return cex->ec_type; + } + + /* not in cache */ + return EXT4_EXT_CACHE_NO; +} + +/* + * ext4_ext_rm_idx: + * removes index from the index block. + * It's used in truncate case only, thus all requests are for + * last index in the block only. + */ +int ext4_ext_rm_idx(handle_t *handle, struct inode *inode, + struct ext4_ext_path *path) +{ + struct buffer_head *bh; + int err; + ext4_fsblk_t leaf; + + /* free index block */ + path--; + leaf = idx_pblock(path->p_idx); + BUG_ON(path->p_hdr->eh_entries == 0); + if ((err = ext4_ext_get_access(handle, inode, path))) + return err; + path->p_hdr->eh_entries = cpu_to_le16(le16_to_cpu(path->p_hdr->eh_entries)-1); + if ((err = ext4_ext_dirty(handle, inode, path))) + return err; + ext_debug("index is empty, remove it, free block %llu\n", leaf); + bh = sb_find_get_block(inode->i_sb, leaf); + ext4_forget(handle, 1, inode, bh, leaf); + ext4_free_blocks(handle, inode, leaf, 1); + return err; +} + +/* + * ext4_ext_calc_credits_for_insert: + * This routine returns max. credits that the extent tree can consume. + * It should be OK for low-performance paths like ->writepage() + * To allow many writing processes to fit into a single transaction, + * the caller should calculate credits under truncate_mutex and + * pass the actual path. + */ +int inline ext4_ext_calc_credits_for_insert(struct inode *inode, + struct ext4_ext_path *path) +{ + int depth, needed; + + if (path) { + /* probably there is space in leaf? */ + depth = ext_depth(inode); + if (le16_to_cpu(path[depth].p_hdr->eh_entries) + < le16_to_cpu(path[depth].p_hdr->eh_max)) + return 1; + } + + /* + * given 32-bit logical block (4294967296 blocks), max. tree + * can be 4 levels in depth -- 4 * 340^4 == 53453440000. + * Let's also add one more level for imbalance. + */ + depth = 5; + + /* allocation of new data block(s) */ + needed = 2; + + /* + * tree can be full, so it would need to grow in depth: + * allocation + old root + new root + */ + needed += 2 + 1 + 1; + + /* + * Index split can happen, we would need: + * allocate intermediate indexes (bitmap + group) + * + change two blocks at each level, but root (already included) + */ + needed = (depth * 2) + (depth * 2); + + /* any allocation modifies superblock */ + needed += 1; + + return needed; +} + +static int ext4_remove_blocks(handle_t *handle, struct inode *inode, + struct ext4_extent *ex, + unsigned long from, unsigned long to) +{ + struct buffer_head *bh; + int i; + +#ifdef EXTENTS_STATS + { + struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); + unsigned short ee_len = le16_to_cpu(ex->ee_len); + spin_lock(&sbi->s_ext_stats_lock); + sbi->s_ext_blocks += ee_len; + sbi->s_ext_extents++; + if (ee_len < sbi->s_ext_min) + sbi->s_ext_min = ee_len; + if (ee_len > sbi->s_ext_max) + sbi->s_ext_max = ee_len; + if (ext_depth(inode) > sbi->s_depth_max) + sbi->s_depth_max = ext_depth(inode); + spin_unlock(&sbi->s_ext_stats_lock); + } +#endif + if (from >= le32_to_cpu(ex->ee_block) + && to == le32_to_cpu(ex->ee_block) + le16_to_cpu(ex->ee_len) - 1) { + /* tail removal */ + unsigned long num; + ext4_fsblk_t start; + num = le32_to_cpu(ex->ee_block) + le16_to_cpu(ex->ee_len) - from; + start = ext_pblock(ex) + le16_to_cpu(ex->ee_len) - num; + ext_debug("free last %lu blocks starting %llu\n", num, start); + for (i = 0; i < num; i++) { + bh = sb_find_get_block(inode->i_sb, start + i); + ext4_forget(handle, 0, inode, bh, start + i); + } + ext4_free_blocks(handle, inode, start, num); + } else if (from == le32_to_cpu(ex->ee_block) + && to <= le32_to_cpu(ex->ee_block) + le16_to_cpu(ex->ee_len) - 1) { + printk("strange request: removal %lu-%lu from %u:%u\n", + from, to, le32_to_cpu(ex->ee_block), le16_to_cpu(ex->ee_len)); + } else { + printk("strange request: removal(2) %lu-%lu from %u:%u\n", + from, to, le32_to_cpu(ex->ee_block), le16_to_cpu(ex->ee_len)); + } + return 0; +} + +static int +ext4_ext_rm_leaf(handle_t *handle, struct inode *inode, + struct ext4_ext_path *path, unsigned long start) +{ + int err = 0, correct_index = 0; + int depth = ext_depth(inode), credits; + struct ext4_extent_header *eh; + unsigned a, b, block, num; + unsigned long ex_ee_block; + unsigned short ex_ee_len; + struct ext4_extent *ex; + + ext_debug("truncate since %lu in leaf\n", start); + if (!path[depth].p_hdr) + path[depth].p_hdr = ext_block_hdr(path[depth].p_bh); + eh = path[depth].p_hdr; + BUG_ON(eh == NULL); + BUG_ON(le16_to_cpu(eh->eh_entries) > le16_to_cpu(eh->eh_max)); + BUG_ON(eh->eh_magic != EXT4_EXT_MAGIC); + + /* find where to start removing */ + ex = EXT_LAST_EXTENT(eh); + + ex_ee_block = le32_to_cpu(ex->ee_block); + ex_ee_len = le16_to_cpu(ex->ee_len); + + while (ex >= EXT_FIRST_EXTENT(eh) && + ex_ee_block + ex_ee_len > start) { + ext_debug("remove ext %lu:%u\n", ex_ee_block, ex_ee_len); + path[depth].p_ext = ex; + + a = ex_ee_block > start ? ex_ee_block : start; + b = ex_ee_block + ex_ee_len - 1 < EXT_MAX_BLOCK ? + ex_ee_block + ex_ee_len - 1 : EXT_MAX_BLOCK; + + ext_debug(" border %u:%u\n", a, b); + + if (a != ex_ee_block && b != ex_ee_block + ex_ee_len - 1) { + block = 0; + num = 0; + BUG(); + } else if (a != ex_ee_block) { + /* remove tail of the extent */ + block = ex_ee_block; + num = a - block; + } else if (b != ex_ee_block + ex_ee_len - 1) { + /* remove head of the extent */ + block = a; + num = b - a; + /* there is no "make a hole" API yet */ + BUG(); + } else { + /* remove whole extent: excellent! */ + block = ex_ee_block; + num = 0; + BUG_ON(a != ex_ee_block); + BUG_ON(b != ex_ee_block + ex_ee_len - 1); + } + + /* at present, extent can't cross block group: */ + /* leaf + bitmap + group desc + sb + inode */ + credits = 5; + if (ex == EXT_FIRST_EXTENT(eh)) { + correct_index = 1; + credits += (ext_depth(inode)) + 1; + } +#ifdef CONFIG_QUOTA + credits += 2 * EXT4_QUOTA_TRANS_BLOCKS(inode->i_sb); +#endif + + handle = ext4_ext_journal_restart(handle, credits); + if (IS_ERR(handle)) { + err = PTR_ERR(handle); + goto out; + } + + err = ext4_ext_get_access(handle, inode, path + depth); + if (err) + goto out; + + err = ext4_remove_blocks(handle, inode, ex, a, b); + if (err) + goto out; + + if (num == 0) { + /* this extent is removed; mark slot entirely unused */ + ext4_ext_store_pblock(ex, 0); + eh->eh_entries = cpu_to_le16(le16_to_cpu(eh->eh_entries)-1); + } + + ex->ee_block = cpu_to_le32(block); + ex->ee_len = cpu_to_le16(num); + + err = ext4_ext_dirty(handle, inode, path + depth); + if (err) + goto out; + + ext_debug("new extent: %u:%u:%llu\n", block, num, + ext_pblock(ex)); + ex--; + ex_ee_block = le32_to_cpu(ex->ee_block); + ex_ee_len = le16_to_cpu(ex->ee_len); + } + + if (correct_index && eh->eh_entries) + err = ext4_ext_correct_indexes(handle, inode, path); + + /* if this leaf is free, then we should + * remove it from index block above */ + if (err == 0 && eh->eh_entries == 0 && path[depth].p_bh != NULL) + err = ext4_ext_rm_idx(handle, inode, path + depth); + +out: + return err; +} + +/* + * ext4_ext_more_to_rm: + * returns 1 if current index has to be freed (even partial) + */ +static int inline +ext4_ext_more_to_rm(struct ext4_ext_path *path) +{ + BUG_ON(path->p_idx == NULL); + + if (path->p_idx < EXT_FIRST_INDEX(path->p_hdr)) + return 0; + + /* + * if truncate on deeper level happened, it wasn't partial, + * so we have to consider current index for truncation + */ + if (le16_to_cpu(path->p_hdr->eh_entries) == path->p_block) + return 0; + return 1; +} + +int ext4_ext_remove_space(struct inode *inode, unsigned long start) +{ + struct super_block *sb = inode->i_sb; + int depth = ext_depth(inode); + struct ext4_ext_path *path; + handle_t *handle; + int i = 0, err = 0; + + ext_debug("truncate since %lu\n", start); + + /* probably first extent we're gonna free will be last in block */ + handle = ext4_journal_start(inode, depth + 1); + if (IS_ERR(handle)) + return PTR_ERR(handle); + + ext4_ext_invalidate_cache(inode); + + /* + * We start scanning from right side, freeing all the blocks + * after i_size and walking into the tree depth-wise. + */ + path = kmalloc(sizeof(struct ext4_ext_path) * (depth + 1), GFP_KERNEL); + if (path == NULL) { + ext4_journal_stop(handle); + return -ENOMEM; + } + memset(path, 0, sizeof(struct ext4_ext_path) * (depth + 1)); + path[0].p_hdr = ext_inode_hdr(inode); + if (ext4_ext_check_header(__FUNCTION__, inode, path[0].p_hdr)) { + err = -EIO; + goto out; + } + path[0].p_depth = depth; + + while (i >= 0 && err == 0) { + if (i == depth) { + /* this is leaf block */ + err = ext4_ext_rm_leaf(handle, inode, path, start); + /* root level has p_bh == NULL, brelse() eats this */ + brelse(path[i].p_bh); + path[i].p_bh = NULL; + i--; + continue; + } + + /* this is index block */ + if (!path[i].p_hdr) { + ext_debug("initialize header\n"); + path[i].p_hdr = ext_block_hdr(path[i].p_bh); + if (ext4_ext_check_header(__FUNCTION__, inode, + path[i].p_hdr)) { + err = -EIO; + goto out; + } + } + + BUG_ON(le16_to_cpu(path[i].p_hdr->eh_entries) + > le16_to_cpu(path[i].p_hdr->eh_max)); + BUG_ON(path[i].p_hdr->eh_magic != EXT4_EXT_MAGIC); + + if (!path[i].p_idx) { + /* this level hasn't been touched yet */ + path[i].p_idx = EXT_LAST_INDEX(path[i].p_hdr); + path[i].p_block = le16_to_cpu(path[i].p_hdr->eh_entries)+1; + ext_debug("init index ptr: hdr 0x%p, num %d\n", + path[i].p_hdr, + le16_to_cpu(path[i].p_hdr->eh_entries)); + } else { + /* we were already here, see at next index */ + path[i].p_idx--; + } + + ext_debug("level %d - index, first 0x%p, cur 0x%p\n", + i, EXT_FIRST_INDEX(path[i].p_hdr), + path[i].p_idx); + if (ext4_ext_more_to_rm(path + i)) { + /* go to the next level */ + ext_debug("move to level %d (block %llu)\n", + i + 1, idx_pblock(path[i].p_idx)); + memset(path + i + 1, 0, sizeof(*path)); + path[i+1].p_bh = + sb_bread(sb, idx_pblock(path[i].p_idx)); + if (!path[i+1].p_bh) { + /* should we reset i_size? */ + err = -EIO; + break; + } + + /* save actual number of indexes since this + * number is changed at the next iteration */ + path[i].p_block = le16_to_cpu(path[i].p_hdr->eh_entries); + i++; + } else { + /* we finished processing this index, go up */ + if (path[i].p_hdr->eh_entries == 0 && i > 0) { + /* index is empty, remove it; + * handle must be already prepared by the + * truncatei_leaf() */ + err = ext4_ext_rm_idx(handle, inode, path + i); + } + /* root level has p_bh == NULL, brelse() eats this */ + brelse(path[i].p_bh); + path[i].p_bh = NULL; + i--; + ext_debug("return to level %d\n", i); + } + } + + /* TODO: flexible tree reduction should be here */ + if (path->p_hdr->eh_entries == 0) { + /* + * truncate to zero freed all the tree, + * so we need to correct eh_depth + */ + err = ext4_ext_get_access(handle, inode, path); + if (err == 0) { + ext_inode_hdr(inode)->eh_depth = 0; + ext_inode_hdr(inode)->eh_max = + cpu_to_le16(ext4_ext_space_root(inode)); + err = ext4_ext_dirty(handle, inode, path); + } + } +out: + ext4_ext_tree_changed(inode); + ext4_ext_drop_refs(path); + kfree(path); + ext4_journal_stop(handle); + + return err; +} + +/* + * called at mount time + */ +void ext4_ext_init(struct super_block *sb) +{ + /* + * possible initialization would be here + */ + + if (test_opt(sb, EXTENTS)) { + printk("EXT4-fs: file extents enabled"); +#ifdef AGRESSIVE_TEST + printk(", agressive tests"); +#endif +#ifdef CHECK_BINSEARCH + printk(", check binsearch"); +#endif +#ifdef EXTENTS_STATS + printk(", stats"); +#endif + printk("\n"); +#ifdef EXTENTS_STATS + spin_lock_init(&EXT4_SB(sb)->s_ext_stats_lock); + EXT4_SB(sb)->s_ext_min = 1 << 30; + EXT4_SB(sb)->s_ext_max = 0; +#endif + } +} + +/* + * called at umount time + */ +void ext4_ext_release(struct super_block *sb) +{ + if (!test_opt(sb, EXTENTS)) + return; + +#ifdef EXTENTS_STATS + if (EXT4_SB(sb)->s_ext_blocks && EXT4_SB(sb)->s_ext_extents) { + struct ext4_sb_info *sbi = EXT4_SB(sb); + printk(KERN_ERR "EXT4-fs: %lu blocks in %lu extents (%lu ave)\n", + sbi->s_ext_blocks, sbi->s_ext_extents, + sbi->s_ext_blocks / sbi->s_ext_extents); + printk(KERN_ERR "EXT4-fs: extents: %lu min, %lu max, max depth %lu\n", + sbi->s_ext_min, sbi->s_ext_max, sbi->s_depth_max); + } +#endif +} + +int ext4_ext_get_blocks(handle_t *handle, struct inode *inode, + ext4_fsblk_t iblock, + unsigned long max_blocks, struct buffer_head *bh_result, + int create, int extend_disksize) +{ + struct ext4_ext_path *path = NULL; + struct ext4_extent newex, *ex; + ext4_fsblk_t goal, newblock; + int err = 0, depth; + unsigned long allocated = 0; + + __clear_bit(BH_New, &bh_result->b_state); + ext_debug("blocks %d/%lu requested for inode %u\n", (int) iblock, + max_blocks, (unsigned) inode->i_ino); + mutex_lock(&EXT4_I(inode)->truncate_mutex); + + /* check in cache */ + if ((goal = ext4_ext_in_cache(inode, iblock, &newex))) { + if (goal == EXT4_EXT_CACHE_GAP) { + if (!create) { + /* block isn't allocated yet and + * user doesn't want to allocate it */ + goto out2; + } + /* we should allocate requested block */ + } else if (goal == EXT4_EXT_CACHE_EXTENT) { + /* block is already allocated */ + newblock = iblock + - le32_to_cpu(newex.ee_block) + + ext_pblock(&newex); + /* number of remaining blocks in the extent */ + allocated = le16_to_cpu(newex.ee_len) - + (iblock - le32_to_cpu(newex.ee_block)); + goto out; + } else { + BUG(); + } + } + + /* find extent for this block */ + path = ext4_ext_find_extent(inode, iblock, NULL); + if (IS_ERR(path)) { + err = PTR_ERR(path); + path = NULL; + goto out2; + } + + depth = ext_depth(inode); + + /* + * consistent leaf must not be empty; + * this situation is possible, though, _during_ tree modification; + * this is why assert can't be put in ext4_ext_find_extent() + */ + BUG_ON(path[depth].p_ext == NULL && depth != 0); + + if ((ex = path[depth].p_ext)) { + unsigned long ee_block = le32_to_cpu(ex->ee_block); + ext4_fsblk_t ee_start = ext_pblock(ex); + unsigned short ee_len = le16_to_cpu(ex->ee_len); + + /* + * Allow future support for preallocated extents to be added + * as an RO_COMPAT feature: + * Uninitialized extents are treated as holes, except that + * we avoid (fail) allocating new blocks during a write. + */ + if (ee_len > EXT_MAX_LEN) + goto out2; + /* if found extent covers block, simply return it */ + if (iblock >= ee_block && iblock < ee_block + ee_len) { + newblock = iblock - ee_block + ee_start; + /* number of remaining blocks in the extent */ + allocated = ee_len - (iblock - ee_block); + ext_debug("%d fit into %lu:%d -> %llu\n", (int) iblock, + ee_block, ee_len, newblock); + ext4_ext_put_in_cache(inode, ee_block, ee_len, + ee_start, EXT4_EXT_CACHE_EXTENT); + goto out; + } + } + + /* + * requested block isn't allocated yet; + * we couldn't try to create block if create flag is zero + */ + if (!create) { + /* put just found gap into cache to speed up + * subsequent requests */ + ext4_ext_put_gap_in_cache(inode, path, iblock); + goto out2; + } + /* + * Okay, we need to do block allocation. Lazily initialize the block + * allocation info here if necessary. + */ + if (S_ISREG(inode->i_mode) && (!EXT4_I(inode)->i_block_alloc_info)) + ext4_init_block_alloc_info(inode); + + /* allocate new block */ + goal = ext4_ext_find_goal(inode, path, iblock); + allocated = max_blocks; + newblock = ext4_new_blocks(handle, inode, goal, &allocated, &err); + if (!newblock) + goto out2; + ext_debug("allocate new block: goal %llu, found %llu/%lu\n", + goal, newblock, allocated); + + /* try to insert new extent into found leaf and return */ + newex.ee_block = cpu_to_le32(iblock); + ext4_ext_store_pblock(&newex, newblock); + newex.ee_len = cpu_to_le16(allocated); + err = ext4_ext_insert_extent(handle, inode, path, &newex); + if (err) + goto out2; + + if (extend_disksize && inode->i_size > EXT4_I(inode)->i_disksize) + EXT4_I(inode)->i_disksize = inode->i_size; + + /* previous routine could use block we allocated */ + newblock = ext_pblock(&newex); + __set_bit(BH_New, &bh_result->b_state); + + ext4_ext_put_in_cache(inode, iblock, allocated, newblock, + EXT4_EXT_CACHE_EXTENT); +out: + if (allocated > max_blocks) + allocated = max_blocks; + ext4_ext_show_leaf(inode, path); + __set_bit(BH_Mapped, &bh_result->b_state); + bh_result->b_bdev = inode->i_sb->s_bdev; + bh_result->b_blocknr = newblock; +out2: + if (path) { + ext4_ext_drop_refs(path); + kfree(path); + } + mutex_unlock(&EXT4_I(inode)->truncate_mutex); + + return err ? err : allocated; +} + +void ext4_ext_truncate(struct inode * inode, struct page *page) +{ + struct address_space *mapping = inode->i_mapping; + struct super_block *sb = inode->i_sb; + unsigned long last_block; + handle_t *handle; + int err = 0; + + /* + * probably first extent we're gonna free will be last in block + */ + err = ext4_writepage_trans_blocks(inode) + 3; + handle = ext4_journal_start(inode, err); + if (IS_ERR(handle)) { + if (page) { + clear_highpage(page); + flush_dcache_page(page); + unlock_page(page); + page_cache_release(page); + } + return; + } + + if (page) + ext4_block_truncate_page(handle, page, mapping, inode->i_size); + + mutex_lock(&EXT4_I(inode)->truncate_mutex); + ext4_ext_invalidate_cache(inode); + + /* + * TODO: optimization is possible here. + * Probably we need not scan at all, + * because page truncation is enough. + */ + if (ext4_orphan_add(handle, inode)) + goto out_stop; + + /* we have to know where to truncate from in crash case */ + EXT4_I(inode)->i_disksize = inode->i_size; + ext4_mark_inode_dirty(handle, inode); + + last_block = (inode->i_size + sb->s_blocksize - 1) + >> EXT4_BLOCK_SIZE_BITS(sb); + err = ext4_ext_remove_space(inode, last_block); + + /* In a multi-transaction truncate, we only make the final + * transaction synchronous. */ + if (IS_SYNC(inode)) + handle->h_sync = 1; + +out_stop: + /* + * If this was a simple ftruncate() and the file will remain alive, + * then we need to clear up the orphan record which we created above. + * However, if this was a real unlink then we were called by + * ext4_delete_inode(), and we allow that function to clean up the + * orphan info for us. + */ + if (inode->i_nlink) + ext4_orphan_del(handle, inode); + + mutex_unlock(&EXT4_I(inode)->truncate_mutex); + ext4_journal_stop(handle); +} + +/* + * ext4_ext_writepage_trans_blocks: + * calculate max number of blocks we could modify + * in order to allocate new block for an inode + */ +int ext4_ext_writepage_trans_blocks(struct inode *inode, int num) +{ + int needed; + + needed = ext4_ext_calc_credits_for_insert(inode, NULL); + + /* caller wants to allocate num blocks, but note it includes sb */ + needed = needed * num - (num - 1); + +#ifdef CONFIG_QUOTA + needed += 2 * EXT4_QUOTA_TRANS_BLOCKS(inode->i_sb); +#endif + + return needed; +} + +EXPORT_SYMBOL(ext4_mark_inode_dirty); +EXPORT_SYMBOL(ext4_ext_invalidate_cache); +EXPORT_SYMBOL(ext4_ext_insert_extent); +EXPORT_SYMBOL(ext4_ext_walk_space); +EXPORT_SYMBOL(ext4_ext_find_goal); +EXPORT_SYMBOL(ext4_ext_calc_credits_for_insert); + diff --git a/fs/ext4/file.c b/fs/ext4/file.c new file mode 100644 index 000000000000..0b622c0624b7 --- /dev/null +++ b/fs/ext4/file.c @@ -0,0 +1,139 @@ +/* + * linux/fs/ext4/file.c + * + * Copyright (C) 1992, 1993, 1994, 1995 + * Remy Card (card@masi.ibp.fr) + * Laboratoire MASI - Institut Blaise Pascal + * Universite Pierre et Marie Curie (Paris VI) + * + * from + * + * linux/fs/minix/file.c + * + * Copyright (C) 1991, 1992 Linus Torvalds + * + * ext4 fs regular file handling primitives + * + * 64-bit file support on 64-bit platforms by Jakub Jelinek + * (jj@sunsite.ms.mff.cuni.cz) + */ + +#include <linux/time.h> +#include <linux/fs.h> +#include <linux/jbd2.h> +#include <linux/ext4_fs.h> +#include <linux/ext4_jbd2.h> +#include "xattr.h" +#include "acl.h" + +/* + * Called when an inode is released. Note that this is different + * from ext4_file_open: open gets called at every open, but release + * gets called only when /all/ the files are closed. + */ +static int ext4_release_file (struct inode * inode, struct file * filp) +{ + /* if we are the last writer on the inode, drop the block reservation */ + if ((filp->f_mode & FMODE_WRITE) && + (atomic_read(&inode->i_writecount) == 1)) + { + mutex_lock(&EXT4_I(inode)->truncate_mutex); + ext4_discard_reservation(inode); + mutex_unlock(&EXT4_I(inode)->truncate_mutex); + } + if (is_dx(inode) && filp->private_data) + ext4_htree_free_dir_info(filp->private_data); + + return 0; +} + +static ssize_t +ext4_file_write(struct kiocb *iocb, const struct iovec *iov, + unsigned long nr_segs, loff_t pos) +{ + struct file *file = iocb->ki_filp; + struct inode *inode = file->f_dentry->d_inode; + ssize_t ret; + int err; + + ret = generic_file_aio_write(iocb, iov, nr_segs, pos); + + /* + * Skip flushing if there was an error, or if nothing was written. + */ + if (ret <= 0) + return ret; + + /* + * If the inode is IS_SYNC, or is O_SYNC and we are doing data + * journalling then we need to make sure that we force the transaction + * to disk to keep all metadata uptodate synchronously. + */ + if (file->f_flags & O_SYNC) { + /* + * If we are non-data-journaled, then the dirty data has + * already been flushed to backing store by generic_osync_inode, + * and the inode has been flushed too if there have been any + * modifications other than mere timestamp updates. + * + * Open question --- do we care about flushing timestamps too + * if the inode is IS_SYNC? + */ + if (!ext4_should_journal_data(inode)) + return ret; + + goto force_commit; + } + + /* + * So we know that there has been no forced data flush. If the inode + * is marked IS_SYNC, we need to force one ourselves. + */ + if (!IS_SYNC(inode)) + return ret; + + /* + * Open question #2 --- should we force data to disk here too? If we + * don't, the only impact is that data=writeback filesystems won't + * flush data to disk automatically on IS_SYNC, only metadata (but + * historically, that is what ext2 has done.) + */ + +force_commit: + err = ext4_force_commit(inode->i_sb); + if (err) + return err; + return ret; +} + +const struct file_operations ext4_file_operations = { + .llseek = generic_file_llseek, + .read = do_sync_read, + .write = do_sync_write, + .aio_read = generic_file_aio_read, + .aio_write = ext4_file_write, + .ioctl = ext4_ioctl, +#ifdef CONFIG_COMPAT + .compat_ioctl = ext4_compat_ioctl, +#endif + .mmap = generic_file_mmap, + .open = generic_file_open, + .release = ext4_release_file, + .fsync = ext4_sync_file, + .sendfile = generic_file_sendfile, + .splice_read = generic_file_splice_read, + .splice_write = generic_file_splice_write, +}; + +struct inode_operations ext4_file_inode_operations = { + .truncate = ext4_truncate, + .setattr = ext4_setattr, +#ifdef CONFIG_EXT4DEV_FS_XATTR + .setxattr = generic_setxattr, + .getxattr = generic_getxattr, + .listxattr = ext4_listxattr, + .removexattr = generic_removexattr, +#endif + .permission = ext4_permission, +}; + diff --git a/fs/ext4/fsync.c b/fs/ext4/fsync.c new file mode 100644 index 000000000000..2a167d7131fa --- /dev/null +++ b/fs/ext4/fsync.c @@ -0,0 +1,88 @@ +/* + * linux/fs/ext4/fsync.c + * + * Copyright (C) 1993 Stephen Tweedie (sct@redhat.com) + * from + * Copyright (C) 1992 Remy Card (card@masi.ibp.fr) + * Laboratoire MASI - Institut Blaise Pascal + * Universite Pierre et Marie Curie (Paris VI) + * from + * linux/fs/minix/truncate.c Copyright (C) 1991, 1992 Linus Torvalds + * + * ext4fs fsync primitive + * + * Big-endian to little-endian byte-swapping/bitmaps by + * David S. Miller (davem@caip.rutgers.edu), 1995 + * + * Removed unnecessary code duplication for little endian machines + * and excessive __inline__s. + * Andi Kleen, 1997 + * + * Major simplications and cleanup - we only need to do the metadata, because + * we can depend on generic_block_fdatasync() to sync the data blocks. + */ + +#include <linux/time.h> +#include <linux/fs.h> +#include <linux/sched.h> +#include <linux/writeback.h> +#include <linux/jbd2.h> +#include <linux/ext4_fs.h> +#include <linux/ext4_jbd2.h> + +/* + * akpm: A new design for ext4_sync_file(). + * + * This is only called from sys_fsync(), sys_fdatasync() and sys_msync(). + * There cannot be a transaction open by this task. + * Another task could have dirtied this inode. Its data can be in any + * state in the journalling system. + * + * What we do is just kick off a commit and wait on it. This will snapshot the + * inode to disk. + */ + +int ext4_sync_file(struct file * file, struct dentry *dentry, int datasync) +{ + struct inode *inode = dentry->d_inode; + int ret = 0; + + J_ASSERT(ext4_journal_current_handle() == 0); + + /* + * data=writeback: + * The caller's filemap_fdatawrite()/wait will sync the data. + * sync_inode() will sync the metadata + * + * data=ordered: + * The caller's filemap_fdatawrite() will write the data and + * sync_inode() will write the inode if it is dirty. Then the caller's + * filemap_fdatawait() will wait on the pages. + * + * data=journal: + * filemap_fdatawrite won't do anything (the buffers are clean). + * ext4_force_commit will write the file data into the journal and + * will wait on that. + * filemap_fdatawait() will encounter a ton of newly-dirtied pages + * (they were dirtied by commit). But that's OK - the blocks are + * safe in-journal, which is all fsync() needs to ensure. + */ + if (ext4_should_journal_data(inode)) { + ret = ext4_force_commit(inode->i_sb); + goto out; + } + + /* + * The VFS has written the file data. If the inode is unaltered + * then we need not start a commit. + */ + if (inode->i_state & (I_DIRTY_SYNC|I_DIRTY_DATASYNC)) { + struct writeback_control wbc = { + .sync_mode = WB_SYNC_ALL, + .nr_to_write = 0, /* sys_fsync did this */ + }; + ret = sync_inode(inode, &wbc); + } +out: + return ret; +} diff --git a/fs/ext4/hash.c b/fs/ext4/hash.c new file mode 100644 index 000000000000..a67966385e06 --- /dev/null +++ b/fs/ext4/hash.c @@ -0,0 +1,152 @@ +/* + * linux/fs/ext4/hash.c + * + * Copyright (C) 2002 by Theodore Ts'o + * + * This file is released under the GPL v2. + * + * This file may be redistributed under the terms of the GNU Public + * License. + */ + +#include <linux/fs.h> +#include <linux/jbd2.h> +#include <linux/sched.h> +#include <linux/ext4_fs.h> +#include <linux/cryptohash.h> + +#define DELTA 0x9E3779B9 + +static void TEA_transform(__u32 buf[4], __u32 const in[]) +{ + __u32 sum = 0; + __u32 b0 = buf[0], b1 = buf[1]; + __u32 a = in[0], b = in[1], c = in[2], d = in[3]; + int n = 16; + + do { + sum += DELTA; + b0 += ((b1 << 4)+a) ^ (b1+sum) ^ ((b1 >> 5)+b); + b1 += ((b0 << 4)+c) ^ (b0+sum) ^ ((b0 >> 5)+d); + } while(--n); + + buf[0] += b0; + buf[1] += b1; +} + + +/* The old legacy hash */ +static __u32 dx_hack_hash (const char *name, int len) +{ + __u32 hash0 = 0x12a3fe2d, hash1 = 0x37abe8f9; + while (len--) { + __u32 hash = hash1 + (hash0 ^ (*name++ * 7152373)); + + if (hash & 0x80000000) hash -= 0x7fffffff; + hash1 = hash0; + hash0 = hash; + } + return (hash0 << 1); +} + +static void str2hashbuf(const char *msg, int len, __u32 *buf, int num) +{ + __u32 pad, val; + int i; + + pad = (__u32)len | ((__u32)len << 8); + pad |= pad << 16; + + val = pad; + if (len > num*4) + len = num * 4; + for (i=0; i < len; i++) { + if ((i % 4) == 0) + val = pad; + val = msg[i] + (val << 8); + if ((i % 4) == 3) { + *buf++ = val; + val = pad; + num--; + } + } + if (--num >= 0) + *buf++ = val; + while (--num >= 0) + *buf++ = pad; +} + +/* + * Returns the hash of a filename. If len is 0 and name is NULL, then + * this function can be used to test whether or not a hash version is + * supported. + * + * The seed is an 4 longword (32 bits) "secret" which can be used to + * uniquify a hash. If the seed is all zero's, then some default seed + * may be used. + * + * A particular hash version specifies whether or not the seed is + * represented, and whether or not the returned hash is 32 bits or 64 + * bits. 32 bit hashes will return 0 for the minor hash. + */ +int ext4fs_dirhash(const char *name, int len, struct dx_hash_info *hinfo) +{ + __u32 hash; + __u32 minor_hash = 0; + const char *p; + int i; + __u32 in[8], buf[4]; + + /* Initialize the default seed for the hash checksum functions */ + buf[0] = 0x67452301; + buf[1] = 0xefcdab89; + buf[2] = 0x98badcfe; + buf[3] = 0x10325476; + + /* Check to see if the seed is all zero's */ + if (hinfo->seed) { + for (i=0; i < 4; i++) { + if (hinfo->seed[i]) + break; + } + if (i < 4) + memcpy(buf, hinfo->seed, sizeof(buf)); + } + + switch (hinfo->hash_version) { + case DX_HASH_LEGACY: + hash = dx_hack_hash(name, len); + break; + case DX_HASH_HALF_MD4: + p = name; + while (len > 0) { + str2hashbuf(p, len, in, 8); + half_md4_transform(buf, in); + len -= 32; + p += 32; + } + minor_hash = buf[2]; + hash = buf[1]; + break; + case DX_HASH_TEA: + p = name; + while (len > 0) { + str2hashbuf(p, len, in, 4); + TEA_transform(buf, in); + len -= 16; + p += 16; + } + hash = buf[0]; + minor_hash = buf[1]; + break; + default: + hinfo->hash = 0; + return -1; + } + hash = hash & ~1; + if (hash == (EXT4_HTREE_EOF << 1)) + hash = (EXT4_HTREE_EOF-1) << 1; + hinfo->hash = hash; + hinfo->minor_hash = minor_hash; + return 0; +} diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c new file mode 100644 index 000000000000..c88b439ba5cd --- /dev/null +++ b/fs/ext4/ialloc.c @@ -0,0 +1,772 @@ +/* + * linux/fs/ext4/ialloc.c + * + * Copyright (C) 1992, 1993, 1994, 1995 + * Remy Card (card@masi.ibp.fr) + * Laboratoire MASI - Institut Blaise Pascal + * Universite Pierre et Marie Curie (Paris VI) + * + * BSD ufs-inspired inode and directory allocation by + * Stephen Tweedie (sct@redhat.com), 1993 + * Big-endian to little-endian byte-swapping/bitmaps by + * David S. Miller (davem@caip.rutgers.edu), 1995 + */ + +#include <linux/time.h> +#include <linux/fs.h> +#include <linux/jbd2.h> +#include <linux/ext4_fs.h> +#include <linux/ext4_jbd2.h> +#include <linux/stat.h> +#include <linux/string.h> +#include <linux/quotaops.h> +#include <linux/buffer_head.h> +#include <linux/random.h> +#include <linux/bitops.h> +#include <linux/blkdev.h> +#include <asm/byteorder.h> + +#include "xattr.h" +#include "acl.h" + +/* + * ialloc.c contains the inodes allocation and deallocation routines + */ + +/* + * The free inodes are managed by bitmaps. A file system contains several + * blocks groups. Each group contains 1 bitmap block for blocks, 1 bitmap + * block for inodes, N blocks for the inode table and data blocks. + * + * The file system contains group descriptors which are located after the + * super block. Each descriptor contains the number of the bitmap block and + * the free blocks count in the block. + */ + + +/* + * Read the inode allocation bitmap for a given block_group, reading + * into the specified slot in the superblock's bitmap cache. + * + * Return buffer_head of bitmap on success or NULL. + */ +static struct buffer_head * +read_inode_bitmap(struct super_block * sb, unsigned long block_group) +{ + struct ext4_group_desc *desc; + struct buffer_head *bh = NULL; + + desc = ext4_get_group_desc(sb, block_group, NULL); + if (!desc) + goto error_out; + + bh = sb_bread(sb, ext4_inode_bitmap(sb, desc)); + if (!bh) + ext4_error(sb, "read_inode_bitmap", + "Cannot read inode bitmap - " + "block_group = %lu, inode_bitmap = %llu", + block_group, ext4_inode_bitmap(sb, desc)); +error_out: + return bh; +} + +/* + * NOTE! When we get the inode, we're the only people + * that have access to it, and as such there are no + * race conditions we have to worry about. The inode + * is not on the hash-lists, and it cannot be reached + * through the filesystem because the directory entry + * has been deleted earlier. + * + * HOWEVER: we must make sure that we get no aliases, + * which means that we have to call "clear_inode()" + * _before_ we mark the inode not in use in the inode + * bitmaps. Otherwise a newly created file might use + * the same inode number (not actually the same pointer + * though), and then we'd have two inodes sharing the + * same inode number and space on the harddisk. + */ +void ext4_free_inode (handle_t *handle, struct inode * inode) +{ + struct super_block * sb = inode->i_sb; + int is_directory; + unsigned long ino; + struct buffer_head *bitmap_bh = NULL; + struct buffer_head *bh2; + unsigned long block_group; + unsigned long bit; + struct ext4_group_desc * gdp; + struct ext4_super_block * es; + struct ext4_sb_info *sbi; + int fatal = 0, err; + + if (atomic_read(&inode->i_count) > 1) { + printk ("ext4_free_inode: inode has count=%d\n", + atomic_read(&inode->i_count)); + return; + } + if (inode->i_nlink) { + printk ("ext4_free_inode: inode has nlink=%d\n", + inode->i_nlink); + return; + } + if (!sb) { + printk("ext4_free_inode: inode on nonexistent device\n"); + return; + } + sbi = EXT4_SB(sb); + + ino = inode->i_ino; + ext4_debug ("freeing inode %lu\n", ino); + + /* + * Note: we must free any quota before locking the superblock, + * as writing the quota to disk may need the lock as well. + */ + DQUOT_INIT(inode); + ext4_xattr_delete_inode(handle, inode); + DQUOT_FREE_INODE(inode); + DQUOT_DROP(inode); + + is_directory = S_ISDIR(inode->i_mode); + + /* Do this BEFORE marking the inode not in use or returning an error */ + clear_inode (inode); + + es = EXT4_SB(sb)->s_es; + if (ino < EXT4_FIRST_INO(sb) || ino > le32_to_cpu(es->s_inodes_count)) { + ext4_error (sb, "ext4_free_inode", + "reserved or nonexistent inode %lu", ino); + goto error_return; + } + block_group = (ino - 1) / EXT4_INODES_PER_GROUP(sb); + bit = (ino - 1) % EXT4_INODES_PER_GROUP(sb); + bitmap_bh = read_inode_bitmap(sb, block_group); + if (!bitmap_bh) + goto error_return; + + BUFFER_TRACE(bitmap_bh, "get_write_access"); + fatal = ext4_journal_get_write_access(handle, bitmap_bh); + if (fatal) + goto error_return; + + /* Ok, now we can actually update the inode bitmaps.. */ + if (!ext4_clear_bit_atomic(sb_bgl_lock(sbi, block_group), + bit, bitmap_bh->b_data)) + ext4_error (sb, "ext4_free_inode", + "bit already cleared for inode %lu", ino); + else { + gdp = ext4_get_group_desc (sb, block_group, &bh2); + + BUFFER_TRACE(bh2, "get_write_access"); + fatal = ext4_journal_get_write_access(handle, bh2); + if (fatal) goto error_return; + + if (gdp) { + spin_lock(sb_bgl_lock(sbi, block_group)); + gdp->bg_free_inodes_count = cpu_to_le16( + le16_to_cpu(gdp->bg_free_inodes_count) + 1); + if (is_directory) + gdp->bg_used_dirs_count = cpu_to_le16( + le16_to_cpu(gdp->bg_used_dirs_count) - 1); + spin_unlock(sb_bgl_lock(sbi, block_group)); + percpu_counter_inc(&sbi->s_freeinodes_counter); + if (is_directory) + percpu_counter_dec(&sbi->s_dirs_counter); + + } + BUFFER_TRACE(bh2, "call ext4_journal_dirty_metadata"); + err = ext4_journal_dirty_metadata(handle, bh2); + if (!fatal) fatal = err; + } + BUFFER_TRACE(bitmap_bh, "call ext4_journal_dirty_metadata"); + err = ext4_journal_dirty_metadata(handle, bitmap_bh); + if (!fatal) + fatal = err; + sb->s_dirt = 1; +error_return: + brelse(bitmap_bh); + ext4_std_error(sb, fatal); +} + +/* + * There are two policies for allocating an inode. If the new inode is + * a directory, then a forward search is made for a block group with both + * free space and a low directory-to-inode ratio; if that fails, then of + * the groups with above-average free space, that group with the fewest + * directories already is chosen. + * + * For other inodes, search forward from the parent directory\'s block + * group to find a free inode. + */ +static int find_group_dir(struct super_block *sb, struct inode *parent) +{ + int ngroups = EXT4_SB(sb)->s_groups_count; + unsigned int freei, avefreei; + struct ext4_group_desc *desc, *best_desc = NULL; + struct buffer_head *bh; + int group, best_group = -1; + + freei = percpu_counter_read_positive(&EXT4_SB(sb)->s_freeinodes_counter); + avefreei = freei / ngroups; + + for (group = 0; group < ngroups; group++) { + desc = ext4_get_group_desc (sb, group, &bh); + if (!desc || !desc->bg_free_inodes_count) + continue; + if (le16_to_cpu(desc->bg_free_inodes_count) < avefreei) + continue; + if (!best_desc || + (le16_to_cpu(desc->bg_free_blocks_count) > + le16_to_cpu(best_desc->bg_free_blocks_count))) { + best_group = group; + best_desc = desc; + } + } + return best_group; +} + +/* + * Orlov's allocator for directories. + * + * We always try to spread first-level directories. + * + * If there are blockgroups with both free inodes and free blocks counts + * not worse than average we return one with smallest directory count. + * Otherwise we simply return a random group. + * + * For the rest rules look so: + * + * It's OK to put directory into a group unless + * it has too many directories already (max_dirs) or + * it has too few free inodes left (min_inodes) or + * it has too few free blocks left (min_blocks) or + * it's already running too large debt (max_debt). + * Parent's group is prefered, if it doesn't satisfy these + * conditions we search cyclically through the rest. If none + * of the groups look good we just look for a group with more + * free inodes than average (starting at parent's group). + * + * Debt is incremented each time we allocate a directory and decremented + * when we allocate an inode, within 0--255. + */ + +#define INODE_COST 64 +#define BLOCK_COST 256 + +static int find_group_orlov(struct super_block *sb, struct inode *parent) +{ + int parent_group = EXT4_I(parent)->i_block_group; + struct ext4_sb_info *sbi = EXT4_SB(sb); + struct ext4_super_block *es = sbi->s_es; + int ngroups = sbi->s_groups_count; + int inodes_per_group = EXT4_INODES_PER_GROUP(sb); + unsigned int freei, avefreei; + ext4_fsblk_t freeb, avefreeb; + ext4_fsblk_t blocks_per_dir; + unsigned int ndirs; + int max_debt, max_dirs, min_inodes; + ext4_grpblk_t min_blocks; + int group = -1, i; + struct ext4_group_desc *desc; + struct buffer_head *bh; + + freei = percpu_counter_read_positive(&sbi->s_freeinodes_counter); + avefreei = freei / ngroups; + freeb = percpu_counter_read_positive(&sbi->s_freeblocks_counter); + avefreeb = freeb; + do_div(avefreeb, ngroups); + ndirs = percpu_counter_read_positive(&sbi->s_dirs_counter); + + if ((parent == sb->s_root->d_inode) || + (EXT4_I(parent)->i_flags & EXT4_TOPDIR_FL)) { + int best_ndir = inodes_per_group; + int best_group = -1; + + get_random_bytes(&group, sizeof(group)); + parent_group = (unsigned)group % ngroups; + for (i = 0; i < ngroups; i++) { + group = (parent_group + i) % ngroups; + desc = ext4_get_group_desc (sb, group, &bh); + if (!desc || !desc->bg_free_inodes_count) + continue; + if (le16_to_cpu(desc->bg_used_dirs_count) >= best_ndir) + continue; + if (le16_to_cpu(desc->bg_free_inodes_count) < avefreei) + continue; + if (le16_to_cpu(desc->bg_free_blocks_count) < avefreeb) + continue; + best_group = group; + best_ndir = le16_to_cpu(desc->bg_used_dirs_count); + } + if (best_group >= 0) + return best_group; + goto fallback; + } + + blocks_per_dir = ext4_blocks_count(es) - freeb; + do_div(blocks_per_dir, ndirs); + + max_dirs = ndirs / ngroups + inodes_per_group / 16; + min_inodes = avefreei - inodes_per_group / 4; + min_blocks = avefreeb - EXT4_BLOCKS_PER_GROUP(sb) / 4; + + max_debt = EXT4_BLOCKS_PER_GROUP(sb); + max_debt /= max_t(int, blocks_per_dir, BLOCK_COST); + if (max_debt * INODE_COST > inodes_per_group) + max_debt = inodes_per_group / INODE_COST; + if (max_debt > 255) + max_debt = 255; + if (max_debt == 0) + max_debt = 1; + + for (i = 0; i < ngroups; i++) { + group = (parent_group + i) % ngroups; + desc = ext4_get_group_desc (sb, group, &bh); + if (!desc || !desc->bg_free_inodes_count) + continue; + if (le16_to_cpu(desc->bg_used_dirs_count) >= max_dirs) + continue; + if (le16_to_cpu(desc->bg_free_inodes_count) < min_inodes) + continue; + if (le16_to_cpu(desc->bg_free_blocks_count) < min_blocks) + continue; + return group; + } + +fallback: + for (i = 0; i < ngroups; i++) { + group = (parent_group + i) % ngroups; + desc = ext4_get_group_desc (sb, group, &bh); + if (!desc || !desc->bg_free_inodes_count) + continue; + if (le16_to_cpu(desc->bg_free_inodes_count) >= avefreei) + return group; + } + + if (avefreei) { + /* + * The free-inodes counter is approximate, and for really small + * filesystems the above test can fail to find any blockgroups + */ + avefreei = 0; + goto fallback; + } + + return -1; +} + +static int find_group_other(struct super_block *sb, struct inode *parent) +{ + int parent_group = EXT4_I(parent)->i_block_group; + int ngroups = EXT4_SB(sb)->s_groups_count; + struct ext4_group_desc *desc; + struct buffer_head *bh; + int group, i; + + /* + * Try to place the inode in its parent directory + */ + group = parent_group; + desc = ext4_get_group_desc (sb, group, &bh); + if (desc && le16_to_cpu(desc->bg_free_inodes_count) && + le16_to_cpu(desc->bg_free_blocks_count)) + return group; + + /* + * We're going to place this inode in a different blockgroup from its + * parent. We want to cause files in a common directory to all land in + * the same blockgroup. But we want files which are in a different + * directory which shares a blockgroup with our parent to land in a + * different blockgroup. + * + * So add our directory's i_ino into the starting point for the hash. + */ + group = (group + parent->i_ino) % ngroups; + + /* + * Use a quadratic hash to find a group with a free inode and some free + * blocks. + */ + for (i = 1; i < ngroups; i <<= 1) { + group += i; + if (group >= ngroups) + group -= ngroups; + desc = ext4_get_group_desc (sb, group, &bh); + if (desc && le16_to_cpu(desc->bg_free_inodes_count) && + le16_to_cpu(desc->bg_free_blocks_count)) + return group; + } + + /* + * That failed: try linear search for a free inode, even if that group + * has no free blocks. + */ + group = parent_group; + for (i = 0; i < ngroups; i++) { + if (++group >= ngroups) + group = 0; + desc = ext4_get_group_desc (sb, group, &bh); + if (desc && le16_to_cpu(desc->bg_free_inodes_count)) + return group; + } + + return -1; +} + +/* + * There are two policies for allocating an inode. If the new inode is + * a directory, then a forward search is made for a block group with both + * free space and a low directory-to-inode ratio; if that fails, then of + * the groups with above-average free space, that group with the fewest + * directories already is chosen. + * + * For other inodes, search forward from the parent directory's block + * group to find a free inode. + */ +struct inode *ext4_new_inode(handle_t *handle, struct inode * dir, int mode) +{ + struct super_block *sb; + struct buffer_head *bitmap_bh = NULL; + struct buffer_head *bh2; + int group; + unsigned long ino = 0; + struct inode * inode; + struct ext4_group_desc * gdp = NULL; + struct ext4_super_block * es; + struct ext4_inode_info *ei; + struct ext4_sb_info *sbi; + int err = 0; + struct inode *ret; + int i; + + /* Cannot create files in a deleted directory */ + if (!dir || !dir->i_nlink) + return ERR_PTR(-EPERM); + + sb = dir->i_sb; + inode = new_inode(sb); + if (!inode) + return ERR_PTR(-ENOMEM); + ei = EXT4_I(inode); + + sbi = EXT4_SB(sb); + es = sbi->s_es; + if (S_ISDIR(mode)) { + if (test_opt (sb, OLDALLOC)) + group = find_group_dir(sb, dir); + else + group = find_group_orlov(sb, dir); + } else + group = find_group_other(sb, dir); + + err = -ENOSPC; + if (group == -1) + goto out; + + for (i = 0; i < sbi->s_groups_count; i++) { + err = -EIO; + + gdp = ext4_get_group_desc(sb, group, &bh2); + if (!gdp) + goto fail; + + brelse(bitmap_bh); + bitmap_bh = read_inode_bitmap(sb, group); + if (!bitmap_bh) + goto fail; + + ino = 0; + +repeat_in_this_group: + ino = ext4_find_next_zero_bit((unsigned long *) + bitmap_bh->b_data, EXT4_INODES_PER_GROUP(sb), ino); + if (ino < EXT4_INODES_PER_GROUP(sb)) { + + BUFFER_TRACE(bitmap_bh, "get_write_access"); + err = ext4_journal_get_write_access(handle, bitmap_bh); + if (err) + goto fail; + + if (!ext4_set_bit_atomic(sb_bgl_lock(sbi, group), + ino, bitmap_bh->b_data)) { + /* we won it */ + BUFFER_TRACE(bitmap_bh, + "call ext4_journal_dirty_metadata"); + err = ext4_journal_dirty_metadata(handle, + bitmap_bh); + if (err) + goto fail; + goto got; + } + /* we lost it */ + jbd2_journal_release_buffer(handle, bitmap_bh); + + if (++ino < EXT4_INODES_PER_GROUP(sb)) + goto repeat_in_this_group; + } + + /* + * This case is possible in concurrent environment. It is very + * rare. We cannot repeat the find_group_xxx() call because + * that will simply return the same blockgroup, because the + * group descriptor metadata has not yet been updated. + * So we just go onto the next blockgroup. + */ + if (++group == sbi->s_groups_count) + group = 0; + } + err = -ENOSPC; + goto out; + +got: + ino += group * EXT4_INODES_PER_GROUP(sb) + 1; + if (ino < EXT4_FIRST_INO(sb) || ino > le32_to_cpu(es->s_inodes_count)) { + ext4_error (sb, "ext4_new_inode", + "reserved inode or inode > inodes count - " + "block_group = %d, inode=%lu", group, ino); + err = -EIO; + goto fail; + } + + BUFFER_TRACE(bh2, "get_write_access"); + err = ext4_journal_get_write_access(handle, bh2); + if (err) goto fail; + spin_lock(sb_bgl_lock(sbi, group)); + gdp->bg_free_inodes_count = + cpu_to_le16(le16_to_cpu(gdp->bg_free_inodes_count) - 1); + if (S_ISDIR(mode)) { + gdp->bg_used_dirs_count = + cpu_to_le16(le16_to_cpu(gdp->bg_used_dirs_count) + 1); + } + spin_unlock(sb_bgl_lock(sbi, group)); + BUFFER_TRACE(bh2, "call ext4_journal_dirty_metadata"); + err = ext4_journal_dirty_metadata(handle, bh2); + if (err) goto fail; + + percpu_counter_dec(&sbi->s_freeinodes_counter); + if (S_ISDIR(mode)) + percpu_counter_inc(&sbi->s_dirs_counter); + sb->s_dirt = 1; + + inode->i_uid = current->fsuid; + if (test_opt (sb, GRPID)) + inode->i_gid = dir->i_gid; + else if (dir->i_mode & S_ISGID) { + inode->i_gid = dir->i_gid; + if (S_ISDIR(mode)) + mode |= S_ISGID; + } else + inode->i_gid = current->fsgid; + inode->i_mode = mode; + + inode->i_ino = ino; + /* This is the optimal IO size (for stat), not the fs block size */ + inode->i_blocks = 0; + inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME_SEC; + + memset(ei->i_data, 0, sizeof(ei->i_data)); + ei->i_dir_start_lookup = 0; + ei->i_disksize = 0; + + ei->i_flags = EXT4_I(dir)->i_flags & ~EXT4_INDEX_FL; + if (S_ISLNK(mode)) + ei->i_flags &= ~(EXT4_IMMUTABLE_FL|EXT4_APPEND_FL); + /* dirsync only applies to directories */ + if (!S_ISDIR(mode)) + ei->i_flags &= ~EXT4_DIRSYNC_FL; +#ifdef EXT4_FRAGMENTS + ei->i_faddr = 0; + ei->i_frag_no = 0; + ei->i_frag_size = 0; +#endif + ei->i_file_acl = 0; + ei->i_dir_acl = 0; + ei->i_dtime = 0; + ei->i_block_alloc_info = NULL; + ei->i_block_group = group; + + ext4_set_inode_flags(inode); + if (IS_DIRSYNC(inode)) + handle->h_sync = 1; + insert_inode_hash(inode); + spin_lock(&sbi->s_next_gen_lock); + inode->i_generation = sbi->s_next_generation++; + spin_unlock(&sbi->s_next_gen_lock); + + ei->i_state = EXT4_STATE_NEW; + ei->i_extra_isize = + (EXT4_INODE_SIZE(inode->i_sb) > EXT4_GOOD_OLD_INODE_SIZE) ? + sizeof(struct ext4_inode) - EXT4_GOOD_OLD_INODE_SIZE : 0; + + ret = inode; + if(DQUOT_ALLOC_INODE(inode)) { + err = -EDQUOT; + goto fail_drop; + } + + err = ext4_init_acl(handle, inode, dir); + if (err) + goto fail_free_drop; + + err = ext4_init_security(handle,inode, dir); + if (err) + goto fail_free_drop; + + err = ext4_mark_inode_dirty(handle, inode); + if (err) { + ext4_std_error(sb, err); + goto fail_free_drop; + } + if (test_opt(sb, EXTENTS)) { + EXT4_I(inode)->i_flags |= EXT4_EXTENTS_FL; + ext4_ext_tree_init(handle, inode); + if (!EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_EXTENTS)) { + err = ext4_journal_get_write_access(handle, EXT4_SB(sb)->s_sbh); + if (err) goto fail; + EXT4_SET_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_EXTENTS); + BUFFER_TRACE(EXT4_SB(sb)->s_sbh, "call ext4_journal_dirty_metadata"); + err = ext4_journal_dirty_metadata(handle, EXT4_SB(sb)->s_sbh); + } + } + + ext4_debug("allocating inode %lu\n", inode->i_ino); + goto really_out; +fail: + ext4_std_error(sb, err); +out: + iput(inode); + ret = ERR_PTR(err); +really_out: + brelse(bitmap_bh); + return ret; + +fail_free_drop: + DQUOT_FREE_INODE(inode); + +fail_drop: + DQUOT_DROP(inode); + inode->i_flags |= S_NOQUOTA; + inode->i_nlink = 0; + iput(inode); + brelse(bitmap_bh); + return ERR_PTR(err); +} + +/* Verify that we are loading a valid orphan from disk */ +struct inode *ext4_orphan_get(struct super_block *sb, unsigned long ino) +{ + unsigned long max_ino = le32_to_cpu(EXT4_SB(sb)->s_es->s_inodes_count); + unsigned long block_group; + int bit; + struct buffer_head *bitmap_bh = NULL; + struct inode *inode = NULL; + + /* Error cases - e2fsck has already cleaned up for us */ + if (ino > max_ino) { + ext4_warning(sb, __FUNCTION__, + "bad orphan ino %lu! e2fsck was run?", ino); + goto out; + } + + block_group = (ino - 1) / EXT4_INODES_PER_GROUP(sb); + bit = (ino - 1) % EXT4_INODES_PER_GROUP(sb); + bitmap_bh = read_inode_bitmap(sb, block_group); + if (!bitmap_bh) { + ext4_warning(sb, __FUNCTION__, + "inode bitmap error for orphan %lu", ino); + goto out; + } + + /* Having the inode bit set should be a 100% indicator that this + * is a valid orphan (no e2fsck run on fs). Orphans also include + * inodes that were being truncated, so we can't check i_nlink==0. + */ + if (!ext4_test_bit(bit, bitmap_bh->b_data) || + !(inode = iget(sb, ino)) || is_bad_inode(inode) || + NEXT_ORPHAN(inode) > max_ino) { + ext4_warning(sb, __FUNCTION__, + "bad orphan inode %lu! e2fsck was run?", ino); + printk(KERN_NOTICE "ext4_test_bit(bit=%d, block=%llu) = %d\n", + bit, (unsigned long long)bitmap_bh->b_blocknr, + ext4_test_bit(bit, bitmap_bh->b_data)); + printk(KERN_NOTICE "inode=%p\n", inode); + if (inode) { + printk(KERN_NOTICE "is_bad_inode(inode)=%d\n", + is_bad_inode(inode)); + printk(KERN_NOTICE "NEXT_ORPHAN(inode)=%u\n", + NEXT_ORPHAN(inode)); + printk(KERN_NOTICE "max_ino=%lu\n", max_ino); + } + /* Avoid freeing blocks if we got a bad deleted inode */ + if (inode && inode->i_nlink == 0) + inode->i_blocks = 0; + iput(inode); + inode = NULL; + } +out: + brelse(bitmap_bh); + return inode; +} + +unsigned long ext4_count_free_inodes (struct super_block * sb) +{ + unsigned long desc_count; + struct ext4_group_desc *gdp; + int i; +#ifdef EXT4FS_DEBUG + struct ext4_super_block *es; + unsigned long bitmap_count, x; + struct buffer_head *bitmap_bh = NULL; + + es = EXT4_SB(sb)->s_es; + desc_count = 0; + bitmap_count = 0; + gdp = NULL; + for (i = 0; i < EXT4_SB(sb)->s_groups_count; i++) { + gdp = ext4_get_group_desc (sb, i, NULL); + if (!gdp) + continue; + desc_count += le16_to_cpu(gdp->bg_free_inodes_count); + brelse(bitmap_bh); + bitmap_bh = read_inode_bitmap(sb, i); + if (!bitmap_bh) + continue; + + x = ext4_count_free(bitmap_bh, EXT4_INODES_PER_GROUP(sb) / 8); + printk("group %d: stored = %d, counted = %lu\n", + i, le16_to_cpu(gdp->bg_free_inodes_count), x); + bitmap_count += x; + } + brelse(bitmap_bh); + printk("ext4_count_free_inodes: stored = %u, computed = %lu, %lu\n", + le32_to_cpu(es->s_free_inodes_count), desc_count, bitmap_count); + return desc_count; +#else + desc_count = 0; + for (i = 0; i < EXT4_SB(sb)->s_groups_count; i++) { + gdp = ext4_get_group_desc (sb, i, NULL); + if (!gdp) + continue; + desc_count += le16_to_cpu(gdp->bg_free_inodes_count); + cond_resched(); + } + return desc_count; +#endif +} + +/* Called at mount-time, super-block is locked */ +unsigned long ext4_count_dirs (struct super_block * sb) +{ + unsigned long count = 0; + int i; + + for (i = 0; i < EXT4_SB(sb)->s_groups_count; i++) { + struct ext4_group_desc *gdp = ext4_get_group_desc (sb, i, NULL); + if (!gdp) + continue; + count += le16_to_cpu(gdp->bg_used_dirs_count); + } + return count; +} + diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c new file mode 100644 index 000000000000..0a60ec5a16db --- /dev/null +++ b/fs/ext4/inode.c @@ -0,0 +1,3233 @@ +/* + * linux/fs/ext4/inode.c + * + * Copyright (C) 1992, 1993, 1994, 1995 + * Remy Card (card@masi.ibp.fr) + * Laboratoire MASI - Institut Blaise Pascal + * Universite Pierre et Marie Curie (Paris VI) + * + * from + * + * linux/fs/minix/inode.c + * + * Copyright (C) 1991, 1992 Linus Torvalds + * + * Goal-directed block allocation by Stephen Tweedie + * (sct@redhat.com), 1993, 1998 + * Big-endian to little-endian byte-swapping/bitmaps by + * David S. Miller (davem@caip.rutgers.edu), 1995 + * 64-bit file support on 64-bit platforms by Jakub Jelinek + * (jj@sunsite.ms.mff.cuni.cz) + * + * Assorted race fixes, rewrite of ext4_get_block() by Al Viro, 2000 + */ + +#include <linux/module.h> +#include <linux/fs.h> +#include <linux/time.h> +#include <linux/ext4_jbd2.h> +#include <linux/jbd2.h> +#include <linux/smp_lock.h> +#include <linux/highuid.h> +#include <linux/pagemap.h> +#include <linux/quotaops.h> +#include <linux/string.h> +#include <linux/buffer_head.h> +#include <linux/writeback.h> +#include <linux/mpage.h> +#include <linux/uio.h> +#include <linux/bio.h> +#include "xattr.h" +#include "acl.h" + +/* + * Test whether an inode is a fast symlink. + */ +static int ext4_inode_is_fast_symlink(struct inode *inode) +{ + int ea_blocks = EXT4_I(inode)->i_file_acl ? + (inode->i_sb->s_blocksize >> 9) : 0; + + return (S_ISLNK(inode->i_mode) && inode->i_blocks - ea_blocks == 0); +} + +/* + * The ext4 forget function must perform a revoke if we are freeing data + * which has been journaled. Metadata (eg. indirect blocks) must be + * revoked in all cases. + * + * "bh" may be NULL: a metadata block may have been freed from memory + * but there may still be a record of it in the journal, and that record + * still needs to be revoked. + */ +int ext4_forget(handle_t *handle, int is_metadata, struct inode *inode, + struct buffer_head *bh, ext4_fsblk_t blocknr) +{ + int err; + + might_sleep(); + + BUFFER_TRACE(bh, "enter"); + + jbd_debug(4, "forgetting bh %p: is_metadata = %d, mode %o, " + "data mode %lx\n", + bh, is_metadata, inode->i_mode, + test_opt(inode->i_sb, DATA_FLAGS)); + + /* Never use the revoke function if we are doing full data + * journaling: there is no need to, and a V1 superblock won't + * support it. Otherwise, only skip the revoke on un-journaled + * data blocks. */ + + if (test_opt(inode->i_sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA || + (!is_metadata && !ext4_should_journal_data(inode))) { + if (bh) { + BUFFER_TRACE(bh, "call jbd2_journal_forget"); + return ext4_journal_forget(handle, bh); + } + return 0; + } + + /* + * data!=journal && (is_metadata || should_journal_data(inode)) + */ + BUFFER_TRACE(bh, "call ext4_journal_revoke"); + err = ext4_journal_revoke(handle, blocknr, bh); + if (err) + ext4_abort(inode->i_sb, __FUNCTION__, + "error %d when attempting revoke", err); + BUFFER_TRACE(bh, "exit"); + return err; +} + +/* + * Work out how many blocks we need to proceed with the next chunk of a + * truncate transaction. + */ +static unsigned long blocks_for_truncate(struct inode *inode) +{ + unsigned long needed; + + needed = inode->i_blocks >> (inode->i_sb->s_blocksize_bits - 9); + + /* Give ourselves just enough room to cope with inodes in which + * i_blocks is corrupt: we've seen disk corruptions in the past + * which resulted in random data in an inode which looked enough + * like a regular file for ext4 to try to delete it. Things + * will go a bit crazy if that happens, but at least we should + * try not to panic the whole kernel. */ + if (needed < 2) + needed = 2; + + /* But we need to bound the transaction so we don't overflow the + * journal. */ + if (needed > EXT4_MAX_TRANS_DATA) + needed = EXT4_MAX_TRANS_DATA; + + return EXT4_DATA_TRANS_BLOCKS(inode->i_sb) + needed; +} + +/* + * Truncate transactions can be complex and absolutely huge. So we need to + * be able to restart the transaction at a conventient checkpoint to make + * sure we don't overflow the journal. + * + * start_transaction gets us a new handle for a truncate transaction, + * and extend_transaction tries to extend the existing one a bit. If + * extend fails, we need to propagate the failure up and restart the + * transaction in the top-level truncate loop. --sct + */ +static handle_t *start_transaction(struct inode *inode) +{ + handle_t *result; + + result = ext4_journal_start(inode, blocks_for_truncate(inode)); + if (!IS_ERR(result)) + return result; + + ext4_std_error(inode->i_sb, PTR_ERR(result)); + return result; +} + +/* + * Try to extend this transaction for the purposes of truncation. + * + * Returns 0 if we managed to create more room. If we can't create more + * room, and the transaction must be restarted we return 1. + */ +static int try_to_extend_transaction(handle_t *handle, struct inode *inode) +{ + if (handle->h_buffer_credits > EXT4_RESERVE_TRANS_BLOCKS) + return 0; + if (!ext4_journal_extend(handle, blocks_for_truncate(inode))) + return 0; + return 1; +} + +/* + * Restart the transaction associated with *handle. This does a commit, + * so before we call here everything must be consistently dirtied against + * this transaction. + */ +static int ext4_journal_test_restart(handle_t *handle, struct inode *inode) +{ + jbd_debug(2, "restarting handle %p\n", handle); + return ext4_journal_restart(handle, blocks_for_truncate(inode)); +} + +/* + * Called at the last iput() if i_nlink is zero. + */ +void ext4_delete_inode (struct inode * inode) +{ + handle_t *handle; + + truncate_inode_pages(&inode->i_data, 0); + + if (is_bad_inode(inode)) + goto no_delete; + + handle = start_transaction(inode); + if (IS_ERR(handle)) { + /* + * If we're going to skip the normal cleanup, we still need to + * make sure that the in-core orphan linked list is properly + * cleaned up. + */ + ext4_orphan_del(NULL, inode); + goto no_delete; + } + + if (IS_SYNC(inode)) + handle->h_sync = 1; + inode->i_size = 0; + if (inode->i_blocks) + ext4_truncate(inode); + /* + * Kill off the orphan record which ext4_truncate created. + * AKPM: I think this can be inside the above `if'. + * Note that ext4_orphan_del() has to be able to cope with the + * deletion of a non-existent orphan - this is because we don't + * know if ext4_truncate() actually created an orphan record. + * (Well, we could do this if we need to, but heck - it works) + */ + ext4_orphan_del(handle, inode); + EXT4_I(inode)->i_dtime = get_seconds(); + + /* + * One subtle ordering requirement: if anything has gone wrong + * (transaction abort, IO errors, whatever), then we can still + * do these next steps (the fs will already have been marked as + * having errors), but we can't free the inode if the mark_dirty + * fails. + */ + if (ext4_mark_inode_dirty(handle, inode)) + /* If that failed, just do the required in-core inode clear. */ + clear_inode(inode); + else + ext4_free_inode(handle, inode); + ext4_journal_stop(handle); + return; +no_delete: + clear_inode(inode); /* We must guarantee clearing of inode... */ +} + +typedef struct { + __le32 *p; + __le32 key; + struct buffer_head *bh; +} Indirect; + +static inline void add_chain(Indirect *p, struct buffer_head *bh, __le32 *v) +{ + p->key = *(p->p = v); + p->bh = bh; +} + +static int verify_chain(Indirect *from, Indirect *to) +{ + while (from <= to && from->key == *from->p) + from++; + return (from > to); +} + +/** + * ext4_block_to_path - parse the block number into array of offsets + * @inode: inode in question (we are only interested in its superblock) + * @i_block: block number to be parsed + * @offsets: array to store the offsets in + * @boundary: set this non-zero if the referred-to block is likely to be + * followed (on disk) by an indirect block. + * + * To store the locations of file's data ext4 uses a data structure common + * for UNIX filesystems - tree of pointers anchored in the inode, with + * data blocks at leaves and indirect blocks in intermediate nodes. + * This function translates the block number into path in that tree - + * return value is the path length and @offsets[n] is the offset of + * pointer to (n+1)th node in the nth one. If @block is out of range + * (negative or too large) warning is printed and zero returned. + * + * Note: function doesn't find node addresses, so no IO is needed. All + * we need to know is the capacity of indirect blocks (taken from the + * inode->i_sb). + */ + +/* + * Portability note: the last comparison (check that we fit into triple + * indirect block) is spelled differently, because otherwise on an + * architecture with 32-bit longs and 8Kb pages we might get into trouble + * if our filesystem had 8Kb blocks. We might use long long, but that would + * kill us on x86. Oh, well, at least the sign propagation does not matter - + * i_block would have to be negative in the very beginning, so we would not + * get there at all. + */ + +static int ext4_block_to_path(struct inode *inode, + long i_block, int offsets[4], int *boundary) +{ + int ptrs = EXT4_ADDR_PER_BLOCK(inode->i_sb); + int ptrs_bits = EXT4_ADDR_PER_BLOCK_BITS(inode->i_sb); + const long direct_blocks = EXT4_NDIR_BLOCKS, + indirect_blocks = ptrs, + double_blocks = (1 << (ptrs_bits * 2)); + int n = 0; + int final = 0; + + if (i_block < 0) { + ext4_warning (inode->i_sb, "ext4_block_to_path", "block < 0"); + } else if (i_block < direct_blocks) { + offsets[n++] = i_block; + final = direct_blocks; + } else if ( (i_block -= direct_blocks) < indirect_blocks) { + offsets[n++] = EXT4_IND_BLOCK; + offsets[n++] = i_block; + final = ptrs; + } else if ((i_block -= indirect_blocks) < double_blocks) { + offsets[n++] = EXT4_DIND_BLOCK; + offsets[n++] = i_block >> ptrs_bits; + offsets[n++] = i_block & (ptrs - 1); + final = ptrs; + } else if (((i_block -= double_blocks) >> (ptrs_bits * 2)) < ptrs) { + offsets[n++] = EXT4_TIND_BLOCK; + offsets[n++] = i_block >> (ptrs_bits * 2); + offsets[n++] = (i_block >> ptrs_bits) & (ptrs - 1); + offsets[n++] = i_block & (ptrs - 1); + final = ptrs; + } else { + ext4_warning(inode->i_sb, "ext4_block_to_path", "block > big"); + } + if (boundary) + *boundary = final - 1 - (i_block & (ptrs - 1)); + return n; +} + +/** + * ext4_get_branch - read the chain of indirect blocks leading to data + * @inode: inode in question + * @depth: depth of the chain (1 - direct pointer, etc.) + * @offsets: offsets of pointers in inode/indirect blocks + * @chain: place to store the result + * @err: here we store the error value + * + * Function fills the array of triples <key, p, bh> and returns %NULL + * if everything went OK or the pointer to the last filled triple + * (incomplete one) otherwise. Upon the return chain[i].key contains + * the number of (i+1)-th block in the chain (as it is stored in memory, + * i.e. little-endian 32-bit), chain[i].p contains the address of that + * number (it points into struct inode for i==0 and into the bh->b_data + * for i>0) and chain[i].bh points to the buffer_head of i-th indirect + * block for i>0 and NULL for i==0. In other words, it holds the block + * numbers of the chain, addresses they were taken from (and where we can + * verify that chain did not change) and buffer_heads hosting these + * numbers. + * + * Function stops when it stumbles upon zero pointer (absent block) + * (pointer to last triple returned, *@err == 0) + * or when it gets an IO error reading an indirect block + * (ditto, *@err == -EIO) + * or when it notices that chain had been changed while it was reading + * (ditto, *@err == -EAGAIN) + * or when it reads all @depth-1 indirect blocks successfully and finds + * the whole chain, all way to the data (returns %NULL, *err == 0). + */ +static Indirect *ext4_get_branch(struct inode *inode, int depth, int *offsets, + Indirect chain[4], int *err) +{ + struct super_block *sb = inode->i_sb; + Indirect *p = chain; + struct buffer_head *bh; + + *err = 0; + /* i_data is not going away, no lock needed */ + add_chain (chain, NULL, EXT4_I(inode)->i_data + *offsets); + if (!p->key) + goto no_block; + while (--depth) { + bh = sb_bread(sb, le32_to_cpu(p->key)); + if (!bh) + goto failure; + /* Reader: pointers */ + if (!verify_chain(chain, p)) + goto changed; + add_chain(++p, bh, (__le32*)bh->b_data + *++offsets); + /* Reader: end */ + if (!p->key) + goto no_block; + } + return NULL; + +changed: + brelse(bh); + *err = -EAGAIN; + goto no_block; +failure: + *err = -EIO; +no_block: + return p; +} + +/** + * ext4_find_near - find a place for allocation with sufficient locality + * @inode: owner + * @ind: descriptor of indirect block. + * + * This function returns the prefered place for block allocation. + * It is used when heuristic for sequential allocation fails. + * Rules are: + * + if there is a block to the left of our position - allocate near it. + * + if pointer will live in indirect block - allocate near that block. + * + if pointer will live in inode - allocate in the same + * cylinder group. + * + * In the latter case we colour the starting block by the callers PID to + * prevent it from clashing with concurrent allocations for a different inode + * in the same block group. The PID is used here so that functionally related + * files will be close-by on-disk. + * + * Caller must make sure that @ind is valid and will stay that way. + */ +static ext4_fsblk_t ext4_find_near(struct inode *inode, Indirect *ind) +{ + struct ext4_inode_info *ei = EXT4_I(inode); + __le32 *start = ind->bh ? (__le32*) ind->bh->b_data : ei->i_data; + __le32 *p; + ext4_fsblk_t bg_start; + ext4_grpblk_t colour; + + /* Try to find previous block */ + for (p = ind->p - 1; p >= start; p--) { + if (*p) + return le32_to_cpu(*p); + } + + /* No such thing, so let's try location of indirect block */ + if (ind->bh) + return ind->bh->b_blocknr; + + /* + * It is going to be referred to from the inode itself? OK, just put it + * into the same cylinder group then. + */ + bg_start = ext4_group_first_block_no(inode->i_sb, ei->i_block_group); + colour = (current->pid % 16) * + (EXT4_BLOCKS_PER_GROUP(inode->i_sb) / 16); + return bg_start + colour; +} + +/** + * ext4_find_goal - find a prefered place for allocation. + * @inode: owner + * @block: block we want + * @chain: chain of indirect blocks + * @partial: pointer to the last triple within a chain + * @goal: place to store the result. + * + * Normally this function find the prefered place for block allocation, + * stores it in *@goal and returns zero. + */ + +static ext4_fsblk_t ext4_find_goal(struct inode *inode, long block, + Indirect chain[4], Indirect *partial) +{ + struct ext4_block_alloc_info *block_i; + + block_i = EXT4_I(inode)->i_block_alloc_info; + + /* + * try the heuristic for sequential allocation, + * failing that at least try to get decent locality. + */ + if (block_i && (block == block_i->last_alloc_logical_block + 1) + && (block_i->last_alloc_physical_block != 0)) { + return block_i->last_alloc_physical_block + 1; + } + + return ext4_find_near(inode, partial); +} + +/** + * ext4_blks_to_allocate: Look up the block map and count the number + * of direct blocks need to be allocated for the given branch. + * + * @branch: chain of indirect blocks + * @k: number of blocks need for indirect blocks + * @blks: number of data blocks to be mapped. + * @blocks_to_boundary: the offset in the indirect block + * + * return the total number of blocks to be allocate, including the + * direct and indirect blocks. + */ +static int ext4_blks_to_allocate(Indirect *branch, int k, unsigned long blks, + int blocks_to_boundary) +{ + unsigned long count = 0; + + /* + * Simple case, [t,d]Indirect block(s) has not allocated yet + * then it's clear blocks on that path have not allocated + */ + if (k > 0) { + /* right now we don't handle cross boundary allocation */ + if (blks < blocks_to_boundary + 1) + count += blks; + else + count += blocks_to_boundary + 1; + return count; + } + + count++; + while (count < blks && count <= blocks_to_boundary && + le32_to_cpu(*(branch[0].p + count)) == 0) { + count++; + } + return count; +} + +/** + * ext4_alloc_blocks: multiple allocate blocks needed for a branch + * @indirect_blks: the number of blocks need to allocate for indirect + * blocks + * + * @new_blocks: on return it will store the new block numbers for + * the indirect blocks(if needed) and the first direct block, + * @blks: on return it will store the total number of allocated + * direct blocks + */ +static int ext4_alloc_blocks(handle_t *handle, struct inode *inode, + ext4_fsblk_t goal, int indirect_blks, int blks, + ext4_fsblk_t new_blocks[4], int *err) +{ + int target, i; + unsigned long count = 0; + int index = 0; + ext4_fsblk_t current_block = 0; + int ret = 0; + + /* + * Here we try to allocate the requested multiple blocks at once, + * on a best-effort basis. + * To build a branch, we should allocate blocks for + * the indirect blocks(if not allocated yet), and at least + * the first direct block of this branch. That's the + * minimum number of blocks need to allocate(required) + */ + target = blks + indirect_blks; + + while (1) { + count = target; + /* allocating blocks for indirect blocks and direct blocks */ + current_block = ext4_new_blocks(handle,inode,goal,&count,err); + if (*err) + goto failed_out; + + target -= count; + /* allocate blocks for indirect blocks */ + while (index < indirect_blks && count) { + new_blocks[index++] = current_block++; + count--; + } + + if (count > 0) + break; + } + + /* save the new block number for the first direct block */ + new_blocks[index] = current_block; + + /* total number of blocks allocated for direct blocks */ + ret = count; + *err = 0; + return ret; +failed_out: + for (i = 0; i <index; i++) + ext4_free_blocks(handle, inode, new_blocks[i], 1); + return ret; +} + +/** + * ext4_alloc_branch - allocate and set up a chain of blocks. + * @inode: owner + * @indirect_blks: number of allocated indirect blocks + * @blks: number of allocated direct blocks + * @offsets: offsets (in the blocks) to store the pointers to next. + * @branch: place to store the chain in. + * + * This function allocates blocks, zeroes out all but the last one, + * links them into chain and (if we are synchronous) writes them to disk. + * In other words, it prepares a branch that can be spliced onto the + * inode. It stores the information about that chain in the branch[], in + * the same format as ext4_get_branch() would do. We are calling it after + * we had read the existing part of chain and partial points to the last + * triple of that (one with zero ->key). Upon the exit we have the same + * picture as after the successful ext4_get_block(), except that in one + * place chain is disconnected - *branch->p is still zero (we did not + * set the last link), but branch->key contains the number that should + * be placed into *branch->p to fill that gap. + * + * If allocation fails we free all blocks we've allocated (and forget + * their buffer_heads) and return the error value the from failed + * ext4_alloc_block() (normally -ENOSPC). Otherwise we set the chain + * as described above and return 0. + */ +static int ext4_alloc_branch(handle_t *handle, struct inode *inode, + int indirect_blks, int *blks, ext4_fsblk_t goal, + int *offsets, Indirect *branch) +{ + int blocksize = inode->i_sb->s_blocksize; + int i, n = 0; + int err = 0; + struct buffer_head *bh; + int num; + ext4_fsblk_t new_blocks[4]; + ext4_fsblk_t current_block; + + num = ext4_alloc_blocks(handle, inode, goal, indirect_blks, + *blks, new_blocks, &err); + if (err) + return err; + + branch[0].key = cpu_to_le32(new_blocks[0]); + /* + * metadata blocks and data blocks are allocated. + */ + for (n = 1; n <= indirect_blks; n++) { + /* + * Get buffer_head for parent block, zero it out + * and set the pointer to new one, then send + * parent to disk. + */ + bh = sb_getblk(inode->i_sb, new_blocks[n-1]); + branch[n].bh = bh; + lock_buffer(bh); + BUFFER_TRACE(bh, "call get_create_access"); + err = ext4_journal_get_create_access(handle, bh); + if (err) { + unlock_buffer(bh); + brelse(bh); + goto failed; + } + + memset(bh->b_data, 0, blocksize); + branch[n].p = (__le32 *) bh->b_data + offsets[n]; + branch[n].key = cpu_to_le32(new_blocks[n]); + *branch[n].p = branch[n].key; + if ( n == indirect_blks) { + current_block = new_blocks[n]; + /* + * End of chain, update the last new metablock of + * the chain to point to the new allocated + * data blocks numbers + */ + for (i=1; i < num; i++) + *(branch[n].p + i) = cpu_to_le32(++current_block); + } + BUFFER_TRACE(bh, "marking uptodate"); + set_buffer_uptodate(bh); + unlock_buffer(bh); + + BUFFER_TRACE(bh, "call ext4_journal_dirty_metadata"); + err = ext4_journal_dirty_metadata(handle, bh); + if (err) + goto failed; + } + *blks = num; + return err; +failed: + /* Allocation failed, free what we already allocated */ + for (i = 1; i <= n ; i++) { + BUFFER_TRACE(branch[i].bh, "call jbd2_journal_forget"); + ext4_journal_forget(handle, branch[i].bh); + } + for (i = 0; i <indirect_blks; i++) + ext4_free_blocks(handle, inode, new_blocks[i], 1); + + ext4_free_blocks(handle, inode, new_blocks[i], num); + + return err; +} + +/** + * ext4_splice_branch - splice the allocated branch onto inode. + * @inode: owner + * @block: (logical) number of block we are adding + * @chain: chain of indirect blocks (with a missing link - see + * ext4_alloc_branch) + * @where: location of missing link + * @num: number of indirect blocks we are adding + * @blks: number of direct blocks we are adding + * + * This function fills the missing link and does all housekeeping needed in + * inode (->i_blocks, etc.). In case of success we end up with the full + * chain to new block and return 0. + */ +static int ext4_splice_branch(handle_t *handle, struct inode *inode, + long block, Indirect *where, int num, int blks) +{ + int i; + int err = 0; + struct ext4_block_alloc_info *block_i; + ext4_fsblk_t current_block; + + block_i = EXT4_I(inode)->i_block_alloc_info; + /* + * If we're splicing into a [td]indirect block (as opposed to the + * inode) then we need to get write access to the [td]indirect block + * before the splice. + */ + if (where->bh) { + BUFFER_TRACE(where->bh, "get_write_access"); + err = ext4_journal_get_write_access(handle, where->bh); + if (err) + goto err_out; + } + /* That's it */ + + *where->p = where->key; + + /* + * Update the host buffer_head or inode to point to more just allocated + * direct blocks blocks + */ + if (num == 0 && blks > 1) { + current_block = le32_to_cpu(where->key) + 1; + for (i = 1; i < blks; i++) + *(where->p + i ) = cpu_to_le32(current_block++); + } + + /* + * update the most recently allocated logical & physical block + * in i_block_alloc_info, to assist find the proper goal block for next + * allocation + */ + if (block_i) { + block_i->last_alloc_logical_block = block + blks - 1; + block_i->last_alloc_physical_block = + le32_to_cpu(where[num].key) + blks - 1; + } + + /* We are done with atomic stuff, now do the rest of housekeeping */ + + inode->i_ctime = CURRENT_TIME_SEC; + ext4_mark_inode_dirty(handle, inode); + + /* had we spliced it onto indirect block? */ + if (where->bh) { + /* + * If we spliced it onto an indirect block, we haven't + * altered the inode. Note however that if it is being spliced + * onto an indirect block at the very end of the file (the + * file is growing) then we *will* alter the inode to reflect + * the new i_size. But that is not done here - it is done in + * generic_commit_write->__mark_inode_dirty->ext4_dirty_inode. + */ + jbd_debug(5, "splicing indirect only\n"); + BUFFER_TRACE(where->bh, "call ext4_journal_dirty_metadata"); + err = ext4_journal_dirty_metadata(handle, where->bh); + if (err) + goto err_out; + } else { + /* + * OK, we spliced it into the inode itself on a direct block. + * Inode was dirtied above. + */ + jbd_debug(5, "splicing direct\n"); + } + return err; + +err_out: + for (i = 1; i <= num; i++) { + BUFFER_TRACE(where[i].bh, "call jbd2_journal_forget"); + ext4_journal_forget(handle, where[i].bh); + ext4_free_blocks(handle,inode,le32_to_cpu(where[i-1].key),1); + } + ext4_free_blocks(handle, inode, le32_to_cpu(where[num].key), blks); + + return err; +} + +/* + * Allocation strategy is simple: if we have to allocate something, we will + * have to go the whole way to leaf. So let's do it before attaching anything + * to tree, set linkage between the newborn blocks, write them if sync is + * required, recheck the path, free and repeat if check fails, otherwise + * set the last missing link (that will protect us from any truncate-generated + * removals - all blocks on the path are immune now) and possibly force the + * write on the parent block. + * That has a nice additional property: no special recovery from the failed + * allocations is needed - we simply release blocks and do not touch anything + * reachable from inode. + * + * `handle' can be NULL if create == 0. + * + * The BKL may not be held on entry here. Be sure to take it early. + * return > 0, # of blocks mapped or allocated. + * return = 0, if plain lookup failed. + * return < 0, error case. + */ +int ext4_get_blocks_handle(handle_t *handle, struct inode *inode, + sector_t iblock, unsigned long maxblocks, + struct buffer_head *bh_result, + int create, int extend_disksize) +{ + int err = -EIO; + int offsets[4]; + Indirect chain[4]; + Indirect *partial; + ext4_fsblk_t goal; + int indirect_blks; + int blocks_to_boundary = 0; + int depth; + struct ext4_inode_info *ei = EXT4_I(inode); + int count = 0; + ext4_fsblk_t first_block = 0; + + + J_ASSERT(!(EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL)); + J_ASSERT(handle != NULL || create == 0); + depth = ext4_block_to_path(inode,iblock,offsets,&blocks_to_boundary); + + if (depth == 0) + goto out; + + partial = ext4_get_branch(inode, depth, offsets, chain, &err); + + /* Simplest case - block found, no allocation needed */ + if (!partial) { + first_block = le32_to_cpu(chain[depth - 1].key); + clear_buffer_new(bh_result); + count++; + /*map more blocks*/ + while (count < maxblocks && count <= blocks_to_boundary) { + ext4_fsblk_t blk; + + if (!verify_chain(chain, partial)) { + /* + * Indirect block might be removed by + * truncate while we were reading it. + * Handling of that case: forget what we've + * got now. Flag the err as EAGAIN, so it + * will reread. + */ + err = -EAGAIN; + count = 0; + break; + } + blk = le32_to_cpu(*(chain[depth-1].p + count)); + + if (blk == first_block + count) + count++; + else + break; + } + if (err != -EAGAIN) + goto got_it; + } + + /* Next simple case - plain lookup or failed read of indirect block */ + if (!create || err == -EIO) + goto cleanup; + + mutex_lock(&ei->truncate_mutex); + + /* + * If the indirect block is missing while we are reading + * the chain(ext4_get_branch() returns -EAGAIN err), or + * if the chain has been changed after we grab the semaphore, + * (either because another process truncated this branch, or + * another get_block allocated this branch) re-grab the chain to see if + * the request block has been allocated or not. + * + * Since we already block the truncate/other get_block + * at this point, we will have the current copy of the chain when we + * splice the branch into the tree. + */ + if (err == -EAGAIN || !verify_chain(chain, partial)) { + while (partial > chain) { + brelse(partial->bh); + partial--; + } + partial = ext4_get_branch(inode, depth, offsets, chain, &err); + if (!partial) { + count++; + mutex_unlock(&ei->truncate_mutex); + if (err) + goto cleanup; + clear_buffer_new(bh_result); + goto got_it; + } + } + + /* + * Okay, we need to do block allocation. Lazily initialize the block + * allocation info here if necessary + */ + if (S_ISREG(inode->i_mode) && (!ei->i_block_alloc_info)) + ext4_init_block_alloc_info(inode); + + goal = ext4_find_goal(inode, iblock, chain, partial); + + /* the number of blocks need to allocate for [d,t]indirect blocks */ + indirect_blks = (chain + depth) - partial - 1; + + /* + * Next look up the indirect map to count the totoal number of + * direct blocks to allocate for this branch. + */ + count = ext4_blks_to_allocate(partial, indirect_blks, + maxblocks, blocks_to_boundary); + /* + * Block out ext4_truncate while we alter the tree + */ + err = ext4_alloc_branch(handle, inode, indirect_blks, &count, goal, + offsets + (partial - chain), partial); + + /* + * The ext4_splice_branch call will free and forget any buffers + * on the new chain if there is a failure, but that risks using + * up transaction credits, especially for bitmaps where the + * credits cannot be returned. Can we handle this somehow? We + * may need to return -EAGAIN upwards in the worst case. --sct + */ + if (!err) + err = ext4_splice_branch(handle, inode, iblock, + partial, indirect_blks, count); + /* + * i_disksize growing is protected by truncate_mutex. Don't forget to + * protect it if you're about to implement concurrent + * ext4_get_block() -bzzz + */ + if (!err && extend_disksize && inode->i_size > ei->i_disksize) + ei->i_disksize = inode->i_size; + mutex_unlock(&ei->truncate_mutex); + if (err) + goto cleanup; + + set_buffer_new(bh_result); +got_it: + map_bh(bh_result, inode->i_sb, le32_to_cpu(chain[depth-1].key)); + if (count > blocks_to_boundary) + set_buffer_boundary(bh_result); + err = count; + /* Clean up and exit */ + partial = chain + depth - 1; /* the whole chain */ +cleanup: + while (partial > chain) { + BUFFER_TRACE(partial->bh, "call brelse"); + brelse(partial->bh); + partial--; + } + BUFFER_TRACE(bh_result, "returned"); +out: + return err; +} + +#define DIO_CREDITS (EXT4_RESERVE_TRANS_BLOCKS + 32) + +static int ext4_get_block(struct inode *inode, sector_t iblock, + struct buffer_head *bh_result, int create) +{ + handle_t *handle = journal_current_handle(); + int ret = 0; + unsigned max_blocks = bh_result->b_size >> inode->i_blkbits; + + if (!create) + goto get_block; /* A read */ + + if (max_blocks == 1) + goto get_block; /* A single block get */ + + if (handle->h_transaction->t_state == T_LOCKED) { + /* + * Huge direct-io writes can hold off commits for long + * periods of time. Let this commit run. + */ + ext4_journal_stop(handle); + handle = ext4_journal_start(inode, DIO_CREDITS); + if (IS_ERR(handle)) + ret = PTR_ERR(handle); + goto get_block; + } + + if (handle->h_buffer_credits <= EXT4_RESERVE_TRANS_BLOCKS) { + /* + * Getting low on buffer credits... + */ + ret = ext4_journal_extend(handle, DIO_CREDITS); + if (ret > 0) { + /* + * Couldn't extend the transaction. Start a new one. + */ + ret = ext4_journal_restart(handle, DIO_CREDITS); + } + } + +get_block: + if (ret == 0) { + ret = ext4_get_blocks_wrap(handle, inode, iblock, + max_blocks, bh_result, create, 0); + if (ret > 0) { + bh_result->b_size = (ret << inode->i_blkbits); + ret = 0; + } + } + return ret; +} + +/* + * `handle' can be NULL if create is zero + */ +struct buffer_head *ext4_getblk(handle_t *handle, struct inode *inode, + long block, int create, int *errp) +{ + struct buffer_head dummy; + int fatal = 0, err; + + J_ASSERT(handle != NULL || create == 0); + + dummy.b_state = 0; + dummy.b_blocknr = -1000; + buffer_trace_init(&dummy.b_history); + err = ext4_get_blocks_wrap(handle, inode, block, 1, + &dummy, create, 1); + /* + * ext4_get_blocks_handle() returns number of blocks + * mapped. 0 in case of a HOLE. + */ + if (err > 0) { + if (err > 1) + WARN_ON(1); + err = 0; + } + *errp = err; + if (!err && buffer_mapped(&dummy)) { + struct buffer_head *bh; + bh = sb_getblk(inode->i_sb, dummy.b_blocknr); + if (!bh) { + *errp = -EIO; + goto err; + } + if (buffer_new(&dummy)) { + J_ASSERT(create != 0); + J_ASSERT(handle != 0); + + /* + * Now that we do not always journal data, we should + * keep in mind whether this should always journal the + * new buffer as metadata. For now, regular file + * writes use ext4_get_block instead, so it's not a + * problem. + */ + lock_buffer(bh); + BUFFER_TRACE(bh, "call get_create_access"); + fatal = ext4_journal_get_create_access(handle, bh); + if (!fatal && !buffer_uptodate(bh)) { + memset(bh->b_data,0,inode->i_sb->s_blocksize); + set_buffer_uptodate(bh); + } + unlock_buffer(bh); + BUFFER_TRACE(bh, "call ext4_journal_dirty_metadata"); + err = ext4_journal_dirty_metadata(handle, bh); + if (!fatal) + fatal = err; + } else { + BUFFER_TRACE(bh, "not a new buffer"); + } + if (fatal) { + *errp = fatal; + brelse(bh); + bh = NULL; + } + return bh; + } +err: + return NULL; +} + +struct buffer_head *ext4_bread(handle_t *handle, struct inode *inode, + int block, int create, int *err) +{ + struct buffer_head * bh; + + bh = ext4_getblk(handle, inode, block, create, err); + if (!bh) + return bh; + if (buffer_uptodate(bh)) + return bh; + ll_rw_block(READ_META, 1, &bh); + wait_on_buffer(bh); + if (buffer_uptodate(bh)) + return bh; + put_bh(bh); + *err = -EIO; + return NULL; +} + +static int walk_page_buffers( handle_t *handle, + struct buffer_head *head, + unsigned from, + unsigned to, + int *partial, + int (*fn)( handle_t *handle, + struct buffer_head *bh)) +{ + struct buffer_head *bh; + unsigned block_start, block_end; + unsigned blocksize = head->b_size; + int err, ret = 0; + struct buffer_head *next; + + for ( bh = head, block_start = 0; + ret == 0 && (bh != head || !block_start); + block_start = block_end, bh = next) + { + next = bh->b_this_page; + block_end = block_start + blocksize; + if (block_end <= from || block_start >= to) { + if (partial && !buffer_uptodate(bh)) + *partial = 1; + continue; + } + err = (*fn)(handle, bh); + if (!ret) + ret = err; + } + return ret; +} + +/* + * To preserve ordering, it is essential that the hole instantiation and + * the data write be encapsulated in a single transaction. We cannot + * close off a transaction and start a new one between the ext4_get_block() + * and the commit_write(). So doing the jbd2_journal_start at the start of + * prepare_write() is the right place. + * + * Also, this function can nest inside ext4_writepage() -> + * block_write_full_page(). In that case, we *know* that ext4_writepage() + * has generated enough buffer credits to do the whole page. So we won't + * block on the journal in that case, which is good, because the caller may + * be PF_MEMALLOC. + * + * By accident, ext4 can be reentered when a transaction is open via + * quota file writes. If we were to commit the transaction while thus + * reentered, there can be a deadlock - we would be holding a quota + * lock, and the commit would never complete if another thread had a + * transaction open and was blocking on the quota lock - a ranking + * violation. + * + * So what we do is to rely on the fact that jbd2_journal_stop/journal_start + * will _not_ run commit under these circumstances because handle->h_ref + * is elevated. We'll still have enough credits for the tiny quotafile + * write. + */ +static int do_journal_get_write_access(handle_t *handle, + struct buffer_head *bh) +{ + if (!buffer_mapped(bh) || buffer_freed(bh)) + return 0; + return ext4_journal_get_write_access(handle, bh); +} + +static int ext4_prepare_write(struct file *file, struct page *page, + unsigned from, unsigned to) +{ + struct inode *inode = page->mapping->host; + int ret, needed_blocks = ext4_writepage_trans_blocks(inode); + handle_t *handle; + int retries = 0; + +retry: + handle = ext4_journal_start(inode, needed_blocks); + if (IS_ERR(handle)) { + ret = PTR_ERR(handle); + goto out; + } + if (test_opt(inode->i_sb, NOBH) && ext4_should_writeback_data(inode)) + ret = nobh_prepare_write(page, from, to, ext4_get_block); + else + ret = block_prepare_write(page, from, to, ext4_get_block); + if (ret) + goto prepare_write_failed; + + if (ext4_should_journal_data(inode)) { + ret = walk_page_buffers(handle, page_buffers(page), + from, to, NULL, do_journal_get_write_access); + } +prepare_write_failed: + if (ret) + ext4_journal_stop(handle); + if (ret == -ENOSPC && ext4_should_retry_alloc(inode->i_sb, &retries)) + goto retry; +out: + return ret; +} + +int ext4_journal_dirty_data(handle_t *handle, struct buffer_head *bh) +{ + int err = jbd2_journal_dirty_data(handle, bh); + if (err) + ext4_journal_abort_handle(__FUNCTION__, __FUNCTION__, + bh, handle,err); + return err; +} + +/* For commit_write() in data=journal mode */ +static int commit_write_fn(handle_t *handle, struct buffer_head *bh) +{ + if (!buffer_mapped(bh) || buffer_freed(bh)) + return 0; + set_buffer_uptodate(bh); + return ext4_journal_dirty_metadata(handle, bh); +} + +/* + * We need to pick up the new inode size which generic_commit_write gave us + * `file' can be NULL - eg, when called from page_symlink(). + * + * ext4 never places buffers on inode->i_mapping->private_list. metadata + * buffers are managed internally. + */ +static int ext4_ordered_commit_write(struct file *file, struct page *page, + unsigned from, unsigned to) +{ + handle_t *handle = ext4_journal_current_handle(); + struct inode *inode = page->mapping->host; + int ret = 0, ret2; + + ret = walk_page_buffers(handle, page_buffers(page), + from, to, NULL, ext4_journal_dirty_data); + + if (ret == 0) { + /* + * generic_commit_write() will run mark_inode_dirty() if i_size + * changes. So let's piggyback the i_disksize mark_inode_dirty + * into that. + */ + loff_t new_i_size; + + new_i_size = ((loff_t)page->index << PAGE_CACHE_SHIFT) + to; + if (new_i_size > EXT4_I(inode)->i_disksize) + EXT4_I(inode)->i_disksize = new_i_size; + ret = generic_commit_write(file, page, from, to); + } + ret2 = ext4_journal_stop(handle); + if (!ret) + ret = ret2; + return ret; +} + +static int ext4_writeback_commit_write(struct file *file, struct page *page, + unsigned from, unsigned to) +{ + handle_t *handle = ext4_journal_current_handle(); + struct inode *inode = page->mapping->host; + int ret = 0, ret2; + loff_t new_i_size; + + new_i_size = ((loff_t)page->index << PAGE_CACHE_SHIFT) + to; + if (new_i_size > EXT4_I(inode)->i_disksize) + EXT4_I(inode)->i_disksize = new_i_size; + + if (test_opt(inode->i_sb, NOBH) && ext4_should_writeback_data(inode)) + ret = nobh_commit_write(file, page, from, to); + else + ret = generic_commit_write(file, page, from, to); + + ret2 = ext4_journal_stop(handle); + if (!ret) + ret = ret2; + return ret; +} + +static int ext4_journalled_commit_write(struct file *file, + struct page *page, unsigned from, unsigned to) +{ + handle_t *handle = ext4_journal_current_handle(); + struct inode *inode = page->mapping->host; + int ret = 0, ret2; + int partial = 0; + loff_t pos; + + /* + * Here we duplicate the generic_commit_write() functionality + */ + pos = ((loff_t)page->index << PAGE_CACHE_SHIFT) + to; + + ret = walk_page_buffers(handle, page_buffers(page), from, + to, &partial, commit_write_fn); + if (!partial) + SetPageUptodate(page); + if (pos > inode->i_size) + i_size_write(inode, pos); + EXT4_I(inode)->i_state |= EXT4_STATE_JDATA; + if (inode->i_size > EXT4_I(inode)->i_disksize) { + EXT4_I(inode)->i_disksize = inode->i_size; + ret2 = ext4_mark_inode_dirty(handle, inode); + if (!ret) + ret = ret2; + } + ret2 = ext4_journal_stop(handle); + if (!ret) + ret = ret2; + return ret; +} + +/* + * bmap() is special. It gets used by applications such as lilo and by + * the swapper to find the on-disk block of a specific piece of data. + * + * Naturally, this is dangerous if the block concerned is still in the + * journal. If somebody makes a swapfile on an ext4 data-journaling + * filesystem and enables swap, then they may get a nasty shock when the + * data getting swapped to that swapfile suddenly gets overwritten by + * the original zero's written out previously to the journal and + * awaiting writeback in the kernel's buffer cache. + * + * So, if we see any bmap calls here on a modified, data-journaled file, + * take extra steps to flush any blocks which might be in the cache. + */ +static sector_t ext4_bmap(struct address_space *mapping, sector_t block) +{ + struct inode *inode = mapping->host; + journal_t *journal; + int err; + + if (EXT4_I(inode)->i_state & EXT4_STATE_JDATA) { + /* + * This is a REALLY heavyweight approach, but the use of + * bmap on dirty files is expected to be extremely rare: + * only if we run lilo or swapon on a freshly made file + * do we expect this to happen. + * + * (bmap requires CAP_SYS_RAWIO so this does not + * represent an unprivileged user DOS attack --- we'd be + * in trouble if mortal users could trigger this path at + * will.) + * + * NB. EXT4_STATE_JDATA is not set on files other than + * regular files. If somebody wants to bmap a directory + * or symlink and gets confused because the buffer + * hasn't yet been flushed to disk, they deserve + * everything they get. + */ + + EXT4_I(inode)->i_state &= ~EXT4_STATE_JDATA; + journal = EXT4_JOURNAL(inode); + jbd2_journal_lock_updates(journal); + err = jbd2_journal_flush(journal); + jbd2_journal_unlock_updates(journal); + + if (err) + return 0; + } + + return generic_block_bmap(mapping,block,ext4_get_block); +} + +static int bget_one(handle_t *handle, struct buffer_head *bh) +{ + get_bh(bh); + return 0; +} + +static int bput_one(handle_t *handle, struct buffer_head *bh) +{ + put_bh(bh); + return 0; +} + +static int jbd2_journal_dirty_data_fn(handle_t *handle, struct buffer_head *bh) +{ + if (buffer_mapped(bh)) + return ext4_journal_dirty_data(handle, bh); + return 0; +} + +/* + * Note that we always start a transaction even if we're not journalling + * data. This is to preserve ordering: any hole instantiation within + * __block_write_full_page -> ext4_get_block() should be journalled + * along with the data so we don't crash and then get metadata which + * refers to old data. + * + * In all journalling modes block_write_full_page() will start the I/O. + * + * Problem: + * + * ext4_writepage() -> kmalloc() -> __alloc_pages() -> page_launder() -> + * ext4_writepage() + * + * Similar for: + * + * ext4_file_write() -> generic_file_write() -> __alloc_pages() -> ... + * + * Same applies to ext4_get_block(). We will deadlock on various things like + * lock_journal and i_truncate_mutex. + * + * Setting PF_MEMALLOC here doesn't work - too many internal memory + * allocations fail. + * + * 16May01: If we're reentered then journal_current_handle() will be + * non-zero. We simply *return*. + * + * 1 July 2001: @@@ FIXME: + * In journalled data mode, a data buffer may be metadata against the + * current transaction. But the same file is part of a shared mapping + * and someone does a writepage() on it. + * + * We will move the buffer onto the async_data list, but *after* it has + * been dirtied. So there's a small window where we have dirty data on + * BJ_Metadata. + * + * Note that this only applies to the last partial page in the file. The + * bit which block_write_full_page() uses prepare/commit for. (That's + * broken code anyway: it's wrong for msync()). + * + * It's a rare case: affects the final partial page, for journalled data + * where the file is subject to bith write() and writepage() in the same + * transction. To fix it we'll need a custom block_write_full_page(). + * We'll probably need that anyway for journalling writepage() output. + * + * We don't honour synchronous mounts for writepage(). That would be + * disastrous. Any write() or metadata operation will sync the fs for + * us. + * + * AKPM2: if all the page's buffers are mapped to disk and !data=journal, + * we don't need to open a transaction here. + */ +static int ext4_ordered_writepage(struct page *page, + struct writeback_control *wbc) +{ + struct inode *inode = page->mapping->host; + struct buffer_head *page_bufs; + handle_t *handle = NULL; + int ret = 0; + int err; + + J_ASSERT(PageLocked(page)); + + /* + * We give up here if we're reentered, because it might be for a + * different filesystem. + */ + if (ext4_journal_current_handle()) + goto out_fail; + + handle = ext4_journal_start(inode, ext4_writepage_trans_blocks(inode)); + + if (IS_ERR(handle)) { + ret = PTR_ERR(handle); + goto out_fail; + } + + if (!page_has_buffers(page)) { + create_empty_buffers(page, inode->i_sb->s_blocksize, + (1 << BH_Dirty)|(1 << BH_Uptodate)); + } + page_bufs = page_buffers(page); + walk_page_buffers(handle, page_bufs, 0, + PAGE_CACHE_SIZE, NULL, bget_one); + + ret = block_write_full_page(page, ext4_get_block, wbc); + + /* + * The page can become unlocked at any point now, and + * truncate can then come in and change things. So we + * can't touch *page from now on. But *page_bufs is + * safe due to elevated refcount. + */ + + /* + * And attach them to the current transaction. But only if + * block_write_full_page() succeeded. Otherwise they are unmapped, + * and generally junk. + */ + if (ret == 0) { + err = walk_page_buffers(handle, page_bufs, 0, PAGE_CACHE_SIZE, + NULL, jbd2_journal_dirty_data_fn); + if (!ret) + ret = err; + } + walk_page_buffers(handle, page_bufs, 0, + PAGE_CACHE_SIZE, NULL, bput_one); + err = ext4_journal_stop(handle); + if (!ret) + ret = err; + return ret; + +out_fail: + redirty_page_for_writepage(wbc, page); + unlock_page(page); + return ret; +} + +static int ext4_writeback_writepage(struct page *page, + struct writeback_control *wbc) +{ + struct inode *inode = page->mapping->host; + handle_t *handle = NULL; + int ret = 0; + int err; + + if (ext4_journal_current_handle()) + goto out_fail; + + handle = ext4_journal_start(inode, ext4_writepage_trans_blocks(inode)); + if (IS_ERR(handle)) { + ret = PTR_ERR(handle); + goto out_fail; + } + + if (test_opt(inode->i_sb, NOBH) && ext4_should_writeback_data(inode)) + ret = nobh_writepage(page, ext4_get_block, wbc); + else + ret = block_write_full_page(page, ext4_get_block, wbc); + + err = ext4_journal_stop(handle); + if (!ret) + ret = err; + return ret; + +out_fail: + redirty_page_for_writepage(wbc, page); + unlock_page(page); + return ret; +} + +static int ext4_journalled_writepage(struct page *page, + struct writeback_control *wbc) +{ + struct inode *inode = page->mapping->host; + handle_t *handle = NULL; + int ret = 0; + int err; + + if (ext4_journal_current_handle()) + goto no_write; + + handle = ext4_journal_start(inode, ext4_writepage_trans_blocks(inode)); + if (IS_ERR(handle)) { + ret = PTR_ERR(handle); + goto no_write; + } + + if (!page_has_buffers(page) || PageChecked(page)) { + /* + * It's mmapped pagecache. Add buffers and journal it. There + * doesn't seem much point in redirtying the page here. + */ + ClearPageChecked(page); + ret = block_prepare_write(page, 0, PAGE_CACHE_SIZE, + ext4_get_block); + if (ret != 0) { + ext4_journal_stop(handle); + goto out_unlock; + } + ret = walk_page_buffers(handle, page_buffers(page), 0, + PAGE_CACHE_SIZE, NULL, do_journal_get_write_access); + + err = walk_page_buffers(handle, page_buffers(page), 0, + PAGE_CACHE_SIZE, NULL, commit_write_fn); + if (ret == 0) + ret = err; + EXT4_I(inode)->i_state |= EXT4_STATE_JDATA; + unlock_page(page); + } else { + /* + * It may be a page full of checkpoint-mode buffers. We don't + * really know unless we go poke around in the buffer_heads. + * But block_write_full_page will do the right thing. + */ + ret = block_write_full_page(page, ext4_get_block, wbc); + } + err = ext4_journal_stop(handle); + if (!ret) + ret = err; +out: + return ret; + +no_write: + redirty_page_for_writepage(wbc, page); +out_unlock: + unlock_page(page); + goto out; +} + +static int ext4_readpage(struct file *file, struct page *page) +{ + return mpage_readpage(page, ext4_get_block); +} + +static int +ext4_readpages(struct file *file, struct address_space *mapping, + struct list_head *pages, unsigned nr_pages) +{ + return mpage_readpages(mapping, pages, nr_pages, ext4_get_block); +} + +static void ext4_invalidatepage(struct page *page, unsigned long offset) +{ + journal_t *journal = EXT4_JOURNAL(page->mapping->host); + + /* + * If it's a full truncate we just forget about the pending dirtying + */ + if (offset == 0) + ClearPageChecked(page); + + jbd2_journal_invalidatepage(journal, page, offset); +} + +static int ext4_releasepage(struct page *page, gfp_t wait) +{ + journal_t *journal = EXT4_JOURNAL(page->mapping->host); + + WARN_ON(PageChecked(page)); + if (!page_has_buffers(page)) + return 0; + return jbd2_journal_try_to_free_buffers(journal, page, wait); +} + +/* + * If the O_DIRECT write will extend the file then add this inode to the + * orphan list. So recovery will truncate it back to the original size + * if the machine crashes during the write. + * + * If the O_DIRECT write is intantiating holes inside i_size and the machine + * crashes then stale disk data _may_ be exposed inside the file. + */ +static ssize_t ext4_direct_IO(int rw, struct kiocb *iocb, + const struct iovec *iov, loff_t offset, + unsigned long nr_segs) +{ + struct file *file = iocb->ki_filp; + struct inode *inode = file->f_mapping->host; + struct ext4_inode_info *ei = EXT4_I(inode); + handle_t *handle = NULL; + ssize_t ret; + int orphan = 0; + size_t count = iov_length(iov, nr_segs); + + if (rw == WRITE) { + loff_t final_size = offset + count; + + handle = ext4_journal_start(inode, DIO_CREDITS); + if (IS_ERR(handle)) { + ret = PTR_ERR(handle); + goto out; + } + if (final_size > inode->i_size) { + ret = ext4_orphan_add(handle, inode); + if (ret) + goto out_stop; + orphan = 1; + ei->i_disksize = inode->i_size; + } + } + + ret = blockdev_direct_IO(rw, iocb, inode, inode->i_sb->s_bdev, iov, + offset, nr_segs, + ext4_get_block, NULL); + + /* + * Reacquire the handle: ext4_get_block() can restart the transaction + */ + handle = journal_current_handle(); + +out_stop: + if (handle) { + int err; + + if (orphan && inode->i_nlink) + ext4_orphan_del(handle, inode); + if (orphan && ret > 0) { + loff_t end = offset + ret; + if (end > inode->i_size) { + ei->i_disksize = end; + i_size_write(inode, end); + /* + * We're going to return a positive `ret' + * here due to non-zero-length I/O, so there's + * no way of reporting error returns from + * ext4_mark_inode_dirty() to userspace. So + * ignore it. + */ + ext4_mark_inode_dirty(handle, inode); + } + } + err = ext4_journal_stop(handle); + if (ret == 0) + ret = err; + } +out: + return ret; +} + +/* + * Pages can be marked dirty completely asynchronously from ext4's journalling + * activity. By filemap_sync_pte(), try_to_unmap_one(), etc. We cannot do + * much here because ->set_page_dirty is called under VFS locks. The page is + * not necessarily locked. + * + * We cannot just dirty the page and leave attached buffers clean, because the + * buffers' dirty state is "definitive". We cannot just set the buffers dirty + * or jbddirty because all the journalling code will explode. + * + * So what we do is to mark the page "pending dirty" and next time writepage + * is called, propagate that into the buffers appropriately. + */ +static int ext4_journalled_set_page_dirty(struct page *page) +{ + SetPageChecked(page); + return __set_page_dirty_nobuffers(page); +} + +static const struct address_space_operations ext4_ordered_aops = { + .readpage = ext4_readpage, + .readpages = ext4_readpages, + .writepage = ext4_ordered_writepage, + .sync_page = block_sync_page, + .prepare_write = ext4_prepare_write, + .commit_write = ext4_ordered_commit_write, + .bmap = ext4_bmap, + .invalidatepage = ext4_invalidatepage, + .releasepage = ext4_releasepage, + .direct_IO = ext4_direct_IO, + .migratepage = buffer_migrate_page, +}; + +static const struct address_space_operations ext4_writeback_aops = { + .readpage = ext4_readpage, + .readpages = ext4_readpages, + .writepage = ext4_writeback_writepage, + .sync_page = block_sync_page, + .prepare_write = ext4_prepare_write, + .commit_write = ext4_writeback_commit_write, + .bmap = ext4_bmap, + .invalidatepage = ext4_invalidatepage, + .releasepage = ext4_releasepage, + .direct_IO = ext4_direct_IO, + .migratepage = buffer_migrate_page, +}; + +static const struct address_space_operations ext4_journalled_aops = { + .readpage = ext4_readpage, + .readpages = ext4_readpages, + .writepage = ext4_journalled_writepage, + .sync_page = block_sync_page, + .prepare_write = ext4_prepare_write, + .commit_write = ext4_journalled_commit_write, + .set_page_dirty = ext4_journalled_set_page_dirty, + .bmap = ext4_bmap, + .invalidatepage = ext4_invalidatepage, + .releasepage = ext4_releasepage, +}; + +void ext4_set_aops(struct inode *inode) +{ + if (ext4_should_order_data(inode)) + inode->i_mapping->a_ops = &ext4_ordered_aops; + else if (ext4_should_writeback_data(inode)) + inode->i_mapping->a_ops = &ext4_writeback_aops; + else + inode->i_mapping->a_ops = &ext4_journalled_aops; +} + +/* + * ext4_block_truncate_page() zeroes out a mapping from file offset `from' + * up to the end of the block which corresponds to `from'. + * This required during truncate. We need to physically zero the tail end + * of that block so it doesn't yield old data if the file is later grown. + */ +int ext4_block_truncate_page(handle_t *handle, struct page *page, + struct address_space *mapping, loff_t from) +{ + ext4_fsblk_t index = from >> PAGE_CACHE_SHIFT; + unsigned offset = from & (PAGE_CACHE_SIZE-1); + unsigned blocksize, iblock, length, pos; + struct inode *inode = mapping->host; + struct buffer_head *bh; + int err = 0; + void *kaddr; + + blocksize = inode->i_sb->s_blocksize; + length = blocksize - (offset & (blocksize - 1)); + iblock = index << (PAGE_CACHE_SHIFT - inode->i_sb->s_blocksize_bits); + + /* + * For "nobh" option, we can only work if we don't need to + * read-in the page - otherwise we create buffers to do the IO. + */ + if (!page_has_buffers(page) && test_opt(inode->i_sb, NOBH) && + ext4_should_writeback_data(inode) && PageUptodate(page)) { + kaddr = kmap_atomic(page, KM_USER0); + memset(kaddr + offset, 0, length); + flush_dcache_page(page); + kunmap_atomic(kaddr, KM_USER0); + set_page_dirty(page); + goto unlock; + } + + if (!page_has_buffers(page)) + create_empty_buffers(page, blocksize, 0); + + /* Find the buffer that contains "offset" */ + bh = page_buffers(page); + pos = blocksize; + while (offset >= pos) { + bh = bh->b_this_page; + iblock++; + pos += blocksize; + } + + err = 0; + if (buffer_freed(bh)) { + BUFFER_TRACE(bh, "freed: skip"); + goto unlock; + } + + if (!buffer_mapped(bh)) { + BUFFER_TRACE(bh, "unmapped"); + ext4_get_block(inode, iblock, bh, 0); + /* unmapped? It's a hole - nothing to do */ + if (!buffer_mapped(bh)) { + BUFFER_TRACE(bh, "still unmapped"); + goto unlock; + } + } + + /* Ok, it's mapped. Make sure it's up-to-date */ + if (PageUptodate(page)) + set_buffer_uptodate(bh); + + if (!buffer_uptodate(bh)) { + err = -EIO; + ll_rw_block(READ, 1, &bh); + wait_on_buffer(bh); + /* Uhhuh. Read error. Complain and punt. */ + if (!buffer_uptodate(bh)) + goto unlock; + } + + if (ext4_should_journal_data(inode)) { + BUFFER_TRACE(bh, "get write access"); + err = ext4_journal_get_write_access(handle, bh); + if (err) + goto unlock; + } + + kaddr = kmap_atomic(page, KM_USER0); + memset(kaddr + offset, 0, length); + flush_dcache_page(page); + kunmap_atomic(kaddr, KM_USER0); + + BUFFER_TRACE(bh, "zeroed end of block"); + + err = 0; + if (ext4_should_journal_data(inode)) { + err = ext4_journal_dirty_metadata(handle, bh); + } else { + if (ext4_should_order_data(inode)) + err = ext4_journal_dirty_data(handle, bh); + mark_buffer_dirty(bh); + } + +unlock: + unlock_page(page); + page_cache_release(page); + return err; +} + +/* + * Probably it should be a library function... search for first non-zero word + * or memcmp with zero_page, whatever is better for particular architecture. + * Linus? + */ +static inline int all_zeroes(__le32 *p, __le32 *q) +{ + while (p < q) + if (*p++) + return 0; + return 1; +} + +/** + * ext4_find_shared - find the indirect blocks for partial truncation. + * @inode: inode in question + * @depth: depth of the affected branch + * @offsets: offsets of pointers in that branch (see ext4_block_to_path) + * @chain: place to store the pointers to partial indirect blocks + * @top: place to the (detached) top of branch + * + * This is a helper function used by ext4_truncate(). + * + * When we do truncate() we may have to clean the ends of several + * indirect blocks but leave the blocks themselves alive. Block is + * partially truncated if some data below the new i_size is refered + * from it (and it is on the path to the first completely truncated + * data block, indeed). We have to free the top of that path along + * with everything to the right of the path. Since no allocation + * past the truncation point is possible until ext4_truncate() + * finishes, we may safely do the latter, but top of branch may + * require special attention - pageout below the truncation point + * might try to populate it. + * + * We atomically detach the top of branch from the tree, store the + * block number of its root in *@top, pointers to buffer_heads of + * partially truncated blocks - in @chain[].bh and pointers to + * their last elements that should not be removed - in + * @chain[].p. Return value is the pointer to last filled element + * of @chain. + * + * The work left to caller to do the actual freeing of subtrees: + * a) free the subtree starting from *@top + * b) free the subtrees whose roots are stored in + * (@chain[i].p+1 .. end of @chain[i].bh->b_data) + * c) free the subtrees growing from the inode past the @chain[0]. + * (no partially truncated stuff there). */ + +static Indirect *ext4_find_shared(struct inode *inode, int depth, + int offsets[4], Indirect chain[4], __le32 *top) +{ + Indirect *partial, *p; + int k, err; + + *top = 0; + /* Make k index the deepest non-null offest + 1 */ + for (k = depth; k > 1 && !offsets[k-1]; k--) + ; + partial = ext4_get_branch(inode, k, offsets, chain, &err); + /* Writer: pointers */ + if (!partial) + partial = chain + k-1; + /* + * If the branch acquired continuation since we've looked at it - + * fine, it should all survive and (new) top doesn't belong to us. + */ + if (!partial->key && *partial->p) + /* Writer: end */ + goto no_top; + for (p=partial; p>chain && all_zeroes((__le32*)p->bh->b_data,p->p); p--) + ; + /* + * OK, we've found the last block that must survive. The rest of our + * branch should be detached before unlocking. However, if that rest + * of branch is all ours and does not grow immediately from the inode + * it's easier to cheat and just decrement partial->p. + */ + if (p == chain + k - 1 && p > chain) { + p->p--; + } else { + *top = *p->p; + /* Nope, don't do this in ext4. Must leave the tree intact */ +#if 0 + *p->p = 0; +#endif + } + /* Writer: end */ + + while(partial > p) { + brelse(partial->bh); + partial--; + } +no_top: + return partial; +} + +/* + * Zero a number of block pointers in either an inode or an indirect block. + * If we restart the transaction we must again get write access to the + * indirect block for further modification. + * + * We release `count' blocks on disk, but (last - first) may be greater + * than `count' because there can be holes in there. + */ +static void ext4_clear_blocks(handle_t *handle, struct inode *inode, + struct buffer_head *bh, ext4_fsblk_t block_to_free, + unsigned long count, __le32 *first, __le32 *last) +{ + __le32 *p; + if (try_to_extend_transaction(handle, inode)) { + if (bh) { + BUFFER_TRACE(bh, "call ext4_journal_dirty_metadata"); + ext4_journal_dirty_metadata(handle, bh); + } + ext4_mark_inode_dirty(handle, inode); + ext4_journal_test_restart(handle, inode); + if (bh) { + BUFFER_TRACE(bh, "retaking write access"); + ext4_journal_get_write_access(handle, bh); + } + } + + /* + * Any buffers which are on the journal will be in memory. We find + * them on the hash table so jbd2_journal_revoke() will run jbd2_journal_forget() + * on them. We've already detached each block from the file, so + * bforget() in jbd2_journal_forget() should be safe. + * + * AKPM: turn on bforget in jbd2_journal_forget()!!! + */ + for (p = first; p < last; p++) { + u32 nr = le32_to_cpu(*p); + if (nr) { + struct buffer_head *bh; + + *p = 0; + bh = sb_find_get_block(inode->i_sb, nr); + ext4_forget(handle, 0, inode, bh, nr); + } + } + + ext4_free_blocks(handle, inode, block_to_free, count); +} + +/** + * ext4_free_data - free a list of data blocks + * @handle: handle for this transaction + * @inode: inode we are dealing with + * @this_bh: indirect buffer_head which contains *@first and *@last + * @first: array of block numbers + * @last: points immediately past the end of array + * + * We are freeing all blocks refered from that array (numbers are stored as + * little-endian 32-bit) and updating @inode->i_blocks appropriately. + * + * We accumulate contiguous runs of blocks to free. Conveniently, if these + * blocks are contiguous then releasing them at one time will only affect one + * or two bitmap blocks (+ group descriptor(s) and superblock) and we won't + * actually use a lot of journal space. + * + * @this_bh will be %NULL if @first and @last point into the inode's direct + * block pointers. + */ +static void ext4_free_data(handle_t *handle, struct inode *inode, + struct buffer_head *this_bh, + __le32 *first, __le32 *last) +{ + ext4_fsblk_t block_to_free = 0; /* Starting block # of a run */ + unsigned long count = 0; /* Number of blocks in the run */ + __le32 *block_to_free_p = NULL; /* Pointer into inode/ind + corresponding to + block_to_free */ + ext4_fsblk_t nr; /* Current block # */ + __le32 *p; /* Pointer into inode/ind + for current block */ + int err; + + if (this_bh) { /* For indirect block */ + BUFFER_TRACE(this_bh, "get_write_access"); + err = ext4_journal_get_write_access(handle, this_bh); + /* Important: if we can't update the indirect pointers + * to the blocks, we can't free them. */ + if (err) + return; + } + + for (p = first; p < last; p++) { + nr = le32_to_cpu(*p); + if (nr) { + /* accumulate blocks to free if they're contiguous */ + if (count == 0) { + block_to_free = nr; + block_to_free_p = p; + count = 1; + } else if (nr == block_to_free + count) { + count++; + } else { + ext4_clear_blocks(handle, inode, this_bh, + block_to_free, + count, block_to_free_p, p); + block_to_free = nr; + block_to_free_p = p; + count = 1; + } + } + } + + if (count > 0) + ext4_clear_blocks(handle, inode, this_bh, block_to_free, + count, block_to_free_p, p); + + if (this_bh) { + BUFFER_TRACE(this_bh, "call ext4_journal_dirty_metadata"); + ext4_journal_dirty_metadata(handle, this_bh); + } +} + +/** + * ext4_free_branches - free an array of branches + * @handle: JBD handle for this transaction + * @inode: inode we are dealing with + * @parent_bh: the buffer_head which contains *@first and *@last + * @first: array of block numbers + * @last: pointer immediately past the end of array + * @depth: depth of the branches to free + * + * We are freeing all blocks refered from these branches (numbers are + * stored as little-endian 32-bit) and updating @inode->i_blocks + * appropriately. + */ +static void ext4_free_branches(handle_t *handle, struct inode *inode, + struct buffer_head *parent_bh, + __le32 *first, __le32 *last, int depth) +{ + ext4_fsblk_t nr; + __le32 *p; + + if (is_handle_aborted(handle)) + return; + + if (depth--) { + struct buffer_head *bh; + int addr_per_block = EXT4_ADDR_PER_BLOCK(inode->i_sb); + p = last; + while (--p >= first) { + nr = le32_to_cpu(*p); + if (!nr) + continue; /* A hole */ + + /* Go read the buffer for the next level down */ + bh = sb_bread(inode->i_sb, nr); + + /* + * A read failure? Report error and clear slot + * (should be rare). + */ + if (!bh) { + ext4_error(inode->i_sb, "ext4_free_branches", + "Read failure, inode=%lu, block=%llu", + inode->i_ino, nr); + continue; + } + + /* This zaps the entire block. Bottom up. */ + BUFFER_TRACE(bh, "free child branches"); + ext4_free_branches(handle, inode, bh, + (__le32*)bh->b_data, + (__le32*)bh->b_data + addr_per_block, + depth); + + /* + * We've probably journalled the indirect block several + * times during the truncate. But it's no longer + * needed and we now drop it from the transaction via + * jbd2_journal_revoke(). + * + * That's easy if it's exclusively part of this + * transaction. But if it's part of the committing + * transaction then jbd2_journal_forget() will simply + * brelse() it. That means that if the underlying + * block is reallocated in ext4_get_block(), + * unmap_underlying_metadata() will find this block + * and will try to get rid of it. damn, damn. + * + * If this block has already been committed to the + * journal, a revoke record will be written. And + * revoke records must be emitted *before* clearing + * this block's bit in the bitmaps. + */ + ext4_forget(handle, 1, inode, bh, bh->b_blocknr); + + /* + * Everything below this this pointer has been + * released. Now let this top-of-subtree go. + * + * We want the freeing of this indirect block to be + * atomic in the journal with the updating of the + * bitmap block which owns it. So make some room in + * the journal. + * + * We zero the parent pointer *after* freeing its + * pointee in the bitmaps, so if extend_transaction() + * for some reason fails to put the bitmap changes and + * the release into the same transaction, recovery + * will merely complain about releasing a free block, + * rather than leaking blocks. + */ + if (is_handle_aborted(handle)) + return; + if (try_to_extend_transaction(handle, inode)) { + ext4_mark_inode_dirty(handle, inode); + ext4_journal_test_restart(handle, inode); + } + + ext4_free_blocks(handle, inode, nr, 1); + + if (parent_bh) { + /* + * The block which we have just freed is + * pointed to by an indirect block: journal it + */ + BUFFER_TRACE(parent_bh, "get_write_access"); + if (!ext4_journal_get_write_access(handle, + parent_bh)){ + *p = 0; + BUFFER_TRACE(parent_bh, + "call ext4_journal_dirty_metadata"); + ext4_journal_dirty_metadata(handle, + parent_bh); + } + } + } + } else { + /* We have reached the bottom of the tree. */ + BUFFER_TRACE(parent_bh, "free data blocks"); + ext4_free_data(handle, inode, parent_bh, first, last); + } +} + +/* + * ext4_truncate() + * + * We block out ext4_get_block() block instantiations across the entire + * transaction, and VFS/VM ensures that ext4_truncate() cannot run + * simultaneously on behalf of the same inode. + * + * As we work through the truncate and commmit bits of it to the journal there + * is one core, guiding principle: the file's tree must always be consistent on + * disk. We must be able to restart the truncate after a crash. + * + * The file's tree may be transiently inconsistent in memory (although it + * probably isn't), but whenever we close off and commit a journal transaction, + * the contents of (the filesystem + the journal) must be consistent and + * restartable. It's pretty simple, really: bottom up, right to left (although + * left-to-right works OK too). + * + * Note that at recovery time, journal replay occurs *before* the restart of + * truncate against the orphan inode list. + * + * The committed inode has the new, desired i_size (which is the same as + * i_disksize in this case). After a crash, ext4_orphan_cleanup() will see + * that this inode's truncate did not complete and it will again call + * ext4_truncate() to have another go. So there will be instantiated blocks + * to the right of the truncation point in a crashed ext4 filesystem. But + * that's fine - as long as they are linked from the inode, the post-crash + * ext4_truncate() run will find them and release them. + */ +void ext4_truncate(struct inode *inode) +{ + handle_t *handle; + struct ext4_inode_info *ei = EXT4_I(inode); + __le32 *i_data = ei->i_data; + int addr_per_block = EXT4_ADDR_PER_BLOCK(inode->i_sb); + struct address_space *mapping = inode->i_mapping; + int offsets[4]; + Indirect chain[4]; + Indirect *partial; + __le32 nr = 0; + int n; + long last_block; + unsigned blocksize = inode->i_sb->s_blocksize; + struct page *page; + + if (!(S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) || + S_ISLNK(inode->i_mode))) + return; + if (ext4_inode_is_fast_symlink(inode)) + return; + if (IS_APPEND(inode) || IS_IMMUTABLE(inode)) + return; + + /* + * We have to lock the EOF page here, because lock_page() nests + * outside jbd2_journal_start(). + */ + if ((inode->i_size & (blocksize - 1)) == 0) { + /* Block boundary? Nothing to do */ + page = NULL; + } else { + page = grab_cache_page(mapping, + inode->i_size >> PAGE_CACHE_SHIFT); + if (!page) + return; + } + + if (EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL) + return ext4_ext_truncate(inode, page); + + handle = start_transaction(inode); + if (IS_ERR(handle)) { + if (page) { + clear_highpage(page); + flush_dcache_page(page); + unlock_page(page); + page_cache_release(page); + } + return; /* AKPM: return what? */ + } + + last_block = (inode->i_size + blocksize-1) + >> EXT4_BLOCK_SIZE_BITS(inode->i_sb); + + if (page) + ext4_block_truncate_page(handle, page, mapping, inode->i_size); + + n = ext4_block_to_path(inode, last_block, offsets, NULL); + if (n == 0) + goto out_stop; /* error */ + + /* + * OK. This truncate is going to happen. We add the inode to the + * orphan list, so that if this truncate spans multiple transactions, + * and we crash, we will resume the truncate when the filesystem + * recovers. It also marks the inode dirty, to catch the new size. + * + * Implication: the file must always be in a sane, consistent + * truncatable state while each transaction commits. + */ + if (ext4_orphan_add(handle, inode)) + goto out_stop; + + /* + * The orphan list entry will now protect us from any crash which + * occurs before the truncate completes, so it is now safe to propagate + * the new, shorter inode size (held for now in i_size) into the + * on-disk inode. We do this via i_disksize, which is the value which + * ext4 *really* writes onto the disk inode. + */ + ei->i_disksize = inode->i_size; + + /* + * From here we block out all ext4_get_block() callers who want to + * modify the block allocation tree. + */ + mutex_lock(&ei->truncate_mutex); + + if (n == 1) { /* direct blocks */ + ext4_free_data(handle, inode, NULL, i_data+offsets[0], + i_data + EXT4_NDIR_BLOCKS); + goto do_indirects; + } + + partial = ext4_find_shared(inode, n, offsets, chain, &nr); + /* Kill the top of shared branch (not detached) */ + if (nr) { + if (partial == chain) { + /* Shared branch grows from the inode */ + ext4_free_branches(handle, inode, NULL, + &nr, &nr+1, (chain+n-1) - partial); + *partial->p = 0; + /* + * We mark the inode dirty prior to restart, + * and prior to stop. No need for it here. + */ + } else { + /* Shared branch grows from an indirect block */ + BUFFER_TRACE(partial->bh, "get_write_access"); + ext4_free_branches(handle, inode, partial->bh, + partial->p, + partial->p+1, (chain+n-1) - partial); + } + } + /* Clear the ends of indirect blocks on the shared branch */ + while (partial > chain) { + ext4_free_branches(handle, inode, partial->bh, partial->p + 1, + (__le32*)partial->bh->b_data+addr_per_block, + (chain+n-1) - partial); + BUFFER_TRACE(partial->bh, "call brelse"); + brelse (partial->bh); + partial--; + } +do_indirects: + /* Kill the remaining (whole) subtrees */ + switch (offsets[0]) { + default: + nr = i_data[EXT4_IND_BLOCK]; + if (nr) { + ext4_free_branches(handle, inode, NULL, &nr, &nr+1, 1); + i_data[EXT4_IND_BLOCK] = 0; + } + case EXT4_IND_BLOCK: + nr = i_data[EXT4_DIND_BLOCK]; + if (nr) { + ext4_free_branches(handle, inode, NULL, &nr, &nr+1, 2); + i_data[EXT4_DIND_BLOCK] = 0; + } + case EXT4_DIND_BLOCK: + nr = i_data[EXT4_TIND_BLOCK]; + if (nr) { + ext4_free_branches(handle, inode, NULL, &nr, &nr+1, 3); + i_data[EXT4_TIND_BLOCK] = 0; + } + case EXT4_TIND_BLOCK: + ; + } + + ext4_discard_reservation(inode); + + mutex_unlock(&ei->truncate_mutex); + inode->i_mtime = inode->i_ctime = CURRENT_TIME_SEC; + ext4_mark_inode_dirty(handle, inode); + + /* + * In a multi-transaction truncate, we only make the final transaction + * synchronous + */ + if (IS_SYNC(inode)) + handle->h_sync = 1; +out_stop: + /* + * If this was a simple ftruncate(), and the file will remain alive + * then we need to clear up the orphan record which we created above. + * However, if this was a real unlink then we were called by + * ext4_delete_inode(), and we allow that function to clean up the + * orphan info for us. + */ + if (inode->i_nlink) + ext4_orphan_del(handle, inode); + + ext4_journal_stop(handle); +} + +static ext4_fsblk_t ext4_get_inode_block(struct super_block *sb, + unsigned long ino, struct ext4_iloc *iloc) +{ + unsigned long desc, group_desc, block_group; + unsigned long offset; + ext4_fsblk_t block; + struct buffer_head *bh; + struct ext4_group_desc * gdp; + + if (!ext4_valid_inum(sb, ino)) { + /* + * This error is already checked for in namei.c unless we are + * looking at an NFS filehandle, in which case no error + * report is needed + */ + return 0; + } + + block_group = (ino - 1) / EXT4_INODES_PER_GROUP(sb); + if (block_group >= EXT4_SB(sb)->s_groups_count) { + ext4_error(sb,"ext4_get_inode_block","group >= groups count"); + return 0; + } + smp_rmb(); + group_desc = block_group >> EXT4_DESC_PER_BLOCK_BITS(sb); + desc = block_group & (EXT4_DESC_PER_BLOCK(sb) - 1); + bh = EXT4_SB(sb)->s_group_desc[group_desc]; + if (!bh) { + ext4_error (sb, "ext4_get_inode_block", + "Descriptor not loaded"); + return 0; + } + + gdp = (struct ext4_group_desc *)((__u8 *)bh->b_data + + desc * EXT4_DESC_SIZE(sb)); + /* + * Figure out the offset within the block group inode table + */ + offset = ((ino - 1) % EXT4_INODES_PER_GROUP(sb)) * + EXT4_INODE_SIZE(sb); + block = ext4_inode_table(sb, gdp) + + (offset >> EXT4_BLOCK_SIZE_BITS(sb)); + + iloc->block_group = block_group; + iloc->offset = offset & (EXT4_BLOCK_SIZE(sb) - 1); + return block; +} + +/* + * ext4_get_inode_loc returns with an extra refcount against the inode's + * underlying buffer_head on success. If 'in_mem' is true, we have all + * data in memory that is needed to recreate the on-disk version of this + * inode. + */ +static int __ext4_get_inode_loc(struct inode *inode, + struct ext4_iloc *iloc, int in_mem) +{ + ext4_fsblk_t block; + struct buffer_head *bh; + + block = ext4_get_inode_block(inode->i_sb, inode->i_ino, iloc); + if (!block) + return -EIO; + + bh = sb_getblk(inode->i_sb, block); + if (!bh) { + ext4_error (inode->i_sb, "ext4_get_inode_loc", + "unable to read inode block - " + "inode=%lu, block=%llu", + inode->i_ino, block); + return -EIO; + } + if (!buffer_uptodate(bh)) { + lock_buffer(bh); + if (buffer_uptodate(bh)) { + /* someone brought it uptodate while we waited */ + unlock_buffer(bh); + goto has_buffer; + } + + /* + * If we have all information of the inode in memory and this + * is the only valid inode in the block, we need not read the + * block. + */ + if (in_mem) { + struct buffer_head *bitmap_bh; + struct ext4_group_desc *desc; + int inodes_per_buffer; + int inode_offset, i; + int block_group; + int start; + + block_group = (inode->i_ino - 1) / + EXT4_INODES_PER_GROUP(inode->i_sb); + inodes_per_buffer = bh->b_size / + EXT4_INODE_SIZE(inode->i_sb); + inode_offset = ((inode->i_ino - 1) % + EXT4_INODES_PER_GROUP(inode->i_sb)); + start = inode_offset & ~(inodes_per_buffer - 1); + + /* Is the inode bitmap in cache? */ + desc = ext4_get_group_desc(inode->i_sb, + block_group, NULL); + if (!desc) + goto make_io; + + bitmap_bh = sb_getblk(inode->i_sb, + ext4_inode_bitmap(inode->i_sb, desc)); + if (!bitmap_bh) + goto make_io; + + /* + * If the inode bitmap isn't in cache then the + * optimisation may end up performing two reads instead + * of one, so skip it. + */ + if (!buffer_uptodate(bitmap_bh)) { + brelse(bitmap_bh); + goto make_io; + } + for (i = start; i < start + inodes_per_buffer; i++) { + if (i == inode_offset) + continue; + if (ext4_test_bit(i, bitmap_bh->b_data)) + break; + } + brelse(bitmap_bh); + if (i == start + inodes_per_buffer) { + /* all other inodes are free, so skip I/O */ + memset(bh->b_data, 0, bh->b_size); + set_buffer_uptodate(bh); + unlock_buffer(bh); + goto has_buffer; + } + } + +make_io: + /* + * There are other valid inodes in the buffer, this inode + * has in-inode xattrs, or we don't have this inode in memory. + * Read the block from disk. + */ + get_bh(bh); + bh->b_end_io = end_buffer_read_sync; + submit_bh(READ_META, bh); + wait_on_buffer(bh); + if (!buffer_uptodate(bh)) { + ext4_error(inode->i_sb, "ext4_get_inode_loc", + "unable to read inode block - " + "inode=%lu, block=%llu", + inode->i_ino, block); + brelse(bh); + return -EIO; + } + } +has_buffer: + iloc->bh = bh; + return 0; +} + +int ext4_get_inode_loc(struct inode *inode, struct ext4_iloc *iloc) +{ + /* We have all inode data except xattrs in memory here. */ + return __ext4_get_inode_loc(inode, iloc, + !(EXT4_I(inode)->i_state & EXT4_STATE_XATTR)); +} + +void ext4_set_inode_flags(struct inode *inode) +{ + unsigned int flags = EXT4_I(inode)->i_flags; + + inode->i_flags &= ~(S_SYNC|S_APPEND|S_IMMUTABLE|S_NOATIME|S_DIRSYNC); + if (flags & EXT4_SYNC_FL) + inode->i_flags |= S_SYNC; + if (flags & EXT4_APPEND_FL) + inode->i_flags |= S_APPEND; + if (flags & EXT4_IMMUTABLE_FL) + inode->i_flags |= S_IMMUTABLE; + if (flags & EXT4_NOATIME_FL) + inode->i_flags |= S_NOATIME; + if (flags & EXT4_DIRSYNC_FL) + inode->i_flags |= S_DIRSYNC; +} + +void ext4_read_inode(struct inode * inode) +{ + struct ext4_iloc iloc; + struct ext4_inode *raw_inode; + struct ext4_inode_info *ei = EXT4_I(inode); + struct buffer_head *bh; + int block; + +#ifdef CONFIG_EXT4DEV_FS_POSIX_ACL + ei->i_acl = EXT4_ACL_NOT_CACHED; + ei->i_default_acl = EXT4_ACL_NOT_CACHED; +#endif + ei->i_block_alloc_info = NULL; + + if (__ext4_get_inode_loc(inode, &iloc, 0)) + goto bad_inode; + bh = iloc.bh; + raw_inode = ext4_raw_inode(&iloc); + inode->i_mode = le16_to_cpu(raw_inode->i_mode); + inode->i_uid = (uid_t)le16_to_cpu(raw_inode->i_uid_low); + inode->i_gid = (gid_t)le16_to_cpu(raw_inode->i_gid_low); + if(!(test_opt (inode->i_sb, NO_UID32))) { + inode->i_uid |= le16_to_cpu(raw_inode->i_uid_high) << 16; + inode->i_gid |= le16_to_cpu(raw_inode->i_gid_high) << 16; + } + inode->i_nlink = le16_to_cpu(raw_inode->i_links_count); + inode->i_size = le32_to_cpu(raw_inode->i_size); + inode->i_atime.tv_sec = le32_to_cpu(raw_inode->i_atime); + inode->i_ctime.tv_sec = le32_to_cpu(raw_inode->i_ctime); + inode->i_mtime.tv_sec = le32_to_cpu(raw_inode->i_mtime); + inode->i_atime.tv_nsec = inode->i_ctime.tv_nsec = inode->i_mtime.tv_nsec = 0; + + ei->i_state = 0; + ei->i_dir_start_lookup = 0; + ei->i_dtime = le32_to_cpu(raw_inode->i_dtime); + /* We now have enough fields to check if the inode was active or not. + * This is needed because nfsd might try to access dead inodes + * the test is that same one that e2fsck uses + * NeilBrown 1999oct15 + */ + if (inode->i_nlink == 0) { + if (inode->i_mode == 0 || + !(EXT4_SB(inode->i_sb)->s_mount_state & EXT4_ORPHAN_FS)) { + /* this inode is deleted */ + brelse (bh); + goto bad_inode; + } + /* The only unlinked inodes we let through here have + * valid i_mode and are being read by the orphan + * recovery code: that's fine, we're about to complete + * the process of deleting those. */ + } + inode->i_blocks = le32_to_cpu(raw_inode->i_blocks); + ei->i_flags = le32_to_cpu(raw_inode->i_flags); +#ifdef EXT4_FRAGMENTS + ei->i_faddr = le32_to_cpu(raw_inode->i_faddr); + ei->i_frag_no = raw_inode->i_frag; + ei->i_frag_size = raw_inode->i_fsize; +#endif + ei->i_file_acl = le32_to_cpu(raw_inode->i_file_acl); + if (EXT4_SB(inode->i_sb)->s_es->s_creator_os != + cpu_to_le32(EXT4_OS_HURD)) + ei->i_file_acl |= + ((__u64)le16_to_cpu(raw_inode->i_file_acl_high)) << 32; + if (!S_ISREG(inode->i_mode)) { + ei->i_dir_acl = le32_to_cpu(raw_inode->i_dir_acl); + } else { + inode->i_size |= + ((__u64)le32_to_cpu(raw_inode->i_size_high)) << 32; + } + ei->i_disksize = inode->i_size; + inode->i_generation = le32_to_cpu(raw_inode->i_generation); + ei->i_block_group = iloc.block_group; + /* + * NOTE! The in-memory inode i_data array is in little-endian order + * even on big-endian machines: we do NOT byteswap the block numbers! + */ + for (block = 0; block < EXT4_N_BLOCKS; block++) + ei->i_data[block] = raw_inode->i_block[block]; + INIT_LIST_HEAD(&ei->i_orphan); + + if (inode->i_ino >= EXT4_FIRST_INO(inode->i_sb) + 1 && + EXT4_INODE_SIZE(inode->i_sb) > EXT4_GOOD_OLD_INODE_SIZE) { + /* + * When mke2fs creates big inodes it does not zero out + * the unused bytes above EXT4_GOOD_OLD_INODE_SIZE, + * so ignore those first few inodes. + */ + ei->i_extra_isize = le16_to_cpu(raw_inode->i_extra_isize); + if (EXT4_GOOD_OLD_INODE_SIZE + ei->i_extra_isize > + EXT4_INODE_SIZE(inode->i_sb)) + goto bad_inode; + if (ei->i_extra_isize == 0) { + /* The extra space is currently unused. Use it. */ + ei->i_extra_isize = sizeof(struct ext4_inode) - + EXT4_GOOD_OLD_INODE_SIZE; + } else { + __le32 *magic = (void *)raw_inode + + EXT4_GOOD_OLD_INODE_SIZE + + ei->i_extra_isize; + if (*magic == cpu_to_le32(EXT4_XATTR_MAGIC)) + ei->i_state |= EXT4_STATE_XATTR; + } + } else + ei->i_extra_isize = 0; + + if (S_ISREG(inode->i_mode)) { + inode->i_op = &ext4_file_inode_operations; + inode->i_fop = &ext4_file_operations; + ext4_set_aops(inode); + } else if (S_ISDIR(inode->i_mode)) { + inode->i_op = &ext4_dir_inode_operations; + inode->i_fop = &ext4_dir_operations; + } else if (S_ISLNK(inode->i_mode)) { + if (ext4_inode_is_fast_symlink(inode)) + inode->i_op = &ext4_fast_symlink_inode_operations; + else { + inode->i_op = &ext4_symlink_inode_operations; + ext4_set_aops(inode); + } + } else { + inode->i_op = &ext4_special_inode_operations; + if (raw_inode->i_block[0]) + init_special_inode(inode, inode->i_mode, + old_decode_dev(le32_to_cpu(raw_inode->i_block[0]))); + else + init_special_inode(inode, inode->i_mode, + new_decode_dev(le32_to_cpu(raw_inode->i_block[1]))); + } + brelse (iloc.bh); + ext4_set_inode_flags(inode); + return; + +bad_inode: + make_bad_inode(inode); + return; +} + +/* + * Post the struct inode info into an on-disk inode location in the + * buffer-cache. This gobbles the caller's reference to the + * buffer_head in the inode location struct. + * + * The caller must have write access to iloc->bh. + */ +static int ext4_do_update_inode(handle_t *handle, + struct inode *inode, + struct ext4_iloc *iloc) +{ + struct ext4_inode *raw_inode = ext4_raw_inode(iloc); + struct ext4_inode_info *ei = EXT4_I(inode); + struct buffer_head *bh = iloc->bh; + int err = 0, rc, block; + + /* For fields not not tracking in the in-memory inode, + * initialise them to zero for new inodes. */ + if (ei->i_state & EXT4_STATE_NEW) + memset(raw_inode, 0, EXT4_SB(inode->i_sb)->s_inode_size); + + raw_inode->i_mode = cpu_to_le16(inode->i_mode); + if(!(test_opt(inode->i_sb, NO_UID32))) { + raw_inode->i_uid_low = cpu_to_le16(low_16_bits(inode->i_uid)); + raw_inode->i_gid_low = cpu_to_le16(low_16_bits(inode->i_gid)); +/* + * Fix up interoperability with old kernels. Otherwise, old inodes get + * re-used with the upper 16 bits of the uid/gid intact + */ + if(!ei->i_dtime) { + raw_inode->i_uid_high = + cpu_to_le16(high_16_bits(inode->i_uid)); + raw_inode->i_gid_high = + cpu_to_le16(high_16_bits(inode->i_gid)); + } else { + raw_inode->i_uid_high = 0; + raw_inode->i_gid_high = 0; + } + } else { + raw_inode->i_uid_low = + cpu_to_le16(fs_high2lowuid(inode->i_uid)); + raw_inode->i_gid_low = + cpu_to_le16(fs_high2lowgid(inode->i_gid)); + raw_inode->i_uid_high = 0; + raw_inode->i_gid_high = 0; + } + raw_inode->i_links_count = cpu_to_le16(inode->i_nlink); + raw_inode->i_size = cpu_to_le32(ei->i_disksize); + raw_inode->i_atime = cpu_to_le32(inode->i_atime.tv_sec); + raw_inode->i_ctime = cpu_to_le32(inode->i_ctime.tv_sec); + raw_inode->i_mtime = cpu_to_le32(inode->i_mtime.tv_sec); + raw_inode->i_blocks = cpu_to_le32(inode->i_blocks); + raw_inode->i_dtime = cpu_to_le32(ei->i_dtime); + raw_inode->i_flags = cpu_to_le32(ei->i_flags); +#ifdef EXT4_FRAGMENTS + raw_inode->i_faddr = cpu_to_le32(ei->i_faddr); + raw_inode->i_frag = ei->i_frag_no; + raw_inode->i_fsize = ei->i_frag_size; +#endif + if (EXT4_SB(inode->i_sb)->s_es->s_creator_os != + cpu_to_le32(EXT4_OS_HURD)) + raw_inode->i_file_acl_high = + cpu_to_le16(ei->i_file_acl >> 32); + raw_inode->i_file_acl = cpu_to_le32(ei->i_file_acl); + if (!S_ISREG(inode->i_mode)) { + raw_inode->i_dir_acl = cpu_to_le32(ei->i_dir_acl); + } else { + raw_inode->i_size_high = + cpu_to_le32(ei->i_disksize >> 32); + if (ei->i_disksize > 0x7fffffffULL) { + struct super_block *sb = inode->i_sb; + if (!EXT4_HAS_RO_COMPAT_FEATURE(sb, + EXT4_FEATURE_RO_COMPAT_LARGE_FILE) || + EXT4_SB(sb)->s_es->s_rev_level == + cpu_to_le32(EXT4_GOOD_OLD_REV)) { + /* If this is the first large file + * created, add a flag to the superblock. + */ + err = ext4_journal_get_write_access(handle, + EXT4_SB(sb)->s_sbh); + if (err) + goto out_brelse; + ext4_update_dynamic_rev(sb); + EXT4_SET_RO_COMPAT_FEATURE(sb, + EXT4_FEATURE_RO_COMPAT_LARGE_FILE); + sb->s_dirt = 1; + handle->h_sync = 1; + err = ext4_journal_dirty_metadata(handle, + EXT4_SB(sb)->s_sbh); + } + } + } + raw_inode->i_generation = cpu_to_le32(inode->i_generation); + if (S_ISCHR(inode->i_mode) || S_ISBLK(inode->i_mode)) { + if (old_valid_dev(inode->i_rdev)) { + raw_inode->i_block[0] = + cpu_to_le32(old_encode_dev(inode->i_rdev)); + raw_inode->i_block[1] = 0; + } else { + raw_inode->i_block[0] = 0; + raw_inode->i_block[1] = + cpu_to_le32(new_encode_dev(inode->i_rdev)); + raw_inode->i_block[2] = 0; + } + } else for (block = 0; block < EXT4_N_BLOCKS; block++) + raw_inode->i_block[block] = ei->i_data[block]; + + if (ei->i_extra_isize) + raw_inode->i_extra_isize = cpu_to_le16(ei->i_extra_isize); + + BUFFER_TRACE(bh, "call ext4_journal_dirty_metadata"); + rc = ext4_journal_dirty_metadata(handle, bh); + if (!err) + err = rc; + ei->i_state &= ~EXT4_STATE_NEW; + +out_brelse: + brelse (bh); + ext4_std_error(inode->i_sb, err); + return err; +} + +/* + * ext4_write_inode() + * + * We are called from a few places: + * + * - Within generic_file_write() for O_SYNC files. + * Here, there will be no transaction running. We wait for any running + * trasnaction to commit. + * + * - Within sys_sync(), kupdate and such. + * We wait on commit, if tol to. + * + * - Within prune_icache() (PF_MEMALLOC == true) + * Here we simply return. We can't afford to block kswapd on the + * journal commit. + * + * In all cases it is actually safe for us to return without doing anything, + * because the inode has been copied into a raw inode buffer in + * ext4_mark_inode_dirty(). This is a correctness thing for O_SYNC and for + * knfsd. + * + * Note that we are absolutely dependent upon all inode dirtiers doing the + * right thing: they *must* call mark_inode_dirty() after dirtying info in + * which we are interested. + * + * It would be a bug for them to not do this. The code: + * + * mark_inode_dirty(inode) + * stuff(); + * inode->i_size = expr; + * + * is in error because a kswapd-driven write_inode() could occur while + * `stuff()' is running, and the new i_size will be lost. Plus the inode + * will no longer be on the superblock's dirty inode list. + */ +int ext4_write_inode(struct inode *inode, int wait) +{ + if (current->flags & PF_MEMALLOC) + return 0; + + if (ext4_journal_current_handle()) { + jbd_debug(0, "called recursively, non-PF_MEMALLOC!\n"); + dump_stack(); + return -EIO; + } + + if (!wait) + return 0; + + return ext4_force_commit(inode->i_sb); +} + +/* + * ext4_setattr() + * + * Called from notify_change. + * + * We want to trap VFS attempts to truncate the file as soon as + * possible. In particular, we want to make sure that when the VFS + * shrinks i_size, we put the inode on the orphan list and modify + * i_disksize immediately, so that during the subsequent flushing of + * dirty pages and freeing of disk blocks, we can guarantee that any + * commit will leave the blocks being flushed in an unused state on + * disk. (On recovery, the inode will get truncated and the blocks will + * be freed, so we have a strong guarantee that no future commit will + * leave these blocks visible to the user.) + * + * Called with inode->sem down. + */ +int ext4_setattr(struct dentry *dentry, struct iattr *attr) +{ + struct inode *inode = dentry->d_inode; + int error, rc = 0; + const unsigned int ia_valid = attr->ia_valid; + + error = inode_change_ok(inode, attr); + if (error) + return error; + + if ((ia_valid & ATTR_UID && attr->ia_uid != inode->i_uid) || + (ia_valid & ATTR_GID && attr->ia_gid != inode->i_gid)) { + handle_t *handle; + + /* (user+group)*(old+new) structure, inode write (sb, + * inode block, ? - but truncate inode update has it) */ + handle = ext4_journal_start(inode, 2*(EXT4_QUOTA_INIT_BLOCKS(inode->i_sb)+ + EXT4_QUOTA_DEL_BLOCKS(inode->i_sb))+3); + if (IS_ERR(handle)) { + error = PTR_ERR(handle); + goto err_out; + } + error = DQUOT_TRANSFER(inode, attr) ? -EDQUOT : 0; + if (error) { + ext4_journal_stop(handle); + return error; + } + /* Update corresponding info in inode so that everything is in + * one transaction */ + if (attr->ia_valid & ATTR_UID) + inode->i_uid = attr->ia_uid; + if (attr->ia_valid & ATTR_GID) + inode->i_gid = attr->ia_gid; + error = ext4_mark_inode_dirty(handle, inode); + ext4_journal_stop(handle); + } + + if (S_ISREG(inode->i_mode) && + attr->ia_valid & ATTR_SIZE && attr->ia_size < inode->i_size) { + handle_t *handle; + + handle = ext4_journal_start(inode, 3); + if (IS_ERR(handle)) { + error = PTR_ERR(handle); + goto err_out; + } + + error = ext4_orphan_add(handle, inode); + EXT4_I(inode)->i_disksize = attr->ia_size; + rc = ext4_mark_inode_dirty(handle, inode); + if (!error) + error = rc; + ext4_journal_stop(handle); + } + + rc = inode_setattr(inode, attr); + + /* If inode_setattr's call to ext4_truncate failed to get a + * transaction handle at all, we need to clean up the in-core + * orphan list manually. */ + if (inode->i_nlink) + ext4_orphan_del(NULL, inode); + + if (!rc && (ia_valid & ATTR_MODE)) + rc = ext4_acl_chmod(inode); + +err_out: + ext4_std_error(inode->i_sb, error); + if (!error) + error = rc; + return error; +} + + +/* + * How many blocks doth make a writepage()? + * + * With N blocks per page, it may be: + * N data blocks + * 2 indirect block + * 2 dindirect + * 1 tindirect + * N+5 bitmap blocks (from the above) + * N+5 group descriptor summary blocks + * 1 inode block + * 1 superblock. + * 2 * EXT4_SINGLEDATA_TRANS_BLOCKS for the quote files + * + * 3 * (N + 5) + 2 + 2 * EXT4_SINGLEDATA_TRANS_BLOCKS + * + * With ordered or writeback data it's the same, less the N data blocks. + * + * If the inode's direct blocks can hold an integral number of pages then a + * page cannot straddle two indirect blocks, and we can only touch one indirect + * and dindirect block, and the "5" above becomes "3". + * + * This still overestimates under most circumstances. If we were to pass the + * start and end offsets in here as well we could do block_to_path() on each + * block and work out the exact number of indirects which are touched. Pah. + */ + +int ext4_writepage_trans_blocks(struct inode *inode) +{ + int bpp = ext4_journal_blocks_per_page(inode); + int indirects = (EXT4_NDIR_BLOCKS % bpp) ? 5 : 3; + int ret; + + if (EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL) + return ext4_ext_writepage_trans_blocks(inode, bpp); + + if (ext4_should_journal_data(inode)) + ret = 3 * (bpp + indirects) + 2; + else + ret = 2 * (bpp + indirects) + 2; + +#ifdef CONFIG_QUOTA + /* We know that structure was already allocated during DQUOT_INIT so + * we will be updating only the data blocks + inodes */ + ret += 2*EXT4_QUOTA_TRANS_BLOCKS(inode->i_sb); +#endif + + return ret; +} + +/* + * The caller must have previously called ext4_reserve_inode_write(). + * Give this, we know that the caller already has write access to iloc->bh. + */ +int ext4_mark_iloc_dirty(handle_t *handle, + struct inode *inode, struct ext4_iloc *iloc) +{ + int err = 0; + + /* the do_update_inode consumes one bh->b_count */ + get_bh(iloc->bh); + + /* ext4_do_update_inode() does jbd2_journal_dirty_metadata */ + err = ext4_do_update_inode(handle, inode, iloc); + put_bh(iloc->bh); + return err; +} + +/* + * On success, We end up with an outstanding reference count against + * iloc->bh. This _must_ be cleaned up later. + */ + +int +ext4_reserve_inode_write(handle_t *handle, struct inode *inode, + struct ext4_iloc *iloc) +{ + int err = 0; + if (handle) { + err = ext4_get_inode_loc(inode, iloc); + if (!err) { + BUFFER_TRACE(iloc->bh, "get_write_access"); + err = ext4_journal_get_write_access(handle, iloc->bh); + if (err) { + brelse(iloc->bh); + iloc->bh = NULL; + } + } + } + ext4_std_error(inode->i_sb, err); + return err; +} + +/* + * What we do here is to mark the in-core inode as clean with respect to inode + * dirtiness (it may still be data-dirty). + * This means that the in-core inode may be reaped by prune_icache + * without having to perform any I/O. This is a very good thing, + * because *any* task may call prune_icache - even ones which + * have a transaction open against a different journal. + * + * Is this cheating? Not really. Sure, we haven't written the + * inode out, but prune_icache isn't a user-visible syncing function. + * Whenever the user wants stuff synced (sys_sync, sys_msync, sys_fsync) + * we start and wait on commits. + * + * Is this efficient/effective? Well, we're being nice to the system + * by cleaning up our inodes proactively so they can be reaped + * without I/O. But we are potentially leaving up to five seconds' + * worth of inodes floating about which prune_icache wants us to + * write out. One way to fix that would be to get prune_icache() + * to do a write_super() to free up some memory. It has the desired + * effect. + */ +int ext4_mark_inode_dirty(handle_t *handle, struct inode *inode) +{ + struct ext4_iloc iloc; + int err; + + might_sleep(); + err = ext4_reserve_inode_write(handle, inode, &iloc); + if (!err) + err = ext4_mark_iloc_dirty(handle, inode, &iloc); + return err; +} + +/* + * ext4_dirty_inode() is called from __mark_inode_dirty() + * + * We're really interested in the case where a file is being extended. + * i_size has been changed by generic_commit_write() and we thus need + * to include the updated inode in the current transaction. + * + * Also, DQUOT_ALLOC_SPACE() will always dirty the inode when blocks + * are allocated to the file. + * + * If the inode is marked synchronous, we don't honour that here - doing + * so would cause a commit on atime updates, which we don't bother doing. + * We handle synchronous inodes at the highest possible level. + */ +void ext4_dirty_inode(struct inode *inode) +{ + handle_t *current_handle = ext4_journal_current_handle(); + handle_t *handle; + + handle = ext4_journal_start(inode, 2); + if (IS_ERR(handle)) + goto out; + if (current_handle && + current_handle->h_transaction != handle->h_transaction) { + /* This task has a transaction open against a different fs */ + printk(KERN_EMERG "%s: transactions do not match!\n", + __FUNCTION__); + } else { + jbd_debug(5, "marking dirty. outer handle=%p\n", + current_handle); + ext4_mark_inode_dirty(handle, inode); + } + ext4_journal_stop(handle); +out: + return; +} + +#if 0 +/* + * Bind an inode's backing buffer_head into this transaction, to prevent + * it from being flushed to disk early. Unlike + * ext4_reserve_inode_write, this leaves behind no bh reference and + * returns no iloc structure, so the caller needs to repeat the iloc + * lookup to mark the inode dirty later. + */ +static int ext4_pin_inode(handle_t *handle, struct inode *inode) +{ + struct ext4_iloc iloc; + + int err = 0; + if (handle) { + err = ext4_get_inode_loc(inode, &iloc); + if (!err) { + BUFFER_TRACE(iloc.bh, "get_write_access"); + err = jbd2_journal_get_write_access(handle, iloc.bh); + if (!err) + err = ext4_journal_dirty_metadata(handle, + iloc.bh); + brelse(iloc.bh); + } + } + ext4_std_error(inode->i_sb, err); + return err; +} +#endif + +int ext4_change_inode_journal_flag(struct inode *inode, int val) +{ + journal_t *journal; + handle_t *handle; + int err; + + /* + * We have to be very careful here: changing a data block's + * journaling status dynamically is dangerous. If we write a + * data block to the journal, change the status and then delete + * that block, we risk forgetting to revoke the old log record + * from the journal and so a subsequent replay can corrupt data. + * So, first we make sure that the journal is empty and that + * nobody is changing anything. + */ + + journal = EXT4_JOURNAL(inode); + if (is_journal_aborted(journal) || IS_RDONLY(inode)) + return -EROFS; + + jbd2_journal_lock_updates(journal); + jbd2_journal_flush(journal); + + /* + * OK, there are no updates running now, and all cached data is + * synced to disk. We are now in a completely consistent state + * which doesn't have anything in the journal, and we know that + * no filesystem updates are running, so it is safe to modify + * the inode's in-core data-journaling state flag now. + */ + + if (val) + EXT4_I(inode)->i_flags |= EXT4_JOURNAL_DATA_FL; + else + EXT4_I(inode)->i_flags &= ~EXT4_JOURNAL_DATA_FL; + ext4_set_aops(inode); + + jbd2_journal_unlock_updates(journal); + + /* Finally we can mark the inode as dirty. */ + + handle = ext4_journal_start(inode, 1); + if (IS_ERR(handle)) + return PTR_ERR(handle); + + err = ext4_mark_inode_dirty(handle, inode); + handle->h_sync = 1; + ext4_journal_stop(handle); + ext4_std_error(inode->i_sb, err); + + return err; +} diff --git a/fs/ext4/ioctl.c b/fs/ext4/ioctl.c new file mode 100644 index 000000000000..22a737c306c7 --- /dev/null +++ b/fs/ext4/ioctl.c @@ -0,0 +1,306 @@ +/* + * linux/fs/ext4/ioctl.c + * + * Copyright (C) 1993, 1994, 1995 + * Remy Card (card@masi.ibp.fr) + * Laboratoire MASI - Institut Blaise Pascal + * Universite Pierre et Marie Curie (Paris VI) + */ + +#include <linux/fs.h> +#include <linux/jbd2.h> +#include <linux/capability.h> +#include <linux/ext4_fs.h> +#include <linux/ext4_jbd2.h> +#include <linux/time.h> +#include <linux/compat.h> +#include <linux/smp_lock.h> +#include <asm/uaccess.h> + +int ext4_ioctl (struct inode * inode, struct file * filp, unsigned int cmd, + unsigned long arg) +{ + struct ext4_inode_info *ei = EXT4_I(inode); + unsigned int flags; + unsigned short rsv_window_size; + + ext4_debug ("cmd = %u, arg = %lu\n", cmd, arg); + + switch (cmd) { + case EXT4_IOC_GETFLAGS: + flags = ei->i_flags & EXT4_FL_USER_VISIBLE; + return put_user(flags, (int __user *) arg); + case EXT4_IOC_SETFLAGS: { + handle_t *handle = NULL; + int err; + struct ext4_iloc iloc; + unsigned int oldflags; + unsigned int jflag; + + if (IS_RDONLY(inode)) + return -EROFS; + + if ((current->fsuid != inode->i_uid) && !capable(CAP_FOWNER)) + return -EACCES; + + if (get_user(flags, (int __user *) arg)) + return -EFAULT; + + if (!S_ISDIR(inode->i_mode)) + flags &= ~EXT4_DIRSYNC_FL; + + mutex_lock(&inode->i_mutex); + oldflags = ei->i_flags; + + /* The JOURNAL_DATA flag is modifiable only by root */ + jflag = flags & EXT4_JOURNAL_DATA_FL; + + /* + * The IMMUTABLE and APPEND_ONLY flags can only be changed by + * the relevant capability. + * + * This test looks nicer. Thanks to Pauline Middelink + */ + if ((flags ^ oldflags) & (EXT4_APPEND_FL | EXT4_IMMUTABLE_FL)) { + if (!capable(CAP_LINUX_IMMUTABLE)) { + mutex_unlock(&inode->i_mutex); + return -EPERM; + } + } + + /* + * The JOURNAL_DATA flag can only be changed by + * the relevant capability. + */ + if ((jflag ^ oldflags) & (EXT4_JOURNAL_DATA_FL)) { + if (!capable(CAP_SYS_RESOURCE)) { + mutex_unlock(&inode->i_mutex); + return -EPERM; + } + } + + + handle = ext4_journal_start(inode, 1); + if (IS_ERR(handle)) { + mutex_unlock(&inode->i_mutex); + return PTR_ERR(handle); + } + if (IS_SYNC(inode)) + handle->h_sync = 1; + err = ext4_reserve_inode_write(handle, inode, &iloc); + if (err) + goto flags_err; + + flags = flags & EXT4_FL_USER_MODIFIABLE; + flags |= oldflags & ~EXT4_FL_USER_MODIFIABLE; + ei->i_flags = flags; + + ext4_set_inode_flags(inode); + inode->i_ctime = CURRENT_TIME_SEC; + + err = ext4_mark_iloc_dirty(handle, inode, &iloc); +flags_err: + ext4_journal_stop(handle); + if (err) { + mutex_unlock(&inode->i_mutex); + return err; + } + + if ((jflag ^ oldflags) & (EXT4_JOURNAL_DATA_FL)) + err = ext4_change_inode_journal_flag(inode, jflag); + mutex_unlock(&inode->i_mutex); + return err; + } + case EXT4_IOC_GETVERSION: + case EXT4_IOC_GETVERSION_OLD: + return put_user(inode->i_generation, (int __user *) arg); + case EXT4_IOC_SETVERSION: + case EXT4_IOC_SETVERSION_OLD: { + handle_t *handle; + struct ext4_iloc iloc; + __u32 generation; + int err; + + if ((current->fsuid != inode->i_uid) && !capable(CAP_FOWNER)) + return -EPERM; + if (IS_RDONLY(inode)) + return -EROFS; + if (get_user(generation, (int __user *) arg)) + return -EFAULT; + + handle = ext4_journal_start(inode, 1); + if (IS_ERR(handle)) + return PTR_ERR(handle); + err = ext4_reserve_inode_write(handle, inode, &iloc); + if (err == 0) { + inode->i_ctime = CURRENT_TIME_SEC; + inode->i_generation = generation; + err = ext4_mark_iloc_dirty(handle, inode, &iloc); + } + ext4_journal_stop(handle); + return err; + } +#ifdef CONFIG_JBD_DEBUG + case EXT4_IOC_WAIT_FOR_READONLY: + /* + * This is racy - by the time we're woken up and running, + * the superblock could be released. And the module could + * have been unloaded. So sue me. + * + * Returns 1 if it slept, else zero. + */ + { + struct super_block *sb = inode->i_sb; + DECLARE_WAITQUEUE(wait, current); + int ret = 0; + + set_current_state(TASK_INTERRUPTIBLE); + add_wait_queue(&EXT4_SB(sb)->ro_wait_queue, &wait); + if (timer_pending(&EXT4_SB(sb)->turn_ro_timer)) { + schedule(); + ret = 1; + } + remove_wait_queue(&EXT4_SB(sb)->ro_wait_queue, &wait); + return ret; + } +#endif + case EXT4_IOC_GETRSVSZ: + if (test_opt(inode->i_sb, RESERVATION) + && S_ISREG(inode->i_mode) + && ei->i_block_alloc_info) { + rsv_window_size = ei->i_block_alloc_info->rsv_window_node.rsv_goal_size; + return put_user(rsv_window_size, (int __user *)arg); + } + return -ENOTTY; + case EXT4_IOC_SETRSVSZ: { + + if (!test_opt(inode->i_sb, RESERVATION) ||!S_ISREG(inode->i_mode)) + return -ENOTTY; + + if (IS_RDONLY(inode)) + return -EROFS; + + if ((current->fsuid != inode->i_uid) && !capable(CAP_FOWNER)) + return -EACCES; + + if (get_user(rsv_window_size, (int __user *)arg)) + return -EFAULT; + + if (rsv_window_size > EXT4_MAX_RESERVE_BLOCKS) + rsv_window_size = EXT4_MAX_RESERVE_BLOCKS; + + /* + * need to allocate reservation structure for this inode + * before set the window size + */ + mutex_lock(&ei->truncate_mutex); + if (!ei->i_block_alloc_info) + ext4_init_block_alloc_info(inode); + + if (ei->i_block_alloc_info){ + struct ext4_reserve_window_node *rsv = &ei->i_block_alloc_info->rsv_window_node; + rsv->rsv_goal_size = rsv_window_size; + } + mutex_unlock(&ei->truncate_mutex); + return 0; + } + case EXT4_IOC_GROUP_EXTEND: { + ext4_fsblk_t n_blocks_count; + struct super_block *sb = inode->i_sb; + int err; + + if (!capable(CAP_SYS_RESOURCE)) + return -EPERM; + + if (IS_RDONLY(inode)) + return -EROFS; + + if (get_user(n_blocks_count, (__u32 __user *)arg)) + return -EFAULT; + + err = ext4_group_extend(sb, EXT4_SB(sb)->s_es, n_blocks_count); + jbd2_journal_lock_updates(EXT4_SB(sb)->s_journal); + jbd2_journal_flush(EXT4_SB(sb)->s_journal); + jbd2_journal_unlock_updates(EXT4_SB(sb)->s_journal); + + return err; + } + case EXT4_IOC_GROUP_ADD: { + struct ext4_new_group_data input; + struct super_block *sb = inode->i_sb; + int err; + + if (!capable(CAP_SYS_RESOURCE)) + return -EPERM; + + if (IS_RDONLY(inode)) + return -EROFS; + + if (copy_from_user(&input, (struct ext4_new_group_input __user *)arg, + sizeof(input))) + return -EFAULT; + + err = ext4_group_add(sb, &input); + jbd2_journal_lock_updates(EXT4_SB(sb)->s_journal); + jbd2_journal_flush(EXT4_SB(sb)->s_journal); + jbd2_journal_unlock_updates(EXT4_SB(sb)->s_journal); + + return err; + } + + default: + return -ENOTTY; + } +} + +#ifdef CONFIG_COMPAT +long ext4_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg) +{ + struct inode *inode = file->f_dentry->d_inode; + int ret; + + /* These are just misnamed, they actually get/put from/to user an int */ + switch (cmd) { + case EXT4_IOC32_GETFLAGS: + cmd = EXT4_IOC_GETFLAGS; + break; + case EXT4_IOC32_SETFLAGS: + cmd = EXT4_IOC_SETFLAGS; + break; + case EXT4_IOC32_GETVERSION: + cmd = EXT4_IOC_GETVERSION; + break; + case EXT4_IOC32_SETVERSION: + cmd = EXT4_IOC_SETVERSION; + break; + case EXT4_IOC32_GROUP_EXTEND: + cmd = EXT4_IOC_GROUP_EXTEND; + break; + case EXT4_IOC32_GETVERSION_OLD: + cmd = EXT4_IOC_GETVERSION_OLD; + break; + case EXT4_IOC32_SETVERSION_OLD: + cmd = EXT4_IOC_SETVERSION_OLD; + break; +#ifdef CONFIG_JBD_DEBUG + case EXT4_IOC32_WAIT_FOR_READONLY: + cmd = EXT4_IOC_WAIT_FOR_READONLY; + break; +#endif + case EXT4_IOC32_GETRSVSZ: + cmd = EXT4_IOC_GETRSVSZ; + break; + case EXT4_IOC32_SETRSVSZ: + cmd = EXT4_IOC_SETRSVSZ; + break; + case EXT4_IOC_GROUP_ADD: + break; + default: + return -ENOIOCTLCMD; + } + lock_kernel(); + ret = ext4_ioctl(inode, file, cmd, (unsigned long) compat_ptr(arg)); + unlock_kernel(); + return ret; +} +#endif diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c new file mode 100644 index 000000000000..8b1bd03d20f5 --- /dev/null +++ b/fs/ext4/namei.c @@ -0,0 +1,2395 @@ +/* + * linux/fs/ext4/namei.c + * + * Copyright (C) 1992, 1993, 1994, 1995 + * Remy Card (card@masi.ibp.fr) + * Laboratoire MASI - Institut Blaise Pascal + * Universite Pierre et Marie Curie (Paris VI) + * + * from + * + * linux/fs/minix/namei.c + * + * Copyright (C) 1991, 1992 Linus Torvalds + * + * Big-endian to little-endian byte-swapping/bitmaps by + * David S. Miller (davem@caip.rutgers.edu), 1995 + * Directory entry file type support and forward compatibility hooks + * for B-tree directories by Theodore Ts'o (tytso@mit.edu), 1998 + * Hash Tree Directory indexing (c) + * Daniel Phillips, 2001 + * Hash Tree Directory indexing porting + * Christopher Li, 2002 + * Hash Tree Directory indexing cleanup + * Theodore Ts'o, 2002 + */ + +#include <linux/fs.h> +#include <linux/pagemap.h> +#include <linux/jbd2.h> +#include <linux/time.h> +#include <linux/ext4_fs.h> +#include <linux/ext4_jbd2.h> +#include <linux/fcntl.h> +#include <linux/stat.h> +#include <linux/string.h> +#include <linux/quotaops.h> +#include <linux/buffer_head.h> +#include <linux/bio.h> +#include <linux/smp_lock.h> + +#include "namei.h" +#include "xattr.h" +#include "acl.h" + +/* + * define how far ahead to read directories while searching them. + */ +#define NAMEI_RA_CHUNKS 2 +#define NAMEI_RA_BLOCKS 4 +#define NAMEI_RA_SIZE (NAMEI_RA_CHUNKS * NAMEI_RA_BLOCKS) +#define NAMEI_RA_INDEX(c,b) (((c) * NAMEI_RA_BLOCKS) + (b)) + +static struct buffer_head *ext4_append(handle_t *handle, + struct inode *inode, + u32 *block, int *err) +{ + struct buffer_head *bh; + + *block = inode->i_size >> inode->i_sb->s_blocksize_bits; + + if ((bh = ext4_bread(handle, inode, *block, 1, err))) { + inode->i_size += inode->i_sb->s_blocksize; + EXT4_I(inode)->i_disksize = inode->i_size; + ext4_journal_get_write_access(handle,bh); + } + return bh; +} + +#ifndef assert +#define assert(test) J_ASSERT(test) +#endif + +#ifndef swap +#define swap(x, y) do { typeof(x) z = x; x = y; y = z; } while (0) +#endif + +#ifdef DX_DEBUG +#define dxtrace(command) command +#else +#define dxtrace(command) +#endif + +struct fake_dirent +{ + __le32 inode; + __le16 rec_len; + u8 name_len; + u8 file_type; +}; + +struct dx_countlimit +{ + __le16 limit; + __le16 count; +}; + +struct dx_entry +{ + __le32 hash; + __le32 block; +}; + +/* + * dx_root_info is laid out so that if it should somehow get overlaid by a + * dirent the two low bits of the hash version will be zero. Therefore, the + * hash version mod 4 should never be 0. Sincerely, the paranoia department. + */ + +struct dx_root +{ + struct fake_dirent dot; + char dot_name[4]; + struct fake_dirent dotdot; + char dotdot_name[4]; + struct dx_root_info + { + __le32 reserved_zero; + u8 hash_version; + u8 info_length; /* 8 */ + u8 indirect_levels; + u8 unused_flags; + } + info; + struct dx_entry entries[0]; +}; + +struct dx_node +{ + struct fake_dirent fake; + struct dx_entry entries[0]; +}; + + +struct dx_frame +{ + struct buffer_head *bh; + struct dx_entry *entries; + struct dx_entry *at; +}; + +struct dx_map_entry +{ + u32 hash; + u32 offs; +}; + +#ifdef CONFIG_EXT4_INDEX +static inline unsigned dx_get_block (struct dx_entry *entry); +static void dx_set_block (struct dx_entry *entry, unsigned value); +static inline unsigned dx_get_hash (struct dx_entry *entry); +static void dx_set_hash (struct dx_entry *entry, unsigned value); +static unsigned dx_get_count (struct dx_entry *entries); +static unsigned dx_get_limit (struct dx_entry *entries); +static void dx_set_count (struct dx_entry *entries, unsigned value); +static void dx_set_limit (struct dx_entry *entries, unsigned value); +static unsigned dx_root_limit (struct inode *dir, unsigned infosize); +static unsigned dx_node_limit (struct inode *dir); +static struct dx_frame *dx_probe(struct dentry *dentry, + struct inode *dir, + struct dx_hash_info *hinfo, + struct dx_frame *frame, + int *err); +static void dx_release (struct dx_frame *frames); +static int dx_make_map (struct ext4_dir_entry_2 *de, int size, + struct dx_hash_info *hinfo, struct dx_map_entry map[]); +static void dx_sort_map(struct dx_map_entry *map, unsigned count); +static struct ext4_dir_entry_2 *dx_move_dirents (char *from, char *to, + struct dx_map_entry *offsets, int count); +static struct ext4_dir_entry_2* dx_pack_dirents (char *base, int size); +static void dx_insert_block (struct dx_frame *frame, u32 hash, u32 block); +static int ext4_htree_next_block(struct inode *dir, __u32 hash, + struct dx_frame *frame, + struct dx_frame *frames, + __u32 *start_hash); +static struct buffer_head * ext4_dx_find_entry(struct dentry *dentry, + struct ext4_dir_entry_2 **res_dir, int *err); +static int ext4_dx_add_entry(handle_t *handle, struct dentry *dentry, + struct inode *inode); + +/* + * Future: use high four bits of block for coalesce-on-delete flags + * Mask them off for now. + */ + +static inline unsigned dx_get_block (struct dx_entry *entry) +{ + return le32_to_cpu(entry->block) & 0x00ffffff; +} + +static inline void dx_set_block (struct dx_entry *entry, unsigned value) +{ + entry->block = cpu_to_le32(value); +} + +static inline unsigned dx_get_hash (struct dx_entry *entry) +{ + return le32_to_cpu(entry->hash); +} + +static inline void dx_set_hash (struct dx_entry *entry, unsigned value) +{ + entry->hash = cpu_to_le32(value); +} + +static inline unsigned dx_get_count (struct dx_entry *entries) +{ + return le16_to_cpu(((struct dx_countlimit *) entries)->count); +} + +static inline unsigned dx_get_limit (struct dx_entry *entries) +{ + return le16_to_cpu(((struct dx_countlimit *) entries)->limit); +} + +static inline void dx_set_count (struct dx_entry *entries, unsigned value) +{ + ((struct dx_countlimit *) entries)->count = cpu_to_le16(value); +} + +static inline void dx_set_limit (struct dx_entry *entries, unsigned value) +{ + ((struct dx_countlimit *) entries)->limit = cpu_to_le16(value); +} + +static inline unsigned dx_root_limit (struct inode *dir, unsigned infosize) +{ + unsigned entry_space = dir->i_sb->s_blocksize - EXT4_DIR_REC_LEN(1) - + EXT4_DIR_REC_LEN(2) - infosize; + return 0? 20: entry_space / sizeof(struct dx_entry); +} + +static inline unsigned dx_node_limit (struct inode *dir) +{ + unsigned entry_space = dir->i_sb->s_blocksize - EXT4_DIR_REC_LEN(0); + return 0? 22: entry_space / sizeof(struct dx_entry); +} + +/* + * Debug + */ +#ifdef DX_DEBUG +static void dx_show_index (char * label, struct dx_entry *entries) +{ + int i, n = dx_get_count (entries); + printk("%s index ", label); + for (i = 0; i < n; i++) { + printk("%x->%u ", i? dx_get_hash(entries + i) : + 0, dx_get_block(entries + i)); + } + printk("\n"); +} + +struct stats +{ + unsigned names; + unsigned space; + unsigned bcount; +}; + +static struct stats dx_show_leaf(struct dx_hash_info *hinfo, struct ext4_dir_entry_2 *de, + int size, int show_names) +{ + unsigned names = 0, space = 0; + char *base = (char *) de; + struct dx_hash_info h = *hinfo; + + printk("names: "); + while ((char *) de < base + size) + { + if (de->inode) + { + if (show_names) + { + int len = de->name_len; + char *name = de->name; + while (len--) printk("%c", *name++); + ext4fs_dirhash(de->name, de->name_len, &h); + printk(":%x.%u ", h.hash, + ((char *) de - base)); + } + space += EXT4_DIR_REC_LEN(de->name_len); + names++; + } + de = (struct ext4_dir_entry_2 *) ((char *) de + le16_to_cpu(de->rec_len)); + } + printk("(%i)\n", names); + return (struct stats) { names, space, 1 }; +} + +struct stats dx_show_entries(struct dx_hash_info *hinfo, struct inode *dir, + struct dx_entry *entries, int levels) +{ + unsigned blocksize = dir->i_sb->s_blocksize; + unsigned count = dx_get_count (entries), names = 0, space = 0, i; + unsigned bcount = 0; + struct buffer_head *bh; + int err; + printk("%i indexed blocks...\n", count); + for (i = 0; i < count; i++, entries++) + { + u32 block = dx_get_block(entries), hash = i? dx_get_hash(entries): 0; + u32 range = i < count - 1? (dx_get_hash(entries + 1) - hash): ~hash; + struct stats stats; + printk("%s%3u:%03u hash %8x/%8x ",levels?"":" ", i, block, hash, range); + if (!(bh = ext4_bread (NULL,dir, block, 0,&err))) continue; + stats = levels? + dx_show_entries(hinfo, dir, ((struct dx_node *) bh->b_data)->entries, levels - 1): + dx_show_leaf(hinfo, (struct ext4_dir_entry_2 *) bh->b_data, blocksize, 0); + names += stats.names; + space += stats.space; + bcount += stats.bcount; + brelse (bh); + } + if (bcount) + printk("%snames %u, fullness %u (%u%%)\n", levels?"":" ", + names, space/bcount,(space/bcount)*100/blocksize); + return (struct stats) { names, space, bcount}; +} +#endif /* DX_DEBUG */ + +/* + * Probe for a directory leaf block to search. + * + * dx_probe can return ERR_BAD_DX_DIR, which means there was a format + * error in the directory index, and the caller should fall back to + * searching the directory normally. The callers of dx_probe **MUST** + * check for this error code, and make sure it never gets reflected + * back to userspace. + */ +static struct dx_frame * +dx_probe(struct dentry *dentry, struct inode *dir, + struct dx_hash_info *hinfo, struct dx_frame *frame_in, int *err) +{ + unsigned count, indirect; + struct dx_entry *at, *entries, *p, *q, *m; + struct dx_root *root; + struct buffer_head *bh; + struct dx_frame *frame = frame_in; + u32 hash; + + frame->bh = NULL; + if (dentry) + dir = dentry->d_parent->d_inode; + if (!(bh = ext4_bread (NULL,dir, 0, 0, err))) + goto fail; + root = (struct dx_root *) bh->b_data; + if (root->info.hash_version != DX_HASH_TEA && + root->info.hash_version != DX_HASH_HALF_MD4 && + root->info.hash_version != DX_HASH_LEGACY) { + ext4_warning(dir->i_sb, __FUNCTION__, + "Unrecognised inode hash code %d", + root->info.hash_version); + brelse(bh); + *err = ERR_BAD_DX_DIR; + goto fail; + } + hinfo->hash_version = root->info.hash_version; + hinfo->seed = EXT4_SB(dir->i_sb)->s_hash_seed; + if (dentry) + ext4fs_dirhash(dentry->d_name.name, dentry->d_name.len, hinfo); + hash = hinfo->hash; + + if (root->info.unused_flags & 1) { + ext4_warning(dir->i_sb, __FUNCTION__, + "Unimplemented inode hash flags: %#06x", + root->info.unused_flags); + brelse(bh); + *err = ERR_BAD_DX_DIR; + goto fail; + } + + if ((indirect = root->info.indirect_levels) > 1) { + ext4_warning(dir->i_sb, __FUNCTION__, + "Unimplemented inode hash depth: %#06x", + root->info.indirect_levels); + brelse(bh); + *err = ERR_BAD_DX_DIR; + goto fail; + } + + entries = (struct dx_entry *) (((char *)&root->info) + + root->info.info_length); + assert(dx_get_limit(entries) == dx_root_limit(dir, + root->info.info_length)); + dxtrace (printk("Look up %x", hash)); + while (1) + { + count = dx_get_count(entries); + assert (count && count <= dx_get_limit(entries)); + p = entries + 1; + q = entries + count - 1; + while (p <= q) + { + m = p + (q - p)/2; + dxtrace(printk(".")); + if (dx_get_hash(m) > hash) + q = m - 1; + else + p = m + 1; + } + + if (0) // linear search cross check + { + unsigned n = count - 1; + at = entries; + while (n--) + { + dxtrace(printk(",")); + if (dx_get_hash(++at) > hash) + { + at--; + break; + } + } + assert (at == p - 1); + } + + at = p - 1; + dxtrace(printk(" %x->%u\n", at == entries? 0: dx_get_hash(at), dx_get_block(at))); + frame->bh = bh; + frame->entries = entries; + frame->at = at; + if (!indirect--) return frame; + if (!(bh = ext4_bread (NULL,dir, dx_get_block(at), 0, err))) + goto fail2; + at = entries = ((struct dx_node *) bh->b_data)->entries; + assert (dx_get_limit(entries) == dx_node_limit (dir)); + frame++; + } +fail2: + while (frame >= frame_in) { + brelse(frame->bh); + frame--; + } +fail: + return NULL; +} + +static void dx_release (struct dx_frame *frames) +{ + if (frames[0].bh == NULL) + return; + + if (((struct dx_root *) frames[0].bh->b_data)->info.indirect_levels) + brelse(frames[1].bh); + brelse(frames[0].bh); +} + +/* + * This function increments the frame pointer to search the next leaf + * block, and reads in the necessary intervening nodes if the search + * should be necessary. Whether or not the search is necessary is + * controlled by the hash parameter. If the hash value is even, then + * the search is only continued if the next block starts with that + * hash value. This is used if we are searching for a specific file. + * + * If the hash value is HASH_NB_ALWAYS, then always go to the next block. + * + * This function returns 1 if the caller should continue to search, + * or 0 if it should not. If there is an error reading one of the + * index blocks, it will a negative error code. + * + * If start_hash is non-null, it will be filled in with the starting + * hash of the next page. + */ +static int ext4_htree_next_block(struct inode *dir, __u32 hash, + struct dx_frame *frame, + struct dx_frame *frames, + __u32 *start_hash) +{ + struct dx_frame *p; + struct buffer_head *bh; + int err, num_frames = 0; + __u32 bhash; + + p = frame; + /* + * Find the next leaf page by incrementing the frame pointer. + * If we run out of entries in the interior node, loop around and + * increment pointer in the parent node. When we break out of + * this loop, num_frames indicates the number of interior + * nodes need to be read. + */ + while (1) { + if (++(p->at) < p->entries + dx_get_count(p->entries)) + break; + if (p == frames) + return 0; + num_frames++; + p--; + } + + /* + * If the hash is 1, then continue only if the next page has a + * continuation hash of any value. This is used for readdir + * handling. Otherwise, check to see if the hash matches the + * desired contiuation hash. If it doesn't, return since + * there's no point to read in the successive index pages. + */ + bhash = dx_get_hash(p->at); + if (start_hash) + *start_hash = bhash; + if ((hash & 1) == 0) { + if ((bhash & ~1) != hash) + return 0; + } + /* + * If the hash is HASH_NB_ALWAYS, we always go to the next + * block so no check is necessary + */ + while (num_frames--) { + if (!(bh = ext4_bread(NULL, dir, dx_get_block(p->at), + 0, &err))) + return err; /* Failure */ + p++; + brelse (p->bh); + p->bh = bh; + p->at = p->entries = ((struct dx_node *) bh->b_data)->entries; + } + return 1; +} + + +/* + * p is at least 6 bytes before the end of page + */ +static inline struct ext4_dir_entry_2 *ext4_next_entry(struct ext4_dir_entry_2 *p) +{ + return (struct ext4_dir_entry_2 *)((char*)p + le16_to_cpu(p->rec_len)); +} + +/* + * This function fills a red-black tree with information from a + * directory block. It returns the number directory entries loaded + * into the tree. If there is an error it is returned in err. + */ +static int htree_dirblock_to_tree(struct file *dir_file, + struct inode *dir, int block, + struct dx_hash_info *hinfo, + __u32 start_hash, __u32 start_minor_hash) +{ + struct buffer_head *bh; + struct ext4_dir_entry_2 *de, *top; + int err, count = 0; + + dxtrace(printk("In htree dirblock_to_tree: block %d\n", block)); + if (!(bh = ext4_bread (NULL, dir, block, 0, &err))) + return err; + + de = (struct ext4_dir_entry_2 *) bh->b_data; + top = (struct ext4_dir_entry_2 *) ((char *) de + + dir->i_sb->s_blocksize - + EXT4_DIR_REC_LEN(0)); + for (; de < top; de = ext4_next_entry(de)) { + ext4fs_dirhash(de->name, de->name_len, hinfo); + if ((hinfo->hash < start_hash) || + ((hinfo->hash == start_hash) && + (hinfo->minor_hash < start_minor_hash))) + continue; + if (de->inode == 0) + continue; + if ((err = ext4_htree_store_dirent(dir_file, + hinfo->hash, hinfo->minor_hash, de)) != 0) { + brelse(bh); + return err; + } + count++; + } + brelse(bh); + return count; +} + + +/* + * This function fills a red-black tree with information from a + * directory. We start scanning the directory in hash order, starting + * at start_hash and start_minor_hash. + * + * This function returns the number of entries inserted into the tree, + * or a negative error code. + */ +int ext4_htree_fill_tree(struct file *dir_file, __u32 start_hash, + __u32 start_minor_hash, __u32 *next_hash) +{ + struct dx_hash_info hinfo; + struct ext4_dir_entry_2 *de; + struct dx_frame frames[2], *frame; + struct inode *dir; + int block, err; + int count = 0; + int ret; + __u32 hashval; + + dxtrace(printk("In htree_fill_tree, start hash: %x:%x\n", start_hash, + start_minor_hash)); + dir = dir_file->f_dentry->d_inode; + if (!(EXT4_I(dir)->i_flags & EXT4_INDEX_FL)) { + hinfo.hash_version = EXT4_SB(dir->i_sb)->s_def_hash_version; + hinfo.seed = EXT4_SB(dir->i_sb)->s_hash_seed; + count = htree_dirblock_to_tree(dir_file, dir, 0, &hinfo, + start_hash, start_minor_hash); + *next_hash = ~0; + return count; + } + hinfo.hash = start_hash; + hinfo.minor_hash = 0; + frame = dx_probe(NULL, dir_file->f_dentry->d_inode, &hinfo, frames, &err); + if (!frame) + return err; + + /* Add '.' and '..' from the htree header */ + if (!start_hash && !start_minor_hash) { + de = (struct ext4_dir_entry_2 *) frames[0].bh->b_data; + if ((err = ext4_htree_store_dirent(dir_file, 0, 0, de)) != 0) + goto errout; + count++; + } + if (start_hash < 2 || (start_hash ==2 && start_minor_hash==0)) { + de = (struct ext4_dir_entry_2 *) frames[0].bh->b_data; + de = ext4_next_entry(de); + if ((err = ext4_htree_store_dirent(dir_file, 2, 0, de)) != 0) + goto errout; + count++; + } + + while (1) { + block = dx_get_block(frame->at); + ret = htree_dirblock_to_tree(dir_file, dir, block, &hinfo, + start_hash, start_minor_hash); + if (ret < 0) { + err = ret; + goto errout; + } + count += ret; + hashval = ~0; + ret = ext4_htree_next_block(dir, HASH_NB_ALWAYS, + frame, frames, &hashval); + *next_hash = hashval; + if (ret < 0) { + err = ret; + goto errout; + } + /* + * Stop if: (a) there are no more entries, or + * (b) we have inserted at least one entry and the + * next hash value is not a continuation + */ + if ((ret == 0) || + (count && ((hashval & 1) == 0))) + break; + } + dx_release(frames); + dxtrace(printk("Fill tree: returned %d entries, next hash: %x\n", + count, *next_hash)); + return count; +errout: + dx_release(frames); + return (err); +} + + +/* + * Directory block splitting, compacting + */ + +static int dx_make_map (struct ext4_dir_entry_2 *de, int size, + struct dx_hash_info *hinfo, struct dx_map_entry *map_tail) +{ + int count = 0; + char *base = (char *) de; + struct dx_hash_info h = *hinfo; + + while ((char *) de < base + size) + { + if (de->name_len && de->inode) { + ext4fs_dirhash(de->name, de->name_len, &h); + map_tail--; + map_tail->hash = h.hash; + map_tail->offs = (u32) ((char *) de - base); + count++; + cond_resched(); + } + /* XXX: do we need to check rec_len == 0 case? -Chris */ + de = (struct ext4_dir_entry_2 *) ((char *) de + le16_to_cpu(de->rec_len)); + } + return count; +} + +static void dx_sort_map (struct dx_map_entry *map, unsigned count) +{ + struct dx_map_entry *p, *q, *top = map + count - 1; + int more; + /* Combsort until bubble sort doesn't suck */ + while (count > 2) { + count = count*10/13; + if (count - 9 < 2) /* 9, 10 -> 11 */ + count = 11; + for (p = top, q = p - count; q >= map; p--, q--) + if (p->hash < q->hash) + swap(*p, *q); + } + /* Garden variety bubble sort */ + do { + more = 0; + q = top; + while (q-- > map) { + if (q[1].hash >= q[0].hash) + continue; + swap(*(q+1), *q); + more = 1; + } + } while(more); +} + +static void dx_insert_block(struct dx_frame *frame, u32 hash, u32 block) +{ + struct dx_entry *entries = frame->entries; + struct dx_entry *old = frame->at, *new = old + 1; + int count = dx_get_count(entries); + + assert(count < dx_get_limit(entries)); + assert(old < entries + count); + memmove(new + 1, new, (char *)(entries + count) - (char *)(new)); + dx_set_hash(new, hash); + dx_set_block(new, block); + dx_set_count(entries, count + 1); +} +#endif + + +static void ext4_update_dx_flag(struct inode *inode) +{ + if (!EXT4_HAS_COMPAT_FEATURE(inode->i_sb, + EXT4_FEATURE_COMPAT_DIR_INDEX)) + EXT4_I(inode)->i_flags &= ~EXT4_INDEX_FL; +} + +/* + * NOTE! unlike strncmp, ext4_match returns 1 for success, 0 for failure. + * + * `len <= EXT4_NAME_LEN' is guaranteed by caller. + * `de != NULL' is guaranteed by caller. + */ +static inline int ext4_match (int len, const char * const name, + struct ext4_dir_entry_2 * de) +{ + if (len != de->name_len) + return 0; + if (!de->inode) + return 0; + return !memcmp(name, de->name, len); +} + +/* + * Returns 0 if not found, -1 on failure, and 1 on success + */ +static inline int search_dirblock(struct buffer_head * bh, + struct inode *dir, + struct dentry *dentry, + unsigned long offset, + struct ext4_dir_entry_2 ** res_dir) +{ + struct ext4_dir_entry_2 * de; + char * dlimit; + int de_len; + const char *name = dentry->d_name.name; + int namelen = dentry->d_name.len; + + de = (struct ext4_dir_entry_2 *) bh->b_data; + dlimit = bh->b_data + dir->i_sb->s_blocksize; + while ((char *) de < dlimit) { + /* this code is executed quadratically often */ + /* do minimal checking `by hand' */ + + if ((char *) de + namelen <= dlimit && + ext4_match (namelen, name, de)) { + /* found a match - just to be sure, do a full check */ + if (!ext4_check_dir_entry("ext4_find_entry", + dir, de, bh, offset)) + return -1; + *res_dir = de; + return 1; + } + /* prevent looping on a bad block */ + de_len = le16_to_cpu(de->rec_len); + if (de_len <= 0) + return -1; + offset += de_len; + de = (struct ext4_dir_entry_2 *) ((char *) de + de_len); + } + return 0; +} + + +/* + * ext4_find_entry() + * + * finds an entry in the specified directory with the wanted name. It + * returns the cache buffer in which the entry was found, and the entry + * itself (as a parameter - res_dir). It does NOT read the inode of the + * entry - you'll have to do that yourself if you want to. + * + * The returned buffer_head has ->b_count elevated. The caller is expected + * to brelse() it when appropriate. + */ +static struct buffer_head * ext4_find_entry (struct dentry *dentry, + struct ext4_dir_entry_2 ** res_dir) +{ + struct super_block * sb; + struct buffer_head * bh_use[NAMEI_RA_SIZE]; + struct buffer_head * bh, *ret = NULL; + unsigned long start, block, b; + int ra_max = 0; /* Number of bh's in the readahead + buffer, bh_use[] */ + int ra_ptr = 0; /* Current index into readahead + buffer */ + int num = 0; + int nblocks, i, err; + struct inode *dir = dentry->d_parent->d_inode; + int namelen; + const u8 *name; + unsigned blocksize; + + *res_dir = NULL; + sb = dir->i_sb; + blocksize = sb->s_blocksize; + namelen = dentry->d_name.len; + name = dentry->d_name.name; + if (namelen > EXT4_NAME_LEN) + return NULL; +#ifdef CONFIG_EXT4_INDEX + if (is_dx(dir)) { + bh = ext4_dx_find_entry(dentry, res_dir, &err); + /* + * On success, or if the error was file not found, + * return. Otherwise, fall back to doing a search the + * old fashioned way. + */ + if (bh || (err != ERR_BAD_DX_DIR)) + return bh; + dxtrace(printk("ext4_find_entry: dx failed, falling back\n")); + } +#endif + nblocks = dir->i_size >> EXT4_BLOCK_SIZE_BITS(sb); + start = EXT4_I(dir)->i_dir_start_lookup; + if (start >= nblocks) + start = 0; + block = start; +restart: + do { + /* + * We deal with the read-ahead logic here. + */ + if (ra_ptr >= ra_max) { + /* Refill the readahead buffer */ + ra_ptr = 0; + b = block; + for (ra_max = 0; ra_max < NAMEI_RA_SIZE; ra_max++) { + /* + * Terminate if we reach the end of the + * directory and must wrap, or if our + * search has finished at this block. + */ + if (b >= nblocks || (num && block == start)) { + bh_use[ra_max] = NULL; + break; + } + num++; + bh = ext4_getblk(NULL, dir, b++, 0, &err); + bh_use[ra_max] = bh; + if (bh) + ll_rw_block(READ_META, 1, &bh); + } + } + if ((bh = bh_use[ra_ptr++]) == NULL) + goto next; + wait_on_buffer(bh); + if (!buffer_uptodate(bh)) { + /* read error, skip block & hope for the best */ + ext4_error(sb, __FUNCTION__, "reading directory #%lu " + "offset %lu", dir->i_ino, block); + brelse(bh); + goto next; + } + i = search_dirblock(bh, dir, dentry, + block << EXT4_BLOCK_SIZE_BITS(sb), res_dir); + if (i == 1) { + EXT4_I(dir)->i_dir_start_lookup = block; + ret = bh; + goto cleanup_and_exit; + } else { + brelse(bh); + if (i < 0) + goto cleanup_and_exit; + } + next: + if (++block >= nblocks) + block = 0; + } while (block != start); + + /* + * If the directory has grown while we were searching, then + * search the last part of the directory before giving up. + */ + block = nblocks; + nblocks = dir->i_size >> EXT4_BLOCK_SIZE_BITS(sb); + if (block < nblocks) { + start = 0; + goto restart; + } + +cleanup_and_exit: + /* Clean up the read-ahead blocks */ + for (; ra_ptr < ra_max; ra_ptr++) + brelse (bh_use[ra_ptr]); + return ret; +} + +#ifdef CONFIG_EXT4_INDEX +static struct buffer_head * ext4_dx_find_entry(struct dentry *dentry, + struct ext4_dir_entry_2 **res_dir, int *err) +{ + struct super_block * sb; + struct dx_hash_info hinfo; + u32 hash; + struct dx_frame frames[2], *frame; + struct ext4_dir_entry_2 *de, *top; + struct buffer_head *bh; + unsigned long block; + int retval; + int namelen = dentry->d_name.len; + const u8 *name = dentry->d_name.name; + struct inode *dir = dentry->d_parent->d_inode; + + sb = dir->i_sb; + /* NFS may look up ".." - look at dx_root directory block */ + if (namelen > 2 || name[0] != '.'||(name[1] != '.' && name[1] != '\0')){ + if (!(frame = dx_probe(dentry, NULL, &hinfo, frames, err))) + return NULL; + } else { + frame = frames; + frame->bh = NULL; /* for dx_release() */ + frame->at = (struct dx_entry *)frames; /* hack for zero entry*/ + dx_set_block(frame->at, 0); /* dx_root block is 0 */ + } + hash = hinfo.hash; + do { + block = dx_get_block(frame->at); + if (!(bh = ext4_bread (NULL,dir, block, 0, err))) + goto errout; + de = (struct ext4_dir_entry_2 *) bh->b_data; + top = (struct ext4_dir_entry_2 *) ((char *) de + sb->s_blocksize - + EXT4_DIR_REC_LEN(0)); + for (; de < top; de = ext4_next_entry(de)) + if (ext4_match (namelen, name, de)) { + if (!ext4_check_dir_entry("ext4_find_entry", + dir, de, bh, + (block<<EXT4_BLOCK_SIZE_BITS(sb)) + +((char *)de - bh->b_data))) { + brelse (bh); + goto errout; + } + *res_dir = de; + dx_release (frames); + return bh; + } + brelse (bh); + /* Check to see if we should continue to search */ + retval = ext4_htree_next_block(dir, hash, frame, + frames, NULL); + if (retval < 0) { + ext4_warning(sb, __FUNCTION__, + "error reading index page in directory #%lu", + dir->i_ino); + *err = retval; + goto errout; + } + } while (retval == 1); + + *err = -ENOENT; +errout: + dxtrace(printk("%s not found\n", name)); + dx_release (frames); + return NULL; +} +#endif + +static struct dentry *ext4_lookup(struct inode * dir, struct dentry *dentry, struct nameidata *nd) +{ + struct inode * inode; + struct ext4_dir_entry_2 * de; + struct buffer_head * bh; + + if (dentry->d_name.len > EXT4_NAME_LEN) + return ERR_PTR(-ENAMETOOLONG); + + bh = ext4_find_entry(dentry, &de); + inode = NULL; + if (bh) { + unsigned long ino = le32_to_cpu(de->inode); + brelse (bh); + if (!ext4_valid_inum(dir->i_sb, ino)) { + ext4_error(dir->i_sb, "ext4_lookup", + "bad inode number: %lu", ino); + inode = NULL; + } else + inode = iget(dir->i_sb, ino); + + if (!inode) + return ERR_PTR(-EACCES); + } + return d_splice_alias(inode, dentry); +} + + +struct dentry *ext4_get_parent(struct dentry *child) +{ + unsigned long ino; + struct dentry *parent; + struct inode *inode; + struct dentry dotdot; + struct ext4_dir_entry_2 * de; + struct buffer_head *bh; + + dotdot.d_name.name = ".."; + dotdot.d_name.len = 2; + dotdot.d_parent = child; /* confusing, isn't it! */ + + bh = ext4_find_entry(&dotdot, &de); + inode = NULL; + if (!bh) + return ERR_PTR(-ENOENT); + ino = le32_to_cpu(de->inode); + brelse(bh); + + if (!ext4_valid_inum(child->d_inode->i_sb, ino)) { + ext4_error(child->d_inode->i_sb, "ext4_get_parent", + "bad inode number: %lu", ino); + inode = NULL; + } else + inode = iget(child->d_inode->i_sb, ino); + + if (!inode) + return ERR_PTR(-EACCES); + + parent = d_alloc_anon(inode); + if (!parent) { + iput(inode); + parent = ERR_PTR(-ENOMEM); + } + return parent; +} + +#define S_SHIFT 12 +static unsigned char ext4_type_by_mode[S_IFMT >> S_SHIFT] = { + [S_IFREG >> S_SHIFT] = EXT4_FT_REG_FILE, + [S_IFDIR >> S_SHIFT] = EXT4_FT_DIR, + [S_IFCHR >> S_SHIFT] = EXT4_FT_CHRDEV, + [S_IFBLK >> S_SHIFT] = EXT4_FT_BLKDEV, + [S_IFIFO >> S_SHIFT] = EXT4_FT_FIFO, + [S_IFSOCK >> S_SHIFT] = EXT4_FT_SOCK, + [S_IFLNK >> S_SHIFT] = EXT4_FT_SYMLINK, +}; + +static inline void ext4_set_de_type(struct super_block *sb, + struct ext4_dir_entry_2 *de, + umode_t mode) { + if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_FILETYPE)) + de->file_type = ext4_type_by_mode[(mode & S_IFMT)>>S_SHIFT]; +} + +#ifdef CONFIG_EXT4_INDEX +static struct ext4_dir_entry_2 * +dx_move_dirents(char *from, char *to, struct dx_map_entry *map, int count) +{ + unsigned rec_len = 0; + + while (count--) { + struct ext4_dir_entry_2 *de = (struct ext4_dir_entry_2 *) (from + map->offs); + rec_len = EXT4_DIR_REC_LEN(de->name_len); + memcpy (to, de, rec_len); + ((struct ext4_dir_entry_2 *) to)->rec_len = + cpu_to_le16(rec_len); + de->inode = 0; + map++; + to += rec_len; + } + return (struct ext4_dir_entry_2 *) (to - rec_len); +} + +static struct ext4_dir_entry_2* dx_pack_dirents(char *base, int size) +{ + struct ext4_dir_entry_2 *next, *to, *prev, *de = (struct ext4_dir_entry_2 *) base; + unsigned rec_len = 0; + + prev = to = de; + while ((char*)de < base + size) { + next = (struct ext4_dir_entry_2 *) ((char *) de + + le16_to_cpu(de->rec_len)); + if (de->inode && de->name_len) { + rec_len = EXT4_DIR_REC_LEN(de->name_len); + if (de > to) + memmove(to, de, rec_len); + to->rec_len = cpu_to_le16(rec_len); + prev = to; + to = (struct ext4_dir_entry_2 *) (((char *) to) + rec_len); + } + de = next; + } + return prev; +} + +static struct ext4_dir_entry_2 *do_split(handle_t *handle, struct inode *dir, + struct buffer_head **bh,struct dx_frame *frame, + struct dx_hash_info *hinfo, int *error) +{ + unsigned blocksize = dir->i_sb->s_blocksize; + unsigned count, continued; + struct buffer_head *bh2; + u32 newblock; + u32 hash2; + struct dx_map_entry *map; + char *data1 = (*bh)->b_data, *data2; + unsigned split; + struct ext4_dir_entry_2 *de = NULL, *de2; + int err; + + bh2 = ext4_append (handle, dir, &newblock, error); + if (!(bh2)) { + brelse(*bh); + *bh = NULL; + goto errout; + } + + BUFFER_TRACE(*bh, "get_write_access"); + err = ext4_journal_get_write_access(handle, *bh); + if (err) { + journal_error: + brelse(*bh); + brelse(bh2); + *bh = NULL; + ext4_std_error(dir->i_sb, err); + goto errout; + } + BUFFER_TRACE(frame->bh, "get_write_access"); + err = ext4_journal_get_write_access(handle, frame->bh); + if (err) + goto journal_error; + + data2 = bh2->b_data; + + /* create map in the end of data2 block */ + map = (struct dx_map_entry *) (data2 + blocksize); + count = dx_make_map ((struct ext4_dir_entry_2 *) data1, + blocksize, hinfo, map); + map -= count; + split = count/2; // need to adjust to actual middle + dx_sort_map (map, count); + hash2 = map[split].hash; + continued = hash2 == map[split - 1].hash; + dxtrace(printk("Split block %i at %x, %i/%i\n", + dx_get_block(frame->at), hash2, split, count-split)); + + /* Fancy dance to stay within two buffers */ + de2 = dx_move_dirents(data1, data2, map + split, count - split); + de = dx_pack_dirents(data1,blocksize); + de->rec_len = cpu_to_le16(data1 + blocksize - (char *) de); + de2->rec_len = cpu_to_le16(data2 + blocksize - (char *) de2); + dxtrace(dx_show_leaf (hinfo, (struct ext4_dir_entry_2 *) data1, blocksize, 1)); + dxtrace(dx_show_leaf (hinfo, (struct ext4_dir_entry_2 *) data2, blocksize, 1)); + + /* Which block gets the new entry? */ + if (hinfo->hash >= hash2) + { + swap(*bh, bh2); + de = de2; + } + dx_insert_block (frame, hash2 + continued, newblock); + err = ext4_journal_dirty_metadata (handle, bh2); + if (err) + goto journal_error; + err = ext4_journal_dirty_metadata (handle, frame->bh); + if (err) + goto journal_error; + brelse (bh2); + dxtrace(dx_show_index ("frame", frame->entries)); +errout: + return de; +} +#endif + + +/* + * Add a new entry into a directory (leaf) block. If de is non-NULL, + * it points to a directory entry which is guaranteed to be large + * enough for new directory entry. If de is NULL, then + * add_dirent_to_buf will attempt search the directory block for + * space. It will return -ENOSPC if no space is available, and -EIO + * and -EEXIST if directory entry already exists. + * + * NOTE! bh is NOT released in the case where ENOSPC is returned. In + * all other cases bh is released. + */ +static int add_dirent_to_buf(handle_t *handle, struct dentry *dentry, + struct inode *inode, struct ext4_dir_entry_2 *de, + struct buffer_head * bh) +{ + struct inode *dir = dentry->d_parent->d_inode; + const char *name = dentry->d_name.name; + int namelen = dentry->d_name.len; + unsigned long offset = 0; + unsigned short reclen; + int nlen, rlen, err; + char *top; + + reclen = EXT4_DIR_REC_LEN(namelen); + if (!de) { + de = (struct ext4_dir_entry_2 *)bh->b_data; + top = bh->b_data + dir->i_sb->s_blocksize - reclen; + while ((char *) de <= top) { + if (!ext4_check_dir_entry("ext4_add_entry", dir, de, + bh, offset)) { + brelse (bh); + return -EIO; + } + if (ext4_match (namelen, name, de)) { + brelse (bh); + return -EEXIST; + } + nlen = EXT4_DIR_REC_LEN(de->name_len); + rlen = le16_to_cpu(de->rec_len); + if ((de->inode? rlen - nlen: rlen) >= reclen) + break; + de = (struct ext4_dir_entry_2 *)((char *)de + rlen); + offset += rlen; + } + if ((char *) de > top) + return -ENOSPC; + } + BUFFER_TRACE(bh, "get_write_access"); + err = ext4_journal_get_write_access(handle, bh); + if (err) { + ext4_std_error(dir->i_sb, err); + brelse(bh); + return err; + } + + /* By now the buffer is marked for journaling */ + nlen = EXT4_DIR_REC_LEN(de->name_len); + rlen = le16_to_cpu(de->rec_len); + if (de->inode) { + struct ext4_dir_entry_2 *de1 = (struct ext4_dir_entry_2 *)((char *)de + nlen); + de1->rec_len = cpu_to_le16(rlen - nlen); + de->rec_len = cpu_to_le16(nlen); + de = de1; + } + de->file_type = EXT4_FT_UNKNOWN; + if (inode) { + de->inode = cpu_to_le32(inode->i_ino); + ext4_set_de_type(dir->i_sb, de, inode->i_mode); + } else + de->inode = 0; + de->name_len = namelen; + memcpy (de->name, name, namelen); + /* + * XXX shouldn't update any times until successful + * completion of syscall, but too many callers depend + * on this. + * + * XXX similarly, too many callers depend on + * ext4_new_inode() setting the times, but error + * recovery deletes the inode, so the worst that can + * happen is that the times are slightly out of date + * and/or different from the directory change time. + */ + dir->i_mtime = dir->i_ctime = CURRENT_TIME_SEC; + ext4_update_dx_flag(dir); + dir->i_version++; + ext4_mark_inode_dirty(handle, dir); + BUFFER_TRACE(bh, "call ext4_journal_dirty_metadata"); + err = ext4_journal_dirty_metadata(handle, bh); + if (err) + ext4_std_error(dir->i_sb, err); + brelse(bh); + return 0; +} + +#ifdef CONFIG_EXT4_INDEX +/* + * This converts a one block unindexed directory to a 3 block indexed + * directory, and adds the dentry to the indexed directory. + */ +static int make_indexed_dir(handle_t *handle, struct dentry *dentry, + struct inode *inode, struct buffer_head *bh) +{ + struct inode *dir = dentry->d_parent->d_inode; + const char *name = dentry->d_name.name; + int namelen = dentry->d_name.len; + struct buffer_head *bh2; + struct dx_root *root; + struct dx_frame frames[2], *frame; + struct dx_entry *entries; + struct ext4_dir_entry_2 *de, *de2; + char *data1, *top; + unsigned len; + int retval; + unsigned blocksize; + struct dx_hash_info hinfo; + u32 block; + struct fake_dirent *fde; + + blocksize = dir->i_sb->s_blocksize; + dxtrace(printk("Creating index\n")); + retval = ext4_journal_get_write_access(handle, bh); + if (retval) { + ext4_std_error(dir->i_sb, retval); + brelse(bh); + return retval; + } + root = (struct dx_root *) bh->b_data; + + bh2 = ext4_append (handle, dir, &block, &retval); + if (!(bh2)) { + brelse(bh); + return retval; + } + EXT4_I(dir)->i_flags |= EXT4_INDEX_FL; + data1 = bh2->b_data; + + /* The 0th block becomes the root, move the dirents out */ + fde = &root->dotdot; + de = (struct ext4_dir_entry_2 *)((char *)fde + le16_to_cpu(fde->rec_len)); + len = ((char *) root) + blocksize - (char *) de; + memcpy (data1, de, len); + de = (struct ext4_dir_entry_2 *) data1; + top = data1 + len; + while ((char *)(de2=(void*)de+le16_to_cpu(de->rec_len)) < top) + de = de2; + de->rec_len = cpu_to_le16(data1 + blocksize - (char *) de); + /* Initialize the root; the dot dirents already exist */ + de = (struct ext4_dir_entry_2 *) (&root->dotdot); + de->rec_len = cpu_to_le16(blocksize - EXT4_DIR_REC_LEN(2)); + memset (&root->info, 0, sizeof(root->info)); + root->info.info_length = sizeof(root->info); + root->info.hash_version = EXT4_SB(dir->i_sb)->s_def_hash_version; + entries = root->entries; + dx_set_block (entries, 1); + dx_set_count (entries, 1); + dx_set_limit (entries, dx_root_limit(dir, sizeof(root->info))); + + /* Initialize as for dx_probe */ + hinfo.hash_version = root->info.hash_version; + hinfo.seed = EXT4_SB(dir->i_sb)->s_hash_seed; + ext4fs_dirhash(name, namelen, &hinfo); + frame = frames; + frame->entries = entries; + frame->at = entries; + frame->bh = bh; + bh = bh2; + de = do_split(handle,dir, &bh, frame, &hinfo, &retval); + dx_release (frames); + if (!(de)) + return retval; + + return add_dirent_to_buf(handle, dentry, inode, de, bh); +} +#endif + +/* + * ext4_add_entry() + * + * adds a file entry to the specified directory, using the same + * semantics as ext4_find_entry(). It returns NULL if it failed. + * + * NOTE!! The inode part of 'de' is left at 0 - which means you + * may not sleep between calling this and putting something into + * the entry, as someone else might have used it while you slept. + */ +static int ext4_add_entry (handle_t *handle, struct dentry *dentry, + struct inode *inode) +{ + struct inode *dir = dentry->d_parent->d_inode; + unsigned long offset; + struct buffer_head * bh; + struct ext4_dir_entry_2 *de; + struct super_block * sb; + int retval; +#ifdef CONFIG_EXT4_INDEX + int dx_fallback=0; +#endif + unsigned blocksize; + u32 block, blocks; + + sb = dir->i_sb; + blocksize = sb->s_blocksize; + if (!dentry->d_name.len) + return -EINVAL; +#ifdef CONFIG_EXT4_INDEX + if (is_dx(dir)) { + retval = ext4_dx_add_entry(handle, dentry, inode); + if (!retval || (retval != ERR_BAD_DX_DIR)) + return retval; + EXT4_I(dir)->i_flags &= ~EXT4_INDEX_FL; + dx_fallback++; + ext4_mark_inode_dirty(handle, dir); + } +#endif + blocks = dir->i_size >> sb->s_blocksize_bits; + for (block = 0, offset = 0; block < blocks; block++) { + bh = ext4_bread(handle, dir, block, 0, &retval); + if(!bh) + return retval; + retval = add_dirent_to_buf(handle, dentry, inode, NULL, bh); + if (retval != -ENOSPC) + return retval; + +#ifdef CONFIG_EXT4_INDEX + if (blocks == 1 && !dx_fallback && + EXT4_HAS_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_DIR_INDEX)) + return make_indexed_dir(handle, dentry, inode, bh); +#endif + brelse(bh); + } + bh = ext4_append(handle, dir, &block, &retval); + if (!bh) + return retval; + de = (struct ext4_dir_entry_2 *) bh->b_data; + de->inode = 0; + de->rec_len = cpu_to_le16(blocksize); + return add_dirent_to_buf(handle, dentry, inode, de, bh); +} + +#ifdef CONFIG_EXT4_INDEX +/* + * Returns 0 for success, or a negative error value + */ +static int ext4_dx_add_entry(handle_t *handle, struct dentry *dentry, + struct inode *inode) +{ + struct dx_frame frames[2], *frame; + struct dx_entry *entries, *at; + struct dx_hash_info hinfo; + struct buffer_head * bh; + struct inode *dir = dentry->d_parent->d_inode; + struct super_block * sb = dir->i_sb; + struct ext4_dir_entry_2 *de; + int err; + + frame = dx_probe(dentry, NULL, &hinfo, frames, &err); + if (!frame) + return err; + entries = frame->entries; + at = frame->at; + + if (!(bh = ext4_bread(handle,dir, dx_get_block(frame->at), 0, &err))) + goto cleanup; + + BUFFER_TRACE(bh, "get_write_access"); + err = ext4_journal_get_write_access(handle, bh); + if (err) + goto journal_error; + + err = add_dirent_to_buf(handle, dentry, inode, NULL, bh); + if (err != -ENOSPC) { + bh = NULL; + goto cleanup; + } + + /* Block full, should compress but for now just split */ + dxtrace(printk("using %u of %u node entries\n", + dx_get_count(entries), dx_get_limit(entries))); + /* Need to split index? */ + if (dx_get_count(entries) == dx_get_limit(entries)) { + u32 newblock; + unsigned icount = dx_get_count(entries); + int levels = frame - frames; + struct dx_entry *entries2; + struct dx_node *node2; + struct buffer_head *bh2; + + if (levels && (dx_get_count(frames->entries) == + dx_get_limit(frames->entries))) { + ext4_warning(sb, __FUNCTION__, + "Directory index full!"); + err = -ENOSPC; + goto cleanup; + } + bh2 = ext4_append (handle, dir, &newblock, &err); + if (!(bh2)) + goto cleanup; + node2 = (struct dx_node *)(bh2->b_data); + entries2 = node2->entries; + node2->fake.rec_len = cpu_to_le16(sb->s_blocksize); + node2->fake.inode = 0; + BUFFER_TRACE(frame->bh, "get_write_access"); + err = ext4_journal_get_write_access(handle, frame->bh); + if (err) + goto journal_error; + if (levels) { + unsigned icount1 = icount/2, icount2 = icount - icount1; + unsigned hash2 = dx_get_hash(entries + icount1); + dxtrace(printk("Split index %i/%i\n", icount1, icount2)); + + BUFFER_TRACE(frame->bh, "get_write_access"); /* index root */ + err = ext4_journal_get_write_access(handle, + frames[0].bh); + if (err) + goto journal_error; + + memcpy ((char *) entries2, (char *) (entries + icount1), + icount2 * sizeof(struct dx_entry)); + dx_set_count (entries, icount1); + dx_set_count (entries2, icount2); + dx_set_limit (entries2, dx_node_limit(dir)); + + /* Which index block gets the new entry? */ + if (at - entries >= icount1) { + frame->at = at = at - entries - icount1 + entries2; + frame->entries = entries = entries2; + swap(frame->bh, bh2); + } + dx_insert_block (frames + 0, hash2, newblock); + dxtrace(dx_show_index ("node", frames[1].entries)); + dxtrace(dx_show_index ("node", + ((struct dx_node *) bh2->b_data)->entries)); + err = ext4_journal_dirty_metadata(handle, bh2); + if (err) + goto journal_error; + brelse (bh2); + } else { + dxtrace(printk("Creating second level index...\n")); + memcpy((char *) entries2, (char *) entries, + icount * sizeof(struct dx_entry)); + dx_set_limit(entries2, dx_node_limit(dir)); + + /* Set up root */ + dx_set_count(entries, 1); + dx_set_block(entries + 0, newblock); + ((struct dx_root *) frames[0].bh->b_data)->info.indirect_levels = 1; + + /* Add new access path frame */ + frame = frames + 1; + frame->at = at = at - entries + entries2; + frame->entries = entries = entries2; + frame->bh = bh2; + err = ext4_journal_get_write_access(handle, + frame->bh); + if (err) + goto journal_error; + } + ext4_journal_dirty_metadata(handle, frames[0].bh); + } + de = do_split(handle, dir, &bh, frame, &hinfo, &err); + if (!de) + goto cleanup; + err = add_dirent_to_buf(handle, dentry, inode, de, bh); + bh = NULL; + goto cleanup; + +journal_error: + ext4_std_error(dir->i_sb, err); +cleanup: + if (bh) + brelse(bh); + dx_release(frames); + return err; +} +#endif + +/* + * ext4_delete_entry deletes a directory entry by merging it with the + * previous entry + */ +static int ext4_delete_entry (handle_t *handle, + struct inode * dir, + struct ext4_dir_entry_2 * de_del, + struct buffer_head * bh) +{ + struct ext4_dir_entry_2 * de, * pde; + int i; + + i = 0; + pde = NULL; + de = (struct ext4_dir_entry_2 *) bh->b_data; + while (i < bh->b_size) { + if (!ext4_check_dir_entry("ext4_delete_entry", dir, de, bh, i)) + return -EIO; + if (de == de_del) { + BUFFER_TRACE(bh, "get_write_access"); + ext4_journal_get_write_access(handle, bh); + if (pde) + pde->rec_len = + cpu_to_le16(le16_to_cpu(pde->rec_len) + + le16_to_cpu(de->rec_len)); + else + de->inode = 0; + dir->i_version++; + BUFFER_TRACE(bh, "call ext4_journal_dirty_metadata"); + ext4_journal_dirty_metadata(handle, bh); + return 0; + } + i += le16_to_cpu(de->rec_len); + pde = de; + de = (struct ext4_dir_entry_2 *) + ((char *) de + le16_to_cpu(de->rec_len)); + } + return -ENOENT; +} + +/* + * ext4_mark_inode_dirty is somewhat expensive, so unlike ext2 we + * do not perform it in these functions. We perform it at the call site, + * if it is needed. + */ +static inline void ext4_inc_count(handle_t *handle, struct inode *inode) +{ + inc_nlink(inode); +} + +static inline void ext4_dec_count(handle_t *handle, struct inode *inode) +{ + drop_nlink(inode); +} + +static int ext4_add_nondir(handle_t *handle, + struct dentry *dentry, struct inode *inode) +{ + int err = ext4_add_entry(handle, dentry, inode); + if (!err) { + ext4_mark_inode_dirty(handle, inode); + d_instantiate(dentry, inode); + return 0; + } + ext4_dec_count(handle, inode); + iput(inode); + return err; +} + +/* + * By the time this is called, we already have created + * the directory cache entry for the new file, but it + * is so far negative - it has no inode. + * + * If the create succeeds, we fill in the inode information + * with d_instantiate(). + */ +static int ext4_create (struct inode * dir, struct dentry * dentry, int mode, + struct nameidata *nd) +{ + handle_t *handle; + struct inode * inode; + int err, retries = 0; + +retry: + handle = ext4_journal_start(dir, EXT4_DATA_TRANS_BLOCKS(dir->i_sb) + + EXT4_INDEX_EXTRA_TRANS_BLOCKS + 3 + + 2*EXT4_QUOTA_INIT_BLOCKS(dir->i_sb)); + if (IS_ERR(handle)) + return PTR_ERR(handle); + + if (IS_DIRSYNC(dir)) + handle->h_sync = 1; + + inode = ext4_new_inode (handle, dir, mode); + err = PTR_ERR(inode); + if (!IS_ERR(inode)) { + inode->i_op = &ext4_file_inode_operations; + inode->i_fop = &ext4_file_operations; + ext4_set_aops(inode); + err = ext4_add_nondir(handle, dentry, inode); + } + ext4_journal_stop(handle); + if (err == -ENOSPC && ext4_should_retry_alloc(dir->i_sb, &retries)) + goto retry; + return err; +} + +static int ext4_mknod (struct inode * dir, struct dentry *dentry, + int mode, dev_t rdev) +{ + handle_t *handle; + struct inode *inode; + int err, retries = 0; + + if (!new_valid_dev(rdev)) + return -EINVAL; + +retry: + handle = ext4_journal_start(dir, EXT4_DATA_TRANS_BLOCKS(dir->i_sb) + + EXT4_INDEX_EXTRA_TRANS_BLOCKS + 3 + + 2*EXT4_QUOTA_INIT_BLOCKS(dir->i_sb)); + if (IS_ERR(handle)) + return PTR_ERR(handle); + + if (IS_DIRSYNC(dir)) + handle->h_sync = 1; + + inode = ext4_new_inode (handle, dir, mode); + err = PTR_ERR(inode); + if (!IS_ERR(inode)) { + init_special_inode(inode, inode->i_mode, rdev); +#ifdef CONFIG_EXT4DEV_FS_XATTR + inode->i_op = &ext4_special_inode_operations; +#endif + err = ext4_add_nondir(handle, dentry, inode); + } + ext4_journal_stop(handle); + if (err == -ENOSPC && ext4_should_retry_alloc(dir->i_sb, &retries)) + goto retry; + return err; +} + +static int ext4_mkdir(struct inode * dir, struct dentry * dentry, int mode) +{ + handle_t *handle; + struct inode * inode; + struct buffer_head * dir_block; + struct ext4_dir_entry_2 * de; + int err, retries = 0; + + if (dir->i_nlink >= EXT4_LINK_MAX) + return -EMLINK; + +retry: + handle = ext4_journal_start(dir, EXT4_DATA_TRANS_BLOCKS(dir->i_sb) + + EXT4_INDEX_EXTRA_TRANS_BLOCKS + 3 + + 2*EXT4_QUOTA_INIT_BLOCKS(dir->i_sb)); + if (IS_ERR(handle)) + return PTR_ERR(handle); + + if (IS_DIRSYNC(dir)) + handle->h_sync = 1; + + inode = ext4_new_inode (handle, dir, S_IFDIR | mode); + err = PTR_ERR(inode); + if (IS_ERR(inode)) + goto out_stop; + + inode->i_op = &ext4_dir_inode_operations; + inode->i_fop = &ext4_dir_operations; + inode->i_size = EXT4_I(inode)->i_disksize = inode->i_sb->s_blocksize; + dir_block = ext4_bread (handle, inode, 0, 1, &err); + if (!dir_block) { + drop_nlink(inode); /* is this nlink == 0? */ + ext4_mark_inode_dirty(handle, inode); + iput (inode); + goto out_stop; + } + BUFFER_TRACE(dir_block, "get_write_access"); + ext4_journal_get_write_access(handle, dir_block); + de = (struct ext4_dir_entry_2 *) dir_block->b_data; + de->inode = cpu_to_le32(inode->i_ino); + de->name_len = 1; + de->rec_len = cpu_to_le16(EXT4_DIR_REC_LEN(de->name_len)); + strcpy (de->name, "."); + ext4_set_de_type(dir->i_sb, de, S_IFDIR); + de = (struct ext4_dir_entry_2 *) + ((char *) de + le16_to_cpu(de->rec_len)); + de->inode = cpu_to_le32(dir->i_ino); + de->rec_len = cpu_to_le16(inode->i_sb->s_blocksize-EXT4_DIR_REC_LEN(1)); + de->name_len = 2; + strcpy (de->name, ".."); + ext4_set_de_type(dir->i_sb, de, S_IFDIR); + inode->i_nlink = 2; + BUFFER_TRACE(dir_block, "call ext4_journal_dirty_metadata"); + ext4_journal_dirty_metadata(handle, dir_block); + brelse (dir_block); + ext4_mark_inode_dirty(handle, inode); + err = ext4_add_entry (handle, dentry, inode); + if (err) { + inode->i_nlink = 0; + ext4_mark_inode_dirty(handle, inode); + iput (inode); + goto out_stop; + } + inc_nlink(dir); + ext4_update_dx_flag(dir); + ext4_mark_inode_dirty(handle, dir); + d_instantiate(dentry, inode); +out_stop: + ext4_journal_stop(handle); + if (err == -ENOSPC && ext4_should_retry_alloc(dir->i_sb, &retries)) + goto retry; + return err; +} + +/* + * routine to check that the specified directory is empty (for rmdir) + */ +static int empty_dir (struct inode * inode) +{ + unsigned long offset; + struct buffer_head * bh; + struct ext4_dir_entry_2 * de, * de1; + struct super_block * sb; + int err = 0; + + sb = inode->i_sb; + if (inode->i_size < EXT4_DIR_REC_LEN(1) + EXT4_DIR_REC_LEN(2) || + !(bh = ext4_bread (NULL, inode, 0, 0, &err))) { + if (err) + ext4_error(inode->i_sb, __FUNCTION__, + "error %d reading directory #%lu offset 0", + err, inode->i_ino); + else + ext4_warning(inode->i_sb, __FUNCTION__, + "bad directory (dir #%lu) - no data block", + inode->i_ino); + return 1; + } + de = (struct ext4_dir_entry_2 *) bh->b_data; + de1 = (struct ext4_dir_entry_2 *) + ((char *) de + le16_to_cpu(de->rec_len)); + if (le32_to_cpu(de->inode) != inode->i_ino || + !le32_to_cpu(de1->inode) || + strcmp (".", de->name) || + strcmp ("..", de1->name)) { + ext4_warning (inode->i_sb, "empty_dir", + "bad directory (dir #%lu) - no `.' or `..'", + inode->i_ino); + brelse (bh); + return 1; + } + offset = le16_to_cpu(de->rec_len) + le16_to_cpu(de1->rec_len); + de = (struct ext4_dir_entry_2 *) + ((char *) de1 + le16_to_cpu(de1->rec_len)); + while (offset < inode->i_size ) { + if (!bh || + (void *) de >= (void *) (bh->b_data+sb->s_blocksize)) { + err = 0; + brelse (bh); + bh = ext4_bread (NULL, inode, + offset >> EXT4_BLOCK_SIZE_BITS(sb), 0, &err); + if (!bh) { + if (err) + ext4_error(sb, __FUNCTION__, + "error %d reading directory" + " #%lu offset %lu", + err, inode->i_ino, offset); + offset += sb->s_blocksize; + continue; + } + de = (struct ext4_dir_entry_2 *) bh->b_data; + } + if (!ext4_check_dir_entry("empty_dir", inode, de, bh, offset)) { + de = (struct ext4_dir_entry_2 *)(bh->b_data + + sb->s_blocksize); + offset = (offset | (sb->s_blocksize - 1)) + 1; + continue; + } + if (le32_to_cpu(de->inode)) { + brelse (bh); + return 0; + } + offset += le16_to_cpu(de->rec_len); + de = (struct ext4_dir_entry_2 *) + ((char *) de + le16_to_cpu(de->rec_len)); + } + brelse (bh); + return 1; +} + +/* ext4_orphan_add() links an unlinked or truncated inode into a list of + * such inodes, starting at the superblock, in case we crash before the + * file is closed/deleted, or in case the inode truncate spans multiple + * transactions and the last transaction is not recovered after a crash. + * + * At filesystem recovery time, we walk this list deleting unlinked + * inodes and truncating linked inodes in ext4_orphan_cleanup(). + */ +int ext4_orphan_add(handle_t *handle, struct inode *inode) +{ + struct super_block *sb = inode->i_sb; + struct ext4_iloc iloc; + int err = 0, rc; + + lock_super(sb); + if (!list_empty(&EXT4_I(inode)->i_orphan)) + goto out_unlock; + + /* Orphan handling is only valid for files with data blocks + * being truncated, or files being unlinked. */ + + /* @@@ FIXME: Observation from aviro: + * I think I can trigger J_ASSERT in ext4_orphan_add(). We block + * here (on lock_super()), so race with ext4_link() which might bump + * ->i_nlink. For, say it, character device. Not a regular file, + * not a directory, not a symlink and ->i_nlink > 0. + */ + J_ASSERT ((S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) || + S_ISLNK(inode->i_mode)) || inode->i_nlink == 0); + + BUFFER_TRACE(EXT4_SB(sb)->s_sbh, "get_write_access"); + err = ext4_journal_get_write_access(handle, EXT4_SB(sb)->s_sbh); + if (err) + goto out_unlock; + + err = ext4_reserve_inode_write(handle, inode, &iloc); + if (err) + goto out_unlock; + + /* Insert this inode at the head of the on-disk orphan list... */ + NEXT_ORPHAN(inode) = le32_to_cpu(EXT4_SB(sb)->s_es->s_last_orphan); + EXT4_SB(sb)->s_es->s_last_orphan = cpu_to_le32(inode->i_ino); + err = ext4_journal_dirty_metadata(handle, EXT4_SB(sb)->s_sbh); + rc = ext4_mark_iloc_dirty(handle, inode, &iloc); + if (!err) + err = rc; + + /* Only add to the head of the in-memory list if all the + * previous operations succeeded. If the orphan_add is going to + * fail (possibly taking the journal offline), we can't risk + * leaving the inode on the orphan list: stray orphan-list + * entries can cause panics at unmount time. + * + * This is safe: on error we're going to ignore the orphan list + * anyway on the next recovery. */ + if (!err) + list_add(&EXT4_I(inode)->i_orphan, &EXT4_SB(sb)->s_orphan); + + jbd_debug(4, "superblock will point to %lu\n", inode->i_ino); + jbd_debug(4, "orphan inode %lu will point to %d\n", + inode->i_ino, NEXT_ORPHAN(inode)); +out_unlock: + unlock_super(sb); + ext4_std_error(inode->i_sb, err); + return err; +} + +/* + * ext4_orphan_del() removes an unlinked or truncated inode from the list + * of such inodes stored on disk, because it is finally being cleaned up. + */ +int ext4_orphan_del(handle_t *handle, struct inode *inode) +{ + struct list_head *prev; + struct ext4_inode_info *ei = EXT4_I(inode); + struct ext4_sb_info *sbi; + unsigned long ino_next; + struct ext4_iloc iloc; + int err = 0; + + lock_super(inode->i_sb); + if (list_empty(&ei->i_orphan)) { + unlock_super(inode->i_sb); + return 0; + } + + ino_next = NEXT_ORPHAN(inode); + prev = ei->i_orphan.prev; + sbi = EXT4_SB(inode->i_sb); + + jbd_debug(4, "remove inode %lu from orphan list\n", inode->i_ino); + + list_del_init(&ei->i_orphan); + + /* If we're on an error path, we may not have a valid + * transaction handle with which to update the orphan list on + * disk, but we still need to remove the inode from the linked + * list in memory. */ + if (!handle) + goto out; + + err = ext4_reserve_inode_write(handle, inode, &iloc); + if (err) + goto out_err; + + if (prev == &sbi->s_orphan) { + jbd_debug(4, "superblock will point to %lu\n", ino_next); + BUFFER_TRACE(sbi->s_sbh, "get_write_access"); + err = ext4_journal_get_write_access(handle, sbi->s_sbh); + if (err) + goto out_brelse; + sbi->s_es->s_last_orphan = cpu_to_le32(ino_next); + err = ext4_journal_dirty_metadata(handle, sbi->s_sbh); + } else { + struct ext4_iloc iloc2; + struct inode *i_prev = + &list_entry(prev, struct ext4_inode_info, i_orphan)->vfs_inode; + + jbd_debug(4, "orphan inode %lu will point to %lu\n", + i_prev->i_ino, ino_next); + err = ext4_reserve_inode_write(handle, i_prev, &iloc2); + if (err) + goto out_brelse; + NEXT_ORPHAN(i_prev) = ino_next; + err = ext4_mark_iloc_dirty(handle, i_prev, &iloc2); + } + if (err) + goto out_brelse; + NEXT_ORPHAN(inode) = 0; + err = ext4_mark_iloc_dirty(handle, inode, &iloc); + +out_err: + ext4_std_error(inode->i_sb, err); +out: + unlock_super(inode->i_sb); + return err; + +out_brelse: + brelse(iloc.bh); + goto out_err; +} + +static int ext4_rmdir (struct inode * dir, struct dentry *dentry) +{ + int retval; + struct inode * inode; + struct buffer_head * bh; + struct ext4_dir_entry_2 * de; + handle_t *handle; + + /* Initialize quotas before so that eventual writes go in + * separate transaction */ + DQUOT_INIT(dentry->d_inode); + handle = ext4_journal_start(dir, EXT4_DELETE_TRANS_BLOCKS(dir->i_sb)); + if (IS_ERR(handle)) + return PTR_ERR(handle); + + retval = -ENOENT; + bh = ext4_find_entry (dentry, &de); + if (!bh) + goto end_rmdir; + + if (IS_DIRSYNC(dir)) + handle->h_sync = 1; + + inode = dentry->d_inode; + + retval = -EIO; + if (le32_to_cpu(de->inode) != inode->i_ino) + goto end_rmdir; + + retval = -ENOTEMPTY; + if (!empty_dir (inode)) + goto end_rmdir; + + retval = ext4_delete_entry(handle, dir, de, bh); + if (retval) + goto end_rmdir; + if (inode->i_nlink != 2) + ext4_warning (inode->i_sb, "ext4_rmdir", + "empty directory has nlink!=2 (%d)", + inode->i_nlink); + inode->i_version++; + clear_nlink(inode); + /* There's no need to set i_disksize: the fact that i_nlink is + * zero will ensure that the right thing happens during any + * recovery. */ + inode->i_size = 0; + ext4_orphan_add(handle, inode); + inode->i_ctime = dir->i_ctime = dir->i_mtime = CURRENT_TIME_SEC; + ext4_mark_inode_dirty(handle, inode); + drop_nlink(dir); + ext4_update_dx_flag(dir); + ext4_mark_inode_dirty(handle, dir); + +end_rmdir: + ext4_journal_stop(handle); + brelse (bh); + return retval; +} + +static int ext4_unlink(struct inode * dir, struct dentry *dentry) +{ + int retval; + struct inode * inode; + struct buffer_head * bh; + struct ext4_dir_entry_2 * de; + handle_t *handle; + + /* Initialize quotas before so that eventual writes go + * in separate transaction */ + DQUOT_INIT(dentry->d_inode); + handle = ext4_journal_start(dir, EXT4_DELETE_TRANS_BLOCKS(dir->i_sb)); + if (IS_ERR(handle)) + return PTR_ERR(handle); + + if (IS_DIRSYNC(dir)) + handle->h_sync = 1; + + retval = -ENOENT; + bh = ext4_find_entry (dentry, &de); + if (!bh) + goto end_unlink; + + inode = dentry->d_inode; + + retval = -EIO; + if (le32_to_cpu(de->inode) != inode->i_ino) + goto end_unlink; + + if (!inode->i_nlink) { + ext4_warning (inode->i_sb, "ext4_unlink", + "Deleting nonexistent file (%lu), %d", + inode->i_ino, inode->i_nlink); + inode->i_nlink = 1; + } + retval = ext4_delete_entry(handle, dir, de, bh); + if (retval) + goto end_unlink; + dir->i_ctime = dir->i_mtime = CURRENT_TIME_SEC; + ext4_update_dx_flag(dir); + ext4_mark_inode_dirty(handle, dir); + drop_nlink(inode); + if (!inode->i_nlink) + ext4_orphan_add(handle, inode); + inode->i_ctime = dir->i_ctime; + ext4_mark_inode_dirty(handle, inode); + retval = 0; + +end_unlink: + ext4_journal_stop(handle); + brelse (bh); + return retval; +} + +static int ext4_symlink (struct inode * dir, + struct dentry *dentry, const char * symname) +{ + handle_t *handle; + struct inode * inode; + int l, err, retries = 0; + + l = strlen(symname)+1; + if (l > dir->i_sb->s_blocksize) + return -ENAMETOOLONG; + +retry: + handle = ext4_journal_start(dir, EXT4_DATA_TRANS_BLOCKS(dir->i_sb) + + EXT4_INDEX_EXTRA_TRANS_BLOCKS + 5 + + 2*EXT4_QUOTA_INIT_BLOCKS(dir->i_sb)); + if (IS_ERR(handle)) + return PTR_ERR(handle); + + if (IS_DIRSYNC(dir)) + handle->h_sync = 1; + + inode = ext4_new_inode (handle, dir, S_IFLNK|S_IRWXUGO); + err = PTR_ERR(inode); + if (IS_ERR(inode)) + goto out_stop; + + if (l > sizeof (EXT4_I(inode)->i_data)) { + inode->i_op = &ext4_symlink_inode_operations; + ext4_set_aops(inode); + /* + * page_symlink() calls into ext4_prepare/commit_write. + * We have a transaction open. All is sweetness. It also sets + * i_size in generic_commit_write(). + */ + err = __page_symlink(inode, symname, l, + mapping_gfp_mask(inode->i_mapping) & ~__GFP_FS); + if (err) { + ext4_dec_count(handle, inode); + ext4_mark_inode_dirty(handle, inode); + iput (inode); + goto out_stop; + } + } else { + inode->i_op = &ext4_fast_symlink_inode_operations; + memcpy((char*)&EXT4_I(inode)->i_data,symname,l); + inode->i_size = l-1; + } + EXT4_I(inode)->i_disksize = inode->i_size; + err = ext4_add_nondir(handle, dentry, inode); +out_stop: + ext4_journal_stop(handle); + if (err == -ENOSPC && ext4_should_retry_alloc(dir->i_sb, &retries)) + goto retry; + return err; +} + +static int ext4_link (struct dentry * old_dentry, + struct inode * dir, struct dentry *dentry) +{ + handle_t *handle; + struct inode *inode = old_dentry->d_inode; + int err, retries = 0; + + if (inode->i_nlink >= EXT4_LINK_MAX) + return -EMLINK; + +retry: + handle = ext4_journal_start(dir, EXT4_DATA_TRANS_BLOCKS(dir->i_sb) + + EXT4_INDEX_EXTRA_TRANS_BLOCKS); + if (IS_ERR(handle)) + return PTR_ERR(handle); + + if (IS_DIRSYNC(dir)) + handle->h_sync = 1; + + inode->i_ctime = CURRENT_TIME_SEC; + ext4_inc_count(handle, inode); + atomic_inc(&inode->i_count); + + err = ext4_add_nondir(handle, dentry, inode); + ext4_journal_stop(handle); + if (err == -ENOSPC && ext4_should_retry_alloc(dir->i_sb, &retries)) + goto retry; + return err; +} + +#define PARENT_INO(buffer) \ + ((struct ext4_dir_entry_2 *) ((char *) buffer + \ + le16_to_cpu(((struct ext4_dir_entry_2 *) buffer)->rec_len)))->inode + +/* + * Anybody can rename anything with this: the permission checks are left to the + * higher-level routines. + */ +static int ext4_rename (struct inode * old_dir, struct dentry *old_dentry, + struct inode * new_dir,struct dentry *new_dentry) +{ + handle_t *handle; + struct inode * old_inode, * new_inode; + struct buffer_head * old_bh, * new_bh, * dir_bh; + struct ext4_dir_entry_2 * old_de, * new_de; + int retval; + + old_bh = new_bh = dir_bh = NULL; + + /* Initialize quotas before so that eventual writes go + * in separate transaction */ + if (new_dentry->d_inode) + DQUOT_INIT(new_dentry->d_inode); + handle = ext4_journal_start(old_dir, 2 * + EXT4_DATA_TRANS_BLOCKS(old_dir->i_sb) + + EXT4_INDEX_EXTRA_TRANS_BLOCKS + 2); + if (IS_ERR(handle)) + return PTR_ERR(handle); + + if (IS_DIRSYNC(old_dir) || IS_DIRSYNC(new_dir)) + handle->h_sync = 1; + + old_bh = ext4_find_entry (old_dentry, &old_de); + /* + * Check for inode number is _not_ due to possible IO errors. + * We might rmdir the source, keep it as pwd of some process + * and merrily kill the link to whatever was created under the + * same name. Goodbye sticky bit ;-< + */ + old_inode = old_dentry->d_inode; + retval = -ENOENT; + if (!old_bh || le32_to_cpu(old_de->inode) != old_inode->i_ino) + goto end_rename; + + new_inode = new_dentry->d_inode; + new_bh = ext4_find_entry (new_dentry, &new_de); + if (new_bh) { + if (!new_inode) { + brelse (new_bh); + new_bh = NULL; + } + } + if (S_ISDIR(old_inode->i_mode)) { + if (new_inode) { + retval = -ENOTEMPTY; + if (!empty_dir (new_inode)) + goto end_rename; + } + retval = -EIO; + dir_bh = ext4_bread (handle, old_inode, 0, 0, &retval); + if (!dir_bh) + goto end_rename; + if (le32_to_cpu(PARENT_INO(dir_bh->b_data)) != old_dir->i_ino) + goto end_rename; + retval = -EMLINK; + if (!new_inode && new_dir!=old_dir && + new_dir->i_nlink >= EXT4_LINK_MAX) + goto end_rename; + } + if (!new_bh) { + retval = ext4_add_entry (handle, new_dentry, old_inode); + if (retval) + goto end_rename; + } else { + BUFFER_TRACE(new_bh, "get write access"); + ext4_journal_get_write_access(handle, new_bh); + new_de->inode = cpu_to_le32(old_inode->i_ino); + if (EXT4_HAS_INCOMPAT_FEATURE(new_dir->i_sb, + EXT4_FEATURE_INCOMPAT_FILETYPE)) + new_de->file_type = old_de->file_type; + new_dir->i_version++; + BUFFER_TRACE(new_bh, "call ext4_journal_dirty_metadata"); + ext4_journal_dirty_metadata(handle, new_bh); + brelse(new_bh); + new_bh = NULL; + } + + /* + * Like most other Unix systems, set the ctime for inodes on a + * rename. + */ + old_inode->i_ctime = CURRENT_TIME_SEC; + ext4_mark_inode_dirty(handle, old_inode); + + /* + * ok, that's it + */ + if (le32_to_cpu(old_de->inode) != old_inode->i_ino || + old_de->name_len != old_dentry->d_name.len || + strncmp(old_de->name, old_dentry->d_name.name, old_de->name_len) || + (retval = ext4_delete_entry(handle, old_dir, + old_de, old_bh)) == -ENOENT) { + /* old_de could have moved from under us during htree split, so + * make sure that we are deleting the right entry. We might + * also be pointing to a stale entry in the unused part of + * old_bh so just checking inum and the name isn't enough. */ + struct buffer_head *old_bh2; + struct ext4_dir_entry_2 *old_de2; + + old_bh2 = ext4_find_entry(old_dentry, &old_de2); + if (old_bh2) { + retval = ext4_delete_entry(handle, old_dir, + old_de2, old_bh2); + brelse(old_bh2); + } + } + if (retval) { + ext4_warning(old_dir->i_sb, "ext4_rename", + "Deleting old file (%lu), %d, error=%d", + old_dir->i_ino, old_dir->i_nlink, retval); + } + + if (new_inode) { + drop_nlink(new_inode); + new_inode->i_ctime = CURRENT_TIME_SEC; + } + old_dir->i_ctime = old_dir->i_mtime = CURRENT_TIME_SEC; + ext4_update_dx_flag(old_dir); + if (dir_bh) { + BUFFER_TRACE(dir_bh, "get_write_access"); + ext4_journal_get_write_access(handle, dir_bh); + PARENT_INO(dir_bh->b_data) = cpu_to_le32(new_dir->i_ino); + BUFFER_TRACE(dir_bh, "call ext4_journal_dirty_metadata"); + ext4_journal_dirty_metadata(handle, dir_bh); + drop_nlink(old_dir); + if (new_inode) { + drop_nlink(new_inode); + } else { + inc_nlink(new_dir); + ext4_update_dx_flag(new_dir); + ext4_mark_inode_dirty(handle, new_dir); + } + } + ext4_mark_inode_dirty(handle, old_dir); + if (new_inode) { + ext4_mark_inode_dirty(handle, new_inode); + if (!new_inode->i_nlink) + ext4_orphan_add(handle, new_inode); + } + retval = 0; + +end_rename: + brelse (dir_bh); + brelse (old_bh); + brelse (new_bh); + ext4_journal_stop(handle); + return retval; +} + +/* + * directories can handle most operations... + */ +struct inode_operations ext4_dir_inode_operations = { + .create = ext4_create, + .lookup = ext4_lookup, + .link = ext4_link, + .unlink = ext4_unlink, + .symlink = ext4_symlink, + .mkdir = ext4_mkdir, + .rmdir = ext4_rmdir, + .mknod = ext4_mknod, + .rename = ext4_rename, + .setattr = ext4_setattr, +#ifdef CONFIG_EXT4DEV_FS_XATTR + .setxattr = generic_setxattr, + .getxattr = generic_getxattr, + .listxattr = ext4_listxattr, + .removexattr = generic_removexattr, +#endif + .permission = ext4_permission, +}; + +struct inode_operations ext4_special_inode_operations = { + .setattr = ext4_setattr, +#ifdef CONFIG_EXT4DEV_FS_XATTR + .setxattr = generic_setxattr, + .getxattr = generic_getxattr, + .listxattr = ext4_listxattr, + .removexattr = generic_removexattr, +#endif + .permission = ext4_permission, +}; diff --git a/fs/ext4/namei.h b/fs/ext4/namei.h new file mode 100644 index 000000000000..5e4dfff36a00 --- /dev/null +++ b/fs/ext4/namei.h @@ -0,0 +1,8 @@ +/* linux/fs/ext4/namei.h + * + * Copyright (C) 2005 Simtec Electronics + * Ben Dooks <ben@simtec.co.uk> + * +*/ + +extern struct dentry *ext4_get_parent(struct dentry *child); diff --git a/fs/ext4/resize.c b/fs/ext4/resize.c new file mode 100644 index 000000000000..1e9578052cd3 --- /dev/null +++ b/fs/ext4/resize.c @@ -0,0 +1,1045 @@ +/* + * linux/fs/ext4/resize.c + * + * Support for resizing an ext4 filesystem while it is mounted. + * + * Copyright (C) 2001, 2002 Andreas Dilger <adilger@clusterfs.com> + * + * This could probably be made into a module, because it is not often in use. + */ + + +#define EXT4FS_DEBUG + +#include <linux/sched.h> +#include <linux/smp_lock.h> +#include <linux/ext4_jbd2.h> + +#include <linux/errno.h> +#include <linux/slab.h> + + +#define outside(b, first, last) ((b) < (first) || (b) >= (last)) +#define inside(b, first, last) ((b) >= (first) && (b) < (last)) + +static int verify_group_input(struct super_block *sb, + struct ext4_new_group_data *input) +{ + struct ext4_sb_info *sbi = EXT4_SB(sb); + struct ext4_super_block *es = sbi->s_es; + ext4_fsblk_t start = ext4_blocks_count(es); + ext4_fsblk_t end = start + input->blocks_count; + unsigned group = input->group; + ext4_fsblk_t itend = input->inode_table + sbi->s_itb_per_group; + unsigned overhead = ext4_bg_has_super(sb, group) ? + (1 + ext4_bg_num_gdb(sb, group) + + le16_to_cpu(es->s_reserved_gdt_blocks)) : 0; + ext4_fsblk_t metaend = start + overhead; + struct buffer_head *bh = NULL; + ext4_grpblk_t free_blocks_count, offset; + int err = -EINVAL; + + input->free_blocks_count = free_blocks_count = + input->blocks_count - 2 - overhead - sbi->s_itb_per_group; + + if (test_opt(sb, DEBUG)) + printk(KERN_DEBUG "EXT4-fs: adding %s group %u: %u blocks " + "(%d free, %u reserved)\n", + ext4_bg_has_super(sb, input->group) ? "normal" : + "no-super", input->group, input->blocks_count, + free_blocks_count, input->reserved_blocks); + + ext4_get_group_no_and_offset(sb, start, NULL, &offset); + if (group != sbi->s_groups_count) + ext4_warning(sb, __FUNCTION__, + "Cannot add at group %u (only %lu groups)", + input->group, sbi->s_groups_count); + else if (offset != 0) + ext4_warning(sb, __FUNCTION__, "Last group not full"); + else if (input->reserved_blocks > input->blocks_count / 5) + ext4_warning(sb, __FUNCTION__, "Reserved blocks too high (%u)", + input->reserved_blocks); + else if (free_blocks_count < 0) + ext4_warning(sb, __FUNCTION__, "Bad blocks count %u", + input->blocks_count); + else if (!(bh = sb_bread(sb, end - 1))) + ext4_warning(sb, __FUNCTION__, + "Cannot read last block (%llu)", + end - 1); + else if (outside(input->block_bitmap, start, end)) + ext4_warning(sb, __FUNCTION__, + "Block bitmap not in group (block %llu)", + input->block_bitmap); + else if (outside(input->inode_bitmap, start, end)) + ext4_warning(sb, __FUNCTION__, + "Inode bitmap not in group (block %llu)", + input->inode_bitmap); + else if (outside(input->inode_table, start, end) || + outside(itend - 1, start, end)) + ext4_warning(sb, __FUNCTION__, + "Inode table not in group (blocks %llu-%llu)", + input->inode_table, itend - 1); + else if (input->inode_bitmap == input->block_bitmap) + ext4_warning(sb, __FUNCTION__, + "Block bitmap same as inode bitmap (%llu)", + input->block_bitmap); + else if (inside(input->block_bitmap, input->inode_table, itend)) + ext4_warning(sb, __FUNCTION__, + "Block bitmap (%llu) in inode table (%llu-%llu)", + input->block_bitmap, input->inode_table, itend-1); + else if (inside(input->inode_bitmap, input->inode_table, itend)) + ext4_warning(sb, __FUNCTION__, + "Inode bitmap (%llu) in inode table (%llu-%llu)", + input->inode_bitmap, input->inode_table, itend-1); + else if (inside(input->block_bitmap, start, metaend)) + ext4_warning(sb, __FUNCTION__, + "Block bitmap (%llu) in GDT table" + " (%llu-%llu)", + input->block_bitmap, start, metaend - 1); + else if (inside(input->inode_bitmap, start, metaend)) + ext4_warning(sb, __FUNCTION__, + "Inode bitmap (%llu) in GDT table" + " (%llu-%llu)", + input->inode_bitmap, start, metaend - 1); + else if (inside(input->inode_table, start, metaend) || + inside(itend - 1, start, metaend)) + ext4_warning(sb, __FUNCTION__, + "Inode table (%llu-%llu) overlaps" + "GDT table (%llu-%llu)", + input->inode_table, itend - 1, start, metaend - 1); + else + err = 0; + brelse(bh); + + return err; +} + +static struct buffer_head *bclean(handle_t *handle, struct super_block *sb, + ext4_fsblk_t blk) +{ + struct buffer_head *bh; + int err; + + bh = sb_getblk(sb, blk); + if (!bh) + return ERR_PTR(-EIO); + if ((err = ext4_journal_get_write_access(handle, bh))) { + brelse(bh); + bh = ERR_PTR(err); + } else { + lock_buffer(bh); + memset(bh->b_data, 0, sb->s_blocksize); + set_buffer_uptodate(bh); + unlock_buffer(bh); + } + + return bh; +} + +/* + * To avoid calling the atomic setbit hundreds or thousands of times, we only + * need to use it within a single byte (to ensure we get endianness right). + * We can use memset for the rest of the bitmap as there are no other users. + */ +static void mark_bitmap_end(int start_bit, int end_bit, char *bitmap) +{ + int i; + + if (start_bit >= end_bit) + return; + + ext4_debug("mark end bits +%d through +%d used\n", start_bit, end_bit); + for (i = start_bit; i < ((start_bit + 7) & ~7UL); i++) + ext4_set_bit(i, bitmap); + if (i < end_bit) + memset(bitmap + (i >> 3), 0xff, (end_bit - i) >> 3); +} + +/* + * Set up the block and inode bitmaps, and the inode table for the new group. + * This doesn't need to be part of the main transaction, since we are only + * changing blocks outside the actual filesystem. We still do journaling to + * ensure the recovery is correct in case of a failure just after resize. + * If any part of this fails, we simply abort the resize. + */ +static int setup_new_group_blocks(struct super_block *sb, + struct ext4_new_group_data *input) +{ + struct ext4_sb_info *sbi = EXT4_SB(sb); + ext4_fsblk_t start = ext4_group_first_block_no(sb, input->group); + int reserved_gdb = ext4_bg_has_super(sb, input->group) ? + le16_to_cpu(sbi->s_es->s_reserved_gdt_blocks) : 0; + unsigned long gdblocks = ext4_bg_num_gdb(sb, input->group); + struct buffer_head *bh; + handle_t *handle; + ext4_fsblk_t block; + ext4_grpblk_t bit; + int i; + int err = 0, err2; + + handle = ext4_journal_start_sb(sb, reserved_gdb + gdblocks + + 2 + sbi->s_itb_per_group); + if (IS_ERR(handle)) + return PTR_ERR(handle); + + lock_super(sb); + if (input->group != sbi->s_groups_count) { + err = -EBUSY; + goto exit_journal; + } + + if (IS_ERR(bh = bclean(handle, sb, input->block_bitmap))) { + err = PTR_ERR(bh); + goto exit_journal; + } + + if (ext4_bg_has_super(sb, input->group)) { + ext4_debug("mark backup superblock %#04lx (+0)\n", start); + ext4_set_bit(0, bh->b_data); + } + + /* Copy all of the GDT blocks into the backup in this group */ + for (i = 0, bit = 1, block = start + 1; + i < gdblocks; i++, block++, bit++) { + struct buffer_head *gdb; + + ext4_debug("update backup group %#04lx (+%d)\n", block, bit); + + gdb = sb_getblk(sb, block); + if (!gdb) { + err = -EIO; + goto exit_bh; + } + if ((err = ext4_journal_get_write_access(handle, gdb))) { + brelse(gdb); + goto exit_bh; + } + lock_buffer(bh); + memcpy(gdb->b_data, sbi->s_group_desc[i]->b_data, bh->b_size); + set_buffer_uptodate(gdb); + unlock_buffer(bh); + ext4_journal_dirty_metadata(handle, gdb); + ext4_set_bit(bit, bh->b_data); + brelse(gdb); + } + + /* Zero out all of the reserved backup group descriptor table blocks */ + for (i = 0, bit = gdblocks + 1, block = start + bit; + i < reserved_gdb; i++, block++, bit++) { + struct buffer_head *gdb; + + ext4_debug("clear reserved block %#04lx (+%d)\n", block, bit); + + if (IS_ERR(gdb = bclean(handle, sb, block))) { + err = PTR_ERR(bh); + goto exit_bh; + } + ext4_journal_dirty_metadata(handle, gdb); + ext4_set_bit(bit, bh->b_data); + brelse(gdb); + } + ext4_debug("mark block bitmap %#04x (+%ld)\n", input->block_bitmap, + input->block_bitmap - start); + ext4_set_bit(input->block_bitmap - start, bh->b_data); + ext4_debug("mark inode bitmap %#04x (+%ld)\n", input->inode_bitmap, + input->inode_bitmap - start); + ext4_set_bit(input->inode_bitmap - start, bh->b_data); + + /* Zero out all of the inode table blocks */ + for (i = 0, block = input->inode_table, bit = block - start; + i < sbi->s_itb_per_group; i++, bit++, block++) { + struct buffer_head *it; + + ext4_debug("clear inode block %#04lx (+%d)\n", block, bit); + if (IS_ERR(it = bclean(handle, sb, block))) { + err = PTR_ERR(it); + goto exit_bh; + } + ext4_journal_dirty_metadata(handle, it); + brelse(it); + ext4_set_bit(bit, bh->b_data); + } + mark_bitmap_end(input->blocks_count, EXT4_BLOCKS_PER_GROUP(sb), + bh->b_data); + ext4_journal_dirty_metadata(handle, bh); + brelse(bh); + + /* Mark unused entries in inode bitmap used */ + ext4_debug("clear inode bitmap %#04x (+%ld)\n", + input->inode_bitmap, input->inode_bitmap - start); + if (IS_ERR(bh = bclean(handle, sb, input->inode_bitmap))) { + err = PTR_ERR(bh); + goto exit_journal; + } + + mark_bitmap_end(EXT4_INODES_PER_GROUP(sb), EXT4_BLOCKS_PER_GROUP(sb), + bh->b_data); + ext4_journal_dirty_metadata(handle, bh); +exit_bh: + brelse(bh); + +exit_journal: + unlock_super(sb); + if ((err2 = ext4_journal_stop(handle)) && !err) + err = err2; + + return err; +} + + +/* + * Iterate through the groups which hold BACKUP superblock/GDT copies in an + * ext4 filesystem. The counters should be initialized to 1, 5, and 7 before + * calling this for the first time. In a sparse filesystem it will be the + * sequence of powers of 3, 5, and 7: 1, 3, 5, 7, 9, 25, 27, 49, 81, ... + * For a non-sparse filesystem it will be every group: 1, 2, 3, 4, ... + */ +static unsigned ext4_list_backups(struct super_block *sb, unsigned *three, + unsigned *five, unsigned *seven) +{ + unsigned *min = three; + int mult = 3; + unsigned ret; + + if (!EXT4_HAS_RO_COMPAT_FEATURE(sb, + EXT4_FEATURE_RO_COMPAT_SPARSE_SUPER)) { + ret = *min; + *min += 1; + return ret; + } + + if (*five < *min) { + min = five; + mult = 5; + } + if (*seven < *min) { + min = seven; + mult = 7; + } + + ret = *min; + *min *= mult; + + return ret; +} + +/* + * Check that all of the backup GDT blocks are held in the primary GDT block. + * It is assumed that they are stored in group order. Returns the number of + * groups in current filesystem that have BACKUPS, or -ve error code. + */ +static int verify_reserved_gdb(struct super_block *sb, + struct buffer_head *primary) +{ + const ext4_fsblk_t blk = primary->b_blocknr; + const unsigned long end = EXT4_SB(sb)->s_groups_count; + unsigned three = 1; + unsigned five = 5; + unsigned seven = 7; + unsigned grp; + __le32 *p = (__le32 *)primary->b_data; + int gdbackups = 0; + + while ((grp = ext4_list_backups(sb, &three, &five, &seven)) < end) { + if (le32_to_cpu(*p++) != + grp * EXT4_BLOCKS_PER_GROUP(sb) + blk){ + ext4_warning(sb, __FUNCTION__, + "reserved GDT %llu" + " missing grp %d (%llu)", + blk, grp, + grp * + (ext4_fsblk_t)EXT4_BLOCKS_PER_GROUP(sb) + + blk); + return -EINVAL; + } + if (++gdbackups > EXT4_ADDR_PER_BLOCK(sb)) + return -EFBIG; + } + + return gdbackups; +} + +/* + * Called when we need to bring a reserved group descriptor table block into + * use from the resize inode. The primary copy of the new GDT block currently + * is an indirect block (under the double indirect block in the resize inode). + * The new backup GDT blocks will be stored as leaf blocks in this indirect + * block, in group order. Even though we know all the block numbers we need, + * we check to ensure that the resize inode has actually reserved these blocks. + * + * Don't need to update the block bitmaps because the blocks are still in use. + * + * We get all of the error cases out of the way, so that we are sure to not + * fail once we start modifying the data on disk, because JBD has no rollback. + */ +static int add_new_gdb(handle_t *handle, struct inode *inode, + struct ext4_new_group_data *input, + struct buffer_head **primary) +{ + struct super_block *sb = inode->i_sb; + struct ext4_super_block *es = EXT4_SB(sb)->s_es; + unsigned long gdb_num = input->group / EXT4_DESC_PER_BLOCK(sb); + ext4_fsblk_t gdblock = EXT4_SB(sb)->s_sbh->b_blocknr + 1 + gdb_num; + struct buffer_head **o_group_desc, **n_group_desc; + struct buffer_head *dind; + int gdbackups; + struct ext4_iloc iloc; + __le32 *data; + int err; + + if (test_opt(sb, DEBUG)) + printk(KERN_DEBUG + "EXT4-fs: ext4_add_new_gdb: adding group block %lu\n", + gdb_num); + + /* + * If we are not using the primary superblock/GDT copy don't resize, + * because the user tools have no way of handling this. Probably a + * bad time to do it anyways. + */ + if (EXT4_SB(sb)->s_sbh->b_blocknr != + le32_to_cpu(EXT4_SB(sb)->s_es->s_first_data_block)) { + ext4_warning(sb, __FUNCTION__, + "won't resize using backup superblock at %llu", + (unsigned long long)EXT4_SB(sb)->s_sbh->b_blocknr); + return -EPERM; + } + + *primary = sb_bread(sb, gdblock); + if (!*primary) + return -EIO; + + if ((gdbackups = verify_reserved_gdb(sb, *primary)) < 0) { + err = gdbackups; + goto exit_bh; + } + + data = EXT4_I(inode)->i_data + EXT4_DIND_BLOCK; + dind = sb_bread(sb, le32_to_cpu(*data)); + if (!dind) { + err = -EIO; + goto exit_bh; + } + + data = (__le32 *)dind->b_data; + if (le32_to_cpu(data[gdb_num % EXT4_ADDR_PER_BLOCK(sb)]) != gdblock) { + ext4_warning(sb, __FUNCTION__, + "new group %u GDT block %llu not reserved", + input->group, gdblock); + err = -EINVAL; + goto exit_dind; + } + + if ((err = ext4_journal_get_write_access(handle, EXT4_SB(sb)->s_sbh))) + goto exit_dind; + + if ((err = ext4_journal_get_write_access(handle, *primary))) + goto exit_sbh; + + if ((err = ext4_journal_get_write_access(handle, dind))) + goto exit_primary; + + /* ext4_reserve_inode_write() gets a reference on the iloc */ + if ((err = ext4_reserve_inode_write(handle, inode, &iloc))) + goto exit_dindj; + + n_group_desc = kmalloc((gdb_num + 1) * sizeof(struct buffer_head *), + GFP_KERNEL); + if (!n_group_desc) { + err = -ENOMEM; + ext4_warning (sb, __FUNCTION__, + "not enough memory for %lu groups", gdb_num + 1); + goto exit_inode; + } + + /* + * Finally, we have all of the possible failures behind us... + * + * Remove new GDT block from inode double-indirect block and clear out + * the new GDT block for use (which also "frees" the backup GDT blocks + * from the reserved inode). We don't need to change the bitmaps for + * these blocks, because they are marked as in-use from being in the + * reserved inode, and will become GDT blocks (primary and backup). + */ + data[gdb_num % EXT4_ADDR_PER_BLOCK(sb)] = 0; + ext4_journal_dirty_metadata(handle, dind); + brelse(dind); + inode->i_blocks -= (gdbackups + 1) * sb->s_blocksize >> 9; + ext4_mark_iloc_dirty(handle, inode, &iloc); + memset((*primary)->b_data, 0, sb->s_blocksize); + ext4_journal_dirty_metadata(handle, *primary); + + o_group_desc = EXT4_SB(sb)->s_group_desc; + memcpy(n_group_desc, o_group_desc, + EXT4_SB(sb)->s_gdb_count * sizeof(struct buffer_head *)); + n_group_desc[gdb_num] = *primary; + EXT4_SB(sb)->s_group_desc = n_group_desc; + EXT4_SB(sb)->s_gdb_count++; + kfree(o_group_desc); + + es->s_reserved_gdt_blocks = + cpu_to_le16(le16_to_cpu(es->s_reserved_gdt_blocks) - 1); + ext4_journal_dirty_metadata(handle, EXT4_SB(sb)->s_sbh); + + return 0; + +exit_inode: + //ext4_journal_release_buffer(handle, iloc.bh); + brelse(iloc.bh); +exit_dindj: + //ext4_journal_release_buffer(handle, dind); +exit_primary: + //ext4_journal_release_buffer(handle, *primary); +exit_sbh: + //ext4_journal_release_buffer(handle, *primary); +exit_dind: + brelse(dind); +exit_bh: + brelse(*primary); + + ext4_debug("leaving with error %d\n", err); + return err; +} + +/* + * Called when we are adding a new group which has a backup copy of each of + * the GDT blocks (i.e. sparse group) and there are reserved GDT blocks. + * We need to add these reserved backup GDT blocks to the resize inode, so + * that they are kept for future resizing and not allocated to files. + * + * Each reserved backup GDT block will go into a different indirect block. + * The indirect blocks are actually the primary reserved GDT blocks, + * so we know in advance what their block numbers are. We only get the + * double-indirect block to verify it is pointing to the primary reserved + * GDT blocks so we don't overwrite a data block by accident. The reserved + * backup GDT blocks are stored in their reserved primary GDT block. + */ +static int reserve_backup_gdb(handle_t *handle, struct inode *inode, + struct ext4_new_group_data *input) +{ + struct super_block *sb = inode->i_sb; + int reserved_gdb =le16_to_cpu(EXT4_SB(sb)->s_es->s_reserved_gdt_blocks); + struct buffer_head **primary; + struct buffer_head *dind; + struct ext4_iloc iloc; + ext4_fsblk_t blk; + __le32 *data, *end; + int gdbackups = 0; + int res, i; + int err; + + primary = kmalloc(reserved_gdb * sizeof(*primary), GFP_KERNEL); + if (!primary) + return -ENOMEM; + + data = EXT4_I(inode)->i_data + EXT4_DIND_BLOCK; + dind = sb_bread(sb, le32_to_cpu(*data)); + if (!dind) { + err = -EIO; + goto exit_free; + } + + blk = EXT4_SB(sb)->s_sbh->b_blocknr + 1 + EXT4_SB(sb)->s_gdb_count; + data = (__le32 *)dind->b_data + EXT4_SB(sb)->s_gdb_count; + end = (__le32 *)dind->b_data + EXT4_ADDR_PER_BLOCK(sb); + + /* Get each reserved primary GDT block and verify it holds backups */ + for (res = 0; res < reserved_gdb; res++, blk++) { + if (le32_to_cpu(*data) != blk) { + ext4_warning(sb, __FUNCTION__, + "reserved block %llu" + " not at offset %ld", + blk, + (long)(data - (__le32 *)dind->b_data)); + err = -EINVAL; + goto exit_bh; + } + primary[res] = sb_bread(sb, blk); + if (!primary[res]) { + err = -EIO; + goto exit_bh; + } + if ((gdbackups = verify_reserved_gdb(sb, primary[res])) < 0) { + brelse(primary[res]); + err = gdbackups; + goto exit_bh; + } + if (++data >= end) + data = (__le32 *)dind->b_data; + } + + for (i = 0; i < reserved_gdb; i++) { + if ((err = ext4_journal_get_write_access(handle, primary[i]))) { + /* + int j; + for (j = 0; j < i; j++) + ext4_journal_release_buffer(handle, primary[j]); + */ + goto exit_bh; + } + } + + if ((err = ext4_reserve_inode_write(handle, inode, &iloc))) + goto exit_bh; + + /* + * Finally we can add each of the reserved backup GDT blocks from + * the new group to its reserved primary GDT block. + */ + blk = input->group * EXT4_BLOCKS_PER_GROUP(sb); + for (i = 0; i < reserved_gdb; i++) { + int err2; + data = (__le32 *)primary[i]->b_data; + /* printk("reserving backup %lu[%u] = %lu\n", + primary[i]->b_blocknr, gdbackups, + blk + primary[i]->b_blocknr); */ + data[gdbackups] = cpu_to_le32(blk + primary[i]->b_blocknr); + err2 = ext4_journal_dirty_metadata(handle, primary[i]); + if (!err) + err = err2; + } + inode->i_blocks += reserved_gdb * sb->s_blocksize >> 9; + ext4_mark_iloc_dirty(handle, inode, &iloc); + +exit_bh: + while (--res >= 0) + brelse(primary[res]); + brelse(dind); + +exit_free: + kfree(primary); + + return err; +} + +/* + * Update the backup copies of the ext4 metadata. These don't need to be part + * of the main resize transaction, because e2fsck will re-write them if there + * is a problem (basically only OOM will cause a problem). However, we + * _should_ update the backups if possible, in case the primary gets trashed + * for some reason and we need to run e2fsck from a backup superblock. The + * important part is that the new block and inode counts are in the backup + * superblocks, and the location of the new group metadata in the GDT backups. + * + * We do not need lock_super() for this, because these blocks are not + * otherwise touched by the filesystem code when it is mounted. We don't + * need to worry about last changing from sbi->s_groups_count, because the + * worst that can happen is that we do not copy the full number of backups + * at this time. The resize which changed s_groups_count will backup again. + */ +static void update_backups(struct super_block *sb, + int blk_off, char *data, int size) +{ + struct ext4_sb_info *sbi = EXT4_SB(sb); + const unsigned long last = sbi->s_groups_count; + const int bpg = EXT4_BLOCKS_PER_GROUP(sb); + unsigned three = 1; + unsigned five = 5; + unsigned seven = 7; + unsigned group; + int rest = sb->s_blocksize - size; + handle_t *handle; + int err = 0, err2; + + handle = ext4_journal_start_sb(sb, EXT4_MAX_TRANS_DATA); + if (IS_ERR(handle)) { + group = 1; + err = PTR_ERR(handle); + goto exit_err; + } + + while ((group = ext4_list_backups(sb, &three, &five, &seven)) < last) { + struct buffer_head *bh; + + /* Out of journal space, and can't get more - abort - so sad */ + if (handle->h_buffer_credits == 0 && + ext4_journal_extend(handle, EXT4_MAX_TRANS_DATA) && + (err = ext4_journal_restart(handle, EXT4_MAX_TRANS_DATA))) + break; + + bh = sb_getblk(sb, group * bpg + blk_off); + if (!bh) { + err = -EIO; + break; + } + ext4_debug("update metadata backup %#04lx\n", + (unsigned long)bh->b_blocknr); + if ((err = ext4_journal_get_write_access(handle, bh))) + break; + lock_buffer(bh); + memcpy(bh->b_data, data, size); + if (rest) + memset(bh->b_data + size, 0, rest); + set_buffer_uptodate(bh); + unlock_buffer(bh); + ext4_journal_dirty_metadata(handle, bh); + brelse(bh); + } + if ((err2 = ext4_journal_stop(handle)) && !err) + err = err2; + + /* + * Ugh! Need to have e2fsck write the backup copies. It is too + * late to revert the resize, we shouldn't fail just because of + * the backup copies (they are only needed in case of corruption). + * + * However, if we got here we have a journal problem too, so we + * can't really start a transaction to mark the superblock. + * Chicken out and just set the flag on the hope it will be written + * to disk, and if not - we will simply wait until next fsck. + */ +exit_err: + if (err) { + ext4_warning(sb, __FUNCTION__, + "can't update backup for group %d (err %d), " + "forcing fsck on next reboot", group, err); + sbi->s_mount_state &= ~EXT4_VALID_FS; + sbi->s_es->s_state &= cpu_to_le16(~EXT4_VALID_FS); + mark_buffer_dirty(sbi->s_sbh); + } +} + +/* Add group descriptor data to an existing or new group descriptor block. + * Ensure we handle all possible error conditions _before_ we start modifying + * the filesystem, because we cannot abort the transaction and not have it + * write the data to disk. + * + * If we are on a GDT block boundary, we need to get the reserved GDT block. + * Otherwise, we may need to add backup GDT blocks for a sparse group. + * + * We only need to hold the superblock lock while we are actually adding + * in the new group's counts to the superblock. Prior to that we have + * not really "added" the group at all. We re-check that we are still + * adding in the last group in case things have changed since verifying. + */ +int ext4_group_add(struct super_block *sb, struct ext4_new_group_data *input) +{ + struct ext4_sb_info *sbi = EXT4_SB(sb); + struct ext4_super_block *es = sbi->s_es; + int reserved_gdb = ext4_bg_has_super(sb, input->group) ? + le16_to_cpu(es->s_reserved_gdt_blocks) : 0; + struct buffer_head *primary = NULL; + struct ext4_group_desc *gdp; + struct inode *inode = NULL; + handle_t *handle; + int gdb_off, gdb_num; + int err, err2; + + gdb_num = input->group / EXT4_DESC_PER_BLOCK(sb); + gdb_off = input->group % EXT4_DESC_PER_BLOCK(sb); + + if (gdb_off == 0 && !EXT4_HAS_RO_COMPAT_FEATURE(sb, + EXT4_FEATURE_RO_COMPAT_SPARSE_SUPER)) { + ext4_warning(sb, __FUNCTION__, + "Can't resize non-sparse filesystem further"); + return -EPERM; + } + + if (ext4_blocks_count(es) + input->blocks_count < + ext4_blocks_count(es)) { + ext4_warning(sb, __FUNCTION__, "blocks_count overflow\n"); + return -EINVAL; + } + + if (le32_to_cpu(es->s_inodes_count) + EXT4_INODES_PER_GROUP(sb) < + le32_to_cpu(es->s_inodes_count)) { + ext4_warning(sb, __FUNCTION__, "inodes_count overflow\n"); + return -EINVAL; + } + + if (reserved_gdb || gdb_off == 0) { + if (!EXT4_HAS_COMPAT_FEATURE(sb, + EXT4_FEATURE_COMPAT_RESIZE_INODE)){ + ext4_warning(sb, __FUNCTION__, + "No reserved GDT blocks, can't resize"); + return -EPERM; + } + inode = iget(sb, EXT4_RESIZE_INO); + if (!inode || is_bad_inode(inode)) { + ext4_warning(sb, __FUNCTION__, + "Error opening resize inode"); + iput(inode); + return -ENOENT; + } + } + + if ((err = verify_group_input(sb, input))) + goto exit_put; + + if ((err = setup_new_group_blocks(sb, input))) + goto exit_put; + + /* + * We will always be modifying at least the superblock and a GDT + * block. If we are adding a group past the last current GDT block, + * we will also modify the inode and the dindirect block. If we + * are adding a group with superblock/GDT backups we will also + * modify each of the reserved GDT dindirect blocks. + */ + handle = ext4_journal_start_sb(sb, + ext4_bg_has_super(sb, input->group) ? + 3 + reserved_gdb : 4); + if (IS_ERR(handle)) { + err = PTR_ERR(handle); + goto exit_put; + } + + lock_super(sb); + if (input->group != sbi->s_groups_count) { + ext4_warning(sb, __FUNCTION__, + "multiple resizers run on filesystem!"); + err = -EBUSY; + goto exit_journal; + } + + if ((err = ext4_journal_get_write_access(handle, sbi->s_sbh))) + goto exit_journal; + + /* + * We will only either add reserved group blocks to a backup group + * or remove reserved blocks for the first group in a new group block. + * Doing both would be mean more complex code, and sane people don't + * use non-sparse filesystems anymore. This is already checked above. + */ + if (gdb_off) { + primary = sbi->s_group_desc[gdb_num]; + if ((err = ext4_journal_get_write_access(handle, primary))) + goto exit_journal; + + if (reserved_gdb && ext4_bg_num_gdb(sb, input->group) && + (err = reserve_backup_gdb(handle, inode, input))) + goto exit_journal; + } else if ((err = add_new_gdb(handle, inode, input, &primary))) + goto exit_journal; + + /* + * OK, now we've set up the new group. Time to make it active. + * + * Current kernels don't lock all allocations via lock_super(), + * so we have to be safe wrt. concurrent accesses the group + * data. So we need to be careful to set all of the relevant + * group descriptor data etc. *before* we enable the group. + * + * The key field here is sbi->s_groups_count: as long as + * that retains its old value, nobody is going to access the new + * group. + * + * So first we update all the descriptor metadata for the new + * group; then we update the total disk blocks count; then we + * update the groups count to enable the group; then finally we + * update the free space counts so that the system can start + * using the new disk blocks. + */ + + /* Update group descriptor block for new group */ + gdp = (struct ext4_group_desc *)primary->b_data + gdb_off; + + ext4_block_bitmap_set(sb, gdp, input->block_bitmap); /* LV FIXME */ + ext4_inode_bitmap_set(sb, gdp, input->inode_bitmap); /* LV FIXME */ + ext4_inode_table_set(sb, gdp, input->inode_table); /* LV FIXME */ + gdp->bg_free_blocks_count = cpu_to_le16(input->free_blocks_count); + gdp->bg_free_inodes_count = cpu_to_le16(EXT4_INODES_PER_GROUP(sb)); + + /* + * Make the new blocks and inodes valid next. We do this before + * increasing the group count so that once the group is enabled, + * all of its blocks and inodes are already valid. + * + * We always allocate group-by-group, then block-by-block or + * inode-by-inode within a group, so enabling these + * blocks/inodes before the group is live won't actually let us + * allocate the new space yet. + */ + ext4_blocks_count_set(es, ext4_blocks_count(es) + + input->blocks_count); + es->s_inodes_count = cpu_to_le32(le32_to_cpu(es->s_inodes_count) + + EXT4_INODES_PER_GROUP(sb)); + + /* + * We need to protect s_groups_count against other CPUs seeing + * inconsistent state in the superblock. + * + * The precise rules we use are: + * + * * Writers of s_groups_count *must* hold lock_super + * AND + * * Writers must perform a smp_wmb() after updating all dependent + * data and before modifying the groups count + * + * * Readers must hold lock_super() over the access + * OR + * * Readers must perform an smp_rmb() after reading the groups count + * and before reading any dependent data. + * + * NB. These rules can be relaxed when checking the group count + * while freeing data, as we can only allocate from a block + * group after serialising against the group count, and we can + * only then free after serialising in turn against that + * allocation. + */ + smp_wmb(); + + /* Update the global fs size fields */ + sbi->s_groups_count++; + + ext4_journal_dirty_metadata(handle, primary); + + /* Update the reserved block counts only once the new group is + * active. */ + ext4_r_blocks_count_set(es, ext4_r_blocks_count(es) + + input->reserved_blocks); + + /* Update the free space counts */ + percpu_counter_mod(&sbi->s_freeblocks_counter, + input->free_blocks_count); + percpu_counter_mod(&sbi->s_freeinodes_counter, + EXT4_INODES_PER_GROUP(sb)); + + ext4_journal_dirty_metadata(handle, sbi->s_sbh); + sb->s_dirt = 1; + +exit_journal: + unlock_super(sb); + if ((err2 = ext4_journal_stop(handle)) && !err) + err = err2; + if (!err) { + update_backups(sb, sbi->s_sbh->b_blocknr, (char *)es, + sizeof(struct ext4_super_block)); + update_backups(sb, primary->b_blocknr, primary->b_data, + primary->b_size); + } +exit_put: + iput(inode); + return err; +} /* ext4_group_add */ + +/* Extend the filesystem to the new number of blocks specified. This entry + * point is only used to extend the current filesystem to the end of the last + * existing group. It can be accessed via ioctl, or by "remount,resize=<size>" + * for emergencies (because it has no dependencies on reserved blocks). + * + * If we _really_ wanted, we could use default values to call ext4_group_add() + * allow the "remount" trick to work for arbitrary resizing, assuming enough + * GDT blocks are reserved to grow to the desired size. + */ +int ext4_group_extend(struct super_block *sb, struct ext4_super_block *es, + ext4_fsblk_t n_blocks_count) +{ + ext4_fsblk_t o_blocks_count; + unsigned long o_groups_count; + ext4_grpblk_t last; + ext4_grpblk_t add; + struct buffer_head * bh; + handle_t *handle; + int err; + unsigned long freed_blocks; + + /* We don't need to worry about locking wrt other resizers just + * yet: we're going to revalidate es->s_blocks_count after + * taking lock_super() below. */ + o_blocks_count = ext4_blocks_count(es); + o_groups_count = EXT4_SB(sb)->s_groups_count; + + if (test_opt(sb, DEBUG)) + printk(KERN_DEBUG "EXT4-fs: extending last group from %llu uto %llu blocks\n", + o_blocks_count, n_blocks_count); + + if (n_blocks_count == 0 || n_blocks_count == o_blocks_count) + return 0; + + if (n_blocks_count > (sector_t)(~0ULL) >> (sb->s_blocksize_bits - 9)) { + printk(KERN_ERR "EXT4-fs: filesystem on %s:" + " too large to resize to %llu blocks safely\n", + sb->s_id, n_blocks_count); + if (sizeof(sector_t) < 8) + ext4_warning(sb, __FUNCTION__, + "CONFIG_LBD not enabled\n"); + return -EINVAL; + } + + if (n_blocks_count < o_blocks_count) { + ext4_warning(sb, __FUNCTION__, + "can't shrink FS - resize aborted"); + return -EBUSY; + } + + /* Handle the remaining blocks in the last group only. */ + ext4_get_group_no_and_offset(sb, o_blocks_count, NULL, &last); + + if (last == 0) { + ext4_warning(sb, __FUNCTION__, + "need to use ext2online to resize further"); + return -EPERM; + } + + add = EXT4_BLOCKS_PER_GROUP(sb) - last; + + if (o_blocks_count + add < o_blocks_count) { + ext4_warning(sb, __FUNCTION__, "blocks_count overflow"); + return -EINVAL; + } + + if (o_blocks_count + add > n_blocks_count) + add = n_blocks_count - o_blocks_count; + + if (o_blocks_count + add < n_blocks_count) + ext4_warning(sb, __FUNCTION__, + "will only finish group (%llu" + " blocks, %u new)", + o_blocks_count + add, add); + + /* See if the device is actually as big as what was requested */ + bh = sb_bread(sb, o_blocks_count + add -1); + if (!bh) { + ext4_warning(sb, __FUNCTION__, + "can't read last block, resize aborted"); + return -ENOSPC; + } + brelse(bh); + + /* We will update the superblock, one block bitmap, and + * one group descriptor via ext4_free_blocks(). + */ + handle = ext4_journal_start_sb(sb, 3); + if (IS_ERR(handle)) { + err = PTR_ERR(handle); + ext4_warning(sb, __FUNCTION__, "error %d on journal start",err); + goto exit_put; + } + + lock_super(sb); + if (o_blocks_count != ext4_blocks_count(es)) { + ext4_warning(sb, __FUNCTION__, + "multiple resizers run on filesystem!"); + unlock_super(sb); + err = -EBUSY; + goto exit_put; + } + + if ((err = ext4_journal_get_write_access(handle, + EXT4_SB(sb)->s_sbh))) { + ext4_warning(sb, __FUNCTION__, + "error %d on journal write access", err); + unlock_super(sb); + ext4_journal_stop(handle); + goto exit_put; + } + ext4_blocks_count_set(es, o_blocks_count + add); + ext4_journal_dirty_metadata(handle, EXT4_SB(sb)->s_sbh); + sb->s_dirt = 1; + unlock_super(sb); + ext4_debug("freeing blocks %lu through %llu\n", o_blocks_count, + o_blocks_count + add); + ext4_free_blocks_sb(handle, sb, o_blocks_count, add, &freed_blocks); + ext4_debug("freed blocks %llu through %llu\n", o_blocks_count, + o_blocks_count + add); + if ((err = ext4_journal_stop(handle))) + goto exit_put; + if (test_opt(sb, DEBUG)) + printk(KERN_DEBUG "EXT4-fs: extended group to %llu blocks\n", + ext4_blocks_count(es)); + update_backups(sb, EXT4_SB(sb)->s_sbh->b_blocknr, (char *)es, + sizeof(struct ext4_super_block)); +exit_put: + return err; +} /* ext4_group_extend */ diff --git a/fs/ext4/super.c b/fs/ext4/super.c new file mode 100644 index 000000000000..b4b022aa2bc2 --- /dev/null +++ b/fs/ext4/super.c @@ -0,0 +1,2829 @@ +/* + * linux/fs/ext4/super.c + * + * Copyright (C) 1992, 1993, 1994, 1995 + * Remy Card (card@masi.ibp.fr) + * Laboratoire MASI - Institut Blaise Pascal + * Universite Pierre et Marie Curie (Paris VI) + * + * from + * + * linux/fs/minix/inode.c + * + * Copyright (C) 1991, 1992 Linus Torvalds + * + * Big-endian to little-endian byte-swapping/bitmaps by + * David S. Miller (davem@caip.rutgers.edu), 1995 + */ + +#include <linux/module.h> +#include <linux/string.h> +#include <linux/fs.h> +#include <linux/time.h> +#include <linux/jbd2.h> +#include <linux/ext4_fs.h> +#include <linux/ext4_jbd2.h> +#include <linux/slab.h> +#include <linux/init.h> +#include <linux/blkdev.h> +#include <linux/parser.h> +#include <linux/smp_lock.h> +#include <linux/buffer_head.h> +#include <linux/vfs.h> +#include <linux/random.h> +#include <linux/mount.h> +#include <linux/namei.h> +#include <linux/quotaops.h> +#include <linux/seq_file.h> + +#include <asm/uaccess.h> + +#include "xattr.h" +#include "acl.h" +#include "namei.h" + +static int ext4_load_journal(struct super_block *, struct ext4_super_block *, + unsigned long journal_devnum); +static int ext4_create_journal(struct super_block *, struct ext4_super_block *, + unsigned int); +static void ext4_commit_super (struct super_block * sb, + struct ext4_super_block * es, + int sync); +static void ext4_mark_recovery_complete(struct super_block * sb, + struct ext4_super_block * es); +static void ext4_clear_journal_err(struct super_block * sb, + struct ext4_super_block * es); +static int ext4_sync_fs(struct super_block *sb, int wait); +static const char *ext4_decode_error(struct super_block * sb, int errno, + char nbuf[16]); +static int ext4_remount (struct super_block * sb, int * flags, char * data); +static int ext4_statfs (struct dentry * dentry, struct kstatfs * buf); +static void ext4_unlockfs(struct super_block *sb); +static void ext4_write_super (struct super_block * sb); +static void ext4_write_super_lockfs(struct super_block *sb); + + +ext4_fsblk_t ext4_block_bitmap(struct super_block *sb, + struct ext4_group_desc *bg) +{ + return le32_to_cpu(bg->bg_block_bitmap) | + (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT ? + (ext4_fsblk_t)le32_to_cpu(bg->bg_block_bitmap_hi) << 32 : 0); +} + +ext4_fsblk_t ext4_inode_bitmap(struct super_block *sb, + struct ext4_group_desc *bg) +{ + return le32_to_cpu(bg->bg_inode_bitmap) | + (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT ? + (ext4_fsblk_t)le32_to_cpu(bg->bg_inode_bitmap_hi) << 32 : 0); +} + +ext4_fsblk_t ext4_inode_table(struct super_block *sb, + struct ext4_group_desc *bg) +{ + return le32_to_cpu(bg->bg_inode_table) | + (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT ? + (ext4_fsblk_t)le32_to_cpu(bg->bg_inode_table_hi) << 32 : 0); +} + +void ext4_block_bitmap_set(struct super_block *sb, + struct ext4_group_desc *bg, ext4_fsblk_t blk) +{ + bg->bg_block_bitmap = cpu_to_le32((u32)blk); + if (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT) + bg->bg_block_bitmap_hi = cpu_to_le32(blk >> 32); +} + +void ext4_inode_bitmap_set(struct super_block *sb, + struct ext4_group_desc *bg, ext4_fsblk_t blk) +{ + bg->bg_inode_bitmap = cpu_to_le32((u32)blk); + if (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT) + bg->bg_inode_bitmap_hi = cpu_to_le32(blk >> 32); +} + +void ext4_inode_table_set(struct super_block *sb, + struct ext4_group_desc *bg, ext4_fsblk_t blk) +{ + bg->bg_inode_table = cpu_to_le32((u32)blk); + if (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT) + bg->bg_inode_table_hi = cpu_to_le32(blk >> 32); +} + +/* + * Wrappers for jbd2_journal_start/end. + * + * The only special thing we need to do here is to make sure that all + * journal_end calls result in the superblock being marked dirty, so + * that sync() will call the filesystem's write_super callback if + * appropriate. + */ +handle_t *ext4_journal_start_sb(struct super_block *sb, int nblocks) +{ + journal_t *journal; + + if (sb->s_flags & MS_RDONLY) + return ERR_PTR(-EROFS); + + /* Special case here: if the journal has aborted behind our + * backs (eg. EIO in the commit thread), then we still need to + * take the FS itself readonly cleanly. */ + journal = EXT4_SB(sb)->s_journal; + if (is_journal_aborted(journal)) { + ext4_abort(sb, __FUNCTION__, + "Detected aborted journal"); + return ERR_PTR(-EROFS); + } + + return jbd2_journal_start(journal, nblocks); +} + +/* + * The only special thing we need to do here is to make sure that all + * jbd2_journal_stop calls result in the superblock being marked dirty, so + * that sync() will call the filesystem's write_super callback if + * appropriate. + */ +int __ext4_journal_stop(const char *where, handle_t *handle) +{ + struct super_block *sb; + int err; + int rc; + + sb = handle->h_transaction->t_journal->j_private; + err = handle->h_err; + rc = jbd2_journal_stop(handle); + + if (!err) + err = rc; + if (err) + __ext4_std_error(sb, where, err); + return err; +} + +void ext4_journal_abort_handle(const char *caller, const char *err_fn, + struct buffer_head *bh, handle_t *handle, int err) +{ + char nbuf[16]; + const char *errstr = ext4_decode_error(NULL, err, nbuf); + + if (bh) + BUFFER_TRACE(bh, "abort"); + + if (!handle->h_err) + handle->h_err = err; + + if (is_handle_aborted(handle)) + return; + + printk(KERN_ERR "%s: aborting transaction: %s in %s\n", + caller, errstr, err_fn); + + jbd2_journal_abort_handle(handle); +} + +/* Deal with the reporting of failure conditions on a filesystem such as + * inconsistencies detected or read IO failures. + * + * On ext2, we can store the error state of the filesystem in the + * superblock. That is not possible on ext4, because we may have other + * write ordering constraints on the superblock which prevent us from + * writing it out straight away; and given that the journal is about to + * be aborted, we can't rely on the current, or future, transactions to + * write out the superblock safely. + * + * We'll just use the jbd2_journal_abort() error code to record an error in + * the journal instead. On recovery, the journal will compain about + * that error until we've noted it down and cleared it. + */ + +static void ext4_handle_error(struct super_block *sb) +{ + struct ext4_super_block *es = EXT4_SB(sb)->s_es; + + EXT4_SB(sb)->s_mount_state |= EXT4_ERROR_FS; + es->s_state |= cpu_to_le16(EXT4_ERROR_FS); + + if (sb->s_flags & MS_RDONLY) + return; + + if (!test_opt (sb, ERRORS_CONT)) { + journal_t *journal = EXT4_SB(sb)->s_journal; + + EXT4_SB(sb)->s_mount_opt |= EXT4_MOUNT_ABORT; + if (journal) + jbd2_journal_abort(journal, -EIO); + } + if (test_opt (sb, ERRORS_RO)) { + printk (KERN_CRIT "Remounting filesystem read-only\n"); + sb->s_flags |= MS_RDONLY; + } + ext4_commit_super(sb, es, 1); + if (test_opt(sb, ERRORS_PANIC)) + panic("EXT4-fs (device %s): panic forced after error\n", + sb->s_id); +} + +void ext4_error (struct super_block * sb, const char * function, + const char * fmt, ...) +{ + va_list args; + + va_start(args, fmt); + printk(KERN_CRIT "EXT4-fs error (device %s): %s: ",sb->s_id, function); + vprintk(fmt, args); + printk("\n"); + va_end(args); + + ext4_handle_error(sb); +} + +static const char *ext4_decode_error(struct super_block * sb, int errno, + char nbuf[16]) +{ + char *errstr = NULL; + + switch (errno) { + case -EIO: + errstr = "IO failure"; + break; + case -ENOMEM: + errstr = "Out of memory"; + break; + case -EROFS: + if (!sb || EXT4_SB(sb)->s_journal->j_flags & JBD2_ABORT) + errstr = "Journal has aborted"; + else + errstr = "Readonly filesystem"; + break; + default: + /* If the caller passed in an extra buffer for unknown + * errors, textualise them now. Else we just return + * NULL. */ + if (nbuf) { + /* Check for truncated error codes... */ + if (snprintf(nbuf, 16, "error %d", -errno) >= 0) + errstr = nbuf; + } + break; + } + + return errstr; +} + +/* __ext4_std_error decodes expected errors from journaling functions + * automatically and invokes the appropriate error response. */ + +void __ext4_std_error (struct super_block * sb, const char * function, + int errno) +{ + char nbuf[16]; + const char *errstr; + + /* Special case: if the error is EROFS, and we're not already + * inside a transaction, then there's really no point in logging + * an error. */ + if (errno == -EROFS && journal_current_handle() == NULL && + (sb->s_flags & MS_RDONLY)) + return; + + errstr = ext4_decode_error(sb, errno, nbuf); + printk (KERN_CRIT "EXT4-fs error (device %s) in %s: %s\n", + sb->s_id, function, errstr); + + ext4_handle_error(sb); +} + +/* + * ext4_abort is a much stronger failure handler than ext4_error. The + * abort function may be used to deal with unrecoverable failures such + * as journal IO errors or ENOMEM at a critical moment in log management. + * + * We unconditionally force the filesystem into an ABORT|READONLY state, + * unless the error response on the fs has been set to panic in which + * case we take the easy way out and panic immediately. + */ + +void ext4_abort (struct super_block * sb, const char * function, + const char * fmt, ...) +{ + va_list args; + + printk (KERN_CRIT "ext4_abort called.\n"); + + va_start(args, fmt); + printk(KERN_CRIT "EXT4-fs error (device %s): %s: ",sb->s_id, function); + vprintk(fmt, args); + printk("\n"); + va_end(args); + + if (test_opt(sb, ERRORS_PANIC)) + panic("EXT4-fs panic from previous error\n"); + + if (sb->s_flags & MS_RDONLY) + return; + + printk(KERN_CRIT "Remounting filesystem read-only\n"); + EXT4_SB(sb)->s_mount_state |= EXT4_ERROR_FS; + sb->s_flags |= MS_RDONLY; + EXT4_SB(sb)->s_mount_opt |= EXT4_MOUNT_ABORT; + jbd2_journal_abort(EXT4_SB(sb)->s_journal, -EIO); +} + +void ext4_warning (struct super_block * sb, const char * function, + const char * fmt, ...) +{ + va_list args; + + va_start(args, fmt); + printk(KERN_WARNING "EXT4-fs warning (device %s): %s: ", + sb->s_id, function); + vprintk(fmt, args); + printk("\n"); + va_end(args); +} + +void ext4_update_dynamic_rev(struct super_block *sb) +{ + struct ext4_super_block *es = EXT4_SB(sb)->s_es; + + if (le32_to_cpu(es->s_rev_level) > EXT4_GOOD_OLD_REV) + return; + + ext4_warning(sb, __FUNCTION__, + "updating to rev %d because of new feature flag, " + "running e2fsck is recommended", + EXT4_DYNAMIC_REV); + + es->s_first_ino = cpu_to_le32(EXT4_GOOD_OLD_FIRST_INO); + es->s_inode_size = cpu_to_le16(EXT4_GOOD_OLD_INODE_SIZE); + es->s_rev_level = cpu_to_le32(EXT4_DYNAMIC_REV); + /* leave es->s_feature_*compat flags alone */ + /* es->s_uuid will be set by e2fsck if empty */ + + /* + * The rest of the superblock fields should be zero, and if not it + * means they are likely already in use, so leave them alone. We + * can leave it up to e2fsck to clean up any inconsistencies there. + */ +} + +/* + * Open the external journal device + */ +static struct block_device *ext4_blkdev_get(dev_t dev) +{ + struct block_device *bdev; + char b[BDEVNAME_SIZE]; + + bdev = open_by_devnum(dev, FMODE_READ|FMODE_WRITE); + if (IS_ERR(bdev)) + goto fail; + return bdev; + +fail: + printk(KERN_ERR "EXT4: failed to open journal device %s: %ld\n", + __bdevname(dev, b), PTR_ERR(bdev)); + return NULL; +} + +/* + * Release the journal device + */ +static int ext4_blkdev_put(struct block_device *bdev) +{ + bd_release(bdev); + return blkdev_put(bdev); +} + +static int ext4_blkdev_remove(struct ext4_sb_info *sbi) +{ + struct block_device *bdev; + int ret = -ENODEV; + + bdev = sbi->journal_bdev; + if (bdev) { + ret = ext4_blkdev_put(bdev); + sbi->journal_bdev = NULL; + } + return ret; +} + +static inline struct inode *orphan_list_entry(struct list_head *l) +{ + return &list_entry(l, struct ext4_inode_info, i_orphan)->vfs_inode; +} + +static void dump_orphan_list(struct super_block *sb, struct ext4_sb_info *sbi) +{ + struct list_head *l; + + printk(KERN_ERR "sb orphan head is %d\n", + le32_to_cpu(sbi->s_es->s_last_orphan)); + + printk(KERN_ERR "sb_info orphan list:\n"); + list_for_each(l, &sbi->s_orphan) { + struct inode *inode = orphan_list_entry(l); + printk(KERN_ERR " " + "inode %s:%lu at %p: mode %o, nlink %d, next %d\n", + inode->i_sb->s_id, inode->i_ino, inode, + inode->i_mode, inode->i_nlink, + NEXT_ORPHAN(inode)); + } +} + +static void ext4_put_super (struct super_block * sb) +{ + struct ext4_sb_info *sbi = EXT4_SB(sb); + struct ext4_super_block *es = sbi->s_es; + int i; + + ext4_ext_release(sb); + ext4_xattr_put_super(sb); + jbd2_journal_destroy(sbi->s_journal); + if (!(sb->s_flags & MS_RDONLY)) { + EXT4_CLEAR_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER); + es->s_state = cpu_to_le16(sbi->s_mount_state); + BUFFER_TRACE(sbi->s_sbh, "marking dirty"); + mark_buffer_dirty(sbi->s_sbh); + ext4_commit_super(sb, es, 1); + } + + for (i = 0; i < sbi->s_gdb_count; i++) + brelse(sbi->s_group_desc[i]); + kfree(sbi->s_group_desc); + percpu_counter_destroy(&sbi->s_freeblocks_counter); + percpu_counter_destroy(&sbi->s_freeinodes_counter); + percpu_counter_destroy(&sbi->s_dirs_counter); + brelse(sbi->s_sbh); +#ifdef CONFIG_QUOTA + for (i = 0; i < MAXQUOTAS; i++) + kfree(sbi->s_qf_names[i]); +#endif + + /* Debugging code just in case the in-memory inode orphan list + * isn't empty. The on-disk one can be non-empty if we've + * detected an error and taken the fs readonly, but the + * in-memory list had better be clean by this point. */ + if (!list_empty(&sbi->s_orphan)) + dump_orphan_list(sb, sbi); + J_ASSERT(list_empty(&sbi->s_orphan)); + + invalidate_bdev(sb->s_bdev, 0); + if (sbi->journal_bdev && sbi->journal_bdev != sb->s_bdev) { + /* + * Invalidate the journal device's buffers. We don't want them + * floating about in memory - the physical journal device may + * hotswapped, and it breaks the `ro-after' testing code. + */ + sync_blockdev(sbi->journal_bdev); + invalidate_bdev(sbi->journal_bdev, 0); + ext4_blkdev_remove(sbi); + } + sb->s_fs_info = NULL; + kfree(sbi); + return; +} + +static kmem_cache_t *ext4_inode_cachep; + +/* + * Called inside transaction, so use GFP_NOFS + */ +static struct inode *ext4_alloc_inode(struct super_block *sb) +{ + struct ext4_inode_info *ei; + + ei = kmem_cache_alloc(ext4_inode_cachep, SLAB_NOFS); + if (!ei) + return NULL; +#ifdef CONFIG_EXT4DEV_FS_POSIX_ACL + ei->i_acl = EXT4_ACL_NOT_CACHED; + ei->i_default_acl = EXT4_ACL_NOT_CACHED; +#endif + ei->i_block_alloc_info = NULL; + ei->vfs_inode.i_version = 1; + memset(&ei->i_cached_extent, 0, sizeof(struct ext4_ext_cache)); + return &ei->vfs_inode; +} + +static void ext4_destroy_inode(struct inode *inode) +{ + kmem_cache_free(ext4_inode_cachep, EXT4_I(inode)); +} + +static void init_once(void * foo, kmem_cache_t * cachep, unsigned long flags) +{ + struct ext4_inode_info *ei = (struct ext4_inode_info *) foo; + + if ((flags & (SLAB_CTOR_VERIFY|SLAB_CTOR_CONSTRUCTOR)) == + SLAB_CTOR_CONSTRUCTOR) { + INIT_LIST_HEAD(&ei->i_orphan); +#ifdef CONFIG_EXT4DEV_FS_XATTR + init_rwsem(&ei->xattr_sem); +#endif + mutex_init(&ei->truncate_mutex); + inode_init_once(&ei->vfs_inode); + } +} + +static int init_inodecache(void) +{ + ext4_inode_cachep = kmem_cache_create("ext4_inode_cache", + sizeof(struct ext4_inode_info), + 0, (SLAB_RECLAIM_ACCOUNT| + SLAB_MEM_SPREAD), + init_once, NULL); + if (ext4_inode_cachep == NULL) + return -ENOMEM; + return 0; +} + +static void destroy_inodecache(void) +{ + kmem_cache_destroy(ext4_inode_cachep); +} + +static void ext4_clear_inode(struct inode *inode) +{ + struct ext4_block_alloc_info *rsv = EXT4_I(inode)->i_block_alloc_info; +#ifdef CONFIG_EXT4DEV_FS_POSIX_ACL + if (EXT4_I(inode)->i_acl && + EXT4_I(inode)->i_acl != EXT4_ACL_NOT_CACHED) { + posix_acl_release(EXT4_I(inode)->i_acl); + EXT4_I(inode)->i_acl = EXT4_ACL_NOT_CACHED; + } + if (EXT4_I(inode)->i_default_acl && + EXT4_I(inode)->i_default_acl != EXT4_ACL_NOT_CACHED) { + posix_acl_release(EXT4_I(inode)->i_default_acl); + EXT4_I(inode)->i_default_acl = EXT4_ACL_NOT_CACHED; + } +#endif + ext4_discard_reservation(inode); + EXT4_I(inode)->i_block_alloc_info = NULL; + if (unlikely(rsv)) + kfree(rsv); +} + +static inline void ext4_show_quota_options(struct seq_file *seq, struct super_block *sb) +{ +#if defined(CONFIG_QUOTA) + struct ext4_sb_info *sbi = EXT4_SB(sb); + + if (sbi->s_jquota_fmt) + seq_printf(seq, ",jqfmt=%s", + (sbi->s_jquota_fmt == QFMT_VFS_OLD) ? "vfsold": "vfsv0"); + + if (sbi->s_qf_names[USRQUOTA]) + seq_printf(seq, ",usrjquota=%s", sbi->s_qf_names[USRQUOTA]); + + if (sbi->s_qf_names[GRPQUOTA]) + seq_printf(seq, ",grpjquota=%s", sbi->s_qf_names[GRPQUOTA]); + + if (sbi->s_mount_opt & EXT4_MOUNT_USRQUOTA) + seq_puts(seq, ",usrquota"); + + if (sbi->s_mount_opt & EXT4_MOUNT_GRPQUOTA) + seq_puts(seq, ",grpquota"); +#endif +} + +static int ext4_show_options(struct seq_file *seq, struct vfsmount *vfs) +{ + struct super_block *sb = vfs->mnt_sb; + + if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA) + seq_puts(seq, ",data=journal"); + else if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_ORDERED_DATA) + seq_puts(seq, ",data=ordered"); + else if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_WRITEBACK_DATA) + seq_puts(seq, ",data=writeback"); + + ext4_show_quota_options(seq, sb); + + return 0; +} + + +static struct dentry *ext4_get_dentry(struct super_block *sb, void *vobjp) +{ + __u32 *objp = vobjp; + unsigned long ino = objp[0]; + __u32 generation = objp[1]; + struct inode *inode; + struct dentry *result; + + if (ino < EXT4_FIRST_INO(sb) && ino != EXT4_ROOT_INO) + return ERR_PTR(-ESTALE); + if (ino > le32_to_cpu(EXT4_SB(sb)->s_es->s_inodes_count)) + return ERR_PTR(-ESTALE); + + /* iget isn't really right if the inode is currently unallocated!! + * + * ext4_read_inode will return a bad_inode if the inode had been + * deleted, so we should be safe. + * + * Currently we don't know the generation for parent directory, so + * a generation of 0 means "accept any" + */ + inode = iget(sb, ino); + if (inode == NULL) + return ERR_PTR(-ENOMEM); + if (is_bad_inode(inode) || + (generation && inode->i_generation != generation)) { + iput(inode); + return ERR_PTR(-ESTALE); + } + /* now to find a dentry. + * If possible, get a well-connected one + */ + result = d_alloc_anon(inode); + if (!result) { + iput(inode); + return ERR_PTR(-ENOMEM); + } + return result; +} + +#ifdef CONFIG_QUOTA +#define QTYPE2NAME(t) ((t)==USRQUOTA?"user":"group") +#define QTYPE2MOPT(on, t) ((t)==USRQUOTA?((on)##USRJQUOTA):((on)##GRPJQUOTA)) + +static int ext4_dquot_initialize(struct inode *inode, int type); +static int ext4_dquot_drop(struct inode *inode); +static int ext4_write_dquot(struct dquot *dquot); +static int ext4_acquire_dquot(struct dquot *dquot); +static int ext4_release_dquot(struct dquot *dquot); +static int ext4_mark_dquot_dirty(struct dquot *dquot); +static int ext4_write_info(struct super_block *sb, int type); +static int ext4_quota_on(struct super_block *sb, int type, int format_id, char *path); +static int ext4_quota_on_mount(struct super_block *sb, int type); +static ssize_t ext4_quota_read(struct super_block *sb, int type, char *data, + size_t len, loff_t off); +static ssize_t ext4_quota_write(struct super_block *sb, int type, + const char *data, size_t len, loff_t off); + +static struct dquot_operations ext4_quota_operations = { + .initialize = ext4_dquot_initialize, + .drop = ext4_dquot_drop, + .alloc_space = dquot_alloc_space, + .alloc_inode = dquot_alloc_inode, + .free_space = dquot_free_space, + .free_inode = dquot_free_inode, + .transfer = dquot_transfer, + .write_dquot = ext4_write_dquot, + .acquire_dquot = ext4_acquire_dquot, + .release_dquot = ext4_release_dquot, + .mark_dirty = ext4_mark_dquot_dirty, + .write_info = ext4_write_info +}; + +static struct quotactl_ops ext4_qctl_operations = { + .quota_on = ext4_quota_on, + .quota_off = vfs_quota_off, + .quota_sync = vfs_quota_sync, + .get_info = vfs_get_dqinfo, + .set_info = vfs_set_dqinfo, + .get_dqblk = vfs_get_dqblk, + .set_dqblk = vfs_set_dqblk +}; +#endif + +static struct super_operations ext4_sops = { + .alloc_inode = ext4_alloc_inode, + .destroy_inode = ext4_destroy_inode, + .read_inode = ext4_read_inode, + .write_inode = ext4_write_inode, + .dirty_inode = ext4_dirty_inode, + .delete_inode = ext4_delete_inode, + .put_super = ext4_put_super, + .write_super = ext4_write_super, + .sync_fs = ext4_sync_fs, + .write_super_lockfs = ext4_write_super_lockfs, + .unlockfs = ext4_unlockfs, + .statfs = ext4_statfs, + .remount_fs = ext4_remount, + .clear_inode = ext4_clear_inode, + .show_options = ext4_show_options, +#ifdef CONFIG_QUOTA + .quota_read = ext4_quota_read, + .quota_write = ext4_quota_write, +#endif +}; + +static struct export_operations ext4_export_ops = { + .get_parent = ext4_get_parent, + .get_dentry = ext4_get_dentry, +}; + +enum { + Opt_bsd_df, Opt_minix_df, Opt_grpid, Opt_nogrpid, + Opt_resgid, Opt_resuid, Opt_sb, Opt_err_cont, Opt_err_panic, Opt_err_ro, + Opt_nouid32, Opt_nocheck, Opt_debug, Opt_oldalloc, Opt_orlov, + Opt_user_xattr, Opt_nouser_xattr, Opt_acl, Opt_noacl, + Opt_reservation, Opt_noreservation, Opt_noload, Opt_nobh, Opt_bh, + Opt_commit, Opt_journal_update, Opt_journal_inum, Opt_journal_dev, + Opt_abort, Opt_data_journal, Opt_data_ordered, Opt_data_writeback, + Opt_usrjquota, Opt_grpjquota, Opt_offusrjquota, Opt_offgrpjquota, + Opt_jqfmt_vfsold, Opt_jqfmt_vfsv0, Opt_quota, Opt_noquota, + Opt_ignore, Opt_barrier, Opt_err, Opt_resize, Opt_usrquota, + Opt_grpquota, Opt_extents, +}; + +static match_table_t tokens = { + {Opt_bsd_df, "bsddf"}, + {Opt_minix_df, "minixdf"}, + {Opt_grpid, "grpid"}, + {Opt_grpid, "bsdgroups"}, + {Opt_nogrpid, "nogrpid"}, + {Opt_nogrpid, "sysvgroups"}, + {Opt_resgid, "resgid=%u"}, + {Opt_resuid, "resuid=%u"}, + {Opt_sb, "sb=%u"}, + {Opt_err_cont, "errors=continue"}, + {Opt_err_panic, "errors=panic"}, + {Opt_err_ro, "errors=remount-ro"}, + {Opt_nouid32, "nouid32"}, + {Opt_nocheck, "nocheck"}, + {Opt_nocheck, "check=none"}, + {Opt_debug, "debug"}, + {Opt_oldalloc, "oldalloc"}, + {Opt_orlov, "orlov"}, + {Opt_user_xattr, "user_xattr"}, + {Opt_nouser_xattr, "nouser_xattr"}, + {Opt_acl, "acl"}, + {Opt_noacl, "noacl"}, + {Opt_reservation, "reservation"}, + {Opt_noreservation, "noreservation"}, + {Opt_noload, "noload"}, + {Opt_nobh, "nobh"}, + {Opt_bh, "bh"}, + {Opt_commit, "commit=%u"}, + {Opt_journal_update, "journal=update"}, + {Opt_journal_inum, "journal=%u"}, + {Opt_journal_dev, "journal_dev=%u"}, + {Opt_abort, "abort"}, + {Opt_data_journal, "data=journal"}, + {Opt_data_ordered, "data=ordered"}, + {Opt_data_writeback, "data=writeback"}, + {Opt_offusrjquota, "usrjquota="}, + {Opt_usrjquota, "usrjquota=%s"}, + {Opt_offgrpjquota, "grpjquota="}, + {Opt_grpjquota, "grpjquota=%s"}, + {Opt_jqfmt_vfsold, "jqfmt=vfsold"}, + {Opt_jqfmt_vfsv0, "jqfmt=vfsv0"}, + {Opt_grpquota, "grpquota"}, + {Opt_noquota, "noquota"}, + {Opt_quota, "quota"}, + {Opt_usrquota, "usrquota"}, + {Opt_barrier, "barrier=%u"}, + {Opt_extents, "extents"}, + {Opt_err, NULL}, + {Opt_resize, "resize"}, +}; + +static ext4_fsblk_t get_sb_block(void **data) +{ + ext4_fsblk_t sb_block; + char *options = (char *) *data; + + if (!options || strncmp(options, "sb=", 3) != 0) + return 1; /* Default location */ + options += 3; + /*todo: use simple_strtoll with >32bit ext4 */ + sb_block = simple_strtoul(options, &options, 0); + if (*options && *options != ',') { + printk("EXT4-fs: Invalid sb specification: %s\n", + (char *) *data); + return 1; + } + if (*options == ',') + options++; + *data = (void *) options; + return sb_block; +} + +static int parse_options (char *options, struct super_block *sb, + unsigned int *inum, unsigned long *journal_devnum, + ext4_fsblk_t *n_blocks_count, int is_remount) +{ + struct ext4_sb_info *sbi = EXT4_SB(sb); + char * p; + substring_t args[MAX_OPT_ARGS]; + int data_opt = 0; + int option; +#ifdef CONFIG_QUOTA + int qtype; + char *qname; +#endif + + if (!options) + return 1; + + while ((p = strsep (&options, ",")) != NULL) { + int token; + if (!*p) + continue; + + token = match_token(p, tokens, args); + switch (token) { + case Opt_bsd_df: + clear_opt (sbi->s_mount_opt, MINIX_DF); + break; + case Opt_minix_df: + set_opt (sbi->s_mount_opt, MINIX_DF); + break; + case Opt_grpid: + set_opt (sbi->s_mount_opt, GRPID); + break; + case Opt_nogrpid: + clear_opt (sbi->s_mount_opt, GRPID); + break; + case Opt_resuid: + if (match_int(&args[0], &option)) + return 0; + sbi->s_resuid = option; + break; + case Opt_resgid: + if (match_int(&args[0], &option)) + return 0; + sbi->s_resgid = option; + break; + case Opt_sb: + /* handled by get_sb_block() instead of here */ + /* *sb_block = match_int(&args[0]); */ + break; + case Opt_err_panic: + clear_opt (sbi->s_mount_opt, ERRORS_CONT); + clear_opt (sbi->s_mount_opt, ERRORS_RO); + set_opt (sbi->s_mount_opt, ERRORS_PANIC); + break; + case Opt_err_ro: + clear_opt (sbi->s_mount_opt, ERRORS_CONT); + clear_opt (sbi->s_mount_opt, ERRORS_PANIC); + set_opt (sbi->s_mount_opt, ERRORS_RO); + break; + case Opt_err_cont: + clear_opt (sbi->s_mount_opt, ERRORS_RO); + clear_opt (sbi->s_mount_opt, ERRORS_PANIC); + set_opt (sbi->s_mount_opt, ERRORS_CONT); + break; + case Opt_nouid32: + set_opt (sbi->s_mount_opt, NO_UID32); + break; + case Opt_nocheck: + clear_opt (sbi->s_mount_opt, CHECK); + break; + case Opt_debug: + set_opt (sbi->s_mount_opt, DEBUG); + break; + case Opt_oldalloc: + set_opt (sbi->s_mount_opt, OLDALLOC); + break; + case Opt_orlov: + clear_opt (sbi->s_mount_opt, OLDALLOC); + break; +#ifdef CONFIG_EXT4DEV_FS_XATTR + case Opt_user_xattr: + set_opt (sbi->s_mount_opt, XATTR_USER); + break; + case Opt_nouser_xattr: + clear_opt (sbi->s_mount_opt, XATTR_USER); + break; +#else + case Opt_user_xattr: + case Opt_nouser_xattr: + printk("EXT4 (no)user_xattr options not supported\n"); + break; +#endif +#ifdef CONFIG_EXT4DEV_FS_POSIX_ACL + case Opt_acl: + set_opt(sbi->s_mount_opt, POSIX_ACL); + break; + case Opt_noacl: + clear_opt(sbi->s_mount_opt, POSIX_ACL); + break; +#else + case Opt_acl: + case Opt_noacl: + printk("EXT4 (no)acl options not supported\n"); + break; +#endif + case Opt_reservation: + set_opt(sbi->s_mount_opt, RESERVATION); + break; + case Opt_noreservation: + clear_opt(sbi->s_mount_opt, RESERVATION); + break; + case Opt_journal_update: + /* @@@ FIXME */ + /* Eventually we will want to be able to create + a journal file here. For now, only allow the + user to specify an existing inode to be the + journal file. */ + if (is_remount) { + printk(KERN_ERR "EXT4-fs: cannot specify " + "journal on remount\n"); + return 0; + } + set_opt (sbi->s_mount_opt, UPDATE_JOURNAL); + break; + case Opt_journal_inum: + if (is_remount) { + printk(KERN_ERR "EXT4-fs: cannot specify " + "journal on remount\n"); + return 0; + } + if (match_int(&args[0], &option)) + return 0; + *inum = option; + break; + case Opt_journal_dev: + if (is_remount) { + printk(KERN_ERR "EXT4-fs: cannot specify " + "journal on remount\n"); + return 0; + } + if (match_int(&args[0], &option)) + return 0; + *journal_devnum = option; + break; + case Opt_noload: + set_opt (sbi->s_mount_opt, NOLOAD); + break; + case Opt_commit: + if (match_int(&args[0], &option)) + return 0; + if (option < 0) + return 0; + if (option == 0) + option = JBD_DEFAULT_MAX_COMMIT_AGE; + sbi->s_commit_interval = HZ * option; + break; + case Opt_data_journal: + data_opt = EXT4_MOUNT_JOURNAL_DATA; + goto datacheck; + case Opt_data_ordered: + data_opt = EXT4_MOUNT_ORDERED_DATA; + goto datacheck; + case Opt_data_writeback: + data_opt = EXT4_MOUNT_WRITEBACK_DATA; + datacheck: + if (is_remount) { + if ((sbi->s_mount_opt & EXT4_MOUNT_DATA_FLAGS) + != data_opt) { + printk(KERN_ERR + "EXT4-fs: cannot change data " + "mode on remount\n"); + return 0; + } + } else { + sbi->s_mount_opt &= ~EXT4_MOUNT_DATA_FLAGS; + sbi->s_mount_opt |= data_opt; + } + break; +#ifdef CONFIG_QUOTA + case Opt_usrjquota: + qtype = USRQUOTA; + goto set_qf_name; + case Opt_grpjquota: + qtype = GRPQUOTA; +set_qf_name: + if (sb_any_quota_enabled(sb)) { + printk(KERN_ERR + "EXT4-fs: Cannot change journalled " + "quota options when quota turned on.\n"); + return 0; + } + qname = match_strdup(&args[0]); + if (!qname) { + printk(KERN_ERR + "EXT4-fs: not enough memory for " + "storing quotafile name.\n"); + return 0; + } + if (sbi->s_qf_names[qtype] && + strcmp(sbi->s_qf_names[qtype], qname)) { + printk(KERN_ERR + "EXT4-fs: %s quota file already " + "specified.\n", QTYPE2NAME(qtype)); + kfree(qname); + return 0; + } + sbi->s_qf_names[qtype] = qname; + if (strchr(sbi->s_qf_names[qtype], '/')) { + printk(KERN_ERR + "EXT4-fs: quotafile must be on " + "filesystem root.\n"); + kfree(sbi->s_qf_names[qtype]); + sbi->s_qf_names[qtype] = NULL; + return 0; + } + set_opt(sbi->s_mount_opt, QUOTA); + break; + case Opt_offusrjquota: + qtype = USRQUOTA; + goto clear_qf_name; + case Opt_offgrpjquota: + qtype = GRPQUOTA; +clear_qf_name: + if (sb_any_quota_enabled(sb)) { + printk(KERN_ERR "EXT4-fs: Cannot change " + "journalled quota options when " + "quota turned on.\n"); + return 0; + } + /* + * The space will be released later when all options + * are confirmed to be correct + */ + sbi->s_qf_names[qtype] = NULL; + break; + case Opt_jqfmt_vfsold: + sbi->s_jquota_fmt = QFMT_VFS_OLD; + break; + case Opt_jqfmt_vfsv0: + sbi->s_jquota_fmt = QFMT_VFS_V0; + break; + case Opt_quota: + case Opt_usrquota: + set_opt(sbi->s_mount_opt, QUOTA); + set_opt(sbi->s_mount_opt, USRQUOTA); + break; + case Opt_grpquota: + set_opt(sbi->s_mount_opt, QUOTA); + set_opt(sbi->s_mount_opt, GRPQUOTA); + break; + case Opt_noquota: + if (sb_any_quota_enabled(sb)) { + printk(KERN_ERR "EXT4-fs: Cannot change quota " + "options when quota turned on.\n"); + return 0; + } + clear_opt(sbi->s_mount_opt, QUOTA); + clear_opt(sbi->s_mount_opt, USRQUOTA); + clear_opt(sbi->s_mount_opt, GRPQUOTA); + break; +#else + case Opt_quota: + case Opt_usrquota: + case Opt_grpquota: + case Opt_usrjquota: + case Opt_grpjquota: + case Opt_offusrjquota: + case Opt_offgrpjquota: + case Opt_jqfmt_vfsold: + case Opt_jqfmt_vfsv0: + printk(KERN_ERR + "EXT4-fs: journalled quota options not " + "supported.\n"); + break; + case Opt_noquota: + break; +#endif + case Opt_abort: + set_opt(sbi->s_mount_opt, ABORT); + break; + case Opt_barrier: + if (match_int(&args[0], &option)) + return 0; + if (option) + set_opt(sbi->s_mount_opt, BARRIER); + else + clear_opt(sbi->s_mount_opt, BARRIER); + break; + case Opt_ignore: + break; + case Opt_resize: + if (!is_remount) { + printk("EXT4-fs: resize option only available " + "for remount\n"); + return 0; + } + if (match_int(&args[0], &option) != 0) + return 0; + *n_blocks_count = option; + break; + case Opt_nobh: + set_opt(sbi->s_mount_opt, NOBH); + break; + case Opt_bh: + clear_opt(sbi->s_mount_opt, NOBH); + break; + case Opt_extents: + set_opt (sbi->s_mount_opt, EXTENTS); + break; + default: + printk (KERN_ERR + "EXT4-fs: Unrecognized mount option \"%s\" " + "or missing value\n", p); + return 0; + } + } +#ifdef CONFIG_QUOTA + if (sbi->s_qf_names[USRQUOTA] || sbi->s_qf_names[GRPQUOTA]) { + if ((sbi->s_mount_opt & EXT4_MOUNT_USRQUOTA) && + sbi->s_qf_names[USRQUOTA]) + clear_opt(sbi->s_mount_opt, USRQUOTA); + + if ((sbi->s_mount_opt & EXT4_MOUNT_GRPQUOTA) && + sbi->s_qf_names[GRPQUOTA]) + clear_opt(sbi->s_mount_opt, GRPQUOTA); + + if ((sbi->s_qf_names[USRQUOTA] && + (sbi->s_mount_opt & EXT4_MOUNT_GRPQUOTA)) || + (sbi->s_qf_names[GRPQUOTA] && + (sbi->s_mount_opt & EXT4_MOUNT_USRQUOTA))) { + printk(KERN_ERR "EXT4-fs: old and new quota " + "format mixing.\n"); + return 0; + } + + if (!sbi->s_jquota_fmt) { + printk(KERN_ERR "EXT4-fs: journalled quota format " + "not specified.\n"); + return 0; + } + } else { + if (sbi->s_jquota_fmt) { + printk(KERN_ERR "EXT4-fs: journalled quota format " + "specified with no journalling " + "enabled.\n"); + return 0; + } + } +#endif + return 1; +} + +static int ext4_setup_super(struct super_block *sb, struct ext4_super_block *es, + int read_only) +{ + struct ext4_sb_info *sbi = EXT4_SB(sb); + int res = 0; + + if (le32_to_cpu(es->s_rev_level) > EXT4_MAX_SUPP_REV) { + printk (KERN_ERR "EXT4-fs warning: revision level too high, " + "forcing read-only mode\n"); + res = MS_RDONLY; + } + if (read_only) + return res; + if (!(sbi->s_mount_state & EXT4_VALID_FS)) + printk (KERN_WARNING "EXT4-fs warning: mounting unchecked fs, " + "running e2fsck is recommended\n"); + else if ((sbi->s_mount_state & EXT4_ERROR_FS)) + printk (KERN_WARNING + "EXT4-fs warning: mounting fs with errors, " + "running e2fsck is recommended\n"); + else if ((__s16) le16_to_cpu(es->s_max_mnt_count) >= 0 && + le16_to_cpu(es->s_mnt_count) >= + (unsigned short) (__s16) le16_to_cpu(es->s_max_mnt_count)) + printk (KERN_WARNING + "EXT4-fs warning: maximal mount count reached, " + "running e2fsck is recommended\n"); + else if (le32_to_cpu(es->s_checkinterval) && + (le32_to_cpu(es->s_lastcheck) + + le32_to_cpu(es->s_checkinterval) <= get_seconds())) + printk (KERN_WARNING + "EXT4-fs warning: checktime reached, " + "running e2fsck is recommended\n"); +#if 0 + /* @@@ We _will_ want to clear the valid bit if we find + * inconsistencies, to force a fsck at reboot. But for + * a plain journaled filesystem we can keep it set as + * valid forever! :) + */ + es->s_state = cpu_to_le16(le16_to_cpu(es->s_state) & ~EXT4_VALID_FS); +#endif + if (!(__s16) le16_to_cpu(es->s_max_mnt_count)) + es->s_max_mnt_count = cpu_to_le16(EXT4_DFL_MAX_MNT_COUNT); + es->s_mnt_count=cpu_to_le16(le16_to_cpu(es->s_mnt_count) + 1); + es->s_mtime = cpu_to_le32(get_seconds()); + ext4_update_dynamic_rev(sb); + EXT4_SET_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER); + + ext4_commit_super(sb, es, 1); + if (test_opt(sb, DEBUG)) + printk(KERN_INFO "[EXT4 FS bs=%lu, gc=%lu, " + "bpg=%lu, ipg=%lu, mo=%04lx]\n", + sb->s_blocksize, + sbi->s_groups_count, + EXT4_BLOCKS_PER_GROUP(sb), + EXT4_INODES_PER_GROUP(sb), + sbi->s_mount_opt); + + printk(KERN_INFO "EXT4 FS on %s, ", sb->s_id); + if (EXT4_SB(sb)->s_journal->j_inode == NULL) { + char b[BDEVNAME_SIZE]; + + printk("external journal on %s\n", + bdevname(EXT4_SB(sb)->s_journal->j_dev, b)); + } else { + printk("internal journal\n"); + } + return res; +} + +/* Called at mount-time, super-block is locked */ +static int ext4_check_descriptors (struct super_block * sb) +{ + struct ext4_sb_info *sbi = EXT4_SB(sb); + ext4_fsblk_t first_block = le32_to_cpu(sbi->s_es->s_first_data_block); + ext4_fsblk_t last_block; + ext4_fsblk_t block_bitmap; + ext4_fsblk_t inode_bitmap; + ext4_fsblk_t inode_table; + struct ext4_group_desc * gdp = NULL; + int desc_block = 0; + int i; + + ext4_debug ("Checking group descriptors"); + + for (i = 0; i < sbi->s_groups_count; i++) + { + if (i == sbi->s_groups_count - 1) + last_block = ext4_blocks_count(sbi->s_es) - 1; + else + last_block = first_block + + (EXT4_BLOCKS_PER_GROUP(sb) - 1); + + if ((i % EXT4_DESC_PER_BLOCK(sb)) == 0) + gdp = (struct ext4_group_desc *) + sbi->s_group_desc[desc_block++]->b_data; + block_bitmap = ext4_block_bitmap(sb, gdp); + if (block_bitmap < first_block || block_bitmap > last_block) + { + ext4_error (sb, "ext4_check_descriptors", + "Block bitmap for group %d" + " not in group (block %llu)!", + i, block_bitmap); + return 0; + } + inode_bitmap = ext4_inode_bitmap(sb, gdp); + if (inode_bitmap < first_block || inode_bitmap > last_block) + { + ext4_error (sb, "ext4_check_descriptors", + "Inode bitmap for group %d" + " not in group (block %llu)!", + i, inode_bitmap); + return 0; + } + inode_table = ext4_inode_table(sb, gdp); + if (inode_table < first_block || + inode_table + sbi->s_itb_per_group > last_block) + { + ext4_error (sb, "ext4_check_descriptors", + "Inode table for group %d" + " not in group (block %llu)!", + i, inode_table); + return 0; + } + first_block += EXT4_BLOCKS_PER_GROUP(sb); + gdp = (struct ext4_group_desc *) + ((__u8 *)gdp + EXT4_DESC_SIZE(sb)); + } + + ext4_free_blocks_count_set(sbi->s_es, ext4_count_free_blocks(sb)); + sbi->s_es->s_free_inodes_count=cpu_to_le32(ext4_count_free_inodes(sb)); + return 1; +} + + +/* ext4_orphan_cleanup() walks a singly-linked list of inodes (starting at + * the superblock) which were deleted from all directories, but held open by + * a process at the time of a crash. We walk the list and try to delete these + * inodes at recovery time (only with a read-write filesystem). + * + * In order to keep the orphan inode chain consistent during traversal (in + * case of crash during recovery), we link each inode into the superblock + * orphan list_head and handle it the same way as an inode deletion during + * normal operation (which journals the operations for us). + * + * We only do an iget() and an iput() on each inode, which is very safe if we + * accidentally point at an in-use or already deleted inode. The worst that + * can happen in this case is that we get a "bit already cleared" message from + * ext4_free_inode(). The only reason we would point at a wrong inode is if + * e2fsck was run on this filesystem, and it must have already done the orphan + * inode cleanup for us, so we can safely abort without any further action. + */ +static void ext4_orphan_cleanup (struct super_block * sb, + struct ext4_super_block * es) +{ + unsigned int s_flags = sb->s_flags; + int nr_orphans = 0, nr_truncates = 0; +#ifdef CONFIG_QUOTA + int i; +#endif + if (!es->s_last_orphan) { + jbd_debug(4, "no orphan inodes to clean up\n"); + return; + } + + if (EXT4_SB(sb)->s_mount_state & EXT4_ERROR_FS) { + if (es->s_last_orphan) + jbd_debug(1, "Errors on filesystem, " + "clearing orphan list.\n"); + es->s_last_orphan = 0; + jbd_debug(1, "Skipping orphan recovery on fs with errors.\n"); + return; + } + + if (s_flags & MS_RDONLY) { + printk(KERN_INFO "EXT4-fs: %s: orphan cleanup on readonly fs\n", + sb->s_id); + sb->s_flags &= ~MS_RDONLY; + } +#ifdef CONFIG_QUOTA + /* Needed for iput() to work correctly and not trash data */ + sb->s_flags |= MS_ACTIVE; + /* Turn on quotas so that they are updated correctly */ + for (i = 0; i < MAXQUOTAS; i++) { + if (EXT4_SB(sb)->s_qf_names[i]) { + int ret = ext4_quota_on_mount(sb, i); + if (ret < 0) + printk(KERN_ERR + "EXT4-fs: Cannot turn on journalled " + "quota: error %d\n", ret); + } + } +#endif + + while (es->s_last_orphan) { + struct inode *inode; + + if (!(inode = + ext4_orphan_get(sb, le32_to_cpu(es->s_last_orphan)))) { + es->s_last_orphan = 0; + break; + } + + list_add(&EXT4_I(inode)->i_orphan, &EXT4_SB(sb)->s_orphan); + DQUOT_INIT(inode); + if (inode->i_nlink) { + printk(KERN_DEBUG + "%s: truncating inode %lu to %Ld bytes\n", + __FUNCTION__, inode->i_ino, inode->i_size); + jbd_debug(2, "truncating inode %lu to %Ld bytes\n", + inode->i_ino, inode->i_size); + ext4_truncate(inode); + nr_truncates++; + } else { + printk(KERN_DEBUG + "%s: deleting unreferenced inode %lu\n", + __FUNCTION__, inode->i_ino); + jbd_debug(2, "deleting unreferenced inode %lu\n", + inode->i_ino); + nr_orphans++; + } + iput(inode); /* The delete magic happens here! */ + } + +#define PLURAL(x) (x), ((x)==1) ? "" : "s" + + if (nr_orphans) + printk(KERN_INFO "EXT4-fs: %s: %d orphan inode%s deleted\n", + sb->s_id, PLURAL(nr_orphans)); + if (nr_truncates) + printk(KERN_INFO "EXT4-fs: %s: %d truncate%s cleaned up\n", + sb->s_id, PLURAL(nr_truncates)); +#ifdef CONFIG_QUOTA + /* Turn quotas off */ + for (i = 0; i < MAXQUOTAS; i++) { + if (sb_dqopt(sb)->files[i]) + vfs_quota_off(sb, i); + } +#endif + sb->s_flags = s_flags; /* Restore MS_RDONLY status */ +} + +#define log2(n) ffz(~(n)) + +/* + * Maximal file size. There is a direct, and {,double-,triple-}indirect + * block limit, and also a limit of (2^32 - 1) 512-byte sectors in i_blocks. + * We need to be 1 filesystem block less than the 2^32 sector limit. + */ +static loff_t ext4_max_size(int bits) +{ + loff_t res = EXT4_NDIR_BLOCKS; + /* This constant is calculated to be the largest file size for a + * dense, 4k-blocksize file such that the total number of + * sectors in the file, including data and all indirect blocks, + * does not exceed 2^32. */ + const loff_t upper_limit = 0x1ff7fffd000LL; + + res += 1LL << (bits-2); + res += 1LL << (2*(bits-2)); + res += 1LL << (3*(bits-2)); + res <<= bits; + if (res > upper_limit) + res = upper_limit; + return res; +} + +static ext4_fsblk_t descriptor_loc(struct super_block *sb, + ext4_fsblk_t logical_sb_block, int nr) +{ + struct ext4_sb_info *sbi = EXT4_SB(sb); + unsigned long bg, first_meta_bg; + int has_super = 0; + + first_meta_bg = le32_to_cpu(sbi->s_es->s_first_meta_bg); + + if (!EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_META_BG) || + nr < first_meta_bg) + return logical_sb_block + nr + 1; + bg = sbi->s_desc_per_block * nr; + if (ext4_bg_has_super(sb, bg)) + has_super = 1; + return (has_super + ext4_group_first_block_no(sb, bg)); +} + + +static int ext4_fill_super (struct super_block *sb, void *data, int silent) +{ + struct buffer_head * bh; + struct ext4_super_block *es = NULL; + struct ext4_sb_info *sbi; + ext4_fsblk_t block; + ext4_fsblk_t sb_block = get_sb_block(&data); + ext4_fsblk_t logical_sb_block; + unsigned long offset = 0; + unsigned int journal_inum = 0; + unsigned long journal_devnum = 0; + unsigned long def_mount_opts; + struct inode *root; + int blocksize; + int hblock; + int db_count; + int i; + int needs_recovery; + __le32 features; + __u64 blocks_count; + + sbi = kzalloc(sizeof(*sbi), GFP_KERNEL); + if (!sbi) + return -ENOMEM; + sb->s_fs_info = sbi; + sbi->s_mount_opt = 0; + sbi->s_resuid = EXT4_DEF_RESUID; + sbi->s_resgid = EXT4_DEF_RESGID; + + unlock_kernel(); + + blocksize = sb_min_blocksize(sb, EXT4_MIN_BLOCK_SIZE); + if (!blocksize) { + printk(KERN_ERR "EXT4-fs: unable to set blocksize\n"); + goto out_fail; + } + + /* + * The ext4 superblock will not be buffer aligned for other than 1kB + * block sizes. We need to calculate the offset from buffer start. + */ + if (blocksize != EXT4_MIN_BLOCK_SIZE) { + logical_sb_block = sb_block * EXT4_MIN_BLOCK_SIZE; + offset = do_div(logical_sb_block, blocksize); + } else { + logical_sb_block = sb_block; + } + + if (!(bh = sb_bread(sb, logical_sb_block))) { + printk (KERN_ERR "EXT4-fs: unable to read superblock\n"); + goto out_fail; + } + /* + * Note: s_es must be initialized as soon as possible because + * some ext4 macro-instructions depend on its value + */ + es = (struct ext4_super_block *) (((char *)bh->b_data) + offset); + sbi->s_es = es; + sb->s_magic = le16_to_cpu(es->s_magic); + if (sb->s_magic != EXT4_SUPER_MAGIC) + goto cantfind_ext4; + + /* Set defaults before we parse the mount options */ + def_mount_opts = le32_to_cpu(es->s_default_mount_opts); + if (def_mount_opts & EXT4_DEFM_DEBUG) + set_opt(sbi->s_mount_opt, DEBUG); + if (def_mount_opts & EXT4_DEFM_BSDGROUPS) + set_opt(sbi->s_mount_opt, GRPID); + if (def_mount_opts & EXT4_DEFM_UID16) + set_opt(sbi->s_mount_opt, NO_UID32); + if (def_mount_opts & EXT4_DEFM_XATTR_USER) + set_opt(sbi->s_mount_opt, XATTR_USER); + if (def_mount_opts & EXT4_DEFM_ACL) + set_opt(sbi->s_mount_opt, POSIX_ACL); + if ((def_mount_opts & EXT4_DEFM_JMODE) == EXT4_DEFM_JMODE_DATA) + sbi->s_mount_opt |= EXT4_MOUNT_JOURNAL_DATA; + else if ((def_mount_opts & EXT4_DEFM_JMODE) == EXT4_DEFM_JMODE_ORDERED) + sbi->s_mount_opt |= EXT4_MOUNT_ORDERED_DATA; + else if ((def_mount_opts & EXT4_DEFM_JMODE) == EXT4_DEFM_JMODE_WBACK) + sbi->s_mount_opt |= EXT4_MOUNT_WRITEBACK_DATA; + + if (le16_to_cpu(sbi->s_es->s_errors) == EXT4_ERRORS_PANIC) + set_opt(sbi->s_mount_opt, ERRORS_PANIC); + else if (le16_to_cpu(sbi->s_es->s_errors) == EXT4_ERRORS_RO) + set_opt(sbi->s_mount_opt, ERRORS_RO); + else + set_opt(sbi->s_mount_opt, ERRORS_CONT); + + sbi->s_resuid = le16_to_cpu(es->s_def_resuid); + sbi->s_resgid = le16_to_cpu(es->s_def_resgid); + + set_opt(sbi->s_mount_opt, RESERVATION); + + if (!parse_options ((char *) data, sb, &journal_inum, &journal_devnum, + NULL, 0)) + goto failed_mount; + + sb->s_flags = (sb->s_flags & ~MS_POSIXACL) | + ((sbi->s_mount_opt & EXT4_MOUNT_POSIX_ACL) ? MS_POSIXACL : 0); + + if (le32_to_cpu(es->s_rev_level) == EXT4_GOOD_OLD_REV && + (EXT4_HAS_COMPAT_FEATURE(sb, ~0U) || + EXT4_HAS_RO_COMPAT_FEATURE(sb, ~0U) || + EXT4_HAS_INCOMPAT_FEATURE(sb, ~0U))) + printk(KERN_WARNING + "EXT4-fs warning: feature flags set on rev 0 fs, " + "running e2fsck is recommended\n"); + /* + * Check feature flags regardless of the revision level, since we + * previously didn't change the revision level when setting the flags, + * so there is a chance incompat flags are set on a rev 0 filesystem. + */ + features = EXT4_HAS_INCOMPAT_FEATURE(sb, ~EXT4_FEATURE_INCOMPAT_SUPP); + if (features) { + printk(KERN_ERR "EXT4-fs: %s: couldn't mount because of " + "unsupported optional features (%x).\n", + sb->s_id, le32_to_cpu(features)); + goto failed_mount; + } + features = EXT4_HAS_RO_COMPAT_FEATURE(sb, ~EXT4_FEATURE_RO_COMPAT_SUPP); + if (!(sb->s_flags & MS_RDONLY) && features) { + printk(KERN_ERR "EXT4-fs: %s: couldn't mount RDWR because of " + "unsupported optional features (%x).\n", + sb->s_id, le32_to_cpu(features)); + goto failed_mount; + } + blocksize = BLOCK_SIZE << le32_to_cpu(es->s_log_block_size); + + if (blocksize < EXT4_MIN_BLOCK_SIZE || + blocksize > EXT4_MAX_BLOCK_SIZE) { + printk(KERN_ERR + "EXT4-fs: Unsupported filesystem blocksize %d on %s.\n", + blocksize, sb->s_id); + goto failed_mount; + } + + hblock = bdev_hardsect_size(sb->s_bdev); + if (sb->s_blocksize != blocksize) { + /* + * Make sure the blocksize for the filesystem is larger + * than the hardware sectorsize for the machine. + */ + if (blocksize < hblock) { + printk(KERN_ERR "EXT4-fs: blocksize %d too small for " + "device blocksize %d.\n", blocksize, hblock); + goto failed_mount; + } + + brelse (bh); + sb_set_blocksize(sb, blocksize); + logical_sb_block = sb_block * EXT4_MIN_BLOCK_SIZE; + offset = do_div(logical_sb_block, blocksize); + bh = sb_bread(sb, logical_sb_block); + if (!bh) { + printk(KERN_ERR + "EXT4-fs: Can't read superblock on 2nd try.\n"); + goto failed_mount; + } + es = (struct ext4_super_block *)(((char *)bh->b_data) + offset); + sbi->s_es = es; + if (es->s_magic != cpu_to_le16(EXT4_SUPER_MAGIC)) { + printk (KERN_ERR + "EXT4-fs: Magic mismatch, very weird !\n"); + goto failed_mount; + } + } + + sb->s_maxbytes = ext4_max_size(sb->s_blocksize_bits); + + if (le32_to_cpu(es->s_rev_level) == EXT4_GOOD_OLD_REV) { + sbi->s_inode_size = EXT4_GOOD_OLD_INODE_SIZE; + sbi->s_first_ino = EXT4_GOOD_OLD_FIRST_INO; + } else { + sbi->s_inode_size = le16_to_cpu(es->s_inode_size); + sbi->s_first_ino = le32_to_cpu(es->s_first_ino); + if ((sbi->s_inode_size < EXT4_GOOD_OLD_INODE_SIZE) || + (sbi->s_inode_size & (sbi->s_inode_size - 1)) || + (sbi->s_inode_size > blocksize)) { + printk (KERN_ERR + "EXT4-fs: unsupported inode size: %d\n", + sbi->s_inode_size); + goto failed_mount; + } + } + sbi->s_frag_size = EXT4_MIN_FRAG_SIZE << + le32_to_cpu(es->s_log_frag_size); + if (blocksize != sbi->s_frag_size) { + printk(KERN_ERR + "EXT4-fs: fragsize %lu != blocksize %u (unsupported)\n", + sbi->s_frag_size, blocksize); + goto failed_mount; + } + sbi->s_desc_size = le16_to_cpu(es->s_desc_size); + if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_64BIT)) { + if (sbi->s_desc_size < EXT4_MIN_DESC_SIZE_64BIT || + sbi->s_desc_size > EXT4_MAX_DESC_SIZE || + sbi->s_desc_size & (sbi->s_desc_size - 1)) { + printk(KERN_ERR + "EXT4-fs: unsupported descriptor size %lu\n", + sbi->s_desc_size); + goto failed_mount; + } + } else + sbi->s_desc_size = EXT4_MIN_DESC_SIZE; + sbi->s_blocks_per_group = le32_to_cpu(es->s_blocks_per_group); + sbi->s_frags_per_group = le32_to_cpu(es->s_frags_per_group); + sbi->s_inodes_per_group = le32_to_cpu(es->s_inodes_per_group); + if (EXT4_INODE_SIZE(sb) == 0) + goto cantfind_ext4; + sbi->s_inodes_per_block = blocksize / EXT4_INODE_SIZE(sb); + if (sbi->s_inodes_per_block == 0) + goto cantfind_ext4; + sbi->s_itb_per_group = sbi->s_inodes_per_group / + sbi->s_inodes_per_block; + sbi->s_desc_per_block = blocksize / EXT4_DESC_SIZE(sb); + sbi->s_sbh = bh; + sbi->s_mount_state = le16_to_cpu(es->s_state); + sbi->s_addr_per_block_bits = log2(EXT4_ADDR_PER_BLOCK(sb)); + sbi->s_desc_per_block_bits = log2(EXT4_DESC_PER_BLOCK(sb)); + for (i=0; i < 4; i++) + sbi->s_hash_seed[i] = le32_to_cpu(es->s_hash_seed[i]); + sbi->s_def_hash_version = es->s_def_hash_version; + + if (sbi->s_blocks_per_group > blocksize * 8) { + printk (KERN_ERR + "EXT4-fs: #blocks per group too big: %lu\n", + sbi->s_blocks_per_group); + goto failed_mount; + } + if (sbi->s_frags_per_group > blocksize * 8) { + printk (KERN_ERR + "EXT4-fs: #fragments per group too big: %lu\n", + sbi->s_frags_per_group); + goto failed_mount; + } + if (sbi->s_inodes_per_group > blocksize * 8) { + printk (KERN_ERR + "EXT4-fs: #inodes per group too big: %lu\n", + sbi->s_inodes_per_group); + goto failed_mount; + } + + if (ext4_blocks_count(es) > + (sector_t)(~0ULL) >> (sb->s_blocksize_bits - 9)) { + printk(KERN_ERR "EXT4-fs: filesystem on %s:" + " too large to mount safely\n", sb->s_id); + if (sizeof(sector_t) < 8) + printk(KERN_WARNING "EXT4-fs: CONFIG_LBD not " + "enabled\n"); + goto failed_mount; + } + + if (EXT4_BLOCKS_PER_GROUP(sb) == 0) + goto cantfind_ext4; + blocks_count = (ext4_blocks_count(es) - + le32_to_cpu(es->s_first_data_block) + + EXT4_BLOCKS_PER_GROUP(sb) - 1); + do_div(blocks_count, EXT4_BLOCKS_PER_GROUP(sb)); + sbi->s_groups_count = blocks_count; + db_count = (sbi->s_groups_count + EXT4_DESC_PER_BLOCK(sb) - 1) / + EXT4_DESC_PER_BLOCK(sb); + sbi->s_group_desc = kmalloc(db_count * sizeof (struct buffer_head *), + GFP_KERNEL); + if (sbi->s_group_desc == NULL) { + printk (KERN_ERR "EXT4-fs: not enough memory\n"); + goto failed_mount; + } + + bgl_lock_init(&sbi->s_blockgroup_lock); + + for (i = 0; i < db_count; i++) { + block = descriptor_loc(sb, logical_sb_block, i); + sbi->s_group_desc[i] = sb_bread(sb, block); + if (!sbi->s_group_desc[i]) { + printk (KERN_ERR "EXT4-fs: " + "can't read group descriptor %d\n", i); + db_count = i; + goto failed_mount2; + } + } + if (!ext4_check_descriptors (sb)) { + printk(KERN_ERR "EXT4-fs: group descriptors corrupted!\n"); + goto failed_mount2; + } + sbi->s_gdb_count = db_count; + get_random_bytes(&sbi->s_next_generation, sizeof(u32)); + spin_lock_init(&sbi->s_next_gen_lock); + + percpu_counter_init(&sbi->s_freeblocks_counter, + ext4_count_free_blocks(sb)); + percpu_counter_init(&sbi->s_freeinodes_counter, + ext4_count_free_inodes(sb)); + percpu_counter_init(&sbi->s_dirs_counter, + ext4_count_dirs(sb)); + + /* per fileystem reservation list head & lock */ + spin_lock_init(&sbi->s_rsv_window_lock); + sbi->s_rsv_window_root = RB_ROOT; + /* Add a single, static dummy reservation to the start of the + * reservation window list --- it gives us a placeholder for + * append-at-start-of-list which makes the allocation logic + * _much_ simpler. */ + sbi->s_rsv_window_head.rsv_start = EXT4_RESERVE_WINDOW_NOT_ALLOCATED; + sbi->s_rsv_window_head.rsv_end = EXT4_RESERVE_WINDOW_NOT_ALLOCATED; + sbi->s_rsv_window_head.rsv_alloc_hit = 0; + sbi->s_rsv_window_head.rsv_goal_size = 0; + ext4_rsv_window_add(sb, &sbi->s_rsv_window_head); + + /* + * set up enough so that it can read an inode + */ + sb->s_op = &ext4_sops; + sb->s_export_op = &ext4_export_ops; + sb->s_xattr = ext4_xattr_handlers; +#ifdef CONFIG_QUOTA + sb->s_qcop = &ext4_qctl_operations; + sb->dq_op = &ext4_quota_operations; +#endif + INIT_LIST_HEAD(&sbi->s_orphan); /* unlinked but open files */ + + sb->s_root = NULL; + + needs_recovery = (es->s_last_orphan != 0 || + EXT4_HAS_INCOMPAT_FEATURE(sb, + EXT4_FEATURE_INCOMPAT_RECOVER)); + + /* + * The first inode we look at is the journal inode. Don't try + * root first: it may be modified in the journal! + */ + if (!test_opt(sb, NOLOAD) && + EXT4_HAS_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_HAS_JOURNAL)) { + if (ext4_load_journal(sb, es, journal_devnum)) + goto failed_mount3; + } else if (journal_inum) { + if (ext4_create_journal(sb, es, journal_inum)) + goto failed_mount3; + } else { + if (!silent) + printk (KERN_ERR + "ext4: No journal on filesystem on %s\n", + sb->s_id); + goto failed_mount3; + } + + /* We have now updated the journal if required, so we can + * validate the data journaling mode. */ + switch (test_opt(sb, DATA_FLAGS)) { + case 0: + /* No mode set, assume a default based on the journal + * capabilities: ORDERED_DATA if the journal can + * cope, else JOURNAL_DATA + */ + if (jbd2_journal_check_available_features + (sbi->s_journal, 0, 0, JBD2_FEATURE_INCOMPAT_REVOKE)) + set_opt(sbi->s_mount_opt, ORDERED_DATA); + else + set_opt(sbi->s_mount_opt, JOURNAL_DATA); + break; + + case EXT4_MOUNT_ORDERED_DATA: + case EXT4_MOUNT_WRITEBACK_DATA: + if (!jbd2_journal_check_available_features + (sbi->s_journal, 0, 0, JBD2_FEATURE_INCOMPAT_REVOKE)) { + printk(KERN_ERR "EXT4-fs: Journal does not support " + "requested data journaling mode\n"); + goto failed_mount4; + } + default: + break; + } + + if (test_opt(sb, NOBH)) { + if (!(test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_WRITEBACK_DATA)) { + printk(KERN_WARNING "EXT4-fs: Ignoring nobh option - " + "its supported only with writeback mode\n"); + clear_opt(sbi->s_mount_opt, NOBH); + } + } + /* + * The jbd2_journal_load will have done any necessary log recovery, + * so we can safely mount the rest of the filesystem now. + */ + + root = iget(sb, EXT4_ROOT_INO); + sb->s_root = d_alloc_root(root); + if (!sb->s_root) { + printk(KERN_ERR "EXT4-fs: get root inode failed\n"); + iput(root); + goto failed_mount4; + } + if (!S_ISDIR(root->i_mode) || !root->i_blocks || !root->i_size) { + dput(sb->s_root); + sb->s_root = NULL; + printk(KERN_ERR "EXT4-fs: corrupt root inode, run e2fsck\n"); + goto failed_mount4; + } + + ext4_setup_super (sb, es, sb->s_flags & MS_RDONLY); + /* + * akpm: core read_super() calls in here with the superblock locked. + * That deadlocks, because orphan cleanup needs to lock the superblock + * in numerous places. Here we just pop the lock - it's relatively + * harmless, because we are now ready to accept write_super() requests, + * and aviro says that's the only reason for hanging onto the + * superblock lock. + */ + EXT4_SB(sb)->s_mount_state |= EXT4_ORPHAN_FS; + ext4_orphan_cleanup(sb, es); + EXT4_SB(sb)->s_mount_state &= ~EXT4_ORPHAN_FS; + if (needs_recovery) + printk (KERN_INFO "EXT4-fs: recovery complete.\n"); + ext4_mark_recovery_complete(sb, es); + printk (KERN_INFO "EXT4-fs: mounted filesystem with %s data mode.\n", + test_opt(sb,DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA ? "journal": + test_opt(sb,DATA_FLAGS) == EXT4_MOUNT_ORDERED_DATA ? "ordered": + "writeback"); + + ext4_ext_init(sb); + + lock_kernel(); + return 0; + +cantfind_ext4: + if (!silent) + printk(KERN_ERR "VFS: Can't find ext4 filesystem on dev %s.\n", + sb->s_id); + goto failed_mount; + +failed_mount4: + jbd2_journal_destroy(sbi->s_journal); +failed_mount3: + percpu_counter_destroy(&sbi->s_freeblocks_counter); + percpu_counter_destroy(&sbi->s_freeinodes_counter); + percpu_counter_destroy(&sbi->s_dirs_counter); +failed_mount2: + for (i = 0; i < db_count; i++) + brelse(sbi->s_group_desc[i]); + kfree(sbi->s_group_desc); +failed_mount: +#ifdef CONFIG_QUOTA + for (i = 0; i < MAXQUOTAS; i++) + kfree(sbi->s_qf_names[i]); +#endif + ext4_blkdev_remove(sbi); + brelse(bh); +out_fail: + sb->s_fs_info = NULL; + kfree(sbi); + lock_kernel(); + return -EINVAL; +} + +/* + * Setup any per-fs journal parameters now. We'll do this both on + * initial mount, once the journal has been initialised but before we've + * done any recovery; and again on any subsequent remount. + */ +static void ext4_init_journal_params(struct super_block *sb, journal_t *journal) +{ + struct ext4_sb_info *sbi = EXT4_SB(sb); + + if (sbi->s_commit_interval) + journal->j_commit_interval = sbi->s_commit_interval; + /* We could also set up an ext4-specific default for the commit + * interval here, but for now we'll just fall back to the jbd + * default. */ + + spin_lock(&journal->j_state_lock); + if (test_opt(sb, BARRIER)) + journal->j_flags |= JBD2_BARRIER; + else + journal->j_flags &= ~JBD2_BARRIER; + spin_unlock(&journal->j_state_lock); +} + +static journal_t *ext4_get_journal(struct super_block *sb, + unsigned int journal_inum) +{ + struct inode *journal_inode; + journal_t *journal; + + /* First, test for the existence of a valid inode on disk. Bad + * things happen if we iget() an unused inode, as the subsequent + * iput() will try to delete it. */ + + journal_inode = iget(sb, journal_inum); + if (!journal_inode) { + printk(KERN_ERR "EXT4-fs: no journal found.\n"); + return NULL; + } + if (!journal_inode->i_nlink) { + make_bad_inode(journal_inode); + iput(journal_inode); + printk(KERN_ERR "EXT4-fs: journal inode is deleted.\n"); + return NULL; + } + + jbd_debug(2, "Journal inode found at %p: %Ld bytes\n", + journal_inode, journal_inode->i_size); + if (is_bad_inode(journal_inode) || !S_ISREG(journal_inode->i_mode)) { + printk(KERN_ERR "EXT4-fs: invalid journal inode.\n"); + iput(journal_inode); + return NULL; + } + + journal = jbd2_journal_init_inode(journal_inode); + if (!journal) { + printk(KERN_ERR "EXT4-fs: Could not load journal inode\n"); + iput(journal_inode); + return NULL; + } + journal->j_private = sb; + ext4_init_journal_params(sb, journal); + return journal; +} + +static journal_t *ext4_get_dev_journal(struct super_block *sb, + dev_t j_dev) +{ + struct buffer_head * bh; + journal_t *journal; + ext4_fsblk_t start; + ext4_fsblk_t len; + int hblock, blocksize; + ext4_fsblk_t sb_block; + unsigned long offset; + struct ext4_super_block * es; + struct block_device *bdev; + + bdev = ext4_blkdev_get(j_dev); + if (bdev == NULL) + return NULL; + + if (bd_claim(bdev, sb)) { + printk(KERN_ERR + "EXT4: failed to claim external journal device.\n"); + blkdev_put(bdev); + return NULL; + } + + blocksize = sb->s_blocksize; + hblock = bdev_hardsect_size(bdev); + if (blocksize < hblock) { + printk(KERN_ERR + "EXT4-fs: blocksize too small for journal device.\n"); + goto out_bdev; + } + + sb_block = EXT4_MIN_BLOCK_SIZE / blocksize; + offset = EXT4_MIN_BLOCK_SIZE % blocksize; + set_blocksize(bdev, blocksize); + if (!(bh = __bread(bdev, sb_block, blocksize))) { + printk(KERN_ERR "EXT4-fs: couldn't read superblock of " + "external journal\n"); + goto out_bdev; + } + + es = (struct ext4_super_block *) (((char *)bh->b_data) + offset); + if ((le16_to_cpu(es->s_magic) != EXT4_SUPER_MAGIC) || + !(le32_to_cpu(es->s_feature_incompat) & + EXT4_FEATURE_INCOMPAT_JOURNAL_DEV)) { + printk(KERN_ERR "EXT4-fs: external journal has " + "bad superblock\n"); + brelse(bh); + goto out_bdev; + } + + if (memcmp(EXT4_SB(sb)->s_es->s_journal_uuid, es->s_uuid, 16)) { + printk(KERN_ERR "EXT4-fs: journal UUID does not match\n"); + brelse(bh); + goto out_bdev; + } + + len = ext4_blocks_count(es); + start = sb_block + 1; + brelse(bh); /* we're done with the superblock */ + + journal = jbd2_journal_init_dev(bdev, sb->s_bdev, + start, len, blocksize); + if (!journal) { + printk(KERN_ERR "EXT4-fs: failed to create device journal\n"); + goto out_bdev; + } + journal->j_private = sb; + ll_rw_block(READ, 1, &journal->j_sb_buffer); + wait_on_buffer(journal->j_sb_buffer); + if (!buffer_uptodate(journal->j_sb_buffer)) { + printk(KERN_ERR "EXT4-fs: I/O error on journal device\n"); + goto out_journal; + } + if (be32_to_cpu(journal->j_superblock->s_nr_users) != 1) { + printk(KERN_ERR "EXT4-fs: External journal has more than one " + "user (unsupported) - %d\n", + be32_to_cpu(journal->j_superblock->s_nr_users)); + goto out_journal; + } + EXT4_SB(sb)->journal_bdev = bdev; + ext4_init_journal_params(sb, journal); + return journal; +out_journal: + jbd2_journal_destroy(journal); +out_bdev: + ext4_blkdev_put(bdev); + return NULL; +} + +static int ext4_load_journal(struct super_block *sb, + struct ext4_super_block *es, + unsigned long journal_devnum) +{ + journal_t *journal; + unsigned int journal_inum = le32_to_cpu(es->s_journal_inum); + dev_t journal_dev; + int err = 0; + int really_read_only; + + if (journal_devnum && + journal_devnum != le32_to_cpu(es->s_journal_dev)) { + printk(KERN_INFO "EXT4-fs: external journal device major/minor " + "numbers have changed\n"); + journal_dev = new_decode_dev(journal_devnum); + } else + journal_dev = new_decode_dev(le32_to_cpu(es->s_journal_dev)); + + really_read_only = bdev_read_only(sb->s_bdev); + + /* + * Are we loading a blank journal or performing recovery after a + * crash? For recovery, we need to check in advance whether we + * can get read-write access to the device. + */ + + if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER)) { + if (sb->s_flags & MS_RDONLY) { + printk(KERN_INFO "EXT4-fs: INFO: recovery " + "required on readonly filesystem.\n"); + if (really_read_only) { + printk(KERN_ERR "EXT4-fs: write access " + "unavailable, cannot proceed.\n"); + return -EROFS; + } + printk (KERN_INFO "EXT4-fs: write access will " + "be enabled during recovery.\n"); + } + } + + if (journal_inum && journal_dev) { + printk(KERN_ERR "EXT4-fs: filesystem has both journal " + "and inode journals!\n"); + return -EINVAL; + } + + if (journal_inum) { + if (!(journal = ext4_get_journal(sb, journal_inum))) + return -EINVAL; + } else { + if (!(journal = ext4_get_dev_journal(sb, journal_dev))) + return -EINVAL; + } + + if (!really_read_only && test_opt(sb, UPDATE_JOURNAL)) { + err = jbd2_journal_update_format(journal); + if (err) { + printk(KERN_ERR "EXT4-fs: error updating journal.\n"); + jbd2_journal_destroy(journal); + return err; + } + } + + if (!EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER)) + err = jbd2_journal_wipe(journal, !really_read_only); + if (!err) + err = jbd2_journal_load(journal); + + if (err) { + printk(KERN_ERR "EXT4-fs: error loading journal.\n"); + jbd2_journal_destroy(journal); + return err; + } + + EXT4_SB(sb)->s_journal = journal; + ext4_clear_journal_err(sb, es); + + if (journal_devnum && + journal_devnum != le32_to_cpu(es->s_journal_dev)) { + es->s_journal_dev = cpu_to_le32(journal_devnum); + sb->s_dirt = 1; + + /* Make sure we flush the recovery flag to disk. */ + ext4_commit_super(sb, es, 1); + } + + return 0; +} + +static int ext4_create_journal(struct super_block * sb, + struct ext4_super_block * es, + unsigned int journal_inum) +{ + journal_t *journal; + + if (sb->s_flags & MS_RDONLY) { + printk(KERN_ERR "EXT4-fs: readonly filesystem when trying to " + "create journal.\n"); + return -EROFS; + } + + if (!(journal = ext4_get_journal(sb, journal_inum))) + return -EINVAL; + + printk(KERN_INFO "EXT4-fs: creating new journal on inode %u\n", + journal_inum); + + if (jbd2_journal_create(journal)) { + printk(KERN_ERR "EXT4-fs: error creating journal.\n"); + jbd2_journal_destroy(journal); + return -EIO; + } + + EXT4_SB(sb)->s_journal = journal; + + ext4_update_dynamic_rev(sb); + EXT4_SET_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER); + EXT4_SET_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_HAS_JOURNAL); + + es->s_journal_inum = cpu_to_le32(journal_inum); + sb->s_dirt = 1; + + /* Make sure we flush the recovery flag to disk. */ + ext4_commit_super(sb, es, 1); + + return 0; +} + +static void ext4_commit_super (struct super_block * sb, + struct ext4_super_block * es, + int sync) +{ + struct buffer_head *sbh = EXT4_SB(sb)->s_sbh; + + if (!sbh) + return; + es->s_wtime = cpu_to_le32(get_seconds()); + ext4_free_blocks_count_set(es, ext4_count_free_blocks(sb)); + es->s_free_inodes_count = cpu_to_le32(ext4_count_free_inodes(sb)); + BUFFER_TRACE(sbh, "marking dirty"); + mark_buffer_dirty(sbh); + if (sync) + sync_dirty_buffer(sbh); +} + + +/* + * Have we just finished recovery? If so, and if we are mounting (or + * remounting) the filesystem readonly, then we will end up with a + * consistent fs on disk. Record that fact. + */ +static void ext4_mark_recovery_complete(struct super_block * sb, + struct ext4_super_block * es) +{ + journal_t *journal = EXT4_SB(sb)->s_journal; + + jbd2_journal_lock_updates(journal); + jbd2_journal_flush(journal); + if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER) && + sb->s_flags & MS_RDONLY) { + EXT4_CLEAR_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER); + sb->s_dirt = 0; + ext4_commit_super(sb, es, 1); + } + jbd2_journal_unlock_updates(journal); +} + +/* + * If we are mounting (or read-write remounting) a filesystem whose journal + * has recorded an error from a previous lifetime, move that error to the + * main filesystem now. + */ +static void ext4_clear_journal_err(struct super_block * sb, + struct ext4_super_block * es) +{ + journal_t *journal; + int j_errno; + const char *errstr; + + journal = EXT4_SB(sb)->s_journal; + + /* + * Now check for any error status which may have been recorded in the + * journal by a prior ext4_error() or ext4_abort() + */ + + j_errno = jbd2_journal_errno(journal); + if (j_errno) { + char nbuf[16]; + + errstr = ext4_decode_error(sb, j_errno, nbuf); + ext4_warning(sb, __FUNCTION__, "Filesystem error recorded " + "from previous mount: %s", errstr); + ext4_warning(sb, __FUNCTION__, "Marking fs in need of " + "filesystem check."); + + EXT4_SB(sb)->s_mount_state |= EXT4_ERROR_FS; + es->s_state |= cpu_to_le16(EXT4_ERROR_FS); + ext4_commit_super (sb, es, 1); + + jbd2_journal_clear_err(journal); + } +} + +/* + * Force the running and committing transactions to commit, + * and wait on the commit. + */ +int ext4_force_commit(struct super_block *sb) +{ + journal_t *journal; + int ret; + + if (sb->s_flags & MS_RDONLY) + return 0; + + journal = EXT4_SB(sb)->s_journal; + sb->s_dirt = 0; + ret = ext4_journal_force_commit(journal); + return ret; +} + +/* + * Ext4 always journals updates to the superblock itself, so we don't + * have to propagate any other updates to the superblock on disk at this + * point. Just start an async writeback to get the buffers on their way + * to the disk. + * + * This implicitly triggers the writebehind on sync(). + */ + +static void ext4_write_super (struct super_block * sb) +{ + if (mutex_trylock(&sb->s_lock) != 0) + BUG(); + sb->s_dirt = 0; +} + +static int ext4_sync_fs(struct super_block *sb, int wait) +{ + tid_t target; + + sb->s_dirt = 0; + if (jbd2_journal_start_commit(EXT4_SB(sb)->s_journal, &target)) { + if (wait) + jbd2_log_wait_commit(EXT4_SB(sb)->s_journal, target); + } + return 0; +} + +/* + * LVM calls this function before a (read-only) snapshot is created. This + * gives us a chance to flush the journal completely and mark the fs clean. + */ +static void ext4_write_super_lockfs(struct super_block *sb) +{ + sb->s_dirt = 0; + + if (!(sb->s_flags & MS_RDONLY)) { + journal_t *journal = EXT4_SB(sb)->s_journal; + + /* Now we set up the journal barrier. */ + jbd2_journal_lock_updates(journal); + jbd2_journal_flush(journal); + + /* Journal blocked and flushed, clear needs_recovery flag. */ + EXT4_CLEAR_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER); + ext4_commit_super(sb, EXT4_SB(sb)->s_es, 1); + } +} + +/* + * Called by LVM after the snapshot is done. We need to reset the RECOVER + * flag here, even though the filesystem is not technically dirty yet. + */ +static void ext4_unlockfs(struct super_block *sb) +{ + if (!(sb->s_flags & MS_RDONLY)) { + lock_super(sb); + /* Reser the needs_recovery flag before the fs is unlocked. */ + EXT4_SET_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER); + ext4_commit_super(sb, EXT4_SB(sb)->s_es, 1); + unlock_super(sb); + jbd2_journal_unlock_updates(EXT4_SB(sb)->s_journal); + } +} + +static int ext4_remount (struct super_block * sb, int * flags, char * data) +{ + struct ext4_super_block * es; + struct ext4_sb_info *sbi = EXT4_SB(sb); + ext4_fsblk_t n_blocks_count = 0; + unsigned long old_sb_flags; + struct ext4_mount_options old_opts; + int err; +#ifdef CONFIG_QUOTA + int i; +#endif + + /* Store the original options */ + old_sb_flags = sb->s_flags; + old_opts.s_mount_opt = sbi->s_mount_opt; + old_opts.s_resuid = sbi->s_resuid; + old_opts.s_resgid = sbi->s_resgid; + old_opts.s_commit_interval = sbi->s_commit_interval; +#ifdef CONFIG_QUOTA + old_opts.s_jquota_fmt = sbi->s_jquota_fmt; + for (i = 0; i < MAXQUOTAS; i++) + old_opts.s_qf_names[i] = sbi->s_qf_names[i]; +#endif + + /* + * Allow the "check" option to be passed as a remount option. + */ + if (!parse_options(data, sb, NULL, NULL, &n_blocks_count, 1)) { + err = -EINVAL; + goto restore_opts; + } + + if (sbi->s_mount_opt & EXT4_MOUNT_ABORT) + ext4_abort(sb, __FUNCTION__, "Abort forced by user"); + + sb->s_flags = (sb->s_flags & ~MS_POSIXACL) | + ((sbi->s_mount_opt & EXT4_MOUNT_POSIX_ACL) ? MS_POSIXACL : 0); + + es = sbi->s_es; + + ext4_init_journal_params(sb, sbi->s_journal); + + if ((*flags & MS_RDONLY) != (sb->s_flags & MS_RDONLY) || + n_blocks_count > ext4_blocks_count(es)) { + if (sbi->s_mount_opt & EXT4_MOUNT_ABORT) { + err = -EROFS; + goto restore_opts; + } + + if (*flags & MS_RDONLY) { + /* + * First of all, the unconditional stuff we have to do + * to disable replay of the journal when we next remount + */ + sb->s_flags |= MS_RDONLY; + + /* + * OK, test if we are remounting a valid rw partition + * readonly, and if so set the rdonly flag and then + * mark the partition as valid again. + */ + if (!(es->s_state & cpu_to_le16(EXT4_VALID_FS)) && + (sbi->s_mount_state & EXT4_VALID_FS)) + es->s_state = cpu_to_le16(sbi->s_mount_state); + + ext4_mark_recovery_complete(sb, es); + } else { + __le32 ret; + if ((ret = EXT4_HAS_RO_COMPAT_FEATURE(sb, + ~EXT4_FEATURE_RO_COMPAT_SUPP))) { + printk(KERN_WARNING "EXT4-fs: %s: couldn't " + "remount RDWR because of unsupported " + "optional features (%x).\n", + sb->s_id, le32_to_cpu(ret)); + err = -EROFS; + goto restore_opts; + } + /* + * Mounting a RDONLY partition read-write, so reread + * and store the current valid flag. (It may have + * been changed by e2fsck since we originally mounted + * the partition.) + */ + ext4_clear_journal_err(sb, es); + sbi->s_mount_state = le16_to_cpu(es->s_state); + if ((err = ext4_group_extend(sb, es, n_blocks_count))) + goto restore_opts; + if (!ext4_setup_super (sb, es, 0)) + sb->s_flags &= ~MS_RDONLY; + } + } +#ifdef CONFIG_QUOTA + /* Release old quota file names */ + for (i = 0; i < MAXQUOTAS; i++) + if (old_opts.s_qf_names[i] && + old_opts.s_qf_names[i] != sbi->s_qf_names[i]) + kfree(old_opts.s_qf_names[i]); +#endif + return 0; +restore_opts: + sb->s_flags = old_sb_flags; + sbi->s_mount_opt = old_opts.s_mount_opt; + sbi->s_resuid = old_opts.s_resuid; + sbi->s_resgid = old_opts.s_resgid; + sbi->s_commit_interval = old_opts.s_commit_interval; +#ifdef CONFIG_QUOTA + sbi->s_jquota_fmt = old_opts.s_jquota_fmt; + for (i = 0; i < MAXQUOTAS; i++) { + if (sbi->s_qf_names[i] && + old_opts.s_qf_names[i] != sbi->s_qf_names[i]) + kfree(sbi->s_qf_names[i]); + sbi->s_qf_names[i] = old_opts.s_qf_names[i]; + } +#endif + return err; +} + +static int ext4_statfs (struct dentry * dentry, struct kstatfs * buf) +{ + struct super_block *sb = dentry->d_sb; + struct ext4_sb_info *sbi = EXT4_SB(sb); + struct ext4_super_block *es = sbi->s_es; + ext4_fsblk_t overhead; + int i; + + if (test_opt (sb, MINIX_DF)) + overhead = 0; + else { + unsigned long ngroups; + ngroups = EXT4_SB(sb)->s_groups_count; + smp_rmb(); + + /* + * Compute the overhead (FS structures) + */ + + /* + * All of the blocks before first_data_block are + * overhead + */ + overhead = le32_to_cpu(es->s_first_data_block); + + /* + * Add the overhead attributed to the superblock and + * block group descriptors. If the sparse superblocks + * feature is turned on, then not all groups have this. + */ + for (i = 0; i < ngroups; i++) { + overhead += ext4_bg_has_super(sb, i) + + ext4_bg_num_gdb(sb, i); + cond_resched(); + } + + /* + * Every block group has an inode bitmap, a block + * bitmap, and an inode table. + */ + overhead += (ngroups * (2 + EXT4_SB(sb)->s_itb_per_group)); + } + + buf->f_type = EXT4_SUPER_MAGIC; + buf->f_bsize = sb->s_blocksize; + buf->f_blocks = ext4_blocks_count(es) - overhead; + buf->f_bfree = percpu_counter_sum(&sbi->s_freeblocks_counter); + buf->f_bavail = buf->f_bfree - ext4_r_blocks_count(es); + if (buf->f_bfree < ext4_r_blocks_count(es)) + buf->f_bavail = 0; + buf->f_files = le32_to_cpu(es->s_inodes_count); + buf->f_ffree = percpu_counter_sum(&sbi->s_freeinodes_counter); + buf->f_namelen = EXT4_NAME_LEN; + return 0; +} + +/* Helper function for writing quotas on sync - we need to start transaction before quota file + * is locked for write. Otherwise the are possible deadlocks: + * Process 1 Process 2 + * ext4_create() quota_sync() + * jbd2_journal_start() write_dquot() + * DQUOT_INIT() down(dqio_mutex) + * down(dqio_mutex) jbd2_journal_start() + * + */ + +#ifdef CONFIG_QUOTA + +static inline struct inode *dquot_to_inode(struct dquot *dquot) +{ + return sb_dqopt(dquot->dq_sb)->files[dquot->dq_type]; +} + +static int ext4_dquot_initialize(struct inode *inode, int type) +{ + handle_t *handle; + int ret, err; + + /* We may create quota structure so we need to reserve enough blocks */ + handle = ext4_journal_start(inode, 2*EXT4_QUOTA_INIT_BLOCKS(inode->i_sb)); + if (IS_ERR(handle)) + return PTR_ERR(handle); + ret = dquot_initialize(inode, type); + err = ext4_journal_stop(handle); + if (!ret) + ret = err; + return ret; +} + +static int ext4_dquot_drop(struct inode *inode) +{ + handle_t *handle; + int ret, err; + + /* We may delete quota structure so we need to reserve enough blocks */ + handle = ext4_journal_start(inode, 2*EXT4_QUOTA_DEL_BLOCKS(inode->i_sb)); + if (IS_ERR(handle)) + return PTR_ERR(handle); + ret = dquot_drop(inode); + err = ext4_journal_stop(handle); + if (!ret) + ret = err; + return ret; +} + +static int ext4_write_dquot(struct dquot *dquot) +{ + int ret, err; + handle_t *handle; + struct inode *inode; + + inode = dquot_to_inode(dquot); + handle = ext4_journal_start(inode, + EXT4_QUOTA_TRANS_BLOCKS(dquot->dq_sb)); + if (IS_ERR(handle)) + return PTR_ERR(handle); + ret = dquot_commit(dquot); + err = ext4_journal_stop(handle); + if (!ret) + ret = err; + return ret; +} + +static int ext4_acquire_dquot(struct dquot *dquot) +{ + int ret, err; + handle_t *handle; + + handle = ext4_journal_start(dquot_to_inode(dquot), + EXT4_QUOTA_INIT_BLOCKS(dquot->dq_sb)); + if (IS_ERR(handle)) + return PTR_ERR(handle); + ret = dquot_acquire(dquot); + err = ext4_journal_stop(handle); + if (!ret) + ret = err; + return ret; +} + +static int ext4_release_dquot(struct dquot *dquot) +{ + int ret, err; + handle_t *handle; + + handle = ext4_journal_start(dquot_to_inode(dquot), + EXT4_QUOTA_DEL_BLOCKS(dquot->dq_sb)); + if (IS_ERR(handle)) + return PTR_ERR(handle); + ret = dquot_release(dquot); + err = ext4_journal_stop(handle); + if (!ret) + ret = err; + return ret; +} + +static int ext4_mark_dquot_dirty(struct dquot *dquot) +{ + /* Are we journalling quotas? */ + if (EXT4_SB(dquot->dq_sb)->s_qf_names[USRQUOTA] || + EXT4_SB(dquot->dq_sb)->s_qf_names[GRPQUOTA]) { + dquot_mark_dquot_dirty(dquot); + return ext4_write_dquot(dquot); + } else { + return dquot_mark_dquot_dirty(dquot); + } +} + +static int ext4_write_info(struct super_block *sb, int type) +{ + int ret, err; + handle_t *handle; + + /* Data block + inode block */ + handle = ext4_journal_start(sb->s_root->d_inode, 2); + if (IS_ERR(handle)) + return PTR_ERR(handle); + ret = dquot_commit_info(sb, type); + err = ext4_journal_stop(handle); + if (!ret) + ret = err; + return ret; +} + +/* + * Turn on quotas during mount time - we need to find + * the quota file and such... + */ +static int ext4_quota_on_mount(struct super_block *sb, int type) +{ + return vfs_quota_on_mount(sb, EXT4_SB(sb)->s_qf_names[type], + EXT4_SB(sb)->s_jquota_fmt, type); +} + +/* + * Standard function to be called on quota_on + */ +static int ext4_quota_on(struct super_block *sb, int type, int format_id, + char *path) +{ + int err; + struct nameidata nd; + + if (!test_opt(sb, QUOTA)) + return -EINVAL; + /* Not journalling quota? */ + if (!EXT4_SB(sb)->s_qf_names[USRQUOTA] && + !EXT4_SB(sb)->s_qf_names[GRPQUOTA]) + return vfs_quota_on(sb, type, format_id, path); + err = path_lookup(path, LOOKUP_FOLLOW, &nd); + if (err) + return err; + /* Quotafile not on the same filesystem? */ + if (nd.mnt->mnt_sb != sb) { + path_release(&nd); + return -EXDEV; + } + /* Quotafile not of fs root? */ + if (nd.dentry->d_parent->d_inode != sb->s_root->d_inode) + printk(KERN_WARNING + "EXT4-fs: Quota file not on filesystem root. " + "Journalled quota will not work.\n"); + path_release(&nd); + return vfs_quota_on(sb, type, format_id, path); +} + +/* Read data from quotafile - avoid pagecache and such because we cannot afford + * acquiring the locks... As quota files are never truncated and quota code + * itself serializes the operations (and noone else should touch the files) + * we don't have to be afraid of races */ +static ssize_t ext4_quota_read(struct super_block *sb, int type, char *data, + size_t len, loff_t off) +{ + struct inode *inode = sb_dqopt(sb)->files[type]; + sector_t blk = off >> EXT4_BLOCK_SIZE_BITS(sb); + int err = 0; + int offset = off & (sb->s_blocksize - 1); + int tocopy; + size_t toread; + struct buffer_head *bh; + loff_t i_size = i_size_read(inode); + + if (off > i_size) + return 0; + if (off+len > i_size) + len = i_size-off; + toread = len; + while (toread > 0) { + tocopy = sb->s_blocksize - offset < toread ? + sb->s_blocksize - offset : toread; + bh = ext4_bread(NULL, inode, blk, 0, &err); + if (err) + return err; + if (!bh) /* A hole? */ + memset(data, 0, tocopy); + else + memcpy(data, bh->b_data+offset, tocopy); + brelse(bh); + offset = 0; + toread -= tocopy; + data += tocopy; + blk++; + } + return len; +} + +/* Write to quotafile (we know the transaction is already started and has + * enough credits) */ +static ssize_t ext4_quota_write(struct super_block *sb, int type, + const char *data, size_t len, loff_t off) +{ + struct inode *inode = sb_dqopt(sb)->files[type]; + sector_t blk = off >> EXT4_BLOCK_SIZE_BITS(sb); + int err = 0; + int offset = off & (sb->s_blocksize - 1); + int tocopy; + int journal_quota = EXT4_SB(sb)->s_qf_names[type] != NULL; + size_t towrite = len; + struct buffer_head *bh; + handle_t *handle = journal_current_handle(); + + mutex_lock_nested(&inode->i_mutex, I_MUTEX_QUOTA); + while (towrite > 0) { + tocopy = sb->s_blocksize - offset < towrite ? + sb->s_blocksize - offset : towrite; + bh = ext4_bread(handle, inode, blk, 1, &err); + if (!bh) + goto out; + if (journal_quota) { + err = ext4_journal_get_write_access(handle, bh); + if (err) { + brelse(bh); + goto out; + } + } + lock_buffer(bh); + memcpy(bh->b_data+offset, data, tocopy); + flush_dcache_page(bh->b_page); + unlock_buffer(bh); + if (journal_quota) + err = ext4_journal_dirty_metadata(handle, bh); + else { + /* Always do at least ordered writes for quotas */ + err = ext4_journal_dirty_data(handle, bh); + mark_buffer_dirty(bh); + } + brelse(bh); + if (err) + goto out; + offset = 0; + towrite -= tocopy; + data += tocopy; + blk++; + } +out: + if (len == towrite) + return err; + if (inode->i_size < off+len-towrite) { + i_size_write(inode, off+len-towrite); + EXT4_I(inode)->i_disksize = inode->i_size; + } + inode->i_version++; + inode->i_mtime = inode->i_ctime = CURRENT_TIME; + ext4_mark_inode_dirty(handle, inode); + mutex_unlock(&inode->i_mutex); + return len - towrite; +} + +#endif + +static int ext4_get_sb(struct file_system_type *fs_type, + int flags, const char *dev_name, void *data, struct vfsmount *mnt) +{ + return get_sb_bdev(fs_type, flags, dev_name, data, ext4_fill_super, mnt); +} + +static struct file_system_type ext4dev_fs_type = { + .owner = THIS_MODULE, + .name = "ext4dev", + .get_sb = ext4_get_sb, + .kill_sb = kill_block_super, + .fs_flags = FS_REQUIRES_DEV, +}; + +static int __init init_ext4_fs(void) +{ + int err = init_ext4_xattr(); + if (err) + return err; + err = init_inodecache(); + if (err) + goto out1; + err = register_filesystem(&ext4dev_fs_type); + if (err) + goto out; + return 0; +out: + destroy_inodecache(); +out1: + exit_ext4_xattr(); + return err; +} + +static void __exit exit_ext4_fs(void) +{ + unregister_filesystem(&ext4dev_fs_type); + destroy_inodecache(); + exit_ext4_xattr(); +} + +MODULE_AUTHOR("Remy Card, Stephen Tweedie, Andrew Morton, Andreas Dilger, Theodore Ts'o and others"); +MODULE_DESCRIPTION("Fourth Extended Filesystem with extents"); +MODULE_LICENSE("GPL"); +module_init(init_ext4_fs) +module_exit(exit_ext4_fs) diff --git a/fs/ext4/symlink.c b/fs/ext4/symlink.c new file mode 100644 index 000000000000..fcf527286d75 --- /dev/null +++ b/fs/ext4/symlink.c @@ -0,0 +1,54 @@ +/* + * linux/fs/ext4/symlink.c + * + * Only fast symlinks left here - the rest is done by generic code. AV, 1999 + * + * Copyright (C) 1992, 1993, 1994, 1995 + * Remy Card (card@masi.ibp.fr) + * Laboratoire MASI - Institut Blaise Pascal + * Universite Pierre et Marie Curie (Paris VI) + * + * from + * + * linux/fs/minix/symlink.c + * + * Copyright (C) 1991, 1992 Linus Torvalds + * + * ext4 symlink handling code + */ + +#include <linux/fs.h> +#include <linux/jbd2.h> +#include <linux/ext4_fs.h> +#include <linux/namei.h> +#include "xattr.h" + +static void * ext4_follow_link(struct dentry *dentry, struct nameidata *nd) +{ + struct ext4_inode_info *ei = EXT4_I(dentry->d_inode); + nd_set_link(nd, (char*)ei->i_data); + return NULL; +} + +struct inode_operations ext4_symlink_inode_operations = { + .readlink = generic_readlink, + .follow_link = page_follow_link_light, + .put_link = page_put_link, +#ifdef CONFIG_EXT4DEV_FS_XATTR + .setxattr = generic_setxattr, + .getxattr = generic_getxattr, + .listxattr = ext4_listxattr, + .removexattr = generic_removexattr, +#endif +}; + +struct inode_operations ext4_fast_symlink_inode_operations = { + .readlink = generic_readlink, + .follow_link = ext4_follow_link, +#ifdef CONFIG_EXT4DEV_FS_XATTR + .setxattr = generic_setxattr, + .getxattr = generic_getxattr, + .listxattr = ext4_listxattr, + .removexattr = generic_removexattr, +#endif +}; diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c new file mode 100644 index 000000000000..63233cd946a7 --- /dev/null +++ b/fs/ext4/xattr.c @@ -0,0 +1,1317 @@ +/* + * linux/fs/ext4/xattr.c + * + * Copyright (C) 2001-2003 Andreas Gruenbacher, <agruen@suse.de> + * + * Fix by Harrison Xing <harrison@mountainviewdata.com>. + * Ext4 code with a lot of help from Eric Jarman <ejarman@acm.org>. + * Extended attributes for symlinks and special files added per + * suggestion of Luka Renko <luka.renko@hermes.si>. + * xattr consolidation Copyright (c) 2004 James Morris <jmorris@redhat.com>, + * Red Hat Inc. + * ea-in-inode support by Alex Tomas <alex@clusterfs.com> aka bzzz + * and Andreas Gruenbacher <agruen@suse.de>. + */ + +/* + * Extended attributes are stored directly in inodes (on file systems with + * inodes bigger than 128 bytes) and on additional disk blocks. The i_file_acl + * field contains the block number if an inode uses an additional block. All + * attributes must fit in the inode and one additional block. Blocks that + * contain the identical set of attributes may be shared among several inodes. + * Identical blocks are detected by keeping a cache of blocks that have + * recently been accessed. + * + * The attributes in inodes and on blocks have a different header; the entries + * are stored in the same format: + * + * +------------------+ + * | header | + * | entry 1 | | + * | entry 2 | | growing downwards + * | entry 3 | v + * | four null bytes | + * | . . . | + * | value 1 | ^ + * | value 3 | | growing upwards + * | value 2 | | + * +------------------+ + * + * The header is followed by multiple entry descriptors. In disk blocks, the + * entry descriptors are kept sorted. In inodes, they are unsorted. The + * attribute values are aligned to the end of the block in no specific order. + * + * Locking strategy + * ---------------- + * EXT4_I(inode)->i_file_acl is protected by EXT4_I(inode)->xattr_sem. + * EA blocks are only changed if they are exclusive to an inode, so + * holding xattr_sem also means that nothing but the EA block's reference + * count can change. Multiple writers to the same block are synchronized + * by the buffer lock. + */ + +#include <linux/init.h> +#include <linux/fs.h> +#include <linux/slab.h> +#include <linux/ext4_jbd2.h> +#include <linux/ext4_fs.h> +#include <linux/mbcache.h> +#include <linux/quotaops.h> +#include <linux/rwsem.h> +#include "xattr.h" +#include "acl.h" + +#define BHDR(bh) ((struct ext4_xattr_header *)((bh)->b_data)) +#define ENTRY(ptr) ((struct ext4_xattr_entry *)(ptr)) +#define BFIRST(bh) ENTRY(BHDR(bh)+1) +#define IS_LAST_ENTRY(entry) (*(__u32 *)(entry) == 0) + +#define IHDR(inode, raw_inode) \ + ((struct ext4_xattr_ibody_header *) \ + ((void *)raw_inode + \ + EXT4_GOOD_OLD_INODE_SIZE + \ + EXT4_I(inode)->i_extra_isize)) +#define IFIRST(hdr) ((struct ext4_xattr_entry *)((hdr)+1)) + +#ifdef EXT4_XATTR_DEBUG +# define ea_idebug(inode, f...) do { \ + printk(KERN_DEBUG "inode %s:%lu: ", \ + inode->i_sb->s_id, inode->i_ino); \ + printk(f); \ + printk("\n"); \ + } while (0) +# define ea_bdebug(bh, f...) do { \ + char b[BDEVNAME_SIZE]; \ + printk(KERN_DEBUG "block %s:%lu: ", \ + bdevname(bh->b_bdev, b), \ + (unsigned long) bh->b_blocknr); \ + printk(f); \ + printk("\n"); \ + } while (0) +#else +# define ea_idebug(f...) +# define ea_bdebug(f...) +#endif + +static void ext4_xattr_cache_insert(struct buffer_head *); +static struct buffer_head *ext4_xattr_cache_find(struct inode *, + struct ext4_xattr_header *, + struct mb_cache_entry **); +static void ext4_xattr_rehash(struct ext4_xattr_header *, + struct ext4_xattr_entry *); + +static struct mb_cache *ext4_xattr_cache; + +static struct xattr_handler *ext4_xattr_handler_map[] = { + [EXT4_XATTR_INDEX_USER] = &ext4_xattr_user_handler, +#ifdef CONFIG_EXT4DEV_FS_POSIX_ACL + [EXT4_XATTR_INDEX_POSIX_ACL_ACCESS] = &ext4_xattr_acl_access_handler, + [EXT4_XATTR_INDEX_POSIX_ACL_DEFAULT] = &ext4_xattr_acl_default_handler, +#endif + [EXT4_XATTR_INDEX_TRUSTED] = &ext4_xattr_trusted_handler, +#ifdef CONFIG_EXT4DEV_FS_SECURITY + [EXT4_XATTR_INDEX_SECURITY] = &ext4_xattr_security_handler, +#endif +}; + +struct xattr_handler *ext4_xattr_handlers[] = { + &ext4_xattr_user_handler, + &ext4_xattr_trusted_handler, +#ifdef CONFIG_EXT4DEV_FS_POSIX_ACL + &ext4_xattr_acl_access_handler, + &ext4_xattr_acl_default_handler, +#endif +#ifdef CONFIG_EXT4DEV_FS_SECURITY + &ext4_xattr_security_handler, +#endif + NULL +}; + +static inline struct xattr_handler * +ext4_xattr_handler(int name_index) +{ + struct xattr_handler *handler = NULL; + + if (name_index > 0 && name_index < ARRAY_SIZE(ext4_xattr_handler_map)) + handler = ext4_xattr_handler_map[name_index]; + return handler; +} + +/* + * Inode operation listxattr() + * + * dentry->d_inode->i_mutex: don't care + */ +ssize_t +ext4_listxattr(struct dentry *dentry, char *buffer, size_t size) +{ + return ext4_xattr_list(dentry->d_inode, buffer, size); +} + +static int +ext4_xattr_check_names(struct ext4_xattr_entry *entry, void *end) +{ + while (!IS_LAST_ENTRY(entry)) { + struct ext4_xattr_entry *next = EXT4_XATTR_NEXT(entry); + if ((void *)next >= end) + return -EIO; + entry = next; + } + return 0; +} + +static inline int +ext4_xattr_check_block(struct buffer_head *bh) +{ + int error; + + if (BHDR(bh)->h_magic != cpu_to_le32(EXT4_XATTR_MAGIC) || + BHDR(bh)->h_blocks != cpu_to_le32(1)) + return -EIO; + error = ext4_xattr_check_names(BFIRST(bh), bh->b_data + bh->b_size); + return error; +} + +static inline int +ext4_xattr_check_entry(struct ext4_xattr_entry *entry, size_t size) +{ + size_t value_size = le32_to_cpu(entry->e_value_size); + + if (entry->e_value_block != 0 || value_size > size || + le16_to_cpu(entry->e_value_offs) + value_size > size) + return -EIO; + return 0; +} + +static int +ext4_xattr_find_entry(struct ext4_xattr_entry **pentry, int name_index, + const char *name, size_t size, int sorted) +{ + struct ext4_xattr_entry *entry; + size_t name_len; + int cmp = 1; + + if (name == NULL) + return -EINVAL; + name_len = strlen(name); + entry = *pentry; + for (; !IS_LAST_ENTRY(entry); entry = EXT4_XATTR_NEXT(entry)) { + cmp = name_index - entry->e_name_index; + if (!cmp) + cmp = name_len - entry->e_name_len; + if (!cmp) + cmp = memcmp(name, entry->e_name, name_len); + if (cmp <= 0 && (sorted || cmp == 0)) + break; + } + *pentry = entry; + if (!cmp && ext4_xattr_check_entry(entry, size)) + return -EIO; + return cmp ? -ENODATA : 0; +} + +static int +ext4_xattr_block_get(struct inode *inode, int name_index, const char *name, + void *buffer, size_t buffer_size) +{ + struct buffer_head *bh = NULL; + struct ext4_xattr_entry *entry; + size_t size; + int error; + + ea_idebug(inode, "name=%d.%s, buffer=%p, buffer_size=%ld", + name_index, name, buffer, (long)buffer_size); + + error = -ENODATA; + if (!EXT4_I(inode)->i_file_acl) + goto cleanup; + ea_idebug(inode, "reading block %u", EXT4_I(inode)->i_file_acl); + bh = sb_bread(inode->i_sb, EXT4_I(inode)->i_file_acl); + if (!bh) + goto cleanup; + ea_bdebug(bh, "b_count=%d, refcount=%d", + atomic_read(&(bh->b_count)), le32_to_cpu(BHDR(bh)->h_refcount)); + if (ext4_xattr_check_block(bh)) { +bad_block: ext4_error(inode->i_sb, __FUNCTION__, + "inode %lu: bad block %llu", inode->i_ino, + EXT4_I(inode)->i_file_acl); + error = -EIO; + goto cleanup; + } + ext4_xattr_cache_insert(bh); + entry = BFIRST(bh); + error = ext4_xattr_find_entry(&entry, name_index, name, bh->b_size, 1); + if (error == -EIO) + goto bad_block; + if (error) + goto cleanup; + size = le32_to_cpu(entry->e_value_size); + if (buffer) { + error = -ERANGE; + if (size > buffer_size) + goto cleanup; + memcpy(buffer, bh->b_data + le16_to_cpu(entry->e_value_offs), + size); + } + error = size; + +cleanup: + brelse(bh); + return error; +} + +static int +ext4_xattr_ibody_get(struct inode *inode, int name_index, const char *name, + void *buffer, size_t buffer_size) +{ + struct ext4_xattr_ibody_header *header; + struct ext4_xattr_entry *entry; + struct ext4_inode *raw_inode; + struct ext4_iloc iloc; + size_t size; + void *end; + int error; + + if (!(EXT4_I(inode)->i_state & EXT4_STATE_XATTR)) + return -ENODATA; + error = ext4_get_inode_loc(inode, &iloc); + if (error) + return error; + raw_inode = ext4_raw_inode(&iloc); + header = IHDR(inode, raw_inode); + entry = IFIRST(header); + end = (void *)raw_inode + EXT4_SB(inode->i_sb)->s_inode_size; + error = ext4_xattr_check_names(entry, end); + if (error) + goto cleanup; + error = ext4_xattr_find_entry(&entry, name_index, name, + end - (void *)entry, 0); + if (error) + goto cleanup; + size = le32_to_cpu(entry->e_value_size); + if (buffer) { + error = -ERANGE; + if (size > buffer_size) + goto cleanup; + memcpy(buffer, (void *)IFIRST(header) + + le16_to_cpu(entry->e_value_offs), size); + } + error = size; + +cleanup: + brelse(iloc.bh); + return error; +} + +/* + * ext4_xattr_get() + * + * Copy an extended attribute into the buffer + * provided, or compute the buffer size required. + * Buffer is NULL to compute the size of the buffer required. + * + * Returns a negative error number on failure, or the number of bytes + * used / required on success. + */ +int +ext4_xattr_get(struct inode *inode, int name_index, const char *name, + void *buffer, size_t buffer_size) +{ + int error; + + down_read(&EXT4_I(inode)->xattr_sem); + error = ext4_xattr_ibody_get(inode, name_index, name, buffer, + buffer_size); + if (error == -ENODATA) + error = ext4_xattr_block_get(inode, name_index, name, buffer, + buffer_size); + up_read(&EXT4_I(inode)->xattr_sem); + return error; +} + +static int +ext4_xattr_list_entries(struct inode *inode, struct ext4_xattr_entry *entry, + char *buffer, size_t buffer_size) +{ + size_t rest = buffer_size; + + for (; !IS_LAST_ENTRY(entry); entry = EXT4_XATTR_NEXT(entry)) { + struct xattr_handler *handler = + ext4_xattr_handler(entry->e_name_index); + + if (handler) { + size_t size = handler->list(inode, buffer, rest, + entry->e_name, + entry->e_name_len); + if (buffer) { + if (size > rest) + return -ERANGE; + buffer += size; + } + rest -= size; + } + } + return buffer_size - rest; +} + +static int +ext4_xattr_block_list(struct inode *inode, char *buffer, size_t buffer_size) +{ + struct buffer_head *bh = NULL; + int error; + + ea_idebug(inode, "buffer=%p, buffer_size=%ld", + buffer, (long)buffer_size); + + error = 0; + if (!EXT4_I(inode)->i_file_acl) + goto cleanup; + ea_idebug(inode, "reading block %u", EXT4_I(inode)->i_file_acl); + bh = sb_bread(inode->i_sb, EXT4_I(inode)->i_file_acl); + error = -EIO; + if (!bh) + goto cleanup; + ea_bdebug(bh, "b_count=%d, refcount=%d", + atomic_read(&(bh->b_count)), le32_to_cpu(BHDR(bh)->h_refcount)); + if (ext4_xattr_check_block(bh)) { + ext4_error(inode->i_sb, __FUNCTION__, + "inode %lu: bad block %llu", inode->i_ino, + EXT4_I(inode)->i_file_acl); + error = -EIO; + goto cleanup; + } + ext4_xattr_cache_insert(bh); + error = ext4_xattr_list_entries(inode, BFIRST(bh), buffer, buffer_size); + +cleanup: + brelse(bh); + + return error; +} + +static int +ext4_xattr_ibody_list(struct inode *inode, char *buffer, size_t buffer_size) +{ + struct ext4_xattr_ibody_header *header; + struct ext4_inode *raw_inode; + struct ext4_iloc iloc; + void *end; + int error; + + if (!(EXT4_I(inode)->i_state & EXT4_STATE_XATTR)) + return 0; + error = ext4_get_inode_loc(inode, &iloc); + if (error) + return error; + raw_inode = ext4_raw_inode(&iloc); + header = IHDR(inode, raw_inode); + end = (void *)raw_inode + EXT4_SB(inode->i_sb)->s_inode_size; + error = ext4_xattr_check_names(IFIRST(header), end); + if (error) + goto cleanup; + error = ext4_xattr_list_entries(inode, IFIRST(header), + buffer, buffer_size); + +cleanup: + brelse(iloc.bh); + return error; +} + +/* + * ext4_xattr_list() + * + * Copy a list of attribute names into the buffer + * provided, or compute the buffer size required. + * Buffer is NULL to compute the size of the buffer required. + * + * Returns a negative error number on failure, or the number of bytes + * used / required on success. + */ +int +ext4_xattr_list(struct inode *inode, char *buffer, size_t buffer_size) +{ + int i_error, b_error; + + down_read(&EXT4_I(inode)->xattr_sem); + i_error = ext4_xattr_ibody_list(inode, buffer, buffer_size); + if (i_error < 0) { + b_error = 0; + } else { + if (buffer) { + buffer += i_error; + buffer_size -= i_error; + } + b_error = ext4_xattr_block_list(inode, buffer, buffer_size); + if (b_error < 0) + i_error = 0; + } + up_read(&EXT4_I(inode)->xattr_sem); + return i_error + b_error; +} + +/* + * If the EXT4_FEATURE_COMPAT_EXT_ATTR feature of this file system is + * not set, set it. + */ +static void ext4_xattr_update_super_block(handle_t *handle, + struct super_block *sb) +{ + if (EXT4_HAS_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_EXT_ATTR)) + return; + + lock_super(sb); + if (ext4_journal_get_write_access(handle, EXT4_SB(sb)->s_sbh) == 0) { + EXT4_SB(sb)->s_es->s_feature_compat |= + cpu_to_le32(EXT4_FEATURE_COMPAT_EXT_ATTR); + sb->s_dirt = 1; + ext4_journal_dirty_metadata(handle, EXT4_SB(sb)->s_sbh); + } + unlock_super(sb); +} + +/* + * Release the xattr block BH: If the reference count is > 1, decrement + * it; otherwise free the block. + */ +static void +ext4_xattr_release_block(handle_t *handle, struct inode *inode, + struct buffer_head *bh) +{ + struct mb_cache_entry *ce = NULL; + + ce = mb_cache_entry_get(ext4_xattr_cache, bh->b_bdev, bh->b_blocknr); + if (BHDR(bh)->h_refcount == cpu_to_le32(1)) { + ea_bdebug(bh, "refcount now=0; freeing"); + if (ce) + mb_cache_entry_free(ce); + ext4_free_blocks(handle, inode, bh->b_blocknr, 1); + get_bh(bh); + ext4_forget(handle, 1, inode, bh, bh->b_blocknr); + } else { + if (ext4_journal_get_write_access(handle, bh) == 0) { + lock_buffer(bh); + BHDR(bh)->h_refcount = cpu_to_le32( + le32_to_cpu(BHDR(bh)->h_refcount) - 1); + ext4_journal_dirty_metadata(handle, bh); + if (IS_SYNC(inode)) + handle->h_sync = 1; + DQUOT_FREE_BLOCK(inode, 1); + unlock_buffer(bh); + ea_bdebug(bh, "refcount now=%d; releasing", + le32_to_cpu(BHDR(bh)->h_refcount)); + } + if (ce) + mb_cache_entry_release(ce); + } +} + +struct ext4_xattr_info { + int name_index; + const char *name; + const void *value; + size_t value_len; +}; + +struct ext4_xattr_search { + struct ext4_xattr_entry *first; + void *base; + void *end; + struct ext4_xattr_entry *here; + int not_found; +}; + +static int +ext4_xattr_set_entry(struct ext4_xattr_info *i, struct ext4_xattr_search *s) +{ + struct ext4_xattr_entry *last; + size_t free, min_offs = s->end - s->base, name_len = strlen(i->name); + + /* Compute min_offs and last. */ + last = s->first; + for (; !IS_LAST_ENTRY(last); last = EXT4_XATTR_NEXT(last)) { + if (!last->e_value_block && last->e_value_size) { + size_t offs = le16_to_cpu(last->e_value_offs); + if (offs < min_offs) + min_offs = offs; + } + } + free = min_offs - ((void *)last - s->base) - sizeof(__u32); + if (!s->not_found) { + if (!s->here->e_value_block && s->here->e_value_size) { + size_t size = le32_to_cpu(s->here->e_value_size); + free += EXT4_XATTR_SIZE(size); + } + free += EXT4_XATTR_LEN(name_len); + } + if (i->value) { + if (free < EXT4_XATTR_SIZE(i->value_len) || + free < EXT4_XATTR_LEN(name_len) + + EXT4_XATTR_SIZE(i->value_len)) + return -ENOSPC; + } + + if (i->value && s->not_found) { + /* Insert the new name. */ + size_t size = EXT4_XATTR_LEN(name_len); + size_t rest = (void *)last - (void *)s->here + sizeof(__u32); + memmove((void *)s->here + size, s->here, rest); + memset(s->here, 0, size); + s->here->e_name_index = i->name_index; + s->here->e_name_len = name_len; + memcpy(s->here->e_name, i->name, name_len); + } else { + if (!s->here->e_value_block && s->here->e_value_size) { + void *first_val = s->base + min_offs; + size_t offs = le16_to_cpu(s->here->e_value_offs); + void *val = s->base + offs; + size_t size = EXT4_XATTR_SIZE( + le32_to_cpu(s->here->e_value_size)); + + if (i->value && size == EXT4_XATTR_SIZE(i->value_len)) { + /* The old and the new value have the same + size. Just replace. */ + s->here->e_value_size = + cpu_to_le32(i->value_len); + memset(val + size - EXT4_XATTR_PAD, 0, + EXT4_XATTR_PAD); /* Clear pad bytes. */ + memcpy(val, i->value, i->value_len); + return 0; + } + + /* Remove the old value. */ + memmove(first_val + size, first_val, val - first_val); + memset(first_val, 0, size); + s->here->e_value_size = 0; + s->here->e_value_offs = 0; + min_offs += size; + + /* Adjust all value offsets. */ + last = s->first; + while (!IS_LAST_ENTRY(last)) { + size_t o = le16_to_cpu(last->e_value_offs); + if (!last->e_value_block && + last->e_value_size && o < offs) + last->e_value_offs = + cpu_to_le16(o + size); + last = EXT4_XATTR_NEXT(last); + } + } + if (!i->value) { + /* Remove the old name. */ + size_t size = EXT4_XATTR_LEN(name_len); + last = ENTRY((void *)last - size); + memmove(s->here, (void *)s->here + size, + (void *)last - (void *)s->here + sizeof(__u32)); + memset(last, 0, size); + } + } + + if (i->value) { + /* Insert the new value. */ + s->here->e_value_size = cpu_to_le32(i->value_len); + if (i->value_len) { + size_t size = EXT4_XATTR_SIZE(i->value_len); + void *val = s->base + min_offs - size; + s->here->e_value_offs = cpu_to_le16(min_offs - size); + memset(val + size - EXT4_XATTR_PAD, 0, + EXT4_XATTR_PAD); /* Clear the pad bytes. */ + memcpy(val, i->value, i->value_len); + } + } + return 0; +} + +struct ext4_xattr_block_find { + struct ext4_xattr_search s; + struct buffer_head *bh; +}; + +static int +ext4_xattr_block_find(struct inode *inode, struct ext4_xattr_info *i, + struct ext4_xattr_block_find *bs) +{ + struct super_block *sb = inode->i_sb; + int error; + + ea_idebug(inode, "name=%d.%s, value=%p, value_len=%ld", + i->name_index, i->name, i->value, (long)i->value_len); + + if (EXT4_I(inode)->i_file_acl) { + /* The inode already has an extended attribute block. */ + bs->bh = sb_bread(sb, EXT4_I(inode)->i_file_acl); + error = -EIO; + if (!bs->bh) + goto cleanup; + ea_bdebug(bs->bh, "b_count=%d, refcount=%d", + atomic_read(&(bs->bh->b_count)), + le32_to_cpu(BHDR(bs->bh)->h_refcount)); + if (ext4_xattr_check_block(bs->bh)) { + ext4_error(sb, __FUNCTION__, + "inode %lu: bad block %llu", inode->i_ino, + EXT4_I(inode)->i_file_acl); + error = -EIO; + goto cleanup; + } + /* Find the named attribute. */ + bs->s.base = BHDR(bs->bh); + bs->s.first = BFIRST(bs->bh); + bs->s.end = bs->bh->b_data + bs->bh->b_size; + bs->s.here = bs->s.first; + error = ext4_xattr_find_entry(&bs->s.here, i->name_index, + i->name, bs->bh->b_size, 1); + if (error && error != -ENODATA) + goto cleanup; + bs->s.not_found = error; + } + error = 0; + +cleanup: + return error; +} + +static int +ext4_xattr_block_set(handle_t *handle, struct inode *inode, + struct ext4_xattr_info *i, + struct ext4_xattr_block_find *bs) +{ + struct super_block *sb = inode->i_sb; + struct buffer_head *new_bh = NULL; + struct ext4_xattr_search *s = &bs->s; + struct mb_cache_entry *ce = NULL; + int error; + +#define header(x) ((struct ext4_xattr_header *)(x)) + + if (i->value && i->value_len > sb->s_blocksize) + return -ENOSPC; + if (s->base) { + ce = mb_cache_entry_get(ext4_xattr_cache, bs->bh->b_bdev, + bs->bh->b_blocknr); + if (header(s->base)->h_refcount == cpu_to_le32(1)) { + if (ce) { + mb_cache_entry_free(ce); + ce = NULL; + } + ea_bdebug(bs->bh, "modifying in-place"); + error = ext4_journal_get_write_access(handle, bs->bh); + if (error) + goto cleanup; + lock_buffer(bs->bh); + error = ext4_xattr_set_entry(i, s); + if (!error) { + if (!IS_LAST_ENTRY(s->first)) + ext4_xattr_rehash(header(s->base), + s->here); + ext4_xattr_cache_insert(bs->bh); + } + unlock_buffer(bs->bh); + if (error == -EIO) + goto bad_block; + if (!error) + error = ext4_journal_dirty_metadata(handle, + bs->bh); + if (error) + goto cleanup; + goto inserted; + } else { + int offset = (char *)s->here - bs->bh->b_data; + + if (ce) { + mb_cache_entry_release(ce); + ce = NULL; + } + ea_bdebug(bs->bh, "cloning"); + s->base = kmalloc(bs->bh->b_size, GFP_KERNEL); + error = -ENOMEM; + if (s->base == NULL) + goto cleanup; + memcpy(s->base, BHDR(bs->bh), bs->bh->b_size); + s->first = ENTRY(header(s->base)+1); + header(s->base)->h_refcount = cpu_to_le32(1); + s->here = ENTRY(s->base + offset); + s->end = s->base + bs->bh->b_size; + } + } else { + /* Allocate a buffer where we construct the new block. */ + s->base = kmalloc(sb->s_blocksize, GFP_KERNEL); + /* assert(header == s->base) */ + error = -ENOMEM; + if (s->base == NULL) + goto cleanup; + memset(s->base, 0, sb->s_blocksize); + header(s->base)->h_magic = cpu_to_le32(EXT4_XATTR_MAGIC); + header(s->base)->h_blocks = cpu_to_le32(1); + header(s->base)->h_refcount = cpu_to_le32(1); + s->first = ENTRY(header(s->base)+1); + s->here = ENTRY(header(s->base)+1); + s->end = s->base + sb->s_blocksize; + } + + error = ext4_xattr_set_entry(i, s); + if (error == -EIO) + goto bad_block; + if (error) + goto cleanup; + if (!IS_LAST_ENTRY(s->first)) + ext4_xattr_rehash(header(s->base), s->here); + +inserted: + if (!IS_LAST_ENTRY(s->first)) { + new_bh = ext4_xattr_cache_find(inode, header(s->base), &ce); + if (new_bh) { + /* We found an identical block in the cache. */ + if (new_bh == bs->bh) + ea_bdebug(new_bh, "keeping"); + else { + /* The old block is released after updating + the inode. */ + error = -EDQUOT; + if (DQUOT_ALLOC_BLOCK(inode, 1)) + goto cleanup; + error = ext4_journal_get_write_access(handle, + new_bh); + if (error) + goto cleanup_dquot; + lock_buffer(new_bh); + BHDR(new_bh)->h_refcount = cpu_to_le32(1 + + le32_to_cpu(BHDR(new_bh)->h_refcount)); + ea_bdebug(new_bh, "reusing; refcount now=%d", + le32_to_cpu(BHDR(new_bh)->h_refcount)); + unlock_buffer(new_bh); + error = ext4_journal_dirty_metadata(handle, + new_bh); + if (error) + goto cleanup_dquot; + } + mb_cache_entry_release(ce); + ce = NULL; + } else if (bs->bh && s->base == bs->bh->b_data) { + /* We were modifying this block in-place. */ + ea_bdebug(bs->bh, "keeping this block"); + new_bh = bs->bh; + get_bh(new_bh); + } else { + /* We need to allocate a new block */ + ext4_fsblk_t goal = le32_to_cpu( + EXT4_SB(sb)->s_es->s_first_data_block) + + (ext4_fsblk_t)EXT4_I(inode)->i_block_group * + EXT4_BLOCKS_PER_GROUP(sb); + ext4_fsblk_t block = ext4_new_block(handle, inode, + goal, &error); + if (error) + goto cleanup; + ea_idebug(inode, "creating block %d", block); + + new_bh = sb_getblk(sb, block); + if (!new_bh) { +getblk_failed: + ext4_free_blocks(handle, inode, block, 1); + error = -EIO; + goto cleanup; + } + lock_buffer(new_bh); + error = ext4_journal_get_create_access(handle, new_bh); + if (error) { + unlock_buffer(new_bh); + goto getblk_failed; + } + memcpy(new_bh->b_data, s->base, new_bh->b_size); + set_buffer_uptodate(new_bh); + unlock_buffer(new_bh); + ext4_xattr_cache_insert(new_bh); + error = ext4_journal_dirty_metadata(handle, new_bh); + if (error) + goto cleanup; + } + } + + /* Update the inode. */ + EXT4_I(inode)->i_file_acl = new_bh ? new_bh->b_blocknr : 0; + + /* Drop the previous xattr block. */ + if (bs->bh && bs->bh != new_bh) + ext4_xattr_release_block(handle, inode, bs->bh); + error = 0; + +cleanup: + if (ce) + mb_cache_entry_release(ce); + brelse(new_bh); + if (!(bs->bh && s->base == bs->bh->b_data)) + kfree(s->base); + + return error; + +cleanup_dquot: + DQUOT_FREE_BLOCK(inode, 1); + goto cleanup; + +bad_block: + ext4_error(inode->i_sb, __FUNCTION__, + "inode %lu: bad block %llu", inode->i_ino, + EXT4_I(inode)->i_file_acl); + goto cleanup; + +#undef header +} + +struct ext4_xattr_ibody_find { + struct ext4_xattr_search s; + struct ext4_iloc iloc; +}; + +static int +ext4_xattr_ibody_find(struct inode *inode, struct ext4_xattr_info *i, + struct ext4_xattr_ibody_find *is) +{ + struct ext4_xattr_ibody_header *header; + struct ext4_inode *raw_inode; + int error; + + if (EXT4_I(inode)->i_extra_isize == 0) + return 0; + raw_inode = ext4_raw_inode(&is->iloc); + header = IHDR(inode, raw_inode); + is->s.base = is->s.first = IFIRST(header); + is->s.here = is->s.first; + is->s.end = (void *)raw_inode + EXT4_SB(inode->i_sb)->s_inode_size; + if (EXT4_I(inode)->i_state & EXT4_STATE_XATTR) { + error = ext4_xattr_check_names(IFIRST(header), is->s.end); + if (error) + return error; + /* Find the named attribute. */ + error = ext4_xattr_find_entry(&is->s.here, i->name_index, + i->name, is->s.end - + (void *)is->s.base, 0); + if (error && error != -ENODATA) + return error; + is->s.not_found = error; + } + return 0; +} + +static int +ext4_xattr_ibody_set(handle_t *handle, struct inode *inode, + struct ext4_xattr_info *i, + struct ext4_xattr_ibody_find *is) +{ + struct ext4_xattr_ibody_header *header; + struct ext4_xattr_search *s = &is->s; + int error; + + if (EXT4_I(inode)->i_extra_isize == 0) + return -ENOSPC; + error = ext4_xattr_set_entry(i, s); + if (error) + return error; + header = IHDR(inode, ext4_raw_inode(&is->iloc)); + if (!IS_LAST_ENTRY(s->first)) { + header->h_magic = cpu_to_le32(EXT4_XATTR_MAGIC); + EXT4_I(inode)->i_state |= EXT4_STATE_XATTR; + } else { + header->h_magic = cpu_to_le32(0); + EXT4_I(inode)->i_state &= ~EXT4_STATE_XATTR; + } + return 0; +} + +/* + * ext4_xattr_set_handle() + * + * Create, replace or remove an extended attribute for this inode. Buffer + * is NULL to remove an existing extended attribute, and non-NULL to + * either replace an existing extended attribute, or create a new extended + * attribute. The flags XATTR_REPLACE and XATTR_CREATE + * specify that an extended attribute must exist and must not exist + * previous to the call, respectively. + * + * Returns 0, or a negative error number on failure. + */ +int +ext4_xattr_set_handle(handle_t *handle, struct inode *inode, int name_index, + const char *name, const void *value, size_t value_len, + int flags) +{ + struct ext4_xattr_info i = { + .name_index = name_index, + .name = name, + .value = value, + .value_len = value_len, + + }; + struct ext4_xattr_ibody_find is = { + .s = { .not_found = -ENODATA, }, + }; + struct ext4_xattr_block_find bs = { + .s = { .not_found = -ENODATA, }, + }; + int error; + + if (!name) + return -EINVAL; + if (strlen(name) > 255) + return -ERANGE; + down_write(&EXT4_I(inode)->xattr_sem); + error = ext4_get_inode_loc(inode, &is.iloc); + if (error) + goto cleanup; + + if (EXT4_I(inode)->i_state & EXT4_STATE_NEW) { + struct ext4_inode *raw_inode = ext4_raw_inode(&is.iloc); + memset(raw_inode, 0, EXT4_SB(inode->i_sb)->s_inode_size); + EXT4_I(inode)->i_state &= ~EXT4_STATE_NEW; + } + + error = ext4_xattr_ibody_find(inode, &i, &is); + if (error) + goto cleanup; + if (is.s.not_found) + error = ext4_xattr_block_find(inode, &i, &bs); + if (error) + goto cleanup; + if (is.s.not_found && bs.s.not_found) { + error = -ENODATA; + if (flags & XATTR_REPLACE) + goto cleanup; + error = 0; + if (!value) + goto cleanup; + } else { + error = -EEXIST; + if (flags & XATTR_CREATE) + goto cleanup; + } + error = ext4_journal_get_write_access(handle, is.iloc.bh); + if (error) + goto cleanup; + if (!value) { + if (!is.s.not_found) + error = ext4_xattr_ibody_set(handle, inode, &i, &is); + else if (!bs.s.not_found) + error = ext4_xattr_block_set(handle, inode, &i, &bs); + } else { + error = ext4_xattr_ibody_set(handle, inode, &i, &is); + if (!error && !bs.s.not_found) { + i.value = NULL; + error = ext4_xattr_block_set(handle, inode, &i, &bs); + } else if (error == -ENOSPC) { + error = ext4_xattr_block_set(handle, inode, &i, &bs); + if (error) + goto cleanup; + if (!is.s.not_found) { + i.value = NULL; + error = ext4_xattr_ibody_set(handle, inode, &i, + &is); + } + } + } + if (!error) { + ext4_xattr_update_super_block(handle, inode->i_sb); + inode->i_ctime = CURRENT_TIME_SEC; + error = ext4_mark_iloc_dirty(handle, inode, &is.iloc); + /* + * The bh is consumed by ext4_mark_iloc_dirty, even with + * error != 0. + */ + is.iloc.bh = NULL; + if (IS_SYNC(inode)) + handle->h_sync = 1; + } + +cleanup: + brelse(is.iloc.bh); + brelse(bs.bh); + up_write(&EXT4_I(inode)->xattr_sem); + return error; +} + +/* + * ext4_xattr_set() + * + * Like ext4_xattr_set_handle, but start from an inode. This extended + * attribute modification is a filesystem transaction by itself. + * + * Returns 0, or a negative error number on failure. + */ +int +ext4_xattr_set(struct inode *inode, int name_index, const char *name, + const void *value, size_t value_len, int flags) +{ + handle_t *handle; + int error, retries = 0; + +retry: + handle = ext4_journal_start(inode, EXT4_DATA_TRANS_BLOCKS(inode->i_sb)); + if (IS_ERR(handle)) { + error = PTR_ERR(handle); + } else { + int error2; + + error = ext4_xattr_set_handle(handle, inode, name_index, name, + value, value_len, flags); + error2 = ext4_journal_stop(handle); + if (error == -ENOSPC && + ext4_should_retry_alloc(inode->i_sb, &retries)) + goto retry; + if (error == 0) + error = error2; + } + + return error; +} + +/* + * ext4_xattr_delete_inode() + * + * Free extended attribute resources associated with this inode. This + * is called immediately before an inode is freed. We have exclusive + * access to the inode. + */ +void +ext4_xattr_delete_inode(handle_t *handle, struct inode *inode) +{ + struct buffer_head *bh = NULL; + + if (!EXT4_I(inode)->i_file_acl) + goto cleanup; + bh = sb_bread(inode->i_sb, EXT4_I(inode)->i_file_acl); + if (!bh) { + ext4_error(inode->i_sb, __FUNCTION__, + "inode %lu: block %llu read error", inode->i_ino, + EXT4_I(inode)->i_file_acl); + goto cleanup; + } + if (BHDR(bh)->h_magic != cpu_to_le32(EXT4_XATTR_MAGIC) || + BHDR(bh)->h_blocks != cpu_to_le32(1)) { + ext4_error(inode->i_sb, __FUNCTION__, + "inode %lu: bad block %llu", inode->i_ino, + EXT4_I(inode)->i_file_acl); + goto cleanup; + } + ext4_xattr_release_block(handle, inode, bh); + EXT4_I(inode)->i_file_acl = 0; + +cleanup: + brelse(bh); +} + +/* + * ext4_xattr_put_super() + * + * This is called when a file system is unmounted. + */ +void +ext4_xattr_put_super(struct super_block *sb) +{ + mb_cache_shrink(sb->s_bdev); +} + +/* + * ext4_xattr_cache_insert() + * + * Create a new entry in the extended attribute cache, and insert + * it unless such an entry is already in the cache. + * + * Returns 0, or a negative error number on failure. + */ +static void +ext4_xattr_cache_insert(struct buffer_head *bh) +{ + __u32 hash = le32_to_cpu(BHDR(bh)->h_hash); + struct mb_cache_entry *ce; + int error; + + ce = mb_cache_entry_alloc(ext4_xattr_cache); + if (!ce) { + ea_bdebug(bh, "out of memory"); + return; + } + error = mb_cache_entry_insert(ce, bh->b_bdev, bh->b_blocknr, &hash); + if (error) { + mb_cache_entry_free(ce); + if (error == -EBUSY) { + ea_bdebug(bh, "already in cache"); + error = 0; + } + } else { + ea_bdebug(bh, "inserting [%x]", (int)hash); + mb_cache_entry_release(ce); + } +} + +/* + * ext4_xattr_cmp() + * + * Compare two extended attribute blocks for equality. + * + * Returns 0 if the blocks are equal, 1 if they differ, and + * a negative error number on errors. + */ +static int +ext4_xattr_cmp(struct ext4_xattr_header *header1, + struct ext4_xattr_header *header2) +{ + struct ext4_xattr_entry *entry1, *entry2; + + entry1 = ENTRY(header1+1); + entry2 = ENTRY(header2+1); + while (!IS_LAST_ENTRY(entry1)) { + if (IS_LAST_ENTRY(entry2)) + return 1; + if (entry1->e_hash != entry2->e_hash || + entry1->e_name_index != entry2->e_name_index || + entry1->e_name_len != entry2->e_name_len || + entry1->e_value_size != entry2->e_value_size || + memcmp(entry1->e_name, entry2->e_name, entry1->e_name_len)) + return 1; + if (entry1->e_value_block != 0 || entry2->e_value_block != 0) + return -EIO; + if (memcmp((char *)header1 + le16_to_cpu(entry1->e_value_offs), + (char *)header2 + le16_to_cpu(entry2->e_value_offs), + le32_to_cpu(entry1->e_value_size))) + return 1; + + entry1 = EXT4_XATTR_NEXT(entry1); + entry2 = EXT4_XATTR_NEXT(entry2); + } + if (!IS_LAST_ENTRY(entry2)) + return 1; + return 0; +} + +/* + * ext4_xattr_cache_find() + * + * Find an identical extended attribute block. + * + * Returns a pointer to the block found, or NULL if such a block was + * not found or an error occurred. + */ +static struct buffer_head * +ext4_xattr_cache_find(struct inode *inode, struct ext4_xattr_header *header, + struct mb_cache_entry **pce) +{ + __u32 hash = le32_to_cpu(header->h_hash); + struct mb_cache_entry *ce; + + if (!header->h_hash) + return NULL; /* never share */ + ea_idebug(inode, "looking for cached blocks [%x]", (int)hash); +again: + ce = mb_cache_entry_find_first(ext4_xattr_cache, 0, + inode->i_sb->s_bdev, hash); + while (ce) { + struct buffer_head *bh; + + if (IS_ERR(ce)) { + if (PTR_ERR(ce) == -EAGAIN) + goto again; + break; + } + bh = sb_bread(inode->i_sb, ce->e_block); + if (!bh) { + ext4_error(inode->i_sb, __FUNCTION__, + "inode %lu: block %lu read error", + inode->i_ino, (unsigned long) ce->e_block); + } else if (le32_to_cpu(BHDR(bh)->h_refcount) >= + EXT4_XATTR_REFCOUNT_MAX) { + ea_idebug(inode, "block %lu refcount %d>=%d", + (unsigned long) ce->e_block, + le32_to_cpu(BHDR(bh)->h_refcount), + EXT4_XATTR_REFCOUNT_MAX); + } else if (ext4_xattr_cmp(header, BHDR(bh)) == 0) { + *pce = ce; + return bh; + } + brelse(bh); + ce = mb_cache_entry_find_next(ce, 0, inode->i_sb->s_bdev, hash); + } + return NULL; +} + +#define NAME_HASH_SHIFT 5 +#define VALUE_HASH_SHIFT 16 + +/* + * ext4_xattr_hash_entry() + * + * Compute the hash of an extended attribute. + */ +static inline void ext4_xattr_hash_entry(struct ext4_xattr_header *header, + struct ext4_xattr_entry *entry) +{ + __u32 hash = 0; + char *name = entry->e_name; + int n; + + for (n=0; n < entry->e_name_len; n++) { + hash = (hash << NAME_HASH_SHIFT) ^ + (hash >> (8*sizeof(hash) - NAME_HASH_SHIFT)) ^ + *name++; + } + + if (entry->e_value_block == 0 && entry->e_value_size != 0) { + __le32 *value = (__le32 *)((char *)header + + le16_to_cpu(entry->e_value_offs)); + for (n = (le32_to_cpu(entry->e_value_size) + + EXT4_XATTR_ROUND) >> EXT4_XATTR_PAD_BITS; n; n--) { + hash = (hash << VALUE_HASH_SHIFT) ^ + (hash >> (8*sizeof(hash) - VALUE_HASH_SHIFT)) ^ + le32_to_cpu(*value++); + } + } + entry->e_hash = cpu_to_le32(hash); +} + +#undef NAME_HASH_SHIFT +#undef VALUE_HASH_SHIFT + +#define BLOCK_HASH_SHIFT 16 + +/* + * ext4_xattr_rehash() + * + * Re-compute the extended attribute hash value after an entry has changed. + */ +static void ext4_xattr_rehash(struct ext4_xattr_header *header, + struct ext4_xattr_entry *entry) +{ + struct ext4_xattr_entry *here; + __u32 hash = 0; + + ext4_xattr_hash_entry(header, entry); + here = ENTRY(header+1); + while (!IS_LAST_ENTRY(here)) { + if (!here->e_hash) { + /* Block is not shared if an entry's hash value == 0 */ + hash = 0; + break; + } + hash = (hash << BLOCK_HASH_SHIFT) ^ + (hash >> (8*sizeof(hash) - BLOCK_HASH_SHIFT)) ^ + le32_to_cpu(here->e_hash); + here = EXT4_XATTR_NEXT(here); + } + header->h_hash = cpu_to_le32(hash); +} + +#undef BLOCK_HASH_SHIFT + +int __init +init_ext4_xattr(void) +{ + ext4_xattr_cache = mb_cache_create("ext4_xattr", NULL, + sizeof(struct mb_cache_entry) + + sizeof(((struct mb_cache_entry *) 0)->e_indexes[0]), 1, 6); + if (!ext4_xattr_cache) + return -ENOMEM; + return 0; +} + +void +exit_ext4_xattr(void) +{ + if (ext4_xattr_cache) + mb_cache_destroy(ext4_xattr_cache); + ext4_xattr_cache = NULL; +} diff --git a/fs/ext4/xattr.h b/fs/ext4/xattr.h new file mode 100644 index 000000000000..79432b35398f --- /dev/null +++ b/fs/ext4/xattr.h @@ -0,0 +1,145 @@ +/* + File: fs/ext4/xattr.h + + On-disk format of extended attributes for the ext4 filesystem. + + (C) 2001 Andreas Gruenbacher, <a.gruenbacher@computer.org> +*/ + +#include <linux/xattr.h> + +/* Magic value in attribute blocks */ +#define EXT4_XATTR_MAGIC 0xEA020000 + +/* Maximum number of references to one attribute block */ +#define EXT4_XATTR_REFCOUNT_MAX 1024 + +/* Name indexes */ +#define EXT4_XATTR_INDEX_USER 1 +#define EXT4_XATTR_INDEX_POSIX_ACL_ACCESS 2 +#define EXT4_XATTR_INDEX_POSIX_ACL_DEFAULT 3 +#define EXT4_XATTR_INDEX_TRUSTED 4 +#define EXT4_XATTR_INDEX_LUSTRE 5 +#define EXT4_XATTR_INDEX_SECURITY 6 + +struct ext4_xattr_header { + __le32 h_magic; /* magic number for identification */ + __le32 h_refcount; /* reference count */ + __le32 h_blocks; /* number of disk blocks used */ + __le32 h_hash; /* hash value of all attributes */ + __u32 h_reserved[4]; /* zero right now */ +}; + +struct ext4_xattr_ibody_header { + __le32 h_magic; /* magic number for identification */ +}; + +struct ext4_xattr_entry { + __u8 e_name_len; /* length of name */ + __u8 e_name_index; /* attribute name index */ + __le16 e_value_offs; /* offset in disk block of value */ + __le32 e_value_block; /* disk block attribute is stored on (n/i) */ + __le32 e_value_size; /* size of attribute value */ + __le32 e_hash; /* hash value of name and value */ + char e_name[0]; /* attribute name */ +}; + +#define EXT4_XATTR_PAD_BITS 2 +#define EXT4_XATTR_PAD (1<<EXT4_XATTR_PAD_BITS) +#define EXT4_XATTR_ROUND (EXT4_XATTR_PAD-1) +#define EXT4_XATTR_LEN(name_len) \ + (((name_len) + EXT4_XATTR_ROUND + \ + sizeof(struct ext4_xattr_entry)) & ~EXT4_XATTR_ROUND) +#define EXT4_XATTR_NEXT(entry) \ + ( (struct ext4_xattr_entry *)( \ + (char *)(entry) + EXT4_XATTR_LEN((entry)->e_name_len)) ) +#define EXT4_XATTR_SIZE(size) \ + (((size) + EXT4_XATTR_ROUND) & ~EXT4_XATTR_ROUND) + +# ifdef CONFIG_EXT4DEV_FS_XATTR + +extern struct xattr_handler ext4_xattr_user_handler; +extern struct xattr_handler ext4_xattr_trusted_handler; +extern struct xattr_handler ext4_xattr_acl_access_handler; +extern struct xattr_handler ext4_xattr_acl_default_handler; +extern struct xattr_handler ext4_xattr_security_handler; + +extern ssize_t ext4_listxattr(struct dentry *, char *, size_t); + +extern int ext4_xattr_get(struct inode *, int, const char *, void *, size_t); +extern int ext4_xattr_list(struct inode *, char *, size_t); +extern int ext4_xattr_set(struct inode *, int, const char *, const void *, size_t, int); +extern int ext4_xattr_set_handle(handle_t *, struct inode *, int, const char *, const void *, size_t, int); + +extern void ext4_xattr_delete_inode(handle_t *, struct inode *); +extern void ext4_xattr_put_super(struct super_block *); + +extern int init_ext4_xattr(void); +extern void exit_ext4_xattr(void); + +extern struct xattr_handler *ext4_xattr_handlers[]; + +# else /* CONFIG_EXT4DEV_FS_XATTR */ + +static inline int +ext4_xattr_get(struct inode *inode, int name_index, const char *name, + void *buffer, size_t size, int flags) +{ + return -EOPNOTSUPP; +} + +static inline int +ext4_xattr_list(struct inode *inode, void *buffer, size_t size) +{ + return -EOPNOTSUPP; +} + +static inline int +ext4_xattr_set(struct inode *inode, int name_index, const char *name, + const void *value, size_t size, int flags) +{ + return -EOPNOTSUPP; +} + +static inline int +ext4_xattr_set_handle(handle_t *handle, struct inode *inode, int name_index, + const char *name, const void *value, size_t size, int flags) +{ + return -EOPNOTSUPP; +} + +static inline void +ext4_xattr_delete_inode(handle_t *handle, struct inode *inode) +{ +} + +static inline void +ext4_xattr_put_super(struct super_block *sb) +{ +} + +static inline int +init_ext4_xattr(void) +{ + return 0; +} + +static inline void +exit_ext4_xattr(void) +{ +} + +#define ext4_xattr_handlers NULL + +# endif /* CONFIG_EXT4DEV_FS_XATTR */ + +#ifdef CONFIG_EXT4DEV_FS_SECURITY +extern int ext4_init_security(handle_t *handle, struct inode *inode, + struct inode *dir); +#else +static inline int ext4_init_security(handle_t *handle, struct inode *inode, + struct inode *dir) +{ + return 0; +} +#endif diff --git a/fs/ext4/xattr_security.c b/fs/ext4/xattr_security.c new file mode 100644 index 000000000000..b6a6861951f9 --- /dev/null +++ b/fs/ext4/xattr_security.c @@ -0,0 +1,77 @@ +/* + * linux/fs/ext4/xattr_security.c + * Handler for storing security labels as extended attributes. + */ + +#include <linux/module.h> +#include <linux/string.h> +#include <linux/fs.h> +#include <linux/smp_lock.h> +#include <linux/ext4_jbd2.h> +#include <linux/ext4_fs.h> +#include <linux/security.h> +#include "xattr.h" + +static size_t +ext4_xattr_security_list(struct inode *inode, char *list, size_t list_size, + const char *name, size_t name_len) +{ + const size_t prefix_len = sizeof(XATTR_SECURITY_PREFIX)-1; + const size_t total_len = prefix_len + name_len + 1; + + + if (list && total_len <= list_size) { + memcpy(list, XATTR_SECURITY_PREFIX, prefix_len); + memcpy(list+prefix_len, name, name_len); + list[prefix_len + name_len] = '\0'; + } + return total_len; +} + +static int +ext4_xattr_security_get(struct inode *inode, const char *name, + void *buffer, size_t size) +{ + if (strcmp(name, "") == 0) + return -EINVAL; + return ext4_xattr_get(inode, EXT4_XATTR_INDEX_SECURITY, name, + buffer, size); +} + +static int +ext4_xattr_security_set(struct inode *inode, const char *name, + const void *value, size_t size, int flags) +{ + if (strcmp(name, "") == 0) + return -EINVAL; + return ext4_xattr_set(inode, EXT4_XATTR_INDEX_SECURITY, name, + value, size, flags); +} + +int +ext4_init_security(handle_t *handle, struct inode *inode, struct inode *dir) +{ + int err; + size_t len; + void *value; + char *name; + + err = security_inode_init_security(inode, dir, &name, &value, &len); + if (err) { + if (err == -EOPNOTSUPP) + return 0; + return err; + } + err = ext4_xattr_set_handle(handle, inode, EXT4_XATTR_INDEX_SECURITY, + name, value, len, 0); + kfree(name); + kfree(value); + return err; +} + +struct xattr_handler ext4_xattr_security_handler = { + .prefix = XATTR_SECURITY_PREFIX, + .list = ext4_xattr_security_list, + .get = ext4_xattr_security_get, + .set = ext4_xattr_security_set, +}; diff --git a/fs/ext4/xattr_trusted.c b/fs/ext4/xattr_trusted.c new file mode 100644 index 000000000000..b76f2dbc82da --- /dev/null +++ b/fs/ext4/xattr_trusted.c @@ -0,0 +1,62 @@ +/* + * linux/fs/ext4/xattr_trusted.c + * Handler for trusted extended attributes. + * + * Copyright (C) 2003 by Andreas Gruenbacher, <a.gruenbacher@computer.org> + */ + +#include <linux/module.h> +#include <linux/string.h> +#include <linux/capability.h> +#include <linux/fs.h> +#include <linux/smp_lock.h> +#include <linux/ext4_jbd2.h> +#include <linux/ext4_fs.h> +#include "xattr.h" + +#define XATTR_TRUSTED_PREFIX "trusted." + +static size_t +ext4_xattr_trusted_list(struct inode *inode, char *list, size_t list_size, + const char *name, size_t name_len) +{ + const size_t prefix_len = sizeof(XATTR_TRUSTED_PREFIX)-1; + const size_t total_len = prefix_len + name_len + 1; + + if (!capable(CAP_SYS_ADMIN)) + return 0; + + if (list && total_len <= list_size) { + memcpy(list, XATTR_TRUSTED_PREFIX, prefix_len); + memcpy(list+prefix_len, name, name_len); + list[prefix_len + name_len] = '\0'; + } + return total_len; +} + +static int +ext4_xattr_trusted_get(struct inode *inode, const char *name, + void *buffer, size_t size) +{ + if (strcmp(name, "") == 0) + return -EINVAL; + return ext4_xattr_get(inode, EXT4_XATTR_INDEX_TRUSTED, name, + buffer, size); +} + +static int +ext4_xattr_trusted_set(struct inode *inode, const char *name, + const void *value, size_t size, int flags) +{ + if (strcmp(name, "") == 0) + return -EINVAL; + return ext4_xattr_set(inode, EXT4_XATTR_INDEX_TRUSTED, name, + value, size, flags); +} + +struct xattr_handler ext4_xattr_trusted_handler = { + .prefix = XATTR_TRUSTED_PREFIX, + .list = ext4_xattr_trusted_list, + .get = ext4_xattr_trusted_get, + .set = ext4_xattr_trusted_set, +}; diff --git a/fs/ext4/xattr_user.c b/fs/ext4/xattr_user.c new file mode 100644 index 000000000000..c53cded0761a --- /dev/null +++ b/fs/ext4/xattr_user.c @@ -0,0 +1,64 @@ +/* + * linux/fs/ext4/xattr_user.c + * Handler for extended user attributes. + * + * Copyright (C) 2001 by Andreas Gruenbacher, <a.gruenbacher@computer.org> + */ + +#include <linux/module.h> +#include <linux/string.h> +#include <linux/fs.h> +#include <linux/smp_lock.h> +#include <linux/ext4_jbd2.h> +#include <linux/ext4_fs.h> +#include "xattr.h" + +#define XATTR_USER_PREFIX "user." + +static size_t +ext4_xattr_user_list(struct inode *inode, char *list, size_t list_size, + const char *name, size_t name_len) +{ + const size_t prefix_len = sizeof(XATTR_USER_PREFIX)-1; + const size_t total_len = prefix_len + name_len + 1; + + if (!test_opt(inode->i_sb, XATTR_USER)) + return 0; + + if (list && total_len <= list_size) { + memcpy(list, XATTR_USER_PREFIX, prefix_len); + memcpy(list+prefix_len, name, name_len); + list[prefix_len + name_len] = '\0'; + } + return total_len; +} + +static int +ext4_xattr_user_get(struct inode *inode, const char *name, + void *buffer, size_t size) +{ + if (strcmp(name, "") == 0) + return -EINVAL; + if (!test_opt(inode->i_sb, XATTR_USER)) + return -EOPNOTSUPP; + return ext4_xattr_get(inode, EXT4_XATTR_INDEX_USER, name, buffer, size); +} + +static int +ext4_xattr_user_set(struct inode *inode, const char *name, + const void *value, size_t size, int flags) +{ + if (strcmp(name, "") == 0) + return -EINVAL; + if (!test_opt(inode->i_sb, XATTR_USER)) + return -EOPNOTSUPP; + return ext4_xattr_set(inode, EXT4_XATTR_INDEX_USER, name, + value, size, flags); +} + +struct xattr_handler ext4_xattr_user_handler = { + .prefix = XATTR_USER_PREFIX, + .list = ext4_xattr_user_list, + .get = ext4_xattr_user_get, + .set = ext4_xattr_user_set, +}; diff --git a/fs/fat/inode.c b/fs/fat/inode.c index 045738032a83..78945b53b0f8 100644 --- a/fs/fat/inode.c +++ b/fs/fat/inode.c @@ -384,7 +384,7 @@ static int fat_fill_inode(struct inode *inode, struct msdos_dir_entry *de) le16_to_cpu(de->cdate)) + secs; inode->i_ctime.tv_nsec = csecs * 10000000; inode->i_atime.tv_sec = - date_dos2unix(le16_to_cpu(0), le16_to_cpu(de->adate)); + date_dos2unix(0, le16_to_cpu(de->adate)); inode->i_atime.tv_nsec = 0; } else inode->i_ctime = inode->i_atime = inode->i_mtime; @@ -1472,7 +1472,7 @@ int fat_flush_inodes(struct super_block *sb, struct inode *i1, struct inode *i2) ret = writeback_inode(i1); if (!ret && i2) ret = writeback_inode(i2); - if (!ret && sb) { + if (!ret) { struct address_space *mapping = sb->s_bdev->bd_inode->i_mapping; ret = filemap_flush(mapping); } diff --git a/fs/gfs2/locking/dlm/mount.c b/fs/gfs2/locking/dlm/mount.c index 1f94dd35a943..cdd1694e889b 100644 --- a/fs/gfs2/locking/dlm/mount.c +++ b/fs/gfs2/locking/dlm/mount.c @@ -45,7 +45,7 @@ static struct gdlm_ls *init_gdlm(lm_callback_t cb, struct gfs2_sbd *sdp, strncpy(buf, table_name, 256); buf[255] = '\0'; - p = strstr(buf, ":"); + p = strchr(buf, ':'); if (!p) { log_info("invalid table_name \"%s\"", table_name); kfree(ls); diff --git a/fs/hpfs/inode.c b/fs/hpfs/inode.c index bcf6ee36e065..7faef8544f32 100644 --- a/fs/hpfs/inode.c +++ b/fs/hpfs/inode.c @@ -60,14 +60,14 @@ void hpfs_read_inode(struct inode *i) if (hpfs_sb(i->i_sb)->sb_eas) { if ((ea = hpfs_get_ea(i->i_sb, fnode, "UID", &ea_size))) { if (ea_size == 2) { - i->i_uid = le16_to_cpu(*(u16*)ea); + i->i_uid = le16_to_cpu(*(__le16*)ea); hpfs_inode->i_ea_uid = 1; } kfree(ea); } if ((ea = hpfs_get_ea(i->i_sb, fnode, "GID", &ea_size))) { if (ea_size == 2) { - i->i_gid = le16_to_cpu(*(u16*)ea); + i->i_gid = le16_to_cpu(*(__le16*)ea); hpfs_inode->i_ea_gid = 1; } kfree(ea); @@ -87,7 +87,7 @@ void hpfs_read_inode(struct inode *i) int rdev = 0; umode_t mode = hpfs_sb(sb)->sb_mode; if (ea_size == 2) { - mode = le16_to_cpu(*(u16*)ea); + mode = le16_to_cpu(*(__le16*)ea); hpfs_inode->i_ea_mode = 1; } kfree(ea); @@ -95,7 +95,7 @@ void hpfs_read_inode(struct inode *i) if (S_ISBLK(mode) || S_ISCHR(mode)) { if ((ea = hpfs_get_ea(i->i_sb, fnode, "DEV", &ea_size))) { if (ea_size == 4) - rdev = le32_to_cpu(*(u32*)ea); + rdev = le32_to_cpu(*(__le32*)ea); kfree(ea); } } @@ -148,7 +148,7 @@ static void hpfs_write_inode_ea(struct inode *i, struct fnode *fnode) we'd better not overwrite them hpfs_error(i->i_sb, "fnode %08x has some unknown HPFS386 stuctures", i->i_ino); } else*/ if (hpfs_sb(i->i_sb)->sb_eas >= 2) { - u32 ea; + __le32 ea; if ((i->i_uid != hpfs_sb(i->i_sb)->sb_uid) || hpfs_inode->i_ea_uid) { ea = cpu_to_le32(i->i_uid); hpfs_set_ea(i, fnode, "UID", (char*)&ea, 2); @@ -165,6 +165,7 @@ static void hpfs_write_inode_ea(struct inode *i, struct fnode *fnode) && i->i_mode != ((hpfs_sb(i->i_sb)->sb_mode & ~(S_ISDIR(i->i_mode) ? 0222 : 0333)) | (S_ISDIR(i->i_mode) ? S_IFDIR : S_IFREG))) || hpfs_inode->i_ea_mode) { ea = cpu_to_le32(i->i_mode); + /* sick, but legal */ hpfs_set_ea(i, fnode, "MODE", (char *)&ea, 2); hpfs_inode->i_ea_mode = 1; } diff --git a/fs/hppfs/hppfs_kern.c b/fs/hppfs/hppfs_kern.c index dcb6d2e988b8..642675fc394a 100644 --- a/fs/hppfs/hppfs_kern.c +++ b/fs/hppfs/hppfs_kern.c @@ -572,7 +572,7 @@ struct hppfs_dirent { }; static int hppfs_filldir(void *d, const char *name, int size, - loff_t offset, ino_t inode, unsigned int type) + loff_t offset, u64 inode, unsigned int type) { struct hppfs_dirent *dirent = d; diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c index 5e03b2f67b93..4ee3f006b861 100644 --- a/fs/hugetlbfs/inode.c +++ b/fs/hugetlbfs/inode.c @@ -293,7 +293,7 @@ hugetlb_vmtruncate_list(struct prio_tree_root *root, unsigned long h_pgoff) if (h_vm_pgoff >= h_pgoff) v_offset = 0; - unmap_hugepage_range(vma, + __unmap_hugepage_range(vma, vma->vm_start + v_offset, vma->vm_end); } } diff --git a/fs/inode.c b/fs/inode.c index bf6bec4e54ff..d9a21d122926 100644 --- a/fs/inode.c +++ b/fs/inode.c @@ -162,7 +162,7 @@ static struct inode *alloc_inode(struct super_block *sb) bdi = sb->s_bdev->bd_inode->i_mapping->backing_dev_info; mapping->backing_dev_info = bdi; } - inode->i_private = 0; + inode->i_private = NULL; inode->i_mapping = mapping; } return inode; diff --git a/fs/ioprio.c b/fs/ioprio.c index 6dc6721d9e82..89e8da112a75 100644 --- a/fs/ioprio.c +++ b/fs/ioprio.c @@ -150,11 +150,6 @@ int ioprio_best(unsigned short aprio, unsigned short bprio) unsigned short aclass = IOPRIO_PRIO_CLASS(aprio); unsigned short bclass = IOPRIO_PRIO_CLASS(bprio); - if (!ioprio_valid(aprio)) - return bprio; - if (!ioprio_valid(bprio)) - return aprio; - if (aclass == IOPRIO_CLASS_NONE) aclass = IOPRIO_CLASS_BE; if (bclass == IOPRIO_CLASS_NONE) diff --git a/fs/isofs/joliet.c b/fs/isofs/joliet.c index 81a90e170ac3..fb8fe7a9ddc6 100644 --- a/fs/isofs/joliet.c +++ b/fs/isofs/joliet.c @@ -14,9 +14,9 @@ * Convert Unicode 16 to UTF-8 or ASCII. */ static int -uni16_to_x8(unsigned char *ascii, u16 *uni, int len, struct nls_table *nls) +uni16_to_x8(unsigned char *ascii, __be16 *uni, int len, struct nls_table *nls) { - wchar_t *ip, ch; + __be16 *ip, ch; unsigned char *op; ip = uni; @@ -24,8 +24,8 @@ uni16_to_x8(unsigned char *ascii, u16 *uni, int len, struct nls_table *nls) while ((ch = get_unaligned(ip)) && len) { int llen; - ch = be16_to_cpu(ch); - if ((llen = nls->uni2char(ch, op, NLS_MAX_CHARSET_SIZE)) > 0) + llen = nls->uni2char(be16_to_cpu(ch), op, NLS_MAX_CHARSET_SIZE); + if (llen > 0) op += llen; else *op++ = '?'; @@ -82,7 +82,7 @@ get_joliet_filename(struct iso_directory_record * de, unsigned char *outname, st len = wcsntombs_be(outname, de->name, de->name_len[0] >> 1, PAGE_SIZE); } else { - len = uni16_to_x8(outname, (u16 *) de->name, + len = uni16_to_x8(outname, (__be16 *) de->name, de->name_len[0] >> 1, nls); } if ((len > 2) && (outname[len-2] == ';') && (outname[len-1] == '1')) { diff --git a/fs/jbd/journal.c b/fs/jbd/journal.c index c518dd8fe60a..b85c686b60db 100644 --- a/fs/jbd/journal.c +++ b/fs/jbd/journal.c @@ -725,6 +725,7 @@ journal_t * journal_init_dev(struct block_device *bdev, __FUNCTION__); kfree(journal); journal = NULL; + goto out; } journal->j_dev = bdev; journal->j_fs_dev = fs_dev; @@ -735,7 +736,7 @@ journal_t * journal_init_dev(struct block_device *bdev, J_ASSERT(bh != NULL); journal->j_sb_buffer = bh; journal->j_superblock = (journal_superblock_t *)bh->b_data; - +out: return journal; } diff --git a/fs/jbd2/Makefile b/fs/jbd2/Makefile new file mode 100644 index 000000000000..802a3413872a --- /dev/null +++ b/fs/jbd2/Makefile @@ -0,0 +1,7 @@ +# +# Makefile for the linux journaling routines. +# + +obj-$(CONFIG_JBD2) += jbd2.o + +jbd2-objs := transaction.o commit.o recovery.o checkpoint.o revoke.o journal.o diff --git a/fs/jbd2/checkpoint.c b/fs/jbd2/checkpoint.c new file mode 100644 index 000000000000..68039fa9a566 --- /dev/null +++ b/fs/jbd2/checkpoint.c @@ -0,0 +1,697 @@ +/* + * linux/fs/checkpoint.c + * + * Written by Stephen C. Tweedie <sct@redhat.com>, 1999 + * + * Copyright 1999 Red Hat Software --- All Rights Reserved + * + * This file is part of the Linux kernel and is made available under + * the terms of the GNU General Public License, version 2, or at your + * option, any later version, incorporated herein by reference. + * + * Checkpoint routines for the generic filesystem journaling code. + * Part of the ext2fs journaling system. + * + * Checkpointing is the process of ensuring that a section of the log is + * committed fully to disk, so that that portion of the log can be + * reused. + */ + +#include <linux/time.h> +#include <linux/fs.h> +#include <linux/jbd2.h> +#include <linux/errno.h> +#include <linux/slab.h> + +/* + * Unlink a buffer from a transaction checkpoint list. + * + * Called with j_list_lock held. + */ +static inline void __buffer_unlink_first(struct journal_head *jh) +{ + transaction_t *transaction = jh->b_cp_transaction; + + jh->b_cpnext->b_cpprev = jh->b_cpprev; + jh->b_cpprev->b_cpnext = jh->b_cpnext; + if (transaction->t_checkpoint_list == jh) { + transaction->t_checkpoint_list = jh->b_cpnext; + if (transaction->t_checkpoint_list == jh) + transaction->t_checkpoint_list = NULL; + } +} + +/* + * Unlink a buffer from a transaction checkpoint(io) list. + * + * Called with j_list_lock held. + */ +static inline void __buffer_unlink(struct journal_head *jh) +{ + transaction_t *transaction = jh->b_cp_transaction; + + __buffer_unlink_first(jh); + if (transaction->t_checkpoint_io_list == jh) { + transaction->t_checkpoint_io_list = jh->b_cpnext; + if (transaction->t_checkpoint_io_list == jh) + transaction->t_checkpoint_io_list = NULL; + } +} + +/* + * Move a buffer from the checkpoint list to the checkpoint io list + * + * Called with j_list_lock held + */ +static inline void __buffer_relink_io(struct journal_head *jh) +{ + transaction_t *transaction = jh->b_cp_transaction; + + __buffer_unlink_first(jh); + + if (!transaction->t_checkpoint_io_list) { + jh->b_cpnext = jh->b_cpprev = jh; + } else { + jh->b_cpnext = transaction->t_checkpoint_io_list; + jh->b_cpprev = transaction->t_checkpoint_io_list->b_cpprev; + jh->b_cpprev->b_cpnext = jh; + jh->b_cpnext->b_cpprev = jh; + } + transaction->t_checkpoint_io_list = jh; +} + +/* + * Try to release a checkpointed buffer from its transaction. + * Returns 1 if we released it and 2 if we also released the + * whole transaction. + * + * Requires j_list_lock + * Called under jbd_lock_bh_state(jh2bh(jh)), and drops it + */ +static int __try_to_free_cp_buf(struct journal_head *jh) +{ + int ret = 0; + struct buffer_head *bh = jh2bh(jh); + + if (jh->b_jlist == BJ_None && !buffer_locked(bh) && !buffer_dirty(bh)) { + JBUFFER_TRACE(jh, "remove from checkpoint list"); + ret = __jbd2_journal_remove_checkpoint(jh) + 1; + jbd_unlock_bh_state(bh); + jbd2_journal_remove_journal_head(bh); + BUFFER_TRACE(bh, "release"); + __brelse(bh); + } else { + jbd_unlock_bh_state(bh); + } + return ret; +} + +/* + * __jbd2_log_wait_for_space: wait until there is space in the journal. + * + * Called under j-state_lock *only*. It will be unlocked if we have to wait + * for a checkpoint to free up some space in the log. + */ +void __jbd2_log_wait_for_space(journal_t *journal) +{ + int nblocks; + assert_spin_locked(&journal->j_state_lock); + + nblocks = jbd_space_needed(journal); + while (__jbd2_log_space_left(journal) < nblocks) { + if (journal->j_flags & JBD2_ABORT) + return; + spin_unlock(&journal->j_state_lock); + mutex_lock(&journal->j_checkpoint_mutex); + + /* + * Test again, another process may have checkpointed while we + * were waiting for the checkpoint lock + */ + spin_lock(&journal->j_state_lock); + nblocks = jbd_space_needed(journal); + if (__jbd2_log_space_left(journal) < nblocks) { + spin_unlock(&journal->j_state_lock); + jbd2_log_do_checkpoint(journal); + spin_lock(&journal->j_state_lock); + } + mutex_unlock(&journal->j_checkpoint_mutex); + } +} + +/* + * We were unable to perform jbd_trylock_bh_state() inside j_list_lock. + * The caller must restart a list walk. Wait for someone else to run + * jbd_unlock_bh_state(). + */ +static void jbd_sync_bh(journal_t *journal, struct buffer_head *bh) + __releases(journal->j_list_lock) +{ + get_bh(bh); + spin_unlock(&journal->j_list_lock); + jbd_lock_bh_state(bh); + jbd_unlock_bh_state(bh); + put_bh(bh); +} + +/* + * Clean up transaction's list of buffers submitted for io. + * We wait for any pending IO to complete and remove any clean + * buffers. Note that we take the buffers in the opposite ordering + * from the one in which they were submitted for IO. + * + * Called with j_list_lock held. + */ +static void __wait_cp_io(journal_t *journal, transaction_t *transaction) +{ + struct journal_head *jh; + struct buffer_head *bh; + tid_t this_tid; + int released = 0; + + this_tid = transaction->t_tid; +restart: + /* Did somebody clean up the transaction in the meanwhile? */ + if (journal->j_checkpoint_transactions != transaction || + transaction->t_tid != this_tid) + return; + while (!released && transaction->t_checkpoint_io_list) { + jh = transaction->t_checkpoint_io_list; + bh = jh2bh(jh); + if (!jbd_trylock_bh_state(bh)) { + jbd_sync_bh(journal, bh); + spin_lock(&journal->j_list_lock); + goto restart; + } + if (buffer_locked(bh)) { + atomic_inc(&bh->b_count); + spin_unlock(&journal->j_list_lock); + jbd_unlock_bh_state(bh); + wait_on_buffer(bh); + /* the journal_head may have gone by now */ + BUFFER_TRACE(bh, "brelse"); + __brelse(bh); + spin_lock(&journal->j_list_lock); + goto restart; + } + /* + * Now in whatever state the buffer currently is, we know that + * it has been written out and so we can drop it from the list + */ + released = __jbd2_journal_remove_checkpoint(jh); + jbd_unlock_bh_state(bh); + jbd2_journal_remove_journal_head(bh); + __brelse(bh); + } +} + +#define NR_BATCH 64 + +static void +__flush_batch(journal_t *journal, struct buffer_head **bhs, int *batch_count) +{ + int i; + + ll_rw_block(SWRITE, *batch_count, bhs); + for (i = 0; i < *batch_count; i++) { + struct buffer_head *bh = bhs[i]; + clear_buffer_jwrite(bh); + BUFFER_TRACE(bh, "brelse"); + __brelse(bh); + } + *batch_count = 0; +} + +/* + * Try to flush one buffer from the checkpoint list to disk. + * + * Return 1 if something happened which requires us to abort the current + * scan of the checkpoint list. + * + * Called with j_list_lock held and drops it if 1 is returned + * Called under jbd_lock_bh_state(jh2bh(jh)), and drops it + */ +static int __process_buffer(journal_t *journal, struct journal_head *jh, + struct buffer_head **bhs, int *batch_count) +{ + struct buffer_head *bh = jh2bh(jh); + int ret = 0; + + if (buffer_locked(bh)) { + atomic_inc(&bh->b_count); + spin_unlock(&journal->j_list_lock); + jbd_unlock_bh_state(bh); + wait_on_buffer(bh); + /* the journal_head may have gone by now */ + BUFFER_TRACE(bh, "brelse"); + __brelse(bh); + ret = 1; + } else if (jh->b_transaction != NULL) { + transaction_t *t = jh->b_transaction; + tid_t tid = t->t_tid; + + spin_unlock(&journal->j_list_lock); + jbd_unlock_bh_state(bh); + jbd2_log_start_commit(journal, tid); + jbd2_log_wait_commit(journal, tid); + ret = 1; + } else if (!buffer_dirty(bh)) { + J_ASSERT_JH(jh, !buffer_jbddirty(bh)); + BUFFER_TRACE(bh, "remove from checkpoint"); + __jbd2_journal_remove_checkpoint(jh); + spin_unlock(&journal->j_list_lock); + jbd_unlock_bh_state(bh); + jbd2_journal_remove_journal_head(bh); + __brelse(bh); + ret = 1; + } else { + /* + * Important: we are about to write the buffer, and + * possibly block, while still holding the journal lock. + * We cannot afford to let the transaction logic start + * messing around with this buffer before we write it to + * disk, as that would break recoverability. + */ + BUFFER_TRACE(bh, "queue"); + get_bh(bh); + J_ASSERT_BH(bh, !buffer_jwrite(bh)); + set_buffer_jwrite(bh); + bhs[*batch_count] = bh; + __buffer_relink_io(jh); + jbd_unlock_bh_state(bh); + (*batch_count)++; + if (*batch_count == NR_BATCH) { + spin_unlock(&journal->j_list_lock); + __flush_batch(journal, bhs, batch_count); + ret = 1; + } + } + return ret; +} + +/* + * Perform an actual checkpoint. We take the first transaction on the + * list of transactions to be checkpointed and send all its buffers + * to disk. We submit larger chunks of data at once. + * + * The journal should be locked before calling this function. + */ +int jbd2_log_do_checkpoint(journal_t *journal) +{ + transaction_t *transaction; + tid_t this_tid; + int result; + + jbd_debug(1, "Start checkpoint\n"); + + /* + * First thing: if there are any transactions in the log which + * don't need checkpointing, just eliminate them from the + * journal straight away. + */ + result = jbd2_cleanup_journal_tail(journal); + jbd_debug(1, "cleanup_journal_tail returned %d\n", result); + if (result <= 0) + return result; + + /* + * OK, we need to start writing disk blocks. Take one transaction + * and write it. + */ + spin_lock(&journal->j_list_lock); + if (!journal->j_checkpoint_transactions) + goto out; + transaction = journal->j_checkpoint_transactions; + this_tid = transaction->t_tid; +restart: + /* + * If someone cleaned up this transaction while we slept, we're + * done (maybe it's a new transaction, but it fell at the same + * address). + */ + if (journal->j_checkpoint_transactions == transaction && + transaction->t_tid == this_tid) { + int batch_count = 0; + struct buffer_head *bhs[NR_BATCH]; + struct journal_head *jh; + int retry = 0; + + while (!retry && transaction->t_checkpoint_list) { + struct buffer_head *bh; + + jh = transaction->t_checkpoint_list; + bh = jh2bh(jh); + if (!jbd_trylock_bh_state(bh)) { + jbd_sync_bh(journal, bh); + retry = 1; + break; + } + retry = __process_buffer(journal, jh, bhs,&batch_count); + if (!retry && lock_need_resched(&journal->j_list_lock)){ + spin_unlock(&journal->j_list_lock); + retry = 1; + break; + } + } + + if (batch_count) { + if (!retry) { + spin_unlock(&journal->j_list_lock); + retry = 1; + } + __flush_batch(journal, bhs, &batch_count); + } + + if (retry) { + spin_lock(&journal->j_list_lock); + goto restart; + } + /* + * Now we have cleaned up the first transaction's checkpoint + * list. Let's clean up the second one + */ + __wait_cp_io(journal, transaction); + } +out: + spin_unlock(&journal->j_list_lock); + result = jbd2_cleanup_journal_tail(journal); + if (result < 0) + return result; + return 0; +} + +/* + * Check the list of checkpoint transactions for the journal to see if + * we have already got rid of any since the last update of the log tail + * in the journal superblock. If so, we can instantly roll the + * superblock forward to remove those transactions from the log. + * + * Return <0 on error, 0 on success, 1 if there was nothing to clean up. + * + * Called with the journal lock held. + * + * This is the only part of the journaling code which really needs to be + * aware of transaction aborts. Checkpointing involves writing to the + * main filesystem area rather than to the journal, so it can proceed + * even in abort state, but we must not update the journal superblock if + * we have an abort error outstanding. + */ + +int jbd2_cleanup_journal_tail(journal_t *journal) +{ + transaction_t * transaction; + tid_t first_tid; + unsigned long blocknr, freed; + + /* OK, work out the oldest transaction remaining in the log, and + * the log block it starts at. + * + * If the log is now empty, we need to work out which is the + * next transaction ID we will write, and where it will + * start. */ + + spin_lock(&journal->j_state_lock); + spin_lock(&journal->j_list_lock); + transaction = journal->j_checkpoint_transactions; + if (transaction) { + first_tid = transaction->t_tid; + blocknr = transaction->t_log_start; + } else if ((transaction = journal->j_committing_transaction) != NULL) { + first_tid = transaction->t_tid; + blocknr = transaction->t_log_start; + } else if ((transaction = journal->j_running_transaction) != NULL) { + first_tid = transaction->t_tid; + blocknr = journal->j_head; + } else { + first_tid = journal->j_transaction_sequence; + blocknr = journal->j_head; + } + spin_unlock(&journal->j_list_lock); + J_ASSERT(blocknr != 0); + + /* If the oldest pinned transaction is at the tail of the log + already then there's not much we can do right now. */ + if (journal->j_tail_sequence == first_tid) { + spin_unlock(&journal->j_state_lock); + return 1; + } + + /* OK, update the superblock to recover the freed space. + * Physical blocks come first: have we wrapped beyond the end of + * the log? */ + freed = blocknr - journal->j_tail; + if (blocknr < journal->j_tail) + freed = freed + journal->j_last - journal->j_first; + + jbd_debug(1, + "Cleaning journal tail from %d to %d (offset %lu), " + "freeing %lu\n", + journal->j_tail_sequence, first_tid, blocknr, freed); + + journal->j_free += freed; + journal->j_tail_sequence = first_tid; + journal->j_tail = blocknr; + spin_unlock(&journal->j_state_lock); + if (!(journal->j_flags & JBD2_ABORT)) + jbd2_journal_update_superblock(journal, 1); + return 0; +} + + +/* Checkpoint list management */ + +/* + * journal_clean_one_cp_list + * + * Find all the written-back checkpoint buffers in the given list and release them. + * + * Called with the journal locked. + * Called with j_list_lock held. + * Returns number of bufers reaped (for debug) + */ + +static int journal_clean_one_cp_list(struct journal_head *jh, int *released) +{ + struct journal_head *last_jh; + struct journal_head *next_jh = jh; + int ret, freed = 0; + + *released = 0; + if (!jh) + return 0; + + last_jh = jh->b_cpprev; + do { + jh = next_jh; + next_jh = jh->b_cpnext; + /* Use trylock because of the ranking */ + if (jbd_trylock_bh_state(jh2bh(jh))) { + ret = __try_to_free_cp_buf(jh); + if (ret) { + freed++; + if (ret == 2) { + *released = 1; + return freed; + } + } + } + /* + * This function only frees up some memory + * if possible so we dont have an obligation + * to finish processing. Bail out if preemption + * requested: + */ + if (need_resched()) + return freed; + } while (jh != last_jh); + + return freed; +} + +/* + * journal_clean_checkpoint_list + * + * Find all the written-back checkpoint buffers in the journal and release them. + * + * Called with the journal locked. + * Called with j_list_lock held. + * Returns number of buffers reaped (for debug) + */ + +int __jbd2_journal_clean_checkpoint_list(journal_t *journal) +{ + transaction_t *transaction, *last_transaction, *next_transaction; + int ret = 0; + int released; + + transaction = journal->j_checkpoint_transactions; + if (!transaction) + goto out; + + last_transaction = transaction->t_cpprev; + next_transaction = transaction; + do { + transaction = next_transaction; + next_transaction = transaction->t_cpnext; + ret += journal_clean_one_cp_list(transaction-> + t_checkpoint_list, &released); + /* + * This function only frees up some memory if possible so we + * dont have an obligation to finish processing. Bail out if + * preemption requested: + */ + if (need_resched()) + goto out; + if (released) + continue; + /* + * It is essential that we are as careful as in the case of + * t_checkpoint_list with removing the buffer from the list as + * we can possibly see not yet submitted buffers on io_list + */ + ret += journal_clean_one_cp_list(transaction-> + t_checkpoint_io_list, &released); + if (need_resched()) + goto out; + } while (transaction != last_transaction); +out: + return ret; +} + +/* + * journal_remove_checkpoint: called after a buffer has been committed + * to disk (either by being write-back flushed to disk, or being + * committed to the log). + * + * We cannot safely clean a transaction out of the log until all of the + * buffer updates committed in that transaction have safely been stored + * elsewhere on disk. To achieve this, all of the buffers in a + * transaction need to be maintained on the transaction's checkpoint + * lists until they have been rewritten, at which point this function is + * called to remove the buffer from the existing transaction's + * checkpoint lists. + * + * The function returns 1 if it frees the transaction, 0 otherwise. + * + * This function is called with the journal locked. + * This function is called with j_list_lock held. + * This function is called with jbd_lock_bh_state(jh2bh(jh)) + */ + +int __jbd2_journal_remove_checkpoint(struct journal_head *jh) +{ + transaction_t *transaction; + journal_t *journal; + int ret = 0; + + JBUFFER_TRACE(jh, "entry"); + + if ((transaction = jh->b_cp_transaction) == NULL) { + JBUFFER_TRACE(jh, "not on transaction"); + goto out; + } + journal = transaction->t_journal; + + __buffer_unlink(jh); + jh->b_cp_transaction = NULL; + + if (transaction->t_checkpoint_list != NULL || + transaction->t_checkpoint_io_list != NULL) + goto out; + JBUFFER_TRACE(jh, "transaction has no more buffers"); + + /* + * There is one special case to worry about: if we have just pulled the + * buffer off a committing transaction's forget list, then even if the + * checkpoint list is empty, the transaction obviously cannot be + * dropped! + * + * The locking here around j_committing_transaction is a bit sleazy. + * See the comment at the end of jbd2_journal_commit_transaction(). + */ + if (transaction == journal->j_committing_transaction) { + JBUFFER_TRACE(jh, "belongs to committing transaction"); + goto out; + } + + /* OK, that was the last buffer for the transaction: we can now + safely remove this transaction from the log */ + + __jbd2_journal_drop_transaction(journal, transaction); + + /* Just in case anybody was waiting for more transactions to be + checkpointed... */ + wake_up(&journal->j_wait_logspace); + ret = 1; +out: + JBUFFER_TRACE(jh, "exit"); + return ret; +} + +/* + * journal_insert_checkpoint: put a committed buffer onto a checkpoint + * list so that we know when it is safe to clean the transaction out of + * the log. + * + * Called with the journal locked. + * Called with j_list_lock held. + */ +void __jbd2_journal_insert_checkpoint(struct journal_head *jh, + transaction_t *transaction) +{ + JBUFFER_TRACE(jh, "entry"); + J_ASSERT_JH(jh, buffer_dirty(jh2bh(jh)) || buffer_jbddirty(jh2bh(jh))); + J_ASSERT_JH(jh, jh->b_cp_transaction == NULL); + + jh->b_cp_transaction = transaction; + + if (!transaction->t_checkpoint_list) { + jh->b_cpnext = jh->b_cpprev = jh; + } else { + jh->b_cpnext = transaction->t_checkpoint_list; + jh->b_cpprev = transaction->t_checkpoint_list->b_cpprev; + jh->b_cpprev->b_cpnext = jh; + jh->b_cpnext->b_cpprev = jh; + } + transaction->t_checkpoint_list = jh; +} + +/* + * We've finished with this transaction structure: adios... + * + * The transaction must have no links except for the checkpoint by this + * point. + * + * Called with the journal locked. + * Called with j_list_lock held. + */ + +void __jbd2_journal_drop_transaction(journal_t *journal, transaction_t *transaction) +{ + assert_spin_locked(&journal->j_list_lock); + if (transaction->t_cpnext) { + transaction->t_cpnext->t_cpprev = transaction->t_cpprev; + transaction->t_cpprev->t_cpnext = transaction->t_cpnext; + if (journal->j_checkpoint_transactions == transaction) + journal->j_checkpoint_transactions = + transaction->t_cpnext; + if (journal->j_checkpoint_transactions == transaction) + journal->j_checkpoint_transactions = NULL; + } + + J_ASSERT(transaction->t_state == T_FINISHED); + J_ASSERT(transaction->t_buffers == NULL); + J_ASSERT(transaction->t_sync_datalist == NULL); + J_ASSERT(transaction->t_forget == NULL); + J_ASSERT(transaction->t_iobuf_list == NULL); + J_ASSERT(transaction->t_shadow_list == NULL); + J_ASSERT(transaction->t_log_list == NULL); + J_ASSERT(transaction->t_checkpoint_list == NULL); + J_ASSERT(transaction->t_checkpoint_io_list == NULL); + J_ASSERT(transaction->t_updates == 0); + J_ASSERT(journal->j_committing_transaction != transaction); + J_ASSERT(journal->j_running_transaction != transaction); + + jbd_debug(1, "Dropping transaction %d, all done\n", transaction->t_tid); + kfree(transaction); +} diff --git a/fs/jbd2/commit.c b/fs/jbd2/commit.c new file mode 100644 index 000000000000..70b2ae1ef281 --- /dev/null +++ b/fs/jbd2/commit.c @@ -0,0 +1,920 @@ +/* + * linux/fs/jbd2/commit.c + * + * Written by Stephen C. Tweedie <sct@redhat.com>, 1998 + * + * Copyright 1998 Red Hat corp --- All Rights Reserved + * + * This file is part of the Linux kernel and is made available under + * the terms of the GNU General Public License, version 2, or at your + * option, any later version, incorporated herein by reference. + * + * Journal commit routines for the generic filesystem journaling code; + * part of the ext2fs journaling system. + */ + +#include <linux/time.h> +#include <linux/fs.h> +#include <linux/jbd2.h> +#include <linux/errno.h> +#include <linux/slab.h> +#include <linux/mm.h> +#include <linux/pagemap.h> +#include <linux/smp_lock.h> + +/* + * Default IO end handler for temporary BJ_IO buffer_heads. + */ +static void journal_end_buffer_io_sync(struct buffer_head *bh, int uptodate) +{ + BUFFER_TRACE(bh, ""); + if (uptodate) + set_buffer_uptodate(bh); + else + clear_buffer_uptodate(bh); + unlock_buffer(bh); +} + +/* + * When an ext3-ordered file is truncated, it is possible that many pages are + * not sucessfully freed, because they are attached to a committing transaction. + * After the transaction commits, these pages are left on the LRU, with no + * ->mapping, and with attached buffers. These pages are trivially reclaimable + * by the VM, but their apparent absence upsets the VM accounting, and it makes + * the numbers in /proc/meminfo look odd. + * + * So here, we have a buffer which has just come off the forget list. Look to + * see if we can strip all buffers from the backing page. + * + * Called under lock_journal(), and possibly under journal_datalist_lock. The + * caller provided us with a ref against the buffer, and we drop that here. + */ +static void release_buffer_page(struct buffer_head *bh) +{ + struct page *page; + + if (buffer_dirty(bh)) + goto nope; + if (atomic_read(&bh->b_count) != 1) + goto nope; + page = bh->b_page; + if (!page) + goto nope; + if (page->mapping) + goto nope; + + /* OK, it's a truncated page */ + if (TestSetPageLocked(page)) + goto nope; + + page_cache_get(page); + __brelse(bh); + try_to_free_buffers(page); + unlock_page(page); + page_cache_release(page); + return; + +nope: + __brelse(bh); +} + +/* + * Try to acquire jbd_lock_bh_state() against the buffer, when j_list_lock is + * held. For ranking reasons we must trylock. If we lose, schedule away and + * return 0. j_list_lock is dropped in this case. + */ +static int inverted_lock(journal_t *journal, struct buffer_head *bh) +{ + if (!jbd_trylock_bh_state(bh)) { + spin_unlock(&journal->j_list_lock); + schedule(); + return 0; + } + return 1; +} + +/* Done it all: now write the commit record. We should have + * cleaned up our previous buffers by now, so if we are in abort + * mode we can now just skip the rest of the journal write + * entirely. + * + * Returns 1 if the journal needs to be aborted or 0 on success + */ +static int journal_write_commit_record(journal_t *journal, + transaction_t *commit_transaction) +{ + struct journal_head *descriptor; + struct buffer_head *bh; + int i, ret; + int barrier_done = 0; + + if (is_journal_aborted(journal)) + return 0; + + descriptor = jbd2_journal_get_descriptor_buffer(journal); + if (!descriptor) + return 1; + + bh = jh2bh(descriptor); + + /* AKPM: buglet - add `i' to tmp! */ + for (i = 0; i < bh->b_size; i += 512) { + journal_header_t *tmp = (journal_header_t*)bh->b_data; + tmp->h_magic = cpu_to_be32(JBD2_MAGIC_NUMBER); + tmp->h_blocktype = cpu_to_be32(JBD2_COMMIT_BLOCK); + tmp->h_sequence = cpu_to_be32(commit_transaction->t_tid); + } + + JBUFFER_TRACE(descriptor, "write commit block"); + set_buffer_dirty(bh); + if (journal->j_flags & JBD2_BARRIER) { + set_buffer_ordered(bh); + barrier_done = 1; + } + ret = sync_dirty_buffer(bh); + /* is it possible for another commit to fail at roughly + * the same time as this one? If so, we don't want to + * trust the barrier flag in the super, but instead want + * to remember if we sent a barrier request + */ + if (ret == -EOPNOTSUPP && barrier_done) { + char b[BDEVNAME_SIZE]; + + printk(KERN_WARNING + "JBD: barrier-based sync failed on %s - " + "disabling barriers\n", + bdevname(journal->j_dev, b)); + spin_lock(&journal->j_state_lock); + journal->j_flags &= ~JBD2_BARRIER; + spin_unlock(&journal->j_state_lock); + + /* And try again, without the barrier */ + clear_buffer_ordered(bh); + set_buffer_uptodate(bh); + set_buffer_dirty(bh); + ret = sync_dirty_buffer(bh); + } + put_bh(bh); /* One for getblk() */ + jbd2_journal_put_journal_head(descriptor); + + return (ret == -EIO); +} + +static void journal_do_submit_data(struct buffer_head **wbuf, int bufs) +{ + int i; + + for (i = 0; i < bufs; i++) { + wbuf[i]->b_end_io = end_buffer_write_sync; + /* We use-up our safety reference in submit_bh() */ + submit_bh(WRITE, wbuf[i]); + } +} + +/* + * Submit all the data buffers to disk + */ +static void journal_submit_data_buffers(journal_t *journal, + transaction_t *commit_transaction) +{ + struct journal_head *jh; + struct buffer_head *bh; + int locked; + int bufs = 0; + struct buffer_head **wbuf = journal->j_wbuf; + + /* + * Whenever we unlock the journal and sleep, things can get added + * onto ->t_sync_datalist, so we have to keep looping back to + * write_out_data until we *know* that the list is empty. + * + * Cleanup any flushed data buffers from the data list. Even in + * abort mode, we want to flush this out as soon as possible. + */ +write_out_data: + cond_resched(); + spin_lock(&journal->j_list_lock); + + while (commit_transaction->t_sync_datalist) { + jh = commit_transaction->t_sync_datalist; + bh = jh2bh(jh); + locked = 0; + + /* Get reference just to make sure buffer does not disappear + * when we are forced to drop various locks */ + get_bh(bh); + /* If the buffer is dirty, we need to submit IO and hence + * we need the buffer lock. We try to lock the buffer without + * blocking. If we fail, we need to drop j_list_lock and do + * blocking lock_buffer(). + */ + if (buffer_dirty(bh)) { + if (test_set_buffer_locked(bh)) { + BUFFER_TRACE(bh, "needs blocking lock"); + spin_unlock(&journal->j_list_lock); + /* Write out all data to prevent deadlocks */ + journal_do_submit_data(wbuf, bufs); + bufs = 0; + lock_buffer(bh); + spin_lock(&journal->j_list_lock); + } + locked = 1; + } + /* We have to get bh_state lock. Again out of order, sigh. */ + if (!inverted_lock(journal, bh)) { + jbd_lock_bh_state(bh); + spin_lock(&journal->j_list_lock); + } + /* Someone already cleaned up the buffer? */ + if (!buffer_jbd(bh) + || jh->b_transaction != commit_transaction + || jh->b_jlist != BJ_SyncData) { + jbd_unlock_bh_state(bh); + if (locked) + unlock_buffer(bh); + BUFFER_TRACE(bh, "already cleaned up"); + put_bh(bh); + continue; + } + if (locked && test_clear_buffer_dirty(bh)) { + BUFFER_TRACE(bh, "needs writeout, adding to array"); + wbuf[bufs++] = bh; + __jbd2_journal_file_buffer(jh, commit_transaction, + BJ_Locked); + jbd_unlock_bh_state(bh); + if (bufs == journal->j_wbufsize) { + spin_unlock(&journal->j_list_lock); + journal_do_submit_data(wbuf, bufs); + bufs = 0; + goto write_out_data; + } + } + else { + BUFFER_TRACE(bh, "writeout complete: unfile"); + __jbd2_journal_unfile_buffer(jh); + jbd_unlock_bh_state(bh); + if (locked) + unlock_buffer(bh); + jbd2_journal_remove_journal_head(bh); + /* Once for our safety reference, once for + * jbd2_journal_remove_journal_head() */ + put_bh(bh); + put_bh(bh); + } + + if (lock_need_resched(&journal->j_list_lock)) { + spin_unlock(&journal->j_list_lock); + goto write_out_data; + } + } + spin_unlock(&journal->j_list_lock); + journal_do_submit_data(wbuf, bufs); +} + +static inline void write_tag_block(int tag_bytes, journal_block_tag_t *tag, + unsigned long long block) +{ + tag->t_blocknr = cpu_to_be32(block & (u32)~0); + if (tag_bytes > JBD_TAG_SIZE32) + tag->t_blocknr_high = cpu_to_be32((block >> 31) >> 1); +} + +/* + * jbd2_journal_commit_transaction + * + * The primary function for committing a transaction to the log. This + * function is called by the journal thread to begin a complete commit. + */ +void jbd2_journal_commit_transaction(journal_t *journal) +{ + transaction_t *commit_transaction; + struct journal_head *jh, *new_jh, *descriptor; + struct buffer_head **wbuf = journal->j_wbuf; + int bufs; + int flags; + int err; + unsigned long long blocknr; + char *tagp = NULL; + journal_header_t *header; + journal_block_tag_t *tag = NULL; + int space_left = 0; + int first_tag = 0; + int tag_flag; + int i; + int tag_bytes = journal_tag_bytes(journal); + + /* + * First job: lock down the current transaction and wait for + * all outstanding updates to complete. + */ + +#ifdef COMMIT_STATS + spin_lock(&journal->j_list_lock); + summarise_journal_usage(journal); + spin_unlock(&journal->j_list_lock); +#endif + + /* Do we need to erase the effects of a prior jbd2_journal_flush? */ + if (journal->j_flags & JBD2_FLUSHED) { + jbd_debug(3, "super block updated\n"); + jbd2_journal_update_superblock(journal, 1); + } else { + jbd_debug(3, "superblock not updated\n"); + } + + J_ASSERT(journal->j_running_transaction != NULL); + J_ASSERT(journal->j_committing_transaction == NULL); + + commit_transaction = journal->j_running_transaction; + J_ASSERT(commit_transaction->t_state == T_RUNNING); + + jbd_debug(1, "JBD: starting commit of transaction %d\n", + commit_transaction->t_tid); + + spin_lock(&journal->j_state_lock); + commit_transaction->t_state = T_LOCKED; + + spin_lock(&commit_transaction->t_handle_lock); + while (commit_transaction->t_updates) { + DEFINE_WAIT(wait); + + prepare_to_wait(&journal->j_wait_updates, &wait, + TASK_UNINTERRUPTIBLE); + if (commit_transaction->t_updates) { + spin_unlock(&commit_transaction->t_handle_lock); + spin_unlock(&journal->j_state_lock); + schedule(); + spin_lock(&journal->j_state_lock); + spin_lock(&commit_transaction->t_handle_lock); + } + finish_wait(&journal->j_wait_updates, &wait); + } + spin_unlock(&commit_transaction->t_handle_lock); + + J_ASSERT (commit_transaction->t_outstanding_credits <= + journal->j_max_transaction_buffers); + + /* + * First thing we are allowed to do is to discard any remaining + * BJ_Reserved buffers. Note, it is _not_ permissible to assume + * that there are no such buffers: if a large filesystem + * operation like a truncate needs to split itself over multiple + * transactions, then it may try to do a jbd2_journal_restart() while + * there are still BJ_Reserved buffers outstanding. These must + * be released cleanly from the current transaction. + * + * In this case, the filesystem must still reserve write access + * again before modifying the buffer in the new transaction, but + * we do not require it to remember exactly which old buffers it + * has reserved. This is consistent with the existing behaviour + * that multiple jbd2_journal_get_write_access() calls to the same + * buffer are perfectly permissable. + */ + while (commit_transaction->t_reserved_list) { + jh = commit_transaction->t_reserved_list; + JBUFFER_TRACE(jh, "reserved, unused: refile"); + /* + * A jbd2_journal_get_undo_access()+jbd2_journal_release_buffer() may + * leave undo-committed data. + */ + if (jh->b_committed_data) { + struct buffer_head *bh = jh2bh(jh); + + jbd_lock_bh_state(bh); + jbd2_slab_free(jh->b_committed_data, bh->b_size); + jh->b_committed_data = NULL; + jbd_unlock_bh_state(bh); + } + jbd2_journal_refile_buffer(journal, jh); + } + + /* + * Now try to drop any written-back buffers from the journal's + * checkpoint lists. We do this *before* commit because it potentially + * frees some memory + */ + spin_lock(&journal->j_list_lock); + __jbd2_journal_clean_checkpoint_list(journal); + spin_unlock(&journal->j_list_lock); + + jbd_debug (3, "JBD: commit phase 1\n"); + + /* + * Switch to a new revoke table. + */ + jbd2_journal_switch_revoke_table(journal); + + commit_transaction->t_state = T_FLUSH; + journal->j_committing_transaction = commit_transaction; + journal->j_running_transaction = NULL; + commit_transaction->t_log_start = journal->j_head; + wake_up(&journal->j_wait_transaction_locked); + spin_unlock(&journal->j_state_lock); + + jbd_debug (3, "JBD: commit phase 2\n"); + + /* + * First, drop modified flag: all accesses to the buffers + * will be tracked for a new trasaction only -bzzz + */ + spin_lock(&journal->j_list_lock); + if (commit_transaction->t_buffers) { + new_jh = jh = commit_transaction->t_buffers->b_tnext; + do { + J_ASSERT_JH(new_jh, new_jh->b_modified == 1 || + new_jh->b_modified == 0); + new_jh->b_modified = 0; + new_jh = new_jh->b_tnext; + } while (new_jh != jh); + } + spin_unlock(&journal->j_list_lock); + + /* + * Now start flushing things to disk, in the order they appear + * on the transaction lists. Data blocks go first. + */ + err = 0; + journal_submit_data_buffers(journal, commit_transaction); + + /* + * Wait for all previously submitted IO to complete. + */ + spin_lock(&journal->j_list_lock); + while (commit_transaction->t_locked_list) { + struct buffer_head *bh; + + jh = commit_transaction->t_locked_list->b_tprev; + bh = jh2bh(jh); + get_bh(bh); + if (buffer_locked(bh)) { + spin_unlock(&journal->j_list_lock); + wait_on_buffer(bh); + if (unlikely(!buffer_uptodate(bh))) + err = -EIO; + spin_lock(&journal->j_list_lock); + } + if (!inverted_lock(journal, bh)) { + put_bh(bh); + spin_lock(&journal->j_list_lock); + continue; + } + if (buffer_jbd(bh) && jh->b_jlist == BJ_Locked) { + __jbd2_journal_unfile_buffer(jh); + jbd_unlock_bh_state(bh); + jbd2_journal_remove_journal_head(bh); + put_bh(bh); + } else { + jbd_unlock_bh_state(bh); + } + put_bh(bh); + cond_resched_lock(&journal->j_list_lock); + } + spin_unlock(&journal->j_list_lock); + + if (err) + __jbd2_journal_abort_hard(journal); + + jbd2_journal_write_revoke_records(journal, commit_transaction); + + jbd_debug(3, "JBD: commit phase 2\n"); + + /* + * If we found any dirty or locked buffers, then we should have + * looped back up to the write_out_data label. If there weren't + * any then journal_clean_data_list should have wiped the list + * clean by now, so check that it is in fact empty. + */ + J_ASSERT (commit_transaction->t_sync_datalist == NULL); + + jbd_debug (3, "JBD: commit phase 3\n"); + + /* + * Way to go: we have now written out all of the data for a + * transaction! Now comes the tricky part: we need to write out + * metadata. Loop over the transaction's entire buffer list: + */ + commit_transaction->t_state = T_COMMIT; + + descriptor = NULL; + bufs = 0; + while (commit_transaction->t_buffers) { + + /* Find the next buffer to be journaled... */ + + jh = commit_transaction->t_buffers; + + /* If we're in abort mode, we just un-journal the buffer and + release it for background writing. */ + + if (is_journal_aborted(journal)) { + JBUFFER_TRACE(jh, "journal is aborting: refile"); + jbd2_journal_refile_buffer(journal, jh); + /* If that was the last one, we need to clean up + * any descriptor buffers which may have been + * already allocated, even if we are now + * aborting. */ + if (!commit_transaction->t_buffers) + goto start_journal_io; + continue; + } + + /* Make sure we have a descriptor block in which to + record the metadata buffer. */ + + if (!descriptor) { + struct buffer_head *bh; + + J_ASSERT (bufs == 0); + + jbd_debug(4, "JBD: get descriptor\n"); + + descriptor = jbd2_journal_get_descriptor_buffer(journal); + if (!descriptor) { + __jbd2_journal_abort_hard(journal); + continue; + } + + bh = jh2bh(descriptor); + jbd_debug(4, "JBD: got buffer %llu (%p)\n", + (unsigned long long)bh->b_blocknr, bh->b_data); + header = (journal_header_t *)&bh->b_data[0]; + header->h_magic = cpu_to_be32(JBD2_MAGIC_NUMBER); + header->h_blocktype = cpu_to_be32(JBD2_DESCRIPTOR_BLOCK); + header->h_sequence = cpu_to_be32(commit_transaction->t_tid); + + tagp = &bh->b_data[sizeof(journal_header_t)]; + space_left = bh->b_size - sizeof(journal_header_t); + first_tag = 1; + set_buffer_jwrite(bh); + set_buffer_dirty(bh); + wbuf[bufs++] = bh; + + /* Record it so that we can wait for IO + completion later */ + BUFFER_TRACE(bh, "ph3: file as descriptor"); + jbd2_journal_file_buffer(descriptor, commit_transaction, + BJ_LogCtl); + } + + /* Where is the buffer to be written? */ + + err = jbd2_journal_next_log_block(journal, &blocknr); + /* If the block mapping failed, just abandon the buffer + and repeat this loop: we'll fall into the + refile-on-abort condition above. */ + if (err) { + __jbd2_journal_abort_hard(journal); + continue; + } + + /* + * start_this_handle() uses t_outstanding_credits to determine + * the free space in the log, but this counter is changed + * by jbd2_journal_next_log_block() also. + */ + commit_transaction->t_outstanding_credits--; + + /* Bump b_count to prevent truncate from stumbling over + the shadowed buffer! @@@ This can go if we ever get + rid of the BJ_IO/BJ_Shadow pairing of buffers. */ + atomic_inc(&jh2bh(jh)->b_count); + + /* Make a temporary IO buffer with which to write it out + (this will requeue both the metadata buffer and the + temporary IO buffer). new_bh goes on BJ_IO*/ + + set_bit(BH_JWrite, &jh2bh(jh)->b_state); + /* + * akpm: jbd2_journal_write_metadata_buffer() sets + * new_bh->b_transaction to commit_transaction. + * We need to clean this up before we release new_bh + * (which is of type BJ_IO) + */ + JBUFFER_TRACE(jh, "ph3: write metadata"); + flags = jbd2_journal_write_metadata_buffer(commit_transaction, + jh, &new_jh, blocknr); + set_bit(BH_JWrite, &jh2bh(new_jh)->b_state); + wbuf[bufs++] = jh2bh(new_jh); + + /* Record the new block's tag in the current descriptor + buffer */ + + tag_flag = 0; + if (flags & 1) + tag_flag |= JBD2_FLAG_ESCAPE; + if (!first_tag) + tag_flag |= JBD2_FLAG_SAME_UUID; + + tag = (journal_block_tag_t *) tagp; + write_tag_block(tag_bytes, tag, jh2bh(jh)->b_blocknr); + tag->t_flags = cpu_to_be32(tag_flag); + tagp += tag_bytes; + space_left -= tag_bytes; + + if (first_tag) { + memcpy (tagp, journal->j_uuid, 16); + tagp += 16; + space_left -= 16; + first_tag = 0; + } + + /* If there's no more to do, or if the descriptor is full, + let the IO rip! */ + + if (bufs == journal->j_wbufsize || + commit_transaction->t_buffers == NULL || + space_left < tag_bytes + 16) { + + jbd_debug(4, "JBD: Submit %d IOs\n", bufs); + + /* Write an end-of-descriptor marker before + submitting the IOs. "tag" still points to + the last tag we set up. */ + + tag->t_flags |= cpu_to_be32(JBD2_FLAG_LAST_TAG); + +start_journal_io: + for (i = 0; i < bufs; i++) { + struct buffer_head *bh = wbuf[i]; + lock_buffer(bh); + clear_buffer_dirty(bh); + set_buffer_uptodate(bh); + bh->b_end_io = journal_end_buffer_io_sync; + submit_bh(WRITE, bh); + } + cond_resched(); + + /* Force a new descriptor to be generated next + time round the loop. */ + descriptor = NULL; + bufs = 0; + } + } + + /* Lo and behold: we have just managed to send a transaction to + the log. Before we can commit it, wait for the IO so far to + complete. Control buffers being written are on the + transaction's t_log_list queue, and metadata buffers are on + the t_iobuf_list queue. + + Wait for the buffers in reverse order. That way we are + less likely to be woken up until all IOs have completed, and + so we incur less scheduling load. + */ + + jbd_debug(3, "JBD: commit phase 4\n"); + + /* + * akpm: these are BJ_IO, and j_list_lock is not needed. + * See __journal_try_to_free_buffer. + */ +wait_for_iobuf: + while (commit_transaction->t_iobuf_list != NULL) { + struct buffer_head *bh; + + jh = commit_transaction->t_iobuf_list->b_tprev; + bh = jh2bh(jh); + if (buffer_locked(bh)) { + wait_on_buffer(bh); + goto wait_for_iobuf; + } + if (cond_resched()) + goto wait_for_iobuf; + + if (unlikely(!buffer_uptodate(bh))) + err = -EIO; + + clear_buffer_jwrite(bh); + + JBUFFER_TRACE(jh, "ph4: unfile after journal write"); + jbd2_journal_unfile_buffer(journal, jh); + + /* + * ->t_iobuf_list should contain only dummy buffer_heads + * which were created by jbd2_journal_write_metadata_buffer(). + */ + BUFFER_TRACE(bh, "dumping temporary bh"); + jbd2_journal_put_journal_head(jh); + __brelse(bh); + J_ASSERT_BH(bh, atomic_read(&bh->b_count) == 0); + free_buffer_head(bh); + + /* We also have to unlock and free the corresponding + shadowed buffer */ + jh = commit_transaction->t_shadow_list->b_tprev; + bh = jh2bh(jh); + clear_bit(BH_JWrite, &bh->b_state); + J_ASSERT_BH(bh, buffer_jbddirty(bh)); + + /* The metadata is now released for reuse, but we need + to remember it against this transaction so that when + we finally commit, we can do any checkpointing + required. */ + JBUFFER_TRACE(jh, "file as BJ_Forget"); + jbd2_journal_file_buffer(jh, commit_transaction, BJ_Forget); + /* Wake up any transactions which were waiting for this + IO to complete */ + wake_up_bit(&bh->b_state, BH_Unshadow); + JBUFFER_TRACE(jh, "brelse shadowed buffer"); + __brelse(bh); + } + + J_ASSERT (commit_transaction->t_shadow_list == NULL); + + jbd_debug(3, "JBD: commit phase 5\n"); + + /* Here we wait for the revoke record and descriptor record buffers */ + wait_for_ctlbuf: + while (commit_transaction->t_log_list != NULL) { + struct buffer_head *bh; + + jh = commit_transaction->t_log_list->b_tprev; + bh = jh2bh(jh); + if (buffer_locked(bh)) { + wait_on_buffer(bh); + goto wait_for_ctlbuf; + } + if (cond_resched()) + goto wait_for_ctlbuf; + + if (unlikely(!buffer_uptodate(bh))) + err = -EIO; + + BUFFER_TRACE(bh, "ph5: control buffer writeout done: unfile"); + clear_buffer_jwrite(bh); + jbd2_journal_unfile_buffer(journal, jh); + jbd2_journal_put_journal_head(jh); + __brelse(bh); /* One for getblk */ + /* AKPM: bforget here */ + } + + jbd_debug(3, "JBD: commit phase 6\n"); + + if (journal_write_commit_record(journal, commit_transaction)) + err = -EIO; + + if (err) + __jbd2_journal_abort_hard(journal); + + /* End of a transaction! Finally, we can do checkpoint + processing: any buffers committed as a result of this + transaction can be removed from any checkpoint list it was on + before. */ + + jbd_debug(3, "JBD: commit phase 7\n"); + + J_ASSERT(commit_transaction->t_sync_datalist == NULL); + J_ASSERT(commit_transaction->t_buffers == NULL); + J_ASSERT(commit_transaction->t_checkpoint_list == NULL); + J_ASSERT(commit_transaction->t_iobuf_list == NULL); + J_ASSERT(commit_transaction->t_shadow_list == NULL); + J_ASSERT(commit_transaction->t_log_list == NULL); + +restart_loop: + /* + * As there are other places (journal_unmap_buffer()) adding buffers + * to this list we have to be careful and hold the j_list_lock. + */ + spin_lock(&journal->j_list_lock); + while (commit_transaction->t_forget) { + transaction_t *cp_transaction; + struct buffer_head *bh; + + jh = commit_transaction->t_forget; + spin_unlock(&journal->j_list_lock); + bh = jh2bh(jh); + jbd_lock_bh_state(bh); + J_ASSERT_JH(jh, jh->b_transaction == commit_transaction || + jh->b_transaction == journal->j_running_transaction); + + /* + * If there is undo-protected committed data against + * this buffer, then we can remove it now. If it is a + * buffer needing such protection, the old frozen_data + * field now points to a committed version of the + * buffer, so rotate that field to the new committed + * data. + * + * Otherwise, we can just throw away the frozen data now. + */ + if (jh->b_committed_data) { + jbd2_slab_free(jh->b_committed_data, bh->b_size); + jh->b_committed_data = NULL; + if (jh->b_frozen_data) { + jh->b_committed_data = jh->b_frozen_data; + jh->b_frozen_data = NULL; + } + } else if (jh->b_frozen_data) { + jbd2_slab_free(jh->b_frozen_data, bh->b_size); + jh->b_frozen_data = NULL; + } + + spin_lock(&journal->j_list_lock); + cp_transaction = jh->b_cp_transaction; + if (cp_transaction) { + JBUFFER_TRACE(jh, "remove from old cp transaction"); + __jbd2_journal_remove_checkpoint(jh); + } + + /* Only re-checkpoint the buffer_head if it is marked + * dirty. If the buffer was added to the BJ_Forget list + * by jbd2_journal_forget, it may no longer be dirty and + * there's no point in keeping a checkpoint record for + * it. */ + + /* A buffer which has been freed while still being + * journaled by a previous transaction may end up still + * being dirty here, but we want to avoid writing back + * that buffer in the future now that the last use has + * been committed. That's not only a performance gain, + * it also stops aliasing problems if the buffer is left + * behind for writeback and gets reallocated for another + * use in a different page. */ + if (buffer_freed(bh)) { + clear_buffer_freed(bh); + clear_buffer_jbddirty(bh); + } + + if (buffer_jbddirty(bh)) { + JBUFFER_TRACE(jh, "add to new checkpointing trans"); + __jbd2_journal_insert_checkpoint(jh, commit_transaction); + JBUFFER_TRACE(jh, "refile for checkpoint writeback"); + __jbd2_journal_refile_buffer(jh); + jbd_unlock_bh_state(bh); + } else { + J_ASSERT_BH(bh, !buffer_dirty(bh)); + /* The buffer on BJ_Forget list and not jbddirty means + * it has been freed by this transaction and hence it + * could not have been reallocated until this + * transaction has committed. *BUT* it could be + * reallocated once we have written all the data to + * disk and before we process the buffer on BJ_Forget + * list. */ + JBUFFER_TRACE(jh, "refile or unfile freed buffer"); + __jbd2_journal_refile_buffer(jh); + if (!jh->b_transaction) { + jbd_unlock_bh_state(bh); + /* needs a brelse */ + jbd2_journal_remove_journal_head(bh); + release_buffer_page(bh); + } else + jbd_unlock_bh_state(bh); + } + cond_resched_lock(&journal->j_list_lock); + } + spin_unlock(&journal->j_list_lock); + /* + * This is a bit sleazy. We borrow j_list_lock to protect + * journal->j_committing_transaction in __jbd2_journal_remove_checkpoint. + * Really, __jbd2_journal_remove_checkpoint should be using j_state_lock but + * it's a bit hassle to hold that across __jbd2_journal_remove_checkpoint + */ + spin_lock(&journal->j_state_lock); + spin_lock(&journal->j_list_lock); + /* + * Now recheck if some buffers did not get attached to the transaction + * while the lock was dropped... + */ + if (commit_transaction->t_forget) { + spin_unlock(&journal->j_list_lock); + spin_unlock(&journal->j_state_lock); + goto restart_loop; + } + + /* Done with this transaction! */ + + jbd_debug(3, "JBD: commit phase 8\n"); + + J_ASSERT(commit_transaction->t_state == T_COMMIT); + + commit_transaction->t_state = T_FINISHED; + J_ASSERT(commit_transaction == journal->j_committing_transaction); + journal->j_commit_sequence = commit_transaction->t_tid; + journal->j_committing_transaction = NULL; + spin_unlock(&journal->j_state_lock); + + if (commit_transaction->t_checkpoint_list == NULL) { + __jbd2_journal_drop_transaction(journal, commit_transaction); + } else { + if (journal->j_checkpoint_transactions == NULL) { + journal->j_checkpoint_transactions = commit_transaction; + commit_transaction->t_cpnext = commit_transaction; + commit_transaction->t_cpprev = commit_transaction; + } else { + commit_transaction->t_cpnext = + journal->j_checkpoint_transactions; + commit_transaction->t_cpprev = + commit_transaction->t_cpnext->t_cpprev; + commit_transaction->t_cpnext->t_cpprev = + commit_transaction; + commit_transaction->t_cpprev->t_cpnext = + commit_transaction; + } + } + spin_unlock(&journal->j_list_lock); + + jbd_debug(1, "JBD: commit %d complete, head %d\n", + journal->j_commit_sequence, journal->j_tail_sequence); + + wake_up(&journal->j_wait_done_commit); +} diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c new file mode 100644 index 000000000000..10db92ced014 --- /dev/null +++ b/fs/jbd2/journal.c @@ -0,0 +1,2083 @@ +/* + * linux/fs/jbd2/journal.c + * + * Written by Stephen C. Tweedie <sct@redhat.com>, 1998 + * + * Copyright 1998 Red Hat corp --- All Rights Reserved + * + * This file is part of the Linux kernel and is made available under + * the terms of the GNU General Public License, version 2, or at your + * option, any later version, incorporated herein by reference. + * + * Generic filesystem journal-writing code; part of the ext2fs + * journaling system. + * + * This file manages journals: areas of disk reserved for logging + * transactional updates. This includes the kernel journaling thread + * which is responsible for scheduling updates to the log. + * + * We do not actually manage the physical storage of the journal in this + * file: that is left to a per-journal policy function, which allows us + * to store the journal within a filesystem-specified area for ext2 + * journaling (ext2 can use a reserved inode for storing the log). + */ + +#include <linux/module.h> +#include <linux/time.h> +#include <linux/fs.h> +#include <linux/jbd2.h> +#include <linux/errno.h> +#include <linux/slab.h> +#include <linux/smp_lock.h> +#include <linux/init.h> +#include <linux/mm.h> +#include <linux/suspend.h> +#include <linux/pagemap.h> +#include <linux/kthread.h> +#include <linux/poison.h> +#include <linux/proc_fs.h> + +#include <asm/uaccess.h> +#include <asm/page.h> + +EXPORT_SYMBOL(jbd2_journal_start); +EXPORT_SYMBOL(jbd2_journal_restart); +EXPORT_SYMBOL(jbd2_journal_extend); +EXPORT_SYMBOL(jbd2_journal_stop); +EXPORT_SYMBOL(jbd2_journal_lock_updates); +EXPORT_SYMBOL(jbd2_journal_unlock_updates); +EXPORT_SYMBOL(jbd2_journal_get_write_access); +EXPORT_SYMBOL(jbd2_journal_get_create_access); +EXPORT_SYMBOL(jbd2_journal_get_undo_access); +EXPORT_SYMBOL(jbd2_journal_dirty_data); +EXPORT_SYMBOL(jbd2_journal_dirty_metadata); +EXPORT_SYMBOL(jbd2_journal_release_buffer); +EXPORT_SYMBOL(jbd2_journal_forget); +#if 0 +EXPORT_SYMBOL(journal_sync_buffer); +#endif +EXPORT_SYMBOL(jbd2_journal_flush); +EXPORT_SYMBOL(jbd2_journal_revoke); + +EXPORT_SYMBOL(jbd2_journal_init_dev); +EXPORT_SYMBOL(jbd2_journal_init_inode); +EXPORT_SYMBOL(jbd2_journal_update_format); +EXPORT_SYMBOL(jbd2_journal_check_used_features); +EXPORT_SYMBOL(jbd2_journal_check_available_features); +EXPORT_SYMBOL(jbd2_journal_set_features); +EXPORT_SYMBOL(jbd2_journal_create); +EXPORT_SYMBOL(jbd2_journal_load); +EXPORT_SYMBOL(jbd2_journal_destroy); +EXPORT_SYMBOL(jbd2_journal_update_superblock); +EXPORT_SYMBOL(jbd2_journal_abort); +EXPORT_SYMBOL(jbd2_journal_errno); +EXPORT_SYMBOL(jbd2_journal_ack_err); +EXPORT_SYMBOL(jbd2_journal_clear_err); +EXPORT_SYMBOL(jbd2_log_wait_commit); +EXPORT_SYMBOL(jbd2_journal_start_commit); +EXPORT_SYMBOL(jbd2_journal_force_commit_nested); +EXPORT_SYMBOL(jbd2_journal_wipe); +EXPORT_SYMBOL(jbd2_journal_blocks_per_page); +EXPORT_SYMBOL(jbd2_journal_invalidatepage); +EXPORT_SYMBOL(jbd2_journal_try_to_free_buffers); +EXPORT_SYMBOL(jbd2_journal_force_commit); + +static int journal_convert_superblock_v1(journal_t *, journal_superblock_t *); +static void __journal_abort_soft (journal_t *journal, int errno); +static int jbd2_journal_create_jbd_slab(size_t slab_size); + +/* + * Helper function used to manage commit timeouts + */ + +static void commit_timeout(unsigned long __data) +{ + struct task_struct * p = (struct task_struct *) __data; + + wake_up_process(p); +} + +/* + * kjournald2: The main thread function used to manage a logging device + * journal. + * + * This kernel thread is responsible for two things: + * + * 1) COMMIT: Every so often we need to commit the current state of the + * filesystem to disk. The journal thread is responsible for writing + * all of the metadata buffers to disk. + * + * 2) CHECKPOINT: We cannot reuse a used section of the log file until all + * of the data in that part of the log has been rewritten elsewhere on + * the disk. Flushing these old buffers to reclaim space in the log is + * known as checkpointing, and this thread is responsible for that job. + */ + +static int kjournald2(void *arg) +{ + journal_t *journal = arg; + transaction_t *transaction; + + /* + * Set up an interval timer which can be used to trigger a commit wakeup + * after the commit interval expires + */ + setup_timer(&journal->j_commit_timer, commit_timeout, + (unsigned long)current); + + /* Record that the journal thread is running */ + journal->j_task = current; + wake_up(&journal->j_wait_done_commit); + + printk(KERN_INFO "kjournald2 starting. Commit interval %ld seconds\n", + journal->j_commit_interval / HZ); + + /* + * And now, wait forever for commit wakeup events. + */ + spin_lock(&journal->j_state_lock); + +loop: + if (journal->j_flags & JBD2_UNMOUNT) + goto end_loop; + + jbd_debug(1, "commit_sequence=%d, commit_request=%d\n", + journal->j_commit_sequence, journal->j_commit_request); + + if (journal->j_commit_sequence != journal->j_commit_request) { + jbd_debug(1, "OK, requests differ\n"); + spin_unlock(&journal->j_state_lock); + del_timer_sync(&journal->j_commit_timer); + jbd2_journal_commit_transaction(journal); + spin_lock(&journal->j_state_lock); + goto loop; + } + + wake_up(&journal->j_wait_done_commit); + if (freezing(current)) { + /* + * The simpler the better. Flushing journal isn't a + * good idea, because that depends on threads that may + * be already stopped. + */ + jbd_debug(1, "Now suspending kjournald2\n"); + spin_unlock(&journal->j_state_lock); + refrigerator(); + spin_lock(&journal->j_state_lock); + } else { + /* + * We assume on resume that commits are already there, + * so we don't sleep + */ + DEFINE_WAIT(wait); + int should_sleep = 1; + + prepare_to_wait(&journal->j_wait_commit, &wait, + TASK_INTERRUPTIBLE); + if (journal->j_commit_sequence != journal->j_commit_request) + should_sleep = 0; + transaction = journal->j_running_transaction; + if (transaction && time_after_eq(jiffies, + transaction->t_expires)) + should_sleep = 0; + if (journal->j_flags & JBD2_UNMOUNT) + should_sleep = 0; + if (should_sleep) { + spin_unlock(&journal->j_state_lock); + schedule(); + spin_lock(&journal->j_state_lock); + } + finish_wait(&journal->j_wait_commit, &wait); + } + + jbd_debug(1, "kjournald2 wakes\n"); + + /* + * Were we woken up by a commit wakeup event? + */ + transaction = journal->j_running_transaction; + if (transaction && time_after_eq(jiffies, transaction->t_expires)) { + journal->j_commit_request = transaction->t_tid; + jbd_debug(1, "woke because of timeout\n"); + } + goto loop; + +end_loop: + spin_unlock(&journal->j_state_lock); + del_timer_sync(&journal->j_commit_timer); + journal->j_task = NULL; + wake_up(&journal->j_wait_done_commit); + jbd_debug(1, "Journal thread exiting.\n"); + return 0; +} + +static void jbd2_journal_start_thread(journal_t *journal) +{ + kthread_run(kjournald2, journal, "kjournald2"); + wait_event(journal->j_wait_done_commit, journal->j_task != 0); +} + +static void journal_kill_thread(journal_t *journal) +{ + spin_lock(&journal->j_state_lock); + journal->j_flags |= JBD2_UNMOUNT; + + while (journal->j_task) { + wake_up(&journal->j_wait_commit); + spin_unlock(&journal->j_state_lock); + wait_event(journal->j_wait_done_commit, journal->j_task == 0); + spin_lock(&journal->j_state_lock); + } + spin_unlock(&journal->j_state_lock); +} + +/* + * jbd2_journal_write_metadata_buffer: write a metadata buffer to the journal. + * + * Writes a metadata buffer to a given disk block. The actual IO is not + * performed but a new buffer_head is constructed which labels the data + * to be written with the correct destination disk block. + * + * Any magic-number escaping which needs to be done will cause a + * copy-out here. If the buffer happens to start with the + * JBD2_MAGIC_NUMBER, then we can't write it to the log directly: the + * magic number is only written to the log for descripter blocks. In + * this case, we copy the data and replace the first word with 0, and we + * return a result code which indicates that this buffer needs to be + * marked as an escaped buffer in the corresponding log descriptor + * block. The missing word can then be restored when the block is read + * during recovery. + * + * If the source buffer has already been modified by a new transaction + * since we took the last commit snapshot, we use the frozen copy of + * that data for IO. If we end up using the existing buffer_head's data + * for the write, then we *have* to lock the buffer to prevent anyone + * else from using and possibly modifying it while the IO is in + * progress. + * + * The function returns a pointer to the buffer_heads to be used for IO. + * + * We assume that the journal has already been locked in this function. + * + * Return value: + * <0: Error + * >=0: Finished OK + * + * On success: + * Bit 0 set == escape performed on the data + * Bit 1 set == buffer copy-out performed (kfree the data after IO) + */ + +int jbd2_journal_write_metadata_buffer(transaction_t *transaction, + struct journal_head *jh_in, + struct journal_head **jh_out, + unsigned long long blocknr) +{ + int need_copy_out = 0; + int done_copy_out = 0; + int do_escape = 0; + char *mapped_data; + struct buffer_head *new_bh; + struct journal_head *new_jh; + struct page *new_page; + unsigned int new_offset; + struct buffer_head *bh_in = jh2bh(jh_in); + + /* + * The buffer really shouldn't be locked: only the current committing + * transaction is allowed to write it, so nobody else is allowed + * to do any IO. + * + * akpm: except if we're journalling data, and write() output is + * also part of a shared mapping, and another thread has + * decided to launch a writepage() against this buffer. + */ + J_ASSERT_BH(bh_in, buffer_jbddirty(bh_in)); + + new_bh = alloc_buffer_head(GFP_NOFS|__GFP_NOFAIL); + + /* + * If a new transaction has already done a buffer copy-out, then + * we use that version of the data for the commit. + */ + jbd_lock_bh_state(bh_in); +repeat: + if (jh_in->b_frozen_data) { + done_copy_out = 1; + new_page = virt_to_page(jh_in->b_frozen_data); + new_offset = offset_in_page(jh_in->b_frozen_data); + } else { + new_page = jh2bh(jh_in)->b_page; + new_offset = offset_in_page(jh2bh(jh_in)->b_data); + } + + mapped_data = kmap_atomic(new_page, KM_USER0); + /* + * Check for escaping + */ + if (*((__be32 *)(mapped_data + new_offset)) == + cpu_to_be32(JBD2_MAGIC_NUMBER)) { + need_copy_out = 1; + do_escape = 1; + } + kunmap_atomic(mapped_data, KM_USER0); + + /* + * Do we need to do a data copy? + */ + if (need_copy_out && !done_copy_out) { + char *tmp; + + jbd_unlock_bh_state(bh_in); + tmp = jbd2_slab_alloc(bh_in->b_size, GFP_NOFS); + jbd_lock_bh_state(bh_in); + if (jh_in->b_frozen_data) { + jbd2_slab_free(tmp, bh_in->b_size); + goto repeat; + } + + jh_in->b_frozen_data = tmp; + mapped_data = kmap_atomic(new_page, KM_USER0); + memcpy(tmp, mapped_data + new_offset, jh2bh(jh_in)->b_size); + kunmap_atomic(mapped_data, KM_USER0); + + new_page = virt_to_page(tmp); + new_offset = offset_in_page(tmp); + done_copy_out = 1; + } + + /* + * Did we need to do an escaping? Now we've done all the + * copying, we can finally do so. + */ + if (do_escape) { + mapped_data = kmap_atomic(new_page, KM_USER0); + *((unsigned int *)(mapped_data + new_offset)) = 0; + kunmap_atomic(mapped_data, KM_USER0); + } + + /* keep subsequent assertions sane */ + new_bh->b_state = 0; + init_buffer(new_bh, NULL, NULL); + atomic_set(&new_bh->b_count, 1); + jbd_unlock_bh_state(bh_in); + + new_jh = jbd2_journal_add_journal_head(new_bh); /* This sleeps */ + + set_bh_page(new_bh, new_page, new_offset); + new_jh->b_transaction = NULL; + new_bh->b_size = jh2bh(jh_in)->b_size; + new_bh->b_bdev = transaction->t_journal->j_dev; + new_bh->b_blocknr = blocknr; + set_buffer_mapped(new_bh); + set_buffer_dirty(new_bh); + + *jh_out = new_jh; + + /* + * The to-be-written buffer needs to get moved to the io queue, + * and the original buffer whose contents we are shadowing or + * copying is moved to the transaction's shadow queue. + */ + JBUFFER_TRACE(jh_in, "file as BJ_Shadow"); + jbd2_journal_file_buffer(jh_in, transaction, BJ_Shadow); + JBUFFER_TRACE(new_jh, "file as BJ_IO"); + jbd2_journal_file_buffer(new_jh, transaction, BJ_IO); + + return do_escape | (done_copy_out << 1); +} + +/* + * Allocation code for the journal file. Manage the space left in the + * journal, so that we can begin checkpointing when appropriate. + */ + +/* + * __jbd2_log_space_left: Return the number of free blocks left in the journal. + * + * Called with the journal already locked. + * + * Called under j_state_lock + */ + +int __jbd2_log_space_left(journal_t *journal) +{ + int left = journal->j_free; + + assert_spin_locked(&journal->j_state_lock); + + /* + * Be pessimistic here about the number of those free blocks which + * might be required for log descriptor control blocks. + */ + +#define MIN_LOG_RESERVED_BLOCKS 32 /* Allow for rounding errors */ + + left -= MIN_LOG_RESERVED_BLOCKS; + + if (left <= 0) + return 0; + left -= (left >> 3); + return left; +} + +/* + * Called under j_state_lock. Returns true if a transaction was started. + */ +int __jbd2_log_start_commit(journal_t *journal, tid_t target) +{ + /* + * Are we already doing a recent enough commit? + */ + if (!tid_geq(journal->j_commit_request, target)) { + /* + * We want a new commit: OK, mark the request and wakup the + * commit thread. We do _not_ do the commit ourselves. + */ + + journal->j_commit_request = target; + jbd_debug(1, "JBD: requesting commit %d/%d\n", + journal->j_commit_request, + journal->j_commit_sequence); + wake_up(&journal->j_wait_commit); + return 1; + } + return 0; +} + +int jbd2_log_start_commit(journal_t *journal, tid_t tid) +{ + int ret; + + spin_lock(&journal->j_state_lock); + ret = __jbd2_log_start_commit(journal, tid); + spin_unlock(&journal->j_state_lock); + return ret; +} + +/* + * Force and wait upon a commit if the calling process is not within + * transaction. This is used for forcing out undo-protected data which contains + * bitmaps, when the fs is running out of space. + * + * We can only force the running transaction if we don't have an active handle; + * otherwise, we will deadlock. + * + * Returns true if a transaction was started. + */ +int jbd2_journal_force_commit_nested(journal_t *journal) +{ + transaction_t *transaction = NULL; + tid_t tid; + + spin_lock(&journal->j_state_lock); + if (journal->j_running_transaction && !current->journal_info) { + transaction = journal->j_running_transaction; + __jbd2_log_start_commit(journal, transaction->t_tid); + } else if (journal->j_committing_transaction) + transaction = journal->j_committing_transaction; + + if (!transaction) { + spin_unlock(&journal->j_state_lock); + return 0; /* Nothing to retry */ + } + + tid = transaction->t_tid; + spin_unlock(&journal->j_state_lock); + jbd2_log_wait_commit(journal, tid); + return 1; +} + +/* + * Start a commit of the current running transaction (if any). Returns true + * if a transaction was started, and fills its tid in at *ptid + */ +int jbd2_journal_start_commit(journal_t *journal, tid_t *ptid) +{ + int ret = 0; + + spin_lock(&journal->j_state_lock); + if (journal->j_running_transaction) { + tid_t tid = journal->j_running_transaction->t_tid; + + ret = __jbd2_log_start_commit(journal, tid); + if (ret && ptid) + *ptid = tid; + } else if (journal->j_committing_transaction && ptid) { + /* + * If ext3_write_super() recently started a commit, then we + * have to wait for completion of that transaction + */ + *ptid = journal->j_committing_transaction->t_tid; + ret = 1; + } + spin_unlock(&journal->j_state_lock); + return ret; +} + +/* + * Wait for a specified commit to complete. + * The caller may not hold the journal lock. + */ +int jbd2_log_wait_commit(journal_t *journal, tid_t tid) +{ + int err = 0; + +#ifdef CONFIG_JBD_DEBUG + spin_lock(&journal->j_state_lock); + if (!tid_geq(journal->j_commit_request, tid)) { + printk(KERN_EMERG + "%s: error: j_commit_request=%d, tid=%d\n", + __FUNCTION__, journal->j_commit_request, tid); + } + spin_unlock(&journal->j_state_lock); +#endif + spin_lock(&journal->j_state_lock); + while (tid_gt(tid, journal->j_commit_sequence)) { + jbd_debug(1, "JBD: want %d, j_commit_sequence=%d\n", + tid, journal->j_commit_sequence); + wake_up(&journal->j_wait_commit); + spin_unlock(&journal->j_state_lock); + wait_event(journal->j_wait_done_commit, + !tid_gt(tid, journal->j_commit_sequence)); + spin_lock(&journal->j_state_lock); + } + spin_unlock(&journal->j_state_lock); + + if (unlikely(is_journal_aborted(journal))) { + printk(KERN_EMERG "journal commit I/O error\n"); + err = -EIO; + } + return err; +} + +/* + * Log buffer allocation routines: + */ + +int jbd2_journal_next_log_block(journal_t *journal, unsigned long long *retp) +{ + unsigned long blocknr; + + spin_lock(&journal->j_state_lock); + J_ASSERT(journal->j_free > 1); + + blocknr = journal->j_head; + journal->j_head++; + journal->j_free--; + if (journal->j_head == journal->j_last) + journal->j_head = journal->j_first; + spin_unlock(&journal->j_state_lock); + return jbd2_journal_bmap(journal, blocknr, retp); +} + +/* + * Conversion of logical to physical block numbers for the journal + * + * On external journals the journal blocks are identity-mapped, so + * this is a no-op. If needed, we can use j_blk_offset - everything is + * ready. + */ +int jbd2_journal_bmap(journal_t *journal, unsigned long blocknr, + unsigned long long *retp) +{ + int err = 0; + unsigned long long ret; + + if (journal->j_inode) { + ret = bmap(journal->j_inode, blocknr); + if (ret) + *retp = ret; + else { + char b[BDEVNAME_SIZE]; + + printk(KERN_ALERT "%s: journal block not found " + "at offset %lu on %s\n", + __FUNCTION__, + blocknr, + bdevname(journal->j_dev, b)); + err = -EIO; + __journal_abort_soft(journal, err); + } + } else { + *retp = blocknr; /* +journal->j_blk_offset */ + } + return err; +} + +/* + * We play buffer_head aliasing tricks to write data/metadata blocks to + * the journal without copying their contents, but for journal + * descriptor blocks we do need to generate bona fide buffers. + * + * After the caller of jbd2_journal_get_descriptor_buffer() has finished modifying + * the buffer's contents they really should run flush_dcache_page(bh->b_page). + * But we don't bother doing that, so there will be coherency problems with + * mmaps of blockdevs which hold live JBD-controlled filesystems. + */ +struct journal_head *jbd2_journal_get_descriptor_buffer(journal_t *journal) +{ + struct buffer_head *bh; + unsigned long long blocknr; + int err; + + err = jbd2_journal_next_log_block(journal, &blocknr); + + if (err) + return NULL; + + bh = __getblk(journal->j_dev, blocknr, journal->j_blocksize); + lock_buffer(bh); + memset(bh->b_data, 0, journal->j_blocksize); + set_buffer_uptodate(bh); + unlock_buffer(bh); + BUFFER_TRACE(bh, "return this buffer"); + return jbd2_journal_add_journal_head(bh); +} + +/* + * Management for journal control blocks: functions to create and + * destroy journal_t structures, and to initialise and read existing + * journal blocks from disk. */ + +/* First: create and setup a journal_t object in memory. We initialise + * very few fields yet: that has to wait until we have created the + * journal structures from from scratch, or loaded them from disk. */ + +static journal_t * journal_init_common (void) +{ + journal_t *journal; + int err; + + journal = jbd_kmalloc(sizeof(*journal), GFP_KERNEL); + if (!journal) + goto fail; + memset(journal, 0, sizeof(*journal)); + + init_waitqueue_head(&journal->j_wait_transaction_locked); + init_waitqueue_head(&journal->j_wait_logspace); + init_waitqueue_head(&journal->j_wait_done_commit); + init_waitqueue_head(&journal->j_wait_checkpoint); + init_waitqueue_head(&journal->j_wait_commit); + init_waitqueue_head(&journal->j_wait_updates); + mutex_init(&journal->j_barrier); + mutex_init(&journal->j_checkpoint_mutex); + spin_lock_init(&journal->j_revoke_lock); + spin_lock_init(&journal->j_list_lock); + spin_lock_init(&journal->j_state_lock); + + journal->j_commit_interval = (HZ * JBD_DEFAULT_MAX_COMMIT_AGE); + + /* The journal is marked for error until we succeed with recovery! */ + journal->j_flags = JBD2_ABORT; + + /* Set up a default-sized revoke table for the new mount. */ + err = jbd2_journal_init_revoke(journal, JOURNAL_REVOKE_DEFAULT_HASH); + if (err) { + kfree(journal); + goto fail; + } + return journal; +fail: + return NULL; +} + +/* jbd2_journal_init_dev and jbd2_journal_init_inode: + * + * Create a journal structure assigned some fixed set of disk blocks to + * the journal. We don't actually touch those disk blocks yet, but we + * need to set up all of the mapping information to tell the journaling + * system where the journal blocks are. + * + */ + +/** + * journal_t * jbd2_journal_init_dev() - creates an initialises a journal structure + * @bdev: Block device on which to create the journal + * @fs_dev: Device which hold journalled filesystem for this journal. + * @start: Block nr Start of journal. + * @len: Length of the journal in blocks. + * @blocksize: blocksize of journalling device + * @returns: a newly created journal_t * + * + * jbd2_journal_init_dev creates a journal which maps a fixed contiguous + * range of blocks on an arbitrary block device. + * + */ +journal_t * jbd2_journal_init_dev(struct block_device *bdev, + struct block_device *fs_dev, + unsigned long long start, int len, int blocksize) +{ + journal_t *journal = journal_init_common(); + struct buffer_head *bh; + int n; + + if (!journal) + return NULL; + + /* journal descriptor can store up to n blocks -bzzz */ + journal->j_blocksize = blocksize; + n = journal->j_blocksize / sizeof(journal_block_tag_t); + journal->j_wbufsize = n; + journal->j_wbuf = kmalloc(n * sizeof(struct buffer_head*), GFP_KERNEL); + if (!journal->j_wbuf) { + printk(KERN_ERR "%s: Cant allocate bhs for commit thread\n", + __FUNCTION__); + kfree(journal); + journal = NULL; + } + journal->j_dev = bdev; + journal->j_fs_dev = fs_dev; + journal->j_blk_offset = start; + journal->j_maxlen = len; + + bh = __getblk(journal->j_dev, start, journal->j_blocksize); + J_ASSERT(bh != NULL); + journal->j_sb_buffer = bh; + journal->j_superblock = (journal_superblock_t *)bh->b_data; + + return journal; +} + +/** + * journal_t * jbd2_journal_init_inode () - creates a journal which maps to a inode. + * @inode: An inode to create the journal in + * + * jbd2_journal_init_inode creates a journal which maps an on-disk inode as + * the journal. The inode must exist already, must support bmap() and + * must have all data blocks preallocated. + */ +journal_t * jbd2_journal_init_inode (struct inode *inode) +{ + struct buffer_head *bh; + journal_t *journal = journal_init_common(); + int err; + int n; + unsigned long long blocknr; + + if (!journal) + return NULL; + + journal->j_dev = journal->j_fs_dev = inode->i_sb->s_bdev; + journal->j_inode = inode; + jbd_debug(1, + "journal %p: inode %s/%ld, size %Ld, bits %d, blksize %ld\n", + journal, inode->i_sb->s_id, inode->i_ino, + (long long) inode->i_size, + inode->i_sb->s_blocksize_bits, inode->i_sb->s_blocksize); + + journal->j_maxlen = inode->i_size >> inode->i_sb->s_blocksize_bits; + journal->j_blocksize = inode->i_sb->s_blocksize; + + /* journal descriptor can store up to n blocks -bzzz */ + n = journal->j_blocksize / sizeof(journal_block_tag_t); + journal->j_wbufsize = n; + journal->j_wbuf = kmalloc(n * sizeof(struct buffer_head*), GFP_KERNEL); + if (!journal->j_wbuf) { + printk(KERN_ERR "%s: Cant allocate bhs for commit thread\n", + __FUNCTION__); + kfree(journal); + return NULL; + } + + err = jbd2_journal_bmap(journal, 0, &blocknr); + /* If that failed, give up */ + if (err) { + printk(KERN_ERR "%s: Cannnot locate journal superblock\n", + __FUNCTION__); + kfree(journal); + return NULL; + } + + bh = __getblk(journal->j_dev, blocknr, journal->j_blocksize); + J_ASSERT(bh != NULL); + journal->j_sb_buffer = bh; + journal->j_superblock = (journal_superblock_t *)bh->b_data; + + return journal; +} + +/* + * If the journal init or create aborts, we need to mark the journal + * superblock as being NULL to prevent the journal destroy from writing + * back a bogus superblock. + */ +static void journal_fail_superblock (journal_t *journal) +{ + struct buffer_head *bh = journal->j_sb_buffer; + brelse(bh); + journal->j_sb_buffer = NULL; +} + +/* + * Given a journal_t structure, initialise the various fields for + * startup of a new journaling session. We use this both when creating + * a journal, and after recovering an old journal to reset it for + * subsequent use. + */ + +static int journal_reset(journal_t *journal) +{ + journal_superblock_t *sb = journal->j_superblock; + unsigned long long first, last; + + first = be32_to_cpu(sb->s_first); + last = be32_to_cpu(sb->s_maxlen); + + journal->j_first = first; + journal->j_last = last; + + journal->j_head = first; + journal->j_tail = first; + journal->j_free = last - first; + + journal->j_tail_sequence = journal->j_transaction_sequence; + journal->j_commit_sequence = journal->j_transaction_sequence - 1; + journal->j_commit_request = journal->j_commit_sequence; + + journal->j_max_transaction_buffers = journal->j_maxlen / 4; + + /* Add the dynamic fields and write it to disk. */ + jbd2_journal_update_superblock(journal, 1); + jbd2_journal_start_thread(journal); + return 0; +} + +/** + * int jbd2_journal_create() - Initialise the new journal file + * @journal: Journal to create. This structure must have been initialised + * + * Given a journal_t structure which tells us which disk blocks we can + * use, create a new journal superblock and initialise all of the + * journal fields from scratch. + **/ +int jbd2_journal_create(journal_t *journal) +{ + unsigned long long blocknr; + struct buffer_head *bh; + journal_superblock_t *sb; + int i, err; + + if (journal->j_maxlen < JBD2_MIN_JOURNAL_BLOCKS) { + printk (KERN_ERR "Journal length (%d blocks) too short.\n", + journal->j_maxlen); + journal_fail_superblock(journal); + return -EINVAL; + } + + if (journal->j_inode == NULL) { + /* + * We don't know what block to start at! + */ + printk(KERN_EMERG + "%s: creation of journal on external device!\n", + __FUNCTION__); + BUG(); + } + + /* Zero out the entire journal on disk. We cannot afford to + have any blocks on disk beginning with JBD2_MAGIC_NUMBER. */ + jbd_debug(1, "JBD: Zeroing out journal blocks...\n"); + for (i = 0; i < journal->j_maxlen; i++) { + err = jbd2_journal_bmap(journal, i, &blocknr); + if (err) + return err; + bh = __getblk(journal->j_dev, blocknr, journal->j_blocksize); + lock_buffer(bh); + memset (bh->b_data, 0, journal->j_blocksize); + BUFFER_TRACE(bh, "marking dirty"); + mark_buffer_dirty(bh); + BUFFER_TRACE(bh, "marking uptodate"); + set_buffer_uptodate(bh); + unlock_buffer(bh); + __brelse(bh); + } + + sync_blockdev(journal->j_dev); + jbd_debug(1, "JBD: journal cleared.\n"); + + /* OK, fill in the initial static fields in the new superblock */ + sb = journal->j_superblock; + + sb->s_header.h_magic = cpu_to_be32(JBD2_MAGIC_NUMBER); + sb->s_header.h_blocktype = cpu_to_be32(JBD2_SUPERBLOCK_V2); + + sb->s_blocksize = cpu_to_be32(journal->j_blocksize); + sb->s_maxlen = cpu_to_be32(journal->j_maxlen); + sb->s_first = cpu_to_be32(1); + + journal->j_transaction_sequence = 1; + + journal->j_flags &= ~JBD2_ABORT; + journal->j_format_version = 2; + + return journal_reset(journal); +} + +/** + * void jbd2_journal_update_superblock() - Update journal sb on disk. + * @journal: The journal to update. + * @wait: Set to '0' if you don't want to wait for IO completion. + * + * Update a journal's dynamic superblock fields and write it to disk, + * optionally waiting for the IO to complete. + */ +void jbd2_journal_update_superblock(journal_t *journal, int wait) +{ + journal_superblock_t *sb = journal->j_superblock; + struct buffer_head *bh = journal->j_sb_buffer; + + /* + * As a special case, if the on-disk copy is already marked as needing + * no recovery (s_start == 0) and there are no outstanding transactions + * in the filesystem, then we can safely defer the superblock update + * until the next commit by setting JBD2_FLUSHED. This avoids + * attempting a write to a potential-readonly device. + */ + if (sb->s_start == 0 && journal->j_tail_sequence == + journal->j_transaction_sequence) { + jbd_debug(1,"JBD: Skipping superblock update on recovered sb " + "(start %ld, seq %d, errno %d)\n", + journal->j_tail, journal->j_tail_sequence, + journal->j_errno); + goto out; + } + + spin_lock(&journal->j_state_lock); + jbd_debug(1,"JBD: updating superblock (start %ld, seq %d, errno %d)\n", + journal->j_tail, journal->j_tail_sequence, journal->j_errno); + + sb->s_sequence = cpu_to_be32(journal->j_tail_sequence); + sb->s_start = cpu_to_be32(journal->j_tail); + sb->s_errno = cpu_to_be32(journal->j_errno); + spin_unlock(&journal->j_state_lock); + + BUFFER_TRACE(bh, "marking dirty"); + mark_buffer_dirty(bh); + if (wait) + sync_dirty_buffer(bh); + else + ll_rw_block(SWRITE, 1, &bh); + +out: + /* If we have just flushed the log (by marking s_start==0), then + * any future commit will have to be careful to update the + * superblock again to re-record the true start of the log. */ + + spin_lock(&journal->j_state_lock); + if (sb->s_start) + journal->j_flags &= ~JBD2_FLUSHED; + else + journal->j_flags |= JBD2_FLUSHED; + spin_unlock(&journal->j_state_lock); +} + +/* + * Read the superblock for a given journal, performing initial + * validation of the format. + */ + +static int journal_get_superblock(journal_t *journal) +{ + struct buffer_head *bh; + journal_superblock_t *sb; + int err = -EIO; + + bh = journal->j_sb_buffer; + + J_ASSERT(bh != NULL); + if (!buffer_uptodate(bh)) { + ll_rw_block(READ, 1, &bh); + wait_on_buffer(bh); + if (!buffer_uptodate(bh)) { + printk (KERN_ERR + "JBD: IO error reading journal superblock\n"); + goto out; + } + } + + sb = journal->j_superblock; + + err = -EINVAL; + + if (sb->s_header.h_magic != cpu_to_be32(JBD2_MAGIC_NUMBER) || + sb->s_blocksize != cpu_to_be32(journal->j_blocksize)) { + printk(KERN_WARNING "JBD: no valid journal superblock found\n"); + goto out; + } + + switch(be32_to_cpu(sb->s_header.h_blocktype)) { + case JBD2_SUPERBLOCK_V1: + journal->j_format_version = 1; + break; + case JBD2_SUPERBLOCK_V2: + journal->j_format_version = 2; + break; + default: + printk(KERN_WARNING "JBD: unrecognised superblock format ID\n"); + goto out; + } + + if (be32_to_cpu(sb->s_maxlen) < journal->j_maxlen) + journal->j_maxlen = be32_to_cpu(sb->s_maxlen); + else if (be32_to_cpu(sb->s_maxlen) > journal->j_maxlen) { + printk (KERN_WARNING "JBD: journal file too short\n"); + goto out; + } + + return 0; + +out: + journal_fail_superblock(journal); + return err; +} + +/* + * Load the on-disk journal superblock and read the key fields into the + * journal_t. + */ + +static int load_superblock(journal_t *journal) +{ + int err; + journal_superblock_t *sb; + + err = journal_get_superblock(journal); + if (err) + return err; + + sb = journal->j_superblock; + + journal->j_tail_sequence = be32_to_cpu(sb->s_sequence); + journal->j_tail = be32_to_cpu(sb->s_start); + journal->j_first = be32_to_cpu(sb->s_first); + journal->j_last = be32_to_cpu(sb->s_maxlen); + journal->j_errno = be32_to_cpu(sb->s_errno); + + return 0; +} + + +/** + * int jbd2_journal_load() - Read journal from disk. + * @journal: Journal to act on. + * + * Given a journal_t structure which tells us which disk blocks contain + * a journal, read the journal from disk to initialise the in-memory + * structures. + */ +int jbd2_journal_load(journal_t *journal) +{ + int err; + journal_superblock_t *sb; + + err = load_superblock(journal); + if (err) + return err; + + sb = journal->j_superblock; + /* If this is a V2 superblock, then we have to check the + * features flags on it. */ + + if (journal->j_format_version >= 2) { + if ((sb->s_feature_ro_compat & + ~cpu_to_be32(JBD2_KNOWN_ROCOMPAT_FEATURES)) || + (sb->s_feature_incompat & + ~cpu_to_be32(JBD2_KNOWN_INCOMPAT_FEATURES))) { + printk (KERN_WARNING + "JBD: Unrecognised features on journal\n"); + return -EINVAL; + } + } + + /* + * Create a slab for this blocksize + */ + err = jbd2_journal_create_jbd_slab(be32_to_cpu(sb->s_blocksize)); + if (err) + return err; + + /* Let the recovery code check whether it needs to recover any + * data from the journal. */ + if (jbd2_journal_recover(journal)) + goto recovery_error; + + /* OK, we've finished with the dynamic journal bits: + * reinitialise the dynamic contents of the superblock in memory + * and reset them on disk. */ + if (journal_reset(journal)) + goto recovery_error; + + journal->j_flags &= ~JBD2_ABORT; + journal->j_flags |= JBD2_LOADED; + return 0; + +recovery_error: + printk (KERN_WARNING "JBD: recovery failed\n"); + return -EIO; +} + +/** + * void jbd2_journal_destroy() - Release a journal_t structure. + * @journal: Journal to act on. + * + * Release a journal_t structure once it is no longer in use by the + * journaled object. + */ +void jbd2_journal_destroy(journal_t *journal) +{ + /* Wait for the commit thread to wake up and die. */ + journal_kill_thread(journal); + + /* Force a final log commit */ + if (journal->j_running_transaction) + jbd2_journal_commit_transaction(journal); + + /* Force any old transactions to disk */ + + /* Totally anal locking here... */ + spin_lock(&journal->j_list_lock); + while (journal->j_checkpoint_transactions != NULL) { + spin_unlock(&journal->j_list_lock); + jbd2_log_do_checkpoint(journal); + spin_lock(&journal->j_list_lock); + } + + J_ASSERT(journal->j_running_transaction == NULL); + J_ASSERT(journal->j_committing_transaction == NULL); + J_ASSERT(journal->j_checkpoint_transactions == NULL); + spin_unlock(&journal->j_list_lock); + + /* We can now mark the journal as empty. */ + journal->j_tail = 0; + journal->j_tail_sequence = ++journal->j_transaction_sequence; + if (journal->j_sb_buffer) { + jbd2_journal_update_superblock(journal, 1); + brelse(journal->j_sb_buffer); + } + + if (journal->j_inode) + iput(journal->j_inode); + if (journal->j_revoke) + jbd2_journal_destroy_revoke(journal); + kfree(journal->j_wbuf); + kfree(journal); +} + + +/** + *int jbd2_journal_check_used_features () - Check if features specified are used. + * @journal: Journal to check. + * @compat: bitmask of compatible features + * @ro: bitmask of features that force read-only mount + * @incompat: bitmask of incompatible features + * + * Check whether the journal uses all of a given set of + * features. Return true (non-zero) if it does. + **/ + +int jbd2_journal_check_used_features (journal_t *journal, unsigned long compat, + unsigned long ro, unsigned long incompat) +{ + journal_superblock_t *sb; + + if (!compat && !ro && !incompat) + return 1; + if (journal->j_format_version == 1) + return 0; + + sb = journal->j_superblock; + + if (((be32_to_cpu(sb->s_feature_compat) & compat) == compat) && + ((be32_to_cpu(sb->s_feature_ro_compat) & ro) == ro) && + ((be32_to_cpu(sb->s_feature_incompat) & incompat) == incompat)) + return 1; + + return 0; +} + +/** + * int jbd2_journal_check_available_features() - Check feature set in journalling layer + * @journal: Journal to check. + * @compat: bitmask of compatible features + * @ro: bitmask of features that force read-only mount + * @incompat: bitmask of incompatible features + * + * Check whether the journaling code supports the use of + * all of a given set of features on this journal. Return true + * (non-zero) if it can. */ + +int jbd2_journal_check_available_features (journal_t *journal, unsigned long compat, + unsigned long ro, unsigned long incompat) +{ + journal_superblock_t *sb; + + if (!compat && !ro && !incompat) + return 1; + + sb = journal->j_superblock; + + /* We can support any known requested features iff the + * superblock is in version 2. Otherwise we fail to support any + * extended sb features. */ + + if (journal->j_format_version != 2) + return 0; + + if ((compat & JBD2_KNOWN_COMPAT_FEATURES) == compat && + (ro & JBD2_KNOWN_ROCOMPAT_FEATURES) == ro && + (incompat & JBD2_KNOWN_INCOMPAT_FEATURES) == incompat) + return 1; + + return 0; +} + +/** + * int jbd2_journal_set_features () - Mark a given journal feature in the superblock + * @journal: Journal to act on. + * @compat: bitmask of compatible features + * @ro: bitmask of features that force read-only mount + * @incompat: bitmask of incompatible features + * + * Mark a given journal feature as present on the + * superblock. Returns true if the requested features could be set. + * + */ + +int jbd2_journal_set_features (journal_t *journal, unsigned long compat, + unsigned long ro, unsigned long incompat) +{ + journal_superblock_t *sb; + + if (jbd2_journal_check_used_features(journal, compat, ro, incompat)) + return 1; + + if (!jbd2_journal_check_available_features(journal, compat, ro, incompat)) + return 0; + + jbd_debug(1, "Setting new features 0x%lx/0x%lx/0x%lx\n", + compat, ro, incompat); + + sb = journal->j_superblock; + + sb->s_feature_compat |= cpu_to_be32(compat); + sb->s_feature_ro_compat |= cpu_to_be32(ro); + sb->s_feature_incompat |= cpu_to_be32(incompat); + + return 1; +} + + +/** + * int jbd2_journal_update_format () - Update on-disk journal structure. + * @journal: Journal to act on. + * + * Given an initialised but unloaded journal struct, poke about in the + * on-disk structure to update it to the most recent supported version. + */ +int jbd2_journal_update_format (journal_t *journal) +{ + journal_superblock_t *sb; + int err; + + err = journal_get_superblock(journal); + if (err) + return err; + + sb = journal->j_superblock; + + switch (be32_to_cpu(sb->s_header.h_blocktype)) { + case JBD2_SUPERBLOCK_V2: + return 0; + case JBD2_SUPERBLOCK_V1: + return journal_convert_superblock_v1(journal, sb); + default: + break; + } + return -EINVAL; +} + +static int journal_convert_superblock_v1(journal_t *journal, + journal_superblock_t *sb) +{ + int offset, blocksize; + struct buffer_head *bh; + + printk(KERN_WARNING + "JBD: Converting superblock from version 1 to 2.\n"); + + /* Pre-initialise new fields to zero */ + offset = ((char *) &(sb->s_feature_compat)) - ((char *) sb); + blocksize = be32_to_cpu(sb->s_blocksize); + memset(&sb->s_feature_compat, 0, blocksize-offset); + + sb->s_nr_users = cpu_to_be32(1); + sb->s_header.h_blocktype = cpu_to_be32(JBD2_SUPERBLOCK_V2); + journal->j_format_version = 2; + + bh = journal->j_sb_buffer; + BUFFER_TRACE(bh, "marking dirty"); + mark_buffer_dirty(bh); + sync_dirty_buffer(bh); + return 0; +} + + +/** + * int jbd2_journal_flush () - Flush journal + * @journal: Journal to act on. + * + * Flush all data for a given journal to disk and empty the journal. + * Filesystems can use this when remounting readonly to ensure that + * recovery does not need to happen on remount. + */ + +int jbd2_journal_flush(journal_t *journal) +{ + int err = 0; + transaction_t *transaction = NULL; + unsigned long old_tail; + + spin_lock(&journal->j_state_lock); + + /* Force everything buffered to the log... */ + if (journal->j_running_transaction) { + transaction = journal->j_running_transaction; + __jbd2_log_start_commit(journal, transaction->t_tid); + } else if (journal->j_committing_transaction) + transaction = journal->j_committing_transaction; + + /* Wait for the log commit to complete... */ + if (transaction) { + tid_t tid = transaction->t_tid; + + spin_unlock(&journal->j_state_lock); + jbd2_log_wait_commit(journal, tid); + } else { + spin_unlock(&journal->j_state_lock); + } + + /* ...and flush everything in the log out to disk. */ + spin_lock(&journal->j_list_lock); + while (!err && journal->j_checkpoint_transactions != NULL) { + spin_unlock(&journal->j_list_lock); + err = jbd2_log_do_checkpoint(journal); + spin_lock(&journal->j_list_lock); + } + spin_unlock(&journal->j_list_lock); + jbd2_cleanup_journal_tail(journal); + + /* Finally, mark the journal as really needing no recovery. + * This sets s_start==0 in the underlying superblock, which is + * the magic code for a fully-recovered superblock. Any future + * commits of data to the journal will restore the current + * s_start value. */ + spin_lock(&journal->j_state_lock); + old_tail = journal->j_tail; + journal->j_tail = 0; + spin_unlock(&journal->j_state_lock); + jbd2_journal_update_superblock(journal, 1); + spin_lock(&journal->j_state_lock); + journal->j_tail = old_tail; + + J_ASSERT(!journal->j_running_transaction); + J_ASSERT(!journal->j_committing_transaction); + J_ASSERT(!journal->j_checkpoint_transactions); + J_ASSERT(journal->j_head == journal->j_tail); + J_ASSERT(journal->j_tail_sequence == journal->j_transaction_sequence); + spin_unlock(&journal->j_state_lock); + return err; +} + +/** + * int jbd2_journal_wipe() - Wipe journal contents + * @journal: Journal to act on. + * @write: flag (see below) + * + * Wipe out all of the contents of a journal, safely. This will produce + * a warning if the journal contains any valid recovery information. + * Must be called between journal_init_*() and jbd2_journal_load(). + * + * If 'write' is non-zero, then we wipe out the journal on disk; otherwise + * we merely suppress recovery. + */ + +int jbd2_journal_wipe(journal_t *journal, int write) +{ + journal_superblock_t *sb; + int err = 0; + + J_ASSERT (!(journal->j_flags & JBD2_LOADED)); + + err = load_superblock(journal); + if (err) + return err; + + sb = journal->j_superblock; + + if (!journal->j_tail) + goto no_recovery; + + printk (KERN_WARNING "JBD: %s recovery information on journal\n", + write ? "Clearing" : "Ignoring"); + + err = jbd2_journal_skip_recovery(journal); + if (write) + jbd2_journal_update_superblock(journal, 1); + + no_recovery: + return err; +} + +/* + * journal_dev_name: format a character string to describe on what + * device this journal is present. + */ + +static const char *journal_dev_name(journal_t *journal, char *buffer) +{ + struct block_device *bdev; + + if (journal->j_inode) + bdev = journal->j_inode->i_sb->s_bdev; + else + bdev = journal->j_dev; + + return bdevname(bdev, buffer); +} + +/* + * Journal abort has very specific semantics, which we describe + * for journal abort. + * + * Two internal function, which provide abort to te jbd layer + * itself are here. + */ + +/* + * Quick version for internal journal use (doesn't lock the journal). + * Aborts hard --- we mark the abort as occurred, but do _nothing_ else, + * and don't attempt to make any other journal updates. + */ +void __jbd2_journal_abort_hard(journal_t *journal) +{ + transaction_t *transaction; + char b[BDEVNAME_SIZE]; + + if (journal->j_flags & JBD2_ABORT) + return; + + printk(KERN_ERR "Aborting journal on device %s.\n", + journal_dev_name(journal, b)); + + spin_lock(&journal->j_state_lock); + journal->j_flags |= JBD2_ABORT; + transaction = journal->j_running_transaction; + if (transaction) + __jbd2_log_start_commit(journal, transaction->t_tid); + spin_unlock(&journal->j_state_lock); +} + +/* Soft abort: record the abort error status in the journal superblock, + * but don't do any other IO. */ +static void __journal_abort_soft (journal_t *journal, int errno) +{ + if (journal->j_flags & JBD2_ABORT) + return; + + if (!journal->j_errno) + journal->j_errno = errno; + + __jbd2_journal_abort_hard(journal); + + if (errno) + jbd2_journal_update_superblock(journal, 1); +} + +/** + * void jbd2_journal_abort () - Shutdown the journal immediately. + * @journal: the journal to shutdown. + * @errno: an error number to record in the journal indicating + * the reason for the shutdown. + * + * Perform a complete, immediate shutdown of the ENTIRE + * journal (not of a single transaction). This operation cannot be + * undone without closing and reopening the journal. + * + * The jbd2_journal_abort function is intended to support higher level error + * recovery mechanisms such as the ext2/ext3 remount-readonly error + * mode. + * + * Journal abort has very specific semantics. Any existing dirty, + * unjournaled buffers in the main filesystem will still be written to + * disk by bdflush, but the journaling mechanism will be suspended + * immediately and no further transaction commits will be honoured. + * + * Any dirty, journaled buffers will be written back to disk without + * hitting the journal. Atomicity cannot be guaranteed on an aborted + * filesystem, but we _do_ attempt to leave as much data as possible + * behind for fsck to use for cleanup. + * + * Any attempt to get a new transaction handle on a journal which is in + * ABORT state will just result in an -EROFS error return. A + * jbd2_journal_stop on an existing handle will return -EIO if we have + * entered abort state during the update. + * + * Recursive transactions are not disturbed by journal abort until the + * final jbd2_journal_stop, which will receive the -EIO error. + * + * Finally, the jbd2_journal_abort call allows the caller to supply an errno + * which will be recorded (if possible) in the journal superblock. This + * allows a client to record failure conditions in the middle of a + * transaction without having to complete the transaction to record the + * failure to disk. ext3_error, for example, now uses this + * functionality. + * + * Errors which originate from within the journaling layer will NOT + * supply an errno; a null errno implies that absolutely no further + * writes are done to the journal (unless there are any already in + * progress). + * + */ + +void jbd2_journal_abort(journal_t *journal, int errno) +{ + __journal_abort_soft(journal, errno); +} + +/** + * int jbd2_journal_errno () - returns the journal's error state. + * @journal: journal to examine. + * + * This is the errno numbet set with jbd2_journal_abort(), the last + * time the journal was mounted - if the journal was stopped + * without calling abort this will be 0. + * + * If the journal has been aborted on this mount time -EROFS will + * be returned. + */ +int jbd2_journal_errno(journal_t *journal) +{ + int err; + + spin_lock(&journal->j_state_lock); + if (journal->j_flags & JBD2_ABORT) + err = -EROFS; + else + err = journal->j_errno; + spin_unlock(&journal->j_state_lock); + return err; +} + +/** + * int jbd2_journal_clear_err () - clears the journal's error state + * @journal: journal to act on. + * + * An error must be cleared or Acked to take a FS out of readonly + * mode. + */ +int jbd2_journal_clear_err(journal_t *journal) +{ + int err = 0; + + spin_lock(&journal->j_state_lock); + if (journal->j_flags & JBD2_ABORT) + err = -EROFS; + else + journal->j_errno = 0; + spin_unlock(&journal->j_state_lock); + return err; +} + +/** + * void jbd2_journal_ack_err() - Ack journal err. + * @journal: journal to act on. + * + * An error must be cleared or Acked to take a FS out of readonly + * mode. + */ +void jbd2_journal_ack_err(journal_t *journal) +{ + spin_lock(&journal->j_state_lock); + if (journal->j_errno) + journal->j_flags |= JBD2_ACK_ERR; + spin_unlock(&journal->j_state_lock); +} + +int jbd2_journal_blocks_per_page(struct inode *inode) +{ + return 1 << (PAGE_CACHE_SHIFT - inode->i_sb->s_blocksize_bits); +} + +/* + * helper functions to deal with 32 or 64bit block numbers. + */ +size_t journal_tag_bytes(journal_t *journal) +{ + if (JBD2_HAS_INCOMPAT_FEATURE(journal, JBD2_FEATURE_INCOMPAT_64BIT)) + return JBD_TAG_SIZE64; + else + return JBD_TAG_SIZE32; +} + +/* + * Simple support for retrying memory allocations. Introduced to help to + * debug different VM deadlock avoidance strategies. + */ +void * __jbd2_kmalloc (const char *where, size_t size, gfp_t flags, int retry) +{ + return kmalloc(size, flags | (retry ? __GFP_NOFAIL : 0)); +} + +/* + * jbd slab management: create 1k, 2k, 4k, 8k slabs as needed + * and allocate frozen and commit buffers from these slabs. + * + * Reason for doing this is to avoid, SLAB_DEBUG - since it could + * cause bh to cross page boundary. + */ + +#define JBD_MAX_SLABS 5 +#define JBD_SLAB_INDEX(size) (size >> 11) + +static kmem_cache_t *jbd_slab[JBD_MAX_SLABS]; +static const char *jbd_slab_names[JBD_MAX_SLABS] = { + "jbd2_1k", "jbd2_2k", "jbd2_4k", NULL, "jbd2_8k" +}; + +static void jbd2_journal_destroy_jbd_slabs(void) +{ + int i; + + for (i = 0; i < JBD_MAX_SLABS; i++) { + if (jbd_slab[i]) + kmem_cache_destroy(jbd_slab[i]); + jbd_slab[i] = NULL; + } +} + +static int jbd2_journal_create_jbd_slab(size_t slab_size) +{ + int i = JBD_SLAB_INDEX(slab_size); + + BUG_ON(i >= JBD_MAX_SLABS); + + /* + * Check if we already have a slab created for this size + */ + if (jbd_slab[i]) + return 0; + + /* + * Create a slab and force alignment to be same as slabsize - + * this will make sure that allocations won't cross the page + * boundary. + */ + jbd_slab[i] = kmem_cache_create(jbd_slab_names[i], + slab_size, slab_size, 0, NULL, NULL); + if (!jbd_slab[i]) { + printk(KERN_EMERG "JBD: no memory for jbd_slab cache\n"); + return -ENOMEM; + } + return 0; +} + +void * jbd2_slab_alloc(size_t size, gfp_t flags) +{ + int idx; + + idx = JBD_SLAB_INDEX(size); + BUG_ON(jbd_slab[idx] == NULL); + return kmem_cache_alloc(jbd_slab[idx], flags | __GFP_NOFAIL); +} + +void jbd2_slab_free(void *ptr, size_t size) +{ + int idx; + + idx = JBD_SLAB_INDEX(size); + BUG_ON(jbd_slab[idx] == NULL); + kmem_cache_free(jbd_slab[idx], ptr); +} + +/* + * Journal_head storage management + */ +static kmem_cache_t *jbd2_journal_head_cache; +#ifdef CONFIG_JBD_DEBUG +static atomic_t nr_journal_heads = ATOMIC_INIT(0); +#endif + +static int journal_init_jbd2_journal_head_cache(void) +{ + int retval; + + J_ASSERT(jbd2_journal_head_cache == 0); + jbd2_journal_head_cache = kmem_cache_create("jbd2_journal_head", + sizeof(struct journal_head), + 0, /* offset */ + 0, /* flags */ + NULL, /* ctor */ + NULL); /* dtor */ + retval = 0; + if (jbd2_journal_head_cache == 0) { + retval = -ENOMEM; + printk(KERN_EMERG "JBD: no memory for journal_head cache\n"); + } + return retval; +} + +static void jbd2_journal_destroy_jbd2_journal_head_cache(void) +{ + J_ASSERT(jbd2_journal_head_cache != NULL); + kmem_cache_destroy(jbd2_journal_head_cache); + jbd2_journal_head_cache = NULL; +} + +/* + * journal_head splicing and dicing + */ +static struct journal_head *journal_alloc_journal_head(void) +{ + struct journal_head *ret; + static unsigned long last_warning; + +#ifdef CONFIG_JBD_DEBUG + atomic_inc(&nr_journal_heads); +#endif + ret = kmem_cache_alloc(jbd2_journal_head_cache, GFP_NOFS); + if (ret == 0) { + jbd_debug(1, "out of memory for journal_head\n"); + if (time_after(jiffies, last_warning + 5*HZ)) { + printk(KERN_NOTICE "ENOMEM in %s, retrying.\n", + __FUNCTION__); + last_warning = jiffies; + } + while (ret == 0) { + yield(); + ret = kmem_cache_alloc(jbd2_journal_head_cache, GFP_NOFS); + } + } + return ret; +} + +static void journal_free_journal_head(struct journal_head *jh) +{ +#ifdef CONFIG_JBD_DEBUG + atomic_dec(&nr_journal_heads); + memset(jh, JBD_POISON_FREE, sizeof(*jh)); +#endif + kmem_cache_free(jbd2_journal_head_cache, jh); +} + +/* + * A journal_head is attached to a buffer_head whenever JBD has an + * interest in the buffer. + * + * Whenever a buffer has an attached journal_head, its ->b_state:BH_JBD bit + * is set. This bit is tested in core kernel code where we need to take + * JBD-specific actions. Testing the zeroness of ->b_private is not reliable + * there. + * + * When a buffer has its BH_JBD bit set, its ->b_count is elevated by one. + * + * When a buffer has its BH_JBD bit set it is immune from being released by + * core kernel code, mainly via ->b_count. + * + * A journal_head may be detached from its buffer_head when the journal_head's + * b_transaction, b_cp_transaction and b_next_transaction pointers are NULL. + * Various places in JBD call jbd2_journal_remove_journal_head() to indicate that the + * journal_head can be dropped if needed. + * + * Various places in the kernel want to attach a journal_head to a buffer_head + * _before_ attaching the journal_head to a transaction. To protect the + * journal_head in this situation, jbd2_journal_add_journal_head elevates the + * journal_head's b_jcount refcount by one. The caller must call + * jbd2_journal_put_journal_head() to undo this. + * + * So the typical usage would be: + * + * (Attach a journal_head if needed. Increments b_jcount) + * struct journal_head *jh = jbd2_journal_add_journal_head(bh); + * ... + * jh->b_transaction = xxx; + * jbd2_journal_put_journal_head(jh); + * + * Now, the journal_head's b_jcount is zero, but it is safe from being released + * because it has a non-zero b_transaction. + */ + +/* + * Give a buffer_head a journal_head. + * + * Doesn't need the journal lock. + * May sleep. + */ +struct journal_head *jbd2_journal_add_journal_head(struct buffer_head *bh) +{ + struct journal_head *jh; + struct journal_head *new_jh = NULL; + +repeat: + if (!buffer_jbd(bh)) { + new_jh = journal_alloc_journal_head(); + memset(new_jh, 0, sizeof(*new_jh)); + } + + jbd_lock_bh_journal_head(bh); + if (buffer_jbd(bh)) { + jh = bh2jh(bh); + } else { + J_ASSERT_BH(bh, + (atomic_read(&bh->b_count) > 0) || + (bh->b_page && bh->b_page->mapping)); + + if (!new_jh) { + jbd_unlock_bh_journal_head(bh); + goto repeat; + } + + jh = new_jh; + new_jh = NULL; /* We consumed it */ + set_buffer_jbd(bh); + bh->b_private = jh; + jh->b_bh = bh; + get_bh(bh); + BUFFER_TRACE(bh, "added journal_head"); + } + jh->b_jcount++; + jbd_unlock_bh_journal_head(bh); + if (new_jh) + journal_free_journal_head(new_jh); + return bh->b_private; +} + +/* + * Grab a ref against this buffer_head's journal_head. If it ended up not + * having a journal_head, return NULL + */ +struct journal_head *jbd2_journal_grab_journal_head(struct buffer_head *bh) +{ + struct journal_head *jh = NULL; + + jbd_lock_bh_journal_head(bh); + if (buffer_jbd(bh)) { + jh = bh2jh(bh); + jh->b_jcount++; + } + jbd_unlock_bh_journal_head(bh); + return jh; +} + +static void __journal_remove_journal_head(struct buffer_head *bh) +{ + struct journal_head *jh = bh2jh(bh); + + J_ASSERT_JH(jh, jh->b_jcount >= 0); + + get_bh(bh); + if (jh->b_jcount == 0) { + if (jh->b_transaction == NULL && + jh->b_next_transaction == NULL && + jh->b_cp_transaction == NULL) { + J_ASSERT_JH(jh, jh->b_jlist == BJ_None); + J_ASSERT_BH(bh, buffer_jbd(bh)); + J_ASSERT_BH(bh, jh2bh(jh) == bh); + BUFFER_TRACE(bh, "remove journal_head"); + if (jh->b_frozen_data) { + printk(KERN_WARNING "%s: freeing " + "b_frozen_data\n", + __FUNCTION__); + jbd2_slab_free(jh->b_frozen_data, bh->b_size); + } + if (jh->b_committed_data) { + printk(KERN_WARNING "%s: freeing " + "b_committed_data\n", + __FUNCTION__); + jbd2_slab_free(jh->b_committed_data, bh->b_size); + } + bh->b_private = NULL; + jh->b_bh = NULL; /* debug, really */ + clear_buffer_jbd(bh); + __brelse(bh); + journal_free_journal_head(jh); + } else { + BUFFER_TRACE(bh, "journal_head was locked"); + } + } +} + +/* + * jbd2_journal_remove_journal_head(): if the buffer isn't attached to a transaction + * and has a zero b_jcount then remove and release its journal_head. If we did + * see that the buffer is not used by any transaction we also "logically" + * decrement ->b_count. + * + * We in fact take an additional increment on ->b_count as a convenience, + * because the caller usually wants to do additional things with the bh + * after calling here. + * The caller of jbd2_journal_remove_journal_head() *must* run __brelse(bh) at some + * time. Once the caller has run __brelse(), the buffer is eligible for + * reaping by try_to_free_buffers(). + */ +void jbd2_journal_remove_journal_head(struct buffer_head *bh) +{ + jbd_lock_bh_journal_head(bh); + __journal_remove_journal_head(bh); + jbd_unlock_bh_journal_head(bh); +} + +/* + * Drop a reference on the passed journal_head. If it fell to zero then try to + * release the journal_head from the buffer_head. + */ +void jbd2_journal_put_journal_head(struct journal_head *jh) +{ + struct buffer_head *bh = jh2bh(jh); + + jbd_lock_bh_journal_head(bh); + J_ASSERT_JH(jh, jh->b_jcount > 0); + --jh->b_jcount; + if (!jh->b_jcount && !jh->b_transaction) { + __journal_remove_journal_head(bh); + __brelse(bh); + } + jbd_unlock_bh_journal_head(bh); +} + +/* + * /proc tunables + */ +#if defined(CONFIG_JBD_DEBUG) +int jbd2_journal_enable_debug; +EXPORT_SYMBOL(jbd2_journal_enable_debug); +#endif + +#if defined(CONFIG_JBD_DEBUG) && defined(CONFIG_PROC_FS) + +static struct proc_dir_entry *proc_jbd_debug; + +static int read_jbd_debug(char *page, char **start, off_t off, + int count, int *eof, void *data) +{ + int ret; + + ret = sprintf(page + off, "%d\n", jbd2_journal_enable_debug); + *eof = 1; + return ret; +} + +static int write_jbd_debug(struct file *file, const char __user *buffer, + unsigned long count, void *data) +{ + char buf[32]; + + if (count > ARRAY_SIZE(buf) - 1) + count = ARRAY_SIZE(buf) - 1; + if (copy_from_user(buf, buffer, count)) + return -EFAULT; + buf[ARRAY_SIZE(buf) - 1] = '\0'; + jbd2_journal_enable_debug = simple_strtoul(buf, NULL, 10); + return count; +} + +#define JBD_PROC_NAME "sys/fs/jbd2-debug" + +static void __init create_jbd_proc_entry(void) +{ + proc_jbd_debug = create_proc_entry(JBD_PROC_NAME, 0644, NULL); + if (proc_jbd_debug) { + /* Why is this so hard? */ + proc_jbd_debug->read_proc = read_jbd_debug; + proc_jbd_debug->write_proc = write_jbd_debug; + } +} + +static void __exit jbd2_remove_jbd_proc_entry(void) +{ + if (proc_jbd_debug) + remove_proc_entry(JBD_PROC_NAME, NULL); +} + +#else + +#define create_jbd_proc_entry() do {} while (0) +#define jbd2_remove_jbd_proc_entry() do {} while (0) + +#endif + +kmem_cache_t *jbd2_handle_cache; + +static int __init journal_init_handle_cache(void) +{ + jbd2_handle_cache = kmem_cache_create("jbd2_journal_handle", + sizeof(handle_t), + 0, /* offset */ + 0, /* flags */ + NULL, /* ctor */ + NULL); /* dtor */ + if (jbd2_handle_cache == NULL) { + printk(KERN_EMERG "JBD: failed to create handle cache\n"); + return -ENOMEM; + } + return 0; +} + +static void jbd2_journal_destroy_handle_cache(void) +{ + if (jbd2_handle_cache) + kmem_cache_destroy(jbd2_handle_cache); +} + +/* + * Module startup and shutdown + */ + +static int __init journal_init_caches(void) +{ + int ret; + + ret = jbd2_journal_init_revoke_caches(); + if (ret == 0) + ret = journal_init_jbd2_journal_head_cache(); + if (ret == 0) + ret = journal_init_handle_cache(); + return ret; +} + +static void jbd2_journal_destroy_caches(void) +{ + jbd2_journal_destroy_revoke_caches(); + jbd2_journal_destroy_jbd2_journal_head_cache(); + jbd2_journal_destroy_handle_cache(); + jbd2_journal_destroy_jbd_slabs(); +} + +static int __init journal_init(void) +{ + int ret; + + BUILD_BUG_ON(sizeof(struct journal_superblock_s) != 1024); + + ret = journal_init_caches(); + if (ret != 0) + jbd2_journal_destroy_caches(); + create_jbd_proc_entry(); + return ret; +} + +static void __exit journal_exit(void) +{ +#ifdef CONFIG_JBD_DEBUG + int n = atomic_read(&nr_journal_heads); + if (n) + printk(KERN_EMERG "JBD: leaked %d journal_heads!\n", n); +#endif + jbd2_remove_jbd_proc_entry(); + jbd2_journal_destroy_caches(); +} + +MODULE_LICENSE("GPL"); +module_init(journal_init); +module_exit(journal_exit); + diff --git a/fs/jbd2/recovery.c b/fs/jbd2/recovery.c new file mode 100644 index 000000000000..9f10acafaf70 --- /dev/null +++ b/fs/jbd2/recovery.c @@ -0,0 +1,609 @@ +/* + * linux/fs/recovery.c + * + * Written by Stephen C. Tweedie <sct@redhat.com>, 1999 + * + * Copyright 1999-2000 Red Hat Software --- All Rights Reserved + * + * This file is part of the Linux kernel and is made available under + * the terms of the GNU General Public License, version 2, or at your + * option, any later version, incorporated herein by reference. + * + * Journal recovery routines for the generic filesystem journaling code; + * part of the ext2fs journaling system. + */ + +#ifndef __KERNEL__ +#include "jfs_user.h" +#else +#include <linux/time.h> +#include <linux/fs.h> +#include <linux/jbd2.h> +#include <linux/errno.h> +#include <linux/slab.h> +#endif + +/* + * Maintain information about the progress of the recovery job, so that + * the different passes can carry information between them. + */ +struct recovery_info +{ + tid_t start_transaction; + tid_t end_transaction; + + int nr_replays; + int nr_revokes; + int nr_revoke_hits; +}; + +enum passtype {PASS_SCAN, PASS_REVOKE, PASS_REPLAY}; +static int do_one_pass(journal_t *journal, + struct recovery_info *info, enum passtype pass); +static int scan_revoke_records(journal_t *, struct buffer_head *, + tid_t, struct recovery_info *); + +#ifdef __KERNEL__ + +/* Release readahead buffers after use */ +static void journal_brelse_array(struct buffer_head *b[], int n) +{ + while (--n >= 0) + brelse (b[n]); +} + + +/* + * When reading from the journal, we are going through the block device + * layer directly and so there is no readahead being done for us. We + * need to implement any readahead ourselves if we want it to happen at + * all. Recovery is basically one long sequential read, so make sure we + * do the IO in reasonably large chunks. + * + * This is not so critical that we need to be enormously clever about + * the readahead size, though. 128K is a purely arbitrary, good-enough + * fixed value. + */ + +#define MAXBUF 8 +static int do_readahead(journal_t *journal, unsigned int start) +{ + int err; + unsigned int max, nbufs, next; + unsigned long long blocknr; + struct buffer_head *bh; + + struct buffer_head * bufs[MAXBUF]; + + /* Do up to 128K of readahead */ + max = start + (128 * 1024 / journal->j_blocksize); + if (max > journal->j_maxlen) + max = journal->j_maxlen; + + /* Do the readahead itself. We'll submit MAXBUF buffer_heads at + * a time to the block device IO layer. */ + + nbufs = 0; + + for (next = start; next < max; next++) { + err = jbd2_journal_bmap(journal, next, &blocknr); + + if (err) { + printk (KERN_ERR "JBD: bad block at offset %u\n", + next); + goto failed; + } + + bh = __getblk(journal->j_dev, blocknr, journal->j_blocksize); + if (!bh) { + err = -ENOMEM; + goto failed; + } + + if (!buffer_uptodate(bh) && !buffer_locked(bh)) { + bufs[nbufs++] = bh; + if (nbufs == MAXBUF) { + ll_rw_block(READ, nbufs, bufs); + journal_brelse_array(bufs, nbufs); + nbufs = 0; + } + } else + brelse(bh); + } + + if (nbufs) + ll_rw_block(READ, nbufs, bufs); + err = 0; + +failed: + if (nbufs) + journal_brelse_array(bufs, nbufs); + return err; +} + +#endif /* __KERNEL__ */ + + +/* + * Read a block from the journal + */ + +static int jread(struct buffer_head **bhp, journal_t *journal, + unsigned int offset) +{ + int err; + unsigned long long blocknr; + struct buffer_head *bh; + + *bhp = NULL; + + if (offset >= journal->j_maxlen) { + printk(KERN_ERR "JBD: corrupted journal superblock\n"); + return -EIO; + } + + err = jbd2_journal_bmap(journal, offset, &blocknr); + + if (err) { + printk (KERN_ERR "JBD: bad block at offset %u\n", + offset); + return err; + } + + bh = __getblk(journal->j_dev, blocknr, journal->j_blocksize); + if (!bh) + return -ENOMEM; + + if (!buffer_uptodate(bh)) { + /* If this is a brand new buffer, start readahead. + Otherwise, we assume we are already reading it. */ + if (!buffer_req(bh)) + do_readahead(journal, offset); + wait_on_buffer(bh); + } + + if (!buffer_uptodate(bh)) { + printk (KERN_ERR "JBD: Failed to read block at offset %u\n", + offset); + brelse(bh); + return -EIO; + } + + *bhp = bh; + return 0; +} + + +/* + * Count the number of in-use tags in a journal descriptor block. + */ + +static int count_tags(journal_t *journal, struct buffer_head *bh) +{ + char * tagp; + journal_block_tag_t * tag; + int nr = 0, size = journal->j_blocksize; + int tag_bytes = journal_tag_bytes(journal); + + tagp = &bh->b_data[sizeof(journal_header_t)]; + + while ((tagp - bh->b_data + tag_bytes) <= size) { + tag = (journal_block_tag_t *) tagp; + + nr++; + tagp += tag_bytes; + if (!(tag->t_flags & cpu_to_be32(JBD2_FLAG_SAME_UUID))) + tagp += 16; + + if (tag->t_flags & cpu_to_be32(JBD2_FLAG_LAST_TAG)) + break; + } + + return nr; +} + + +/* Make sure we wrap around the log correctly! */ +#define wrap(journal, var) \ +do { \ + if (var >= (journal)->j_last) \ + var -= ((journal)->j_last - (journal)->j_first); \ +} while (0) + +/** + * jbd2_journal_recover - recovers a on-disk journal + * @journal: the journal to recover + * + * The primary function for recovering the log contents when mounting a + * journaled device. + * + * Recovery is done in three passes. In the first pass, we look for the + * end of the log. In the second, we assemble the list of revoke + * blocks. In the third and final pass, we replay any un-revoked blocks + * in the log. + */ +int jbd2_journal_recover(journal_t *journal) +{ + int err; + journal_superblock_t * sb; + + struct recovery_info info; + + memset(&info, 0, sizeof(info)); + sb = journal->j_superblock; + + /* + * The journal superblock's s_start field (the current log head) + * is always zero if, and only if, the journal was cleanly + * unmounted. + */ + + if (!sb->s_start) { + jbd_debug(1, "No recovery required, last transaction %d\n", + be32_to_cpu(sb->s_sequence)); + journal->j_transaction_sequence = be32_to_cpu(sb->s_sequence) + 1; + return 0; + } + + err = do_one_pass(journal, &info, PASS_SCAN); + if (!err) + err = do_one_pass(journal, &info, PASS_REVOKE); + if (!err) + err = do_one_pass(journal, &info, PASS_REPLAY); + + jbd_debug(0, "JBD: recovery, exit status %d, " + "recovered transactions %u to %u\n", + err, info.start_transaction, info.end_transaction); + jbd_debug(0, "JBD: Replayed %d and revoked %d/%d blocks\n", + info.nr_replays, info.nr_revoke_hits, info.nr_revokes); + + /* Restart the log at the next transaction ID, thus invalidating + * any existing commit records in the log. */ + journal->j_transaction_sequence = ++info.end_transaction; + + jbd2_journal_clear_revoke(journal); + sync_blockdev(journal->j_fs_dev); + return err; +} + +/** + * jbd2_journal_skip_recovery - Start journal and wipe exiting records + * @journal: journal to startup + * + * Locate any valid recovery information from the journal and set up the + * journal structures in memory to ignore it (presumably because the + * caller has evidence that it is out of date). + * This function does'nt appear to be exorted.. + * + * We perform one pass over the journal to allow us to tell the user how + * much recovery information is being erased, and to let us initialise + * the journal transaction sequence numbers to the next unused ID. + */ +int jbd2_journal_skip_recovery(journal_t *journal) +{ + int err; + journal_superblock_t * sb; + + struct recovery_info info; + + memset (&info, 0, sizeof(info)); + sb = journal->j_superblock; + + err = do_one_pass(journal, &info, PASS_SCAN); + + if (err) { + printk(KERN_ERR "JBD: error %d scanning journal\n", err); + ++journal->j_transaction_sequence; + } else { +#ifdef CONFIG_JBD_DEBUG + int dropped = info.end_transaction - be32_to_cpu(sb->s_sequence); +#endif + jbd_debug(0, + "JBD: ignoring %d transaction%s from the journal.\n", + dropped, (dropped == 1) ? "" : "s"); + journal->j_transaction_sequence = ++info.end_transaction; + } + + journal->j_tail = 0; + return err; +} + +static inline unsigned long long read_tag_block(int tag_bytes, journal_block_tag_t *tag) +{ + unsigned long long block = be32_to_cpu(tag->t_blocknr); + if (tag_bytes > JBD_TAG_SIZE32) + block |= (u64)be32_to_cpu(tag->t_blocknr_high) << 32; + return block; +} + +static int do_one_pass(journal_t *journal, + struct recovery_info *info, enum passtype pass) +{ + unsigned int first_commit_ID, next_commit_ID; + unsigned long next_log_block; + int err, success = 0; + journal_superblock_t * sb; + journal_header_t * tmp; + struct buffer_head * bh; + unsigned int sequence; + int blocktype; + int tag_bytes = journal_tag_bytes(journal); + + /* Precompute the maximum metadata descriptors in a descriptor block */ + int MAX_BLOCKS_PER_DESC; + MAX_BLOCKS_PER_DESC = ((journal->j_blocksize-sizeof(journal_header_t)) + / tag_bytes); + + /* + * First thing is to establish what we expect to find in the log + * (in terms of transaction IDs), and where (in terms of log + * block offsets): query the superblock. + */ + + sb = journal->j_superblock; + next_commit_ID = be32_to_cpu(sb->s_sequence); + next_log_block = be32_to_cpu(sb->s_start); + + first_commit_ID = next_commit_ID; + if (pass == PASS_SCAN) + info->start_transaction = first_commit_ID; + + jbd_debug(1, "Starting recovery pass %d\n", pass); + + /* + * Now we walk through the log, transaction by transaction, + * making sure that each transaction has a commit block in the + * expected place. Each complete transaction gets replayed back + * into the main filesystem. + */ + + while (1) { + int flags; + char * tagp; + journal_block_tag_t * tag; + struct buffer_head * obh; + struct buffer_head * nbh; + + cond_resched(); /* We're under lock_kernel() */ + + /* If we already know where to stop the log traversal, + * check right now that we haven't gone past the end of + * the log. */ + + if (pass != PASS_SCAN) + if (tid_geq(next_commit_ID, info->end_transaction)) + break; + + jbd_debug(2, "Scanning for sequence ID %u at %lu/%lu\n", + next_commit_ID, next_log_block, journal->j_last); + + /* Skip over each chunk of the transaction looking + * either the next descriptor block or the final commit + * record. */ + + jbd_debug(3, "JBD: checking block %ld\n", next_log_block); + err = jread(&bh, journal, next_log_block); + if (err) + goto failed; + + next_log_block++; + wrap(journal, next_log_block); + + /* What kind of buffer is it? + * + * If it is a descriptor block, check that it has the + * expected sequence number. Otherwise, we're all done + * here. */ + + tmp = (journal_header_t *)bh->b_data; + + if (tmp->h_magic != cpu_to_be32(JBD2_MAGIC_NUMBER)) { + brelse(bh); + break; + } + + blocktype = be32_to_cpu(tmp->h_blocktype); + sequence = be32_to_cpu(tmp->h_sequence); + jbd_debug(3, "Found magic %d, sequence %d\n", + blocktype, sequence); + + if (sequence != next_commit_ID) { + brelse(bh); + break; + } + + /* OK, we have a valid descriptor block which matches + * all of the sequence number checks. What are we going + * to do with it? That depends on the pass... */ + + switch(blocktype) { + case JBD2_DESCRIPTOR_BLOCK: + /* If it is a valid descriptor block, replay it + * in pass REPLAY; otherwise, just skip over the + * blocks it describes. */ + if (pass != PASS_REPLAY) { + next_log_block += count_tags(journal, bh); + wrap(journal, next_log_block); + brelse(bh); + continue; + } + + /* A descriptor block: we can now write all of + * the data blocks. Yay, useful work is finally + * getting done here! */ + + tagp = &bh->b_data[sizeof(journal_header_t)]; + while ((tagp - bh->b_data + tag_bytes) + <= journal->j_blocksize) { + unsigned long io_block; + + tag = (journal_block_tag_t *) tagp; + flags = be32_to_cpu(tag->t_flags); + + io_block = next_log_block++; + wrap(journal, next_log_block); + err = jread(&obh, journal, io_block); + if (err) { + /* Recover what we can, but + * report failure at the end. */ + success = err; + printk (KERN_ERR + "JBD: IO error %d recovering " + "block %ld in log\n", + err, io_block); + } else { + unsigned long long blocknr; + + J_ASSERT(obh != NULL); + blocknr = read_tag_block(tag_bytes, + tag); + + /* If the block has been + * revoked, then we're all done + * here. */ + if (jbd2_journal_test_revoke + (journal, blocknr, + next_commit_ID)) { + brelse(obh); + ++info->nr_revoke_hits; + goto skip_write; + } + + /* Find a buffer for the new + * data being restored */ + nbh = __getblk(journal->j_fs_dev, + blocknr, + journal->j_blocksize); + if (nbh == NULL) { + printk(KERN_ERR + "JBD: Out of memory " + "during recovery.\n"); + err = -ENOMEM; + brelse(bh); + brelse(obh); + goto failed; + } + + lock_buffer(nbh); + memcpy(nbh->b_data, obh->b_data, + journal->j_blocksize); + if (flags & JBD2_FLAG_ESCAPE) { + *((__be32 *)bh->b_data) = + cpu_to_be32(JBD2_MAGIC_NUMBER); + } + + BUFFER_TRACE(nbh, "marking dirty"); + set_buffer_uptodate(nbh); + mark_buffer_dirty(nbh); + BUFFER_TRACE(nbh, "marking uptodate"); + ++info->nr_replays; + /* ll_rw_block(WRITE, 1, &nbh); */ + unlock_buffer(nbh); + brelse(obh); + brelse(nbh); + } + + skip_write: + tagp += tag_bytes; + if (!(flags & JBD2_FLAG_SAME_UUID)) + tagp += 16; + + if (flags & JBD2_FLAG_LAST_TAG) + break; + } + + brelse(bh); + continue; + + case JBD2_COMMIT_BLOCK: + /* Found an expected commit block: not much to + * do other than move on to the next sequence + * number. */ + brelse(bh); + next_commit_ID++; + continue; + + case JBD2_REVOKE_BLOCK: + /* If we aren't in the REVOKE pass, then we can + * just skip over this block. */ + if (pass != PASS_REVOKE) { + brelse(bh); + continue; + } + + err = scan_revoke_records(journal, bh, + next_commit_ID, info); + brelse(bh); + if (err) + goto failed; + continue; + + default: + jbd_debug(3, "Unrecognised magic %d, end of scan.\n", + blocktype); + brelse(bh); + goto done; + } + } + + done: + /* + * We broke out of the log scan loop: either we came to the + * known end of the log or we found an unexpected block in the + * log. If the latter happened, then we know that the "current" + * transaction marks the end of the valid log. + */ + + if (pass == PASS_SCAN) + info->end_transaction = next_commit_ID; + else { + /* It's really bad news if different passes end up at + * different places (but possible due to IO errors). */ + if (info->end_transaction != next_commit_ID) { + printk (KERN_ERR "JBD: recovery pass %d ended at " + "transaction %u, expected %u\n", + pass, next_commit_ID, info->end_transaction); + if (!success) + success = -EIO; + } + } + + return success; + + failed: + return err; +} + + +/* Scan a revoke record, marking all blocks mentioned as revoked. */ + +static int scan_revoke_records(journal_t *journal, struct buffer_head *bh, + tid_t sequence, struct recovery_info *info) +{ + jbd2_journal_revoke_header_t *header; + int offset, max; + int record_len = 4; + + header = (jbd2_journal_revoke_header_t *) bh->b_data; + offset = sizeof(jbd2_journal_revoke_header_t); + max = be32_to_cpu(header->r_count); + + if (JBD2_HAS_INCOMPAT_FEATURE(journal, JBD2_FEATURE_INCOMPAT_64BIT)) + record_len = 8; + + while (offset + record_len <= max) { + unsigned long long blocknr; + int err; + + if (record_len == 4) + blocknr = be32_to_cpu(* ((__be32 *) (bh->b_data+offset))); + else + blocknr = be64_to_cpu(* ((__be64 *) (bh->b_data+offset))); + offset += record_len; + err = jbd2_journal_set_revoke(journal, blocknr, sequence); + if (err) + return err; + ++info->nr_revokes; + } + return 0; +} diff --git a/fs/jbd2/revoke.c b/fs/jbd2/revoke.c new file mode 100644 index 000000000000..380d19917f37 --- /dev/null +++ b/fs/jbd2/revoke.c @@ -0,0 +1,712 @@ +/* + * linux/fs/revoke.c + * + * Written by Stephen C. Tweedie <sct@redhat.com>, 2000 + * + * Copyright 2000 Red Hat corp --- All Rights Reserved + * + * This file is part of the Linux kernel and is made available under + * the terms of the GNU General Public License, version 2, or at your + * option, any later version, incorporated herein by reference. + * + * Journal revoke routines for the generic filesystem journaling code; + * part of the ext2fs journaling system. + * + * Revoke is the mechanism used to prevent old log records for deleted + * metadata from being replayed on top of newer data using the same + * blocks. The revoke mechanism is used in two separate places: + * + * + Commit: during commit we write the entire list of the current + * transaction's revoked blocks to the journal + * + * + Recovery: during recovery we record the transaction ID of all + * revoked blocks. If there are multiple revoke records in the log + * for a single block, only the last one counts, and if there is a log + * entry for a block beyond the last revoke, then that log entry still + * gets replayed. + * + * We can get interactions between revokes and new log data within a + * single transaction: + * + * Block is revoked and then journaled: + * The desired end result is the journaling of the new block, so we + * cancel the revoke before the transaction commits. + * + * Block is journaled and then revoked: + * The revoke must take precedence over the write of the block, so we + * need either to cancel the journal entry or to write the revoke + * later in the log than the log block. In this case, we choose the + * latter: journaling a block cancels any revoke record for that block + * in the current transaction, so any revoke for that block in the + * transaction must have happened after the block was journaled and so + * the revoke must take precedence. + * + * Block is revoked and then written as data: + * The data write is allowed to succeed, but the revoke is _not_ + * cancelled. We still need to prevent old log records from + * overwriting the new data. We don't even need to clear the revoke + * bit here. + * + * Revoke information on buffers is a tri-state value: + * + * RevokeValid clear: no cached revoke status, need to look it up + * RevokeValid set, Revoked clear: + * buffer has not been revoked, and cancel_revoke + * need do nothing. + * RevokeValid set, Revoked set: + * buffer has been revoked. + */ + +#ifndef __KERNEL__ +#include "jfs_user.h" +#else +#include <linux/time.h> +#include <linux/fs.h> +#include <linux/jbd2.h> +#include <linux/errno.h> +#include <linux/slab.h> +#include <linux/list.h> +#include <linux/smp_lock.h> +#include <linux/init.h> +#endif + +static kmem_cache_t *jbd2_revoke_record_cache; +static kmem_cache_t *jbd2_revoke_table_cache; + +/* Each revoke record represents one single revoked block. During + journal replay, this involves recording the transaction ID of the + last transaction to revoke this block. */ + +struct jbd2_revoke_record_s +{ + struct list_head hash; + tid_t sequence; /* Used for recovery only */ + unsigned long long blocknr; +}; + + +/* The revoke table is just a simple hash table of revoke records. */ +struct jbd2_revoke_table_s +{ + /* It is conceivable that we might want a larger hash table + * for recovery. Must be a power of two. */ + int hash_size; + int hash_shift; + struct list_head *hash_table; +}; + + +#ifdef __KERNEL__ +static void write_one_revoke_record(journal_t *, transaction_t *, + struct journal_head **, int *, + struct jbd2_revoke_record_s *); +static void flush_descriptor(journal_t *, struct journal_head *, int); +#endif + +/* Utility functions to maintain the revoke table */ + +/* Borrowed from buffer.c: this is a tried and tested block hash function */ +static inline int hash(journal_t *journal, unsigned long long block) +{ + struct jbd2_revoke_table_s *table = journal->j_revoke; + int hash_shift = table->hash_shift; + int hash = (int)block ^ (int)((block >> 31) >> 1); + + return ((hash << (hash_shift - 6)) ^ + (hash >> 13) ^ + (hash << (hash_shift - 12))) & (table->hash_size - 1); +} + +static int insert_revoke_hash(journal_t *journal, unsigned long long blocknr, + tid_t seq) +{ + struct list_head *hash_list; + struct jbd2_revoke_record_s *record; + +repeat: + record = kmem_cache_alloc(jbd2_revoke_record_cache, GFP_NOFS); + if (!record) + goto oom; + + record->sequence = seq; + record->blocknr = blocknr; + hash_list = &journal->j_revoke->hash_table[hash(journal, blocknr)]; + spin_lock(&journal->j_revoke_lock); + list_add(&record->hash, hash_list); + spin_unlock(&journal->j_revoke_lock); + return 0; + +oom: + if (!journal_oom_retry) + return -ENOMEM; + jbd_debug(1, "ENOMEM in %s, retrying\n", __FUNCTION__); + yield(); + goto repeat; +} + +/* Find a revoke record in the journal's hash table. */ + +static struct jbd2_revoke_record_s *find_revoke_record(journal_t *journal, + unsigned long long blocknr) +{ + struct list_head *hash_list; + struct jbd2_revoke_record_s *record; + + hash_list = &journal->j_revoke->hash_table[hash(journal, blocknr)]; + + spin_lock(&journal->j_revoke_lock); + record = (struct jbd2_revoke_record_s *) hash_list->next; + while (&(record->hash) != hash_list) { + if (record->blocknr == blocknr) { + spin_unlock(&journal->j_revoke_lock); + return record; + } + record = (struct jbd2_revoke_record_s *) record->hash.next; + } + spin_unlock(&journal->j_revoke_lock); + return NULL; +} + +int __init jbd2_journal_init_revoke_caches(void) +{ + jbd2_revoke_record_cache = kmem_cache_create("jbd2_revoke_record", + sizeof(struct jbd2_revoke_record_s), + 0, SLAB_HWCACHE_ALIGN, NULL, NULL); + if (jbd2_revoke_record_cache == 0) + return -ENOMEM; + + jbd2_revoke_table_cache = kmem_cache_create("jbd2_revoke_table", + sizeof(struct jbd2_revoke_table_s), + 0, 0, NULL, NULL); + if (jbd2_revoke_table_cache == 0) { + kmem_cache_destroy(jbd2_revoke_record_cache); + jbd2_revoke_record_cache = NULL; + return -ENOMEM; + } + return 0; +} + +void jbd2_journal_destroy_revoke_caches(void) +{ + kmem_cache_destroy(jbd2_revoke_record_cache); + jbd2_revoke_record_cache = NULL; + kmem_cache_destroy(jbd2_revoke_table_cache); + jbd2_revoke_table_cache = NULL; +} + +/* Initialise the revoke table for a given journal to a given size. */ + +int jbd2_journal_init_revoke(journal_t *journal, int hash_size) +{ + int shift, tmp; + + J_ASSERT (journal->j_revoke_table[0] == NULL); + + shift = 0; + tmp = hash_size; + while((tmp >>= 1UL) != 0UL) + shift++; + + journal->j_revoke_table[0] = kmem_cache_alloc(jbd2_revoke_table_cache, GFP_KERNEL); + if (!journal->j_revoke_table[0]) + return -ENOMEM; + journal->j_revoke = journal->j_revoke_table[0]; + + /* Check that the hash_size is a power of two */ + J_ASSERT ((hash_size & (hash_size-1)) == 0); + + journal->j_revoke->hash_size = hash_size; + + journal->j_revoke->hash_shift = shift; + + journal->j_revoke->hash_table = + kmalloc(hash_size * sizeof(struct list_head), GFP_KERNEL); + if (!journal->j_revoke->hash_table) { + kmem_cache_free(jbd2_revoke_table_cache, journal->j_revoke_table[0]); + journal->j_revoke = NULL; + return -ENOMEM; + } + + for (tmp = 0; tmp < hash_size; tmp++) + INIT_LIST_HEAD(&journal->j_revoke->hash_table[tmp]); + + journal->j_revoke_table[1] = kmem_cache_alloc(jbd2_revoke_table_cache, GFP_KERNEL); + if (!journal->j_revoke_table[1]) { + kfree(journal->j_revoke_table[0]->hash_table); + kmem_cache_free(jbd2_revoke_table_cache, journal->j_revoke_table[0]); + return -ENOMEM; + } + + journal->j_revoke = journal->j_revoke_table[1]; + + /* Check that the hash_size is a power of two */ + J_ASSERT ((hash_size & (hash_size-1)) == 0); + + journal->j_revoke->hash_size = hash_size; + + journal->j_revoke->hash_shift = shift; + + journal->j_revoke->hash_table = + kmalloc(hash_size * sizeof(struct list_head), GFP_KERNEL); + if (!journal->j_revoke->hash_table) { + kfree(journal->j_revoke_table[0]->hash_table); + kmem_cache_free(jbd2_revoke_table_cache, journal->j_revoke_table[0]); + kmem_cache_free(jbd2_revoke_table_cache, journal->j_revoke_table[1]); + journal->j_revoke = NULL; + return -ENOMEM; + } + + for (tmp = 0; tmp < hash_size; tmp++) + INIT_LIST_HEAD(&journal->j_revoke->hash_table[tmp]); + + spin_lock_init(&journal->j_revoke_lock); + + return 0; +} + +/* Destoy a journal's revoke table. The table must already be empty! */ + +void jbd2_journal_destroy_revoke(journal_t *journal) +{ + struct jbd2_revoke_table_s *table; + struct list_head *hash_list; + int i; + + table = journal->j_revoke_table[0]; + if (!table) + return; + + for (i=0; i<table->hash_size; i++) { + hash_list = &table->hash_table[i]; + J_ASSERT (list_empty(hash_list)); + } + + kfree(table->hash_table); + kmem_cache_free(jbd2_revoke_table_cache, table); + journal->j_revoke = NULL; + + table = journal->j_revoke_table[1]; + if (!table) + return; + + for (i=0; i<table->hash_size; i++) { + hash_list = &table->hash_table[i]; + J_ASSERT (list_empty(hash_list)); + } + + kfree(table->hash_table); + kmem_cache_free(jbd2_revoke_table_cache, table); + journal->j_revoke = NULL; +} + + +#ifdef __KERNEL__ + +/* + * jbd2_journal_revoke: revoke a given buffer_head from the journal. This + * prevents the block from being replayed during recovery if we take a + * crash after this current transaction commits. Any subsequent + * metadata writes of the buffer in this transaction cancel the + * revoke. + * + * Note that this call may block --- it is up to the caller to make + * sure that there are no further calls to journal_write_metadata + * before the revoke is complete. In ext3, this implies calling the + * revoke before clearing the block bitmap when we are deleting + * metadata. + * + * Revoke performs a jbd2_journal_forget on any buffer_head passed in as a + * parameter, but does _not_ forget the buffer_head if the bh was only + * found implicitly. + * + * bh_in may not be a journalled buffer - it may have come off + * the hash tables without an attached journal_head. + * + * If bh_in is non-zero, jbd2_journal_revoke() will decrement its b_count + * by one. + */ + +int jbd2_journal_revoke(handle_t *handle, unsigned long long blocknr, + struct buffer_head *bh_in) +{ + struct buffer_head *bh = NULL; + journal_t *journal; + struct block_device *bdev; + int err; + + might_sleep(); + if (bh_in) + BUFFER_TRACE(bh_in, "enter"); + + journal = handle->h_transaction->t_journal; + if (!jbd2_journal_set_features(journal, 0, 0, JBD2_FEATURE_INCOMPAT_REVOKE)){ + J_ASSERT (!"Cannot set revoke feature!"); + return -EINVAL; + } + + bdev = journal->j_fs_dev; + bh = bh_in; + + if (!bh) { + bh = __find_get_block(bdev, blocknr, journal->j_blocksize); + if (bh) + BUFFER_TRACE(bh, "found on hash"); + } +#ifdef JBD_EXPENSIVE_CHECKING + else { + struct buffer_head *bh2; + + /* If there is a different buffer_head lying around in + * memory anywhere... */ + bh2 = __find_get_block(bdev, blocknr, journal->j_blocksize); + if (bh2) { + /* ... and it has RevokeValid status... */ + if (bh2 != bh && buffer_revokevalid(bh2)) + /* ...then it better be revoked too, + * since it's illegal to create a revoke + * record against a buffer_head which is + * not marked revoked --- that would + * risk missing a subsequent revoke + * cancel. */ + J_ASSERT_BH(bh2, buffer_revoked(bh2)); + put_bh(bh2); + } + } +#endif + + /* We really ought not ever to revoke twice in a row without + first having the revoke cancelled: it's illegal to free a + block twice without allocating it in between! */ + if (bh) { + if (!J_EXPECT_BH(bh, !buffer_revoked(bh), + "inconsistent data on disk")) { + if (!bh_in) + brelse(bh); + return -EIO; + } + set_buffer_revoked(bh); + set_buffer_revokevalid(bh); + if (bh_in) { + BUFFER_TRACE(bh_in, "call jbd2_journal_forget"); + jbd2_journal_forget(handle, bh_in); + } else { + BUFFER_TRACE(bh, "call brelse"); + __brelse(bh); + } + } + + jbd_debug(2, "insert revoke for block %llu, bh_in=%p\n",blocknr, bh_in); + err = insert_revoke_hash(journal, blocknr, + handle->h_transaction->t_tid); + BUFFER_TRACE(bh_in, "exit"); + return err; +} + +/* + * Cancel an outstanding revoke. For use only internally by the + * journaling code (called from jbd2_journal_get_write_access). + * + * We trust buffer_revoked() on the buffer if the buffer is already + * being journaled: if there is no revoke pending on the buffer, then we + * don't do anything here. + * + * This would break if it were possible for a buffer to be revoked and + * discarded, and then reallocated within the same transaction. In such + * a case we would have lost the revoked bit, but when we arrived here + * the second time we would still have a pending revoke to cancel. So, + * do not trust the Revoked bit on buffers unless RevokeValid is also + * set. + * + * The caller must have the journal locked. + */ +int jbd2_journal_cancel_revoke(handle_t *handle, struct journal_head *jh) +{ + struct jbd2_revoke_record_s *record; + journal_t *journal = handle->h_transaction->t_journal; + int need_cancel; + int did_revoke = 0; /* akpm: debug */ + struct buffer_head *bh = jh2bh(jh); + + jbd_debug(4, "journal_head %p, cancelling revoke\n", jh); + + /* Is the existing Revoke bit valid? If so, we trust it, and + * only perform the full cancel if the revoke bit is set. If + * not, we can't trust the revoke bit, and we need to do the + * full search for a revoke record. */ + if (test_set_buffer_revokevalid(bh)) { + need_cancel = test_clear_buffer_revoked(bh); + } else { + need_cancel = 1; + clear_buffer_revoked(bh); + } + + if (need_cancel) { + record = find_revoke_record(journal, bh->b_blocknr); + if (record) { + jbd_debug(4, "cancelled existing revoke on " + "blocknr %llu\n", (unsigned long long)bh->b_blocknr); + spin_lock(&journal->j_revoke_lock); + list_del(&record->hash); + spin_unlock(&journal->j_revoke_lock); + kmem_cache_free(jbd2_revoke_record_cache, record); + did_revoke = 1; + } + } + +#ifdef JBD_EXPENSIVE_CHECKING + /* There better not be one left behind by now! */ + record = find_revoke_record(journal, bh->b_blocknr); + J_ASSERT_JH(jh, record == NULL); +#endif + + /* Finally, have we just cleared revoke on an unhashed + * buffer_head? If so, we'd better make sure we clear the + * revoked status on any hashed alias too, otherwise the revoke + * state machine will get very upset later on. */ + if (need_cancel) { + struct buffer_head *bh2; + bh2 = __find_get_block(bh->b_bdev, bh->b_blocknr, bh->b_size); + if (bh2) { + if (bh2 != bh) + clear_buffer_revoked(bh2); + __brelse(bh2); + } + } + return did_revoke; +} + +/* journal_switch_revoke table select j_revoke for next transaction + * we do not want to suspend any processing until all revokes are + * written -bzzz + */ +void jbd2_journal_switch_revoke_table(journal_t *journal) +{ + int i; + + if (journal->j_revoke == journal->j_revoke_table[0]) + journal->j_revoke = journal->j_revoke_table[1]; + else + journal->j_revoke = journal->j_revoke_table[0]; + + for (i = 0; i < journal->j_revoke->hash_size; i++) + INIT_LIST_HEAD(&journal->j_revoke->hash_table[i]); +} + +/* + * Write revoke records to the journal for all entries in the current + * revoke hash, deleting the entries as we go. + * + * Called with the journal lock held. + */ + +void jbd2_journal_write_revoke_records(journal_t *journal, + transaction_t *transaction) +{ + struct journal_head *descriptor; + struct jbd2_revoke_record_s *record; + struct jbd2_revoke_table_s *revoke; + struct list_head *hash_list; + int i, offset, count; + + descriptor = NULL; + offset = 0; + count = 0; + + /* select revoke table for committing transaction */ + revoke = journal->j_revoke == journal->j_revoke_table[0] ? + journal->j_revoke_table[1] : journal->j_revoke_table[0]; + + for (i = 0; i < revoke->hash_size; i++) { + hash_list = &revoke->hash_table[i]; + + while (!list_empty(hash_list)) { + record = (struct jbd2_revoke_record_s *) + hash_list->next; + write_one_revoke_record(journal, transaction, + &descriptor, &offset, + record); + count++; + list_del(&record->hash); + kmem_cache_free(jbd2_revoke_record_cache, record); + } + } + if (descriptor) + flush_descriptor(journal, descriptor, offset); + jbd_debug(1, "Wrote %d revoke records\n", count); +} + +/* + * Write out one revoke record. We need to create a new descriptor + * block if the old one is full or if we have not already created one. + */ + +static void write_one_revoke_record(journal_t *journal, + transaction_t *transaction, + struct journal_head **descriptorp, + int *offsetp, + struct jbd2_revoke_record_s *record) +{ + struct journal_head *descriptor; + int offset; + journal_header_t *header; + + /* If we are already aborting, this all becomes a noop. We + still need to go round the loop in + jbd2_journal_write_revoke_records in order to free all of the + revoke records: only the IO to the journal is omitted. */ + if (is_journal_aborted(journal)) + return; + + descriptor = *descriptorp; + offset = *offsetp; + + /* Make sure we have a descriptor with space left for the record */ + if (descriptor) { + if (offset == journal->j_blocksize) { + flush_descriptor(journal, descriptor, offset); + descriptor = NULL; + } + } + + if (!descriptor) { + descriptor = jbd2_journal_get_descriptor_buffer(journal); + if (!descriptor) + return; + header = (journal_header_t *) &jh2bh(descriptor)->b_data[0]; + header->h_magic = cpu_to_be32(JBD2_MAGIC_NUMBER); + header->h_blocktype = cpu_to_be32(JBD2_REVOKE_BLOCK); + header->h_sequence = cpu_to_be32(transaction->t_tid); + + /* Record it so that we can wait for IO completion later */ + JBUFFER_TRACE(descriptor, "file as BJ_LogCtl"); + jbd2_journal_file_buffer(descriptor, transaction, BJ_LogCtl); + + offset = sizeof(jbd2_journal_revoke_header_t); + *descriptorp = descriptor; + } + + if (JBD2_HAS_INCOMPAT_FEATURE(journal, JBD2_FEATURE_INCOMPAT_64BIT)) { + * ((__be64 *)(&jh2bh(descriptor)->b_data[offset])) = + cpu_to_be64(record->blocknr); + offset += 8; + + } else { + * ((__be32 *)(&jh2bh(descriptor)->b_data[offset])) = + cpu_to_be32(record->blocknr); + offset += 4; + } + + *offsetp = offset; +} + +/* + * Flush a revoke descriptor out to the journal. If we are aborting, + * this is a noop; otherwise we are generating a buffer which needs to + * be waited for during commit, so it has to go onto the appropriate + * journal buffer list. + */ + +static void flush_descriptor(journal_t *journal, + struct journal_head *descriptor, + int offset) +{ + jbd2_journal_revoke_header_t *header; + struct buffer_head *bh = jh2bh(descriptor); + + if (is_journal_aborted(journal)) { + put_bh(bh); + return; + } + + header = (jbd2_journal_revoke_header_t *) jh2bh(descriptor)->b_data; + header->r_count = cpu_to_be32(offset); + set_buffer_jwrite(bh); + BUFFER_TRACE(bh, "write"); + set_buffer_dirty(bh); + ll_rw_block(SWRITE, 1, &bh); +} +#endif + +/* + * Revoke support for recovery. + * + * Recovery needs to be able to: + * + * record all revoke records, including the tid of the latest instance + * of each revoke in the journal + * + * check whether a given block in a given transaction should be replayed + * (ie. has not been revoked by a revoke record in that or a subsequent + * transaction) + * + * empty the revoke table after recovery. + */ + +/* + * First, setting revoke records. We create a new revoke record for + * every block ever revoked in the log as we scan it for recovery, and + * we update the existing records if we find multiple revokes for a + * single block. + */ + +int jbd2_journal_set_revoke(journal_t *journal, + unsigned long long blocknr, + tid_t sequence) +{ + struct jbd2_revoke_record_s *record; + + record = find_revoke_record(journal, blocknr); + if (record) { + /* If we have multiple occurrences, only record the + * latest sequence number in the hashed record */ + if (tid_gt(sequence, record->sequence)) + record->sequence = sequence; + return 0; + } + return insert_revoke_hash(journal, blocknr, sequence); +} + +/* + * Test revoke records. For a given block referenced in the log, has + * that block been revoked? A revoke record with a given transaction + * sequence number revokes all blocks in that transaction and earlier + * ones, but later transactions still need replayed. + */ + +int jbd2_journal_test_revoke(journal_t *journal, + unsigned long long blocknr, + tid_t sequence) +{ + struct jbd2_revoke_record_s *record; + + record = find_revoke_record(journal, blocknr); + if (!record) + return 0; + if (tid_gt(sequence, record->sequence)) + return 0; + return 1; +} + +/* + * Finally, once recovery is over, we need to clear the revoke table so + * that it can be reused by the running filesystem. + */ + +void jbd2_journal_clear_revoke(journal_t *journal) +{ + int i; + struct list_head *hash_list; + struct jbd2_revoke_record_s *record; + struct jbd2_revoke_table_s *revoke; + + revoke = journal->j_revoke; + + for (i = 0; i < revoke->hash_size; i++) { + hash_list = &revoke->hash_table[i]; + while (!list_empty(hash_list)) { + record = (struct jbd2_revoke_record_s*) hash_list->next; + list_del(&record->hash); + kmem_cache_free(jbd2_revoke_record_cache, record); + } + } +} diff --git a/fs/jbd2/transaction.c b/fs/jbd2/transaction.c new file mode 100644 index 000000000000..149957bef907 --- /dev/null +++ b/fs/jbd2/transaction.c @@ -0,0 +1,2080 @@ +/* + * linux/fs/transaction.c + * + * Written by Stephen C. Tweedie <sct@redhat.com>, 1998 + * + * Copyright 1998 Red Hat corp --- All Rights Reserved + * + * This file is part of the Linux kernel and is made available under + * the terms of the GNU General Public License, version 2, or at your + * option, any later version, incorporated herein by reference. + * + * Generic filesystem transaction handling code; part of the ext2fs + * journaling system. + * + * This file manages transactions (compound commits managed by the + * journaling code) and handles (individual atomic operations by the + * filesystem). + */ + +#include <linux/time.h> +#include <linux/fs.h> +#include <linux/jbd2.h> +#include <linux/errno.h> +#include <linux/slab.h> +#include <linux/timer.h> +#include <linux/smp_lock.h> +#include <linux/mm.h> +#include <linux/highmem.h> + +/* + * jbd2_get_transaction: obtain a new transaction_t object. + * + * Simply allocate and initialise a new transaction. Create it in + * RUNNING state and add it to the current journal (which should not + * have an existing running transaction: we only make a new transaction + * once we have started to commit the old one). + * + * Preconditions: + * The journal MUST be locked. We don't perform atomic mallocs on the + * new transaction and we can't block without protecting against other + * processes trying to touch the journal while it is in transition. + * + * Called under j_state_lock + */ + +static transaction_t * +jbd2_get_transaction(journal_t *journal, transaction_t *transaction) +{ + transaction->t_journal = journal; + transaction->t_state = T_RUNNING; + transaction->t_tid = journal->j_transaction_sequence++; + transaction->t_expires = jiffies + journal->j_commit_interval; + spin_lock_init(&transaction->t_handle_lock); + + /* Set up the commit timer for the new transaction. */ + journal->j_commit_timer.expires = transaction->t_expires; + add_timer(&journal->j_commit_timer); + + J_ASSERT(journal->j_running_transaction == NULL); + journal->j_running_transaction = transaction; + + return transaction; +} + +/* + * Handle management. + * + * A handle_t is an object which represents a single atomic update to a + * filesystem, and which tracks all of the modifications which form part + * of that one update. + */ + +/* + * start_this_handle: Given a handle, deal with any locking or stalling + * needed to make sure that there is enough journal space for the handle + * to begin. Attach the handle to a transaction and set up the + * transaction's buffer credits. + */ + +static int start_this_handle(journal_t *journal, handle_t *handle) +{ + transaction_t *transaction; + int needed; + int nblocks = handle->h_buffer_credits; + transaction_t *new_transaction = NULL; + int ret = 0; + + if (nblocks > journal->j_max_transaction_buffers) { + printk(KERN_ERR "JBD: %s wants too many credits (%d > %d)\n", + current->comm, nblocks, + journal->j_max_transaction_buffers); + ret = -ENOSPC; + goto out; + } + +alloc_transaction: + if (!journal->j_running_transaction) { + new_transaction = jbd_kmalloc(sizeof(*new_transaction), + GFP_NOFS); + if (!new_transaction) { + ret = -ENOMEM; + goto out; + } + memset(new_transaction, 0, sizeof(*new_transaction)); + } + + jbd_debug(3, "New handle %p going live.\n", handle); + +repeat: + + /* + * We need to hold j_state_lock until t_updates has been incremented, + * for proper journal barrier handling + */ + spin_lock(&journal->j_state_lock); +repeat_locked: + if (is_journal_aborted(journal) || + (journal->j_errno != 0 && !(journal->j_flags & JBD2_ACK_ERR))) { + spin_unlock(&journal->j_state_lock); + ret = -EROFS; + goto out; + } + + /* Wait on the journal's transaction barrier if necessary */ + if (journal->j_barrier_count) { + spin_unlock(&journal->j_state_lock); + wait_event(journal->j_wait_transaction_locked, + journal->j_barrier_count == 0); + goto repeat; + } + + if (!journal->j_running_transaction) { + if (!new_transaction) { + spin_unlock(&journal->j_state_lock); + goto alloc_transaction; + } + jbd2_get_transaction(journal, new_transaction); + new_transaction = NULL; + } + + transaction = journal->j_running_transaction; + + /* + * If the current transaction is locked down for commit, wait for the + * lock to be released. + */ + if (transaction->t_state == T_LOCKED) { + DEFINE_WAIT(wait); + + prepare_to_wait(&journal->j_wait_transaction_locked, + &wait, TASK_UNINTERRUPTIBLE); + spin_unlock(&journal->j_state_lock); + schedule(); + finish_wait(&journal->j_wait_transaction_locked, &wait); + goto repeat; + } + + /* + * If there is not enough space left in the log to write all potential + * buffers requested by this operation, we need to stall pending a log + * checkpoint to free some more log space. + */ + spin_lock(&transaction->t_handle_lock); + needed = transaction->t_outstanding_credits + nblocks; + + if (needed > journal->j_max_transaction_buffers) { + /* + * If the current transaction is already too large, then start + * to commit it: we can then go back and attach this handle to + * a new transaction. + */ + DEFINE_WAIT(wait); + + jbd_debug(2, "Handle %p starting new commit...\n", handle); + spin_unlock(&transaction->t_handle_lock); + prepare_to_wait(&journal->j_wait_transaction_locked, &wait, + TASK_UNINTERRUPTIBLE); + __jbd2_log_start_commit(journal, transaction->t_tid); + spin_unlock(&journal->j_state_lock); + schedule(); + finish_wait(&journal->j_wait_transaction_locked, &wait); + goto repeat; + } + + /* + * The commit code assumes that it can get enough log space + * without forcing a checkpoint. This is *critical* for + * correctness: a checkpoint of a buffer which is also + * associated with a committing transaction creates a deadlock, + * so commit simply cannot force through checkpoints. + * + * We must therefore ensure the necessary space in the journal + * *before* starting to dirty potentially checkpointed buffers + * in the new transaction. + * + * The worst part is, any transaction currently committing can + * reduce the free space arbitrarily. Be careful to account for + * those buffers when checkpointing. + */ + + /* + * @@@ AKPM: This seems rather over-defensive. We're giving commit + * a _lot_ of headroom: 1/4 of the journal plus the size of + * the committing transaction. Really, we only need to give it + * committing_transaction->t_outstanding_credits plus "enough" for + * the log control blocks. + * Also, this test is inconsitent with the matching one in + * jbd2_journal_extend(). + */ + if (__jbd2_log_space_left(journal) < jbd_space_needed(journal)) { + jbd_debug(2, "Handle %p waiting for checkpoint...\n", handle); + spin_unlock(&transaction->t_handle_lock); + __jbd2_log_wait_for_space(journal); + goto repeat_locked; + } + + /* OK, account for the buffers that this operation expects to + * use and add the handle to the running transaction. */ + + handle->h_transaction = transaction; + transaction->t_outstanding_credits += nblocks; + transaction->t_updates++; + transaction->t_handle_count++; + jbd_debug(4, "Handle %p given %d credits (total %d, free %d)\n", + handle, nblocks, transaction->t_outstanding_credits, + __jbd2_log_space_left(journal)); + spin_unlock(&transaction->t_handle_lock); + spin_unlock(&journal->j_state_lock); +out: + if (unlikely(new_transaction)) /* It's usually NULL */ + kfree(new_transaction); + return ret; +} + +/* Allocate a new handle. This should probably be in a slab... */ +static handle_t *new_handle(int nblocks) +{ + handle_t *handle = jbd_alloc_handle(GFP_NOFS); + if (!handle) + return NULL; + memset(handle, 0, sizeof(*handle)); + handle->h_buffer_credits = nblocks; + handle->h_ref = 1; + + return handle; +} + +/** + * handle_t *jbd2_journal_start() - Obtain a new handle. + * @journal: Journal to start transaction on. + * @nblocks: number of block buffer we might modify + * + * We make sure that the transaction can guarantee at least nblocks of + * modified buffers in the log. We block until the log can guarantee + * that much space. + * + * This function is visible to journal users (like ext3fs), so is not + * called with the journal already locked. + * + * Return a pointer to a newly allocated handle, or NULL on failure + */ +handle_t *jbd2_journal_start(journal_t *journal, int nblocks) +{ + handle_t *handle = journal_current_handle(); + int err; + + if (!journal) + return ERR_PTR(-EROFS); + + if (handle) { + J_ASSERT(handle->h_transaction->t_journal == journal); + handle->h_ref++; + return handle; + } + + handle = new_handle(nblocks); + if (!handle) + return ERR_PTR(-ENOMEM); + + current->journal_info = handle; + + err = start_this_handle(journal, handle); + if (err < 0) { + jbd_free_handle(handle); + current->journal_info = NULL; + handle = ERR_PTR(err); + } + return handle; +} + +/** + * int jbd2_journal_extend() - extend buffer credits. + * @handle: handle to 'extend' + * @nblocks: nr blocks to try to extend by. + * + * Some transactions, such as large extends and truncates, can be done + * atomically all at once or in several stages. The operation requests + * a credit for a number of buffer modications in advance, but can + * extend its credit if it needs more. + * + * jbd2_journal_extend tries to give the running handle more buffer credits. + * It does not guarantee that allocation - this is a best-effort only. + * The calling process MUST be able to deal cleanly with a failure to + * extend here. + * + * Return 0 on success, non-zero on failure. + * + * return code < 0 implies an error + * return code > 0 implies normal transaction-full status. + */ +int jbd2_journal_extend(handle_t *handle, int nblocks) +{ + transaction_t *transaction = handle->h_transaction; + journal_t *journal = transaction->t_journal; + int result; + int wanted; + + result = -EIO; + if (is_handle_aborted(handle)) + goto out; + + result = 1; + + spin_lock(&journal->j_state_lock); + + /* Don't extend a locked-down transaction! */ + if (handle->h_transaction->t_state != T_RUNNING) { + jbd_debug(3, "denied handle %p %d blocks: " + "transaction not running\n", handle, nblocks); + goto error_out; + } + + spin_lock(&transaction->t_handle_lock); + wanted = transaction->t_outstanding_credits + nblocks; + + if (wanted > journal->j_max_transaction_buffers) { + jbd_debug(3, "denied handle %p %d blocks: " + "transaction too large\n", handle, nblocks); + goto unlock; + } + + if (wanted > __jbd2_log_space_left(journal)) { + jbd_debug(3, "denied handle %p %d blocks: " + "insufficient log space\n", handle, nblocks); + goto unlock; + } + + handle->h_buffer_credits += nblocks; + transaction->t_outstanding_credits += nblocks; + result = 0; + + jbd_debug(3, "extended handle %p by %d\n", handle, nblocks); +unlock: + spin_unlock(&transaction->t_handle_lock); +error_out: + spin_unlock(&journal->j_state_lock); +out: + return result; +} + + +/** + * int jbd2_journal_restart() - restart a handle . + * @handle: handle to restart + * @nblocks: nr credits requested + * + * Restart a handle for a multi-transaction filesystem + * operation. + * + * If the jbd2_journal_extend() call above fails to grant new buffer credits + * to a running handle, a call to jbd2_journal_restart will commit the + * handle's transaction so far and reattach the handle to a new + * transaction capabable of guaranteeing the requested number of + * credits. + */ + +int jbd2_journal_restart(handle_t *handle, int nblocks) +{ + transaction_t *transaction = handle->h_transaction; + journal_t *journal = transaction->t_journal; + int ret; + + /* If we've had an abort of any type, don't even think about + * actually doing the restart! */ + if (is_handle_aborted(handle)) + return 0; + + /* + * First unlink the handle from its current transaction, and start the + * commit on that. + */ + J_ASSERT(transaction->t_updates > 0); + J_ASSERT(journal_current_handle() == handle); + + spin_lock(&journal->j_state_lock); + spin_lock(&transaction->t_handle_lock); + transaction->t_outstanding_credits -= handle->h_buffer_credits; + transaction->t_updates--; + + if (!transaction->t_updates) + wake_up(&journal->j_wait_updates); + spin_unlock(&transaction->t_handle_lock); + + jbd_debug(2, "restarting handle %p\n", handle); + __jbd2_log_start_commit(journal, transaction->t_tid); + spin_unlock(&journal->j_state_lock); + + handle->h_buffer_credits = nblocks; + ret = start_this_handle(journal, handle); + return ret; +} + + +/** + * void jbd2_journal_lock_updates () - establish a transaction barrier. + * @journal: Journal to establish a barrier on. + * + * This locks out any further updates from being started, and blocks + * until all existing updates have completed, returning only once the + * journal is in a quiescent state with no updates running. + * + * The journal lock should not be held on entry. + */ +void jbd2_journal_lock_updates(journal_t *journal) +{ + DEFINE_WAIT(wait); + + spin_lock(&journal->j_state_lock); + ++journal->j_barrier_count; + + /* Wait until there are no running updates */ + while (1) { + transaction_t *transaction = journal->j_running_transaction; + + if (!transaction) + break; + + spin_lock(&transaction->t_handle_lock); + if (!transaction->t_updates) { + spin_unlock(&transaction->t_handle_lock); + break; + } + prepare_to_wait(&journal->j_wait_updates, &wait, + TASK_UNINTERRUPTIBLE); + spin_unlock(&transaction->t_handle_lock); + spin_unlock(&journal->j_state_lock); + schedule(); + finish_wait(&journal->j_wait_updates, &wait); + spin_lock(&journal->j_state_lock); + } + spin_unlock(&journal->j_state_lock); + + /* + * We have now established a barrier against other normal updates, but + * we also need to barrier against other jbd2_journal_lock_updates() calls + * to make sure that we serialise special journal-locked operations + * too. + */ + mutex_lock(&journal->j_barrier); +} + +/** + * void jbd2_journal_unlock_updates (journal_t* journal) - release barrier + * @journal: Journal to release the barrier on. + * + * Release a transaction barrier obtained with jbd2_journal_lock_updates(). + * + * Should be called without the journal lock held. + */ +void jbd2_journal_unlock_updates (journal_t *journal) +{ + J_ASSERT(journal->j_barrier_count != 0); + + mutex_unlock(&journal->j_barrier); + spin_lock(&journal->j_state_lock); + --journal->j_barrier_count; + spin_unlock(&journal->j_state_lock); + wake_up(&journal->j_wait_transaction_locked); +} + +/* + * Report any unexpected dirty buffers which turn up. Normally those + * indicate an error, but they can occur if the user is running (say) + * tune2fs to modify the live filesystem, so we need the option of + * continuing as gracefully as possible. # + * + * The caller should already hold the journal lock and + * j_list_lock spinlock: most callers will need those anyway + * in order to probe the buffer's journaling state safely. + */ +static void jbd_unexpected_dirty_buffer(struct journal_head *jh) +{ + int jlist; + + /* If this buffer is one which might reasonably be dirty + * --- ie. data, or not part of this journal --- then + * we're OK to leave it alone, but otherwise we need to + * move the dirty bit to the journal's own internal + * JBDDirty bit. */ + jlist = jh->b_jlist; + + if (jlist == BJ_Metadata || jlist == BJ_Reserved || + jlist == BJ_Shadow || jlist == BJ_Forget) { + struct buffer_head *bh = jh2bh(jh); + + if (test_clear_buffer_dirty(bh)) + set_buffer_jbddirty(bh); + } +} + +/* + * If the buffer is already part of the current transaction, then there + * is nothing we need to do. If it is already part of a prior + * transaction which we are still committing to disk, then we need to + * make sure that we do not overwrite the old copy: we do copy-out to + * preserve the copy going to disk. We also account the buffer against + * the handle's metadata buffer credits (unless the buffer is already + * part of the transaction, that is). + * + */ +static int +do_get_write_access(handle_t *handle, struct journal_head *jh, + int force_copy) +{ + struct buffer_head *bh; + transaction_t *transaction; + journal_t *journal; + int error; + char *frozen_buffer = NULL; + int need_copy = 0; + + if (is_handle_aborted(handle)) + return -EROFS; + + transaction = handle->h_transaction; + journal = transaction->t_journal; + + jbd_debug(5, "buffer_head %p, force_copy %d\n", jh, force_copy); + + JBUFFER_TRACE(jh, "entry"); +repeat: + bh = jh2bh(jh); + + /* @@@ Need to check for errors here at some point. */ + + lock_buffer(bh); + jbd_lock_bh_state(bh); + + /* We now hold the buffer lock so it is safe to query the buffer + * state. Is the buffer dirty? + * + * If so, there are two possibilities. The buffer may be + * non-journaled, and undergoing a quite legitimate writeback. + * Otherwise, it is journaled, and we don't expect dirty buffers + * in that state (the buffers should be marked JBD_Dirty + * instead.) So either the IO is being done under our own + * control and this is a bug, or it's a third party IO such as + * dump(8) (which may leave the buffer scheduled for read --- + * ie. locked but not dirty) or tune2fs (which may actually have + * the buffer dirtied, ugh.) */ + + if (buffer_dirty(bh)) { + /* + * First question: is this buffer already part of the current + * transaction or the existing committing transaction? + */ + if (jh->b_transaction) { + J_ASSERT_JH(jh, + jh->b_transaction == transaction || + jh->b_transaction == + journal->j_committing_transaction); + if (jh->b_next_transaction) + J_ASSERT_JH(jh, jh->b_next_transaction == + transaction); + } + /* + * In any case we need to clean the dirty flag and we must + * do it under the buffer lock to be sure we don't race + * with running write-out. + */ + JBUFFER_TRACE(jh, "Unexpected dirty buffer"); + jbd_unexpected_dirty_buffer(jh); + } + + unlock_buffer(bh); + + error = -EROFS; + if (is_handle_aborted(handle)) { + jbd_unlock_bh_state(bh); + goto out; + } + error = 0; + + /* + * The buffer is already part of this transaction if b_transaction or + * b_next_transaction points to it + */ + if (jh->b_transaction == transaction || + jh->b_next_transaction == transaction) + goto done; + + /* + * If there is already a copy-out version of this buffer, then we don't + * need to make another one + */ + if (jh->b_frozen_data) { + JBUFFER_TRACE(jh, "has frozen data"); + J_ASSERT_JH(jh, jh->b_next_transaction == NULL); + jh->b_next_transaction = transaction; + goto done; + } + + /* Is there data here we need to preserve? */ + + if (jh->b_transaction && jh->b_transaction != transaction) { + JBUFFER_TRACE(jh, "owned by older transaction"); + J_ASSERT_JH(jh, jh->b_next_transaction == NULL); + J_ASSERT_JH(jh, jh->b_transaction == + journal->j_committing_transaction); + + /* There is one case we have to be very careful about. + * If the committing transaction is currently writing + * this buffer out to disk and has NOT made a copy-out, + * then we cannot modify the buffer contents at all + * right now. The essence of copy-out is that it is the + * extra copy, not the primary copy, which gets + * journaled. If the primary copy is already going to + * disk then we cannot do copy-out here. */ + + if (jh->b_jlist == BJ_Shadow) { + DEFINE_WAIT_BIT(wait, &bh->b_state, BH_Unshadow); + wait_queue_head_t *wqh; + + wqh = bit_waitqueue(&bh->b_state, BH_Unshadow); + + JBUFFER_TRACE(jh, "on shadow: sleep"); + jbd_unlock_bh_state(bh); + /* commit wakes up all shadow buffers after IO */ + for ( ; ; ) { + prepare_to_wait(wqh, &wait.wait, + TASK_UNINTERRUPTIBLE); + if (jh->b_jlist != BJ_Shadow) + break; + schedule(); + } + finish_wait(wqh, &wait.wait); + goto repeat; + } + + /* Only do the copy if the currently-owning transaction + * still needs it. If it is on the Forget list, the + * committing transaction is past that stage. The + * buffer had better remain locked during the kmalloc, + * but that should be true --- we hold the journal lock + * still and the buffer is already on the BUF_JOURNAL + * list so won't be flushed. + * + * Subtle point, though: if this is a get_undo_access, + * then we will be relying on the frozen_data to contain + * the new value of the committed_data record after the + * transaction, so we HAVE to force the frozen_data copy + * in that case. */ + + if (jh->b_jlist != BJ_Forget || force_copy) { + JBUFFER_TRACE(jh, "generate frozen data"); + if (!frozen_buffer) { + JBUFFER_TRACE(jh, "allocate memory for buffer"); + jbd_unlock_bh_state(bh); + frozen_buffer = + jbd2_slab_alloc(jh2bh(jh)->b_size, + GFP_NOFS); + if (!frozen_buffer) { + printk(KERN_EMERG + "%s: OOM for frozen_buffer\n", + __FUNCTION__); + JBUFFER_TRACE(jh, "oom!"); + error = -ENOMEM; + jbd_lock_bh_state(bh); + goto done; + } + goto repeat; + } + jh->b_frozen_data = frozen_buffer; + frozen_buffer = NULL; + need_copy = 1; + } + jh->b_next_transaction = transaction; + } + + + /* + * Finally, if the buffer is not journaled right now, we need to make + * sure it doesn't get written to disk before the caller actually + * commits the new data + */ + if (!jh->b_transaction) { + JBUFFER_TRACE(jh, "no transaction"); + J_ASSERT_JH(jh, !jh->b_next_transaction); + jh->b_transaction = transaction; + JBUFFER_TRACE(jh, "file as BJ_Reserved"); + spin_lock(&journal->j_list_lock); + __jbd2_journal_file_buffer(jh, transaction, BJ_Reserved); + spin_unlock(&journal->j_list_lock); + } + +done: + if (need_copy) { + struct page *page; + int offset; + char *source; + + J_EXPECT_JH(jh, buffer_uptodate(jh2bh(jh)), + "Possible IO failure.\n"); + page = jh2bh(jh)->b_page; + offset = ((unsigned long) jh2bh(jh)->b_data) & ~PAGE_MASK; + source = kmap_atomic(page, KM_USER0); + memcpy(jh->b_frozen_data, source+offset, jh2bh(jh)->b_size); + kunmap_atomic(source, KM_USER0); + } + jbd_unlock_bh_state(bh); + + /* + * If we are about to journal a buffer, then any revoke pending on it is + * no longer valid + */ + jbd2_journal_cancel_revoke(handle, jh); + +out: + if (unlikely(frozen_buffer)) /* It's usually NULL */ + jbd2_slab_free(frozen_buffer, bh->b_size); + + JBUFFER_TRACE(jh, "exit"); + return error; +} + +/** + * int jbd2_journal_get_write_access() - notify intent to modify a buffer for metadata (not data) update. + * @handle: transaction to add buffer modifications to + * @bh: bh to be used for metadata writes + * @credits: variable that will receive credits for the buffer + * + * Returns an error code or 0 on success. + * + * In full data journalling mode the buffer may be of type BJ_AsyncData, + * because we're write()ing a buffer which is also part of a shared mapping. + */ + +int jbd2_journal_get_write_access(handle_t *handle, struct buffer_head *bh) +{ + struct journal_head *jh = jbd2_journal_add_journal_head(bh); + int rc; + + /* We do not want to get caught playing with fields which the + * log thread also manipulates. Make sure that the buffer + * completes any outstanding IO before proceeding. */ + rc = do_get_write_access(handle, jh, 0); + jbd2_journal_put_journal_head(jh); + return rc; +} + + +/* + * When the user wants to journal a newly created buffer_head + * (ie. getblk() returned a new buffer and we are going to populate it + * manually rather than reading off disk), then we need to keep the + * buffer_head locked until it has been completely filled with new + * data. In this case, we should be able to make the assertion that + * the bh is not already part of an existing transaction. + * + * The buffer should already be locked by the caller by this point. + * There is no lock ranking violation: it was a newly created, + * unlocked buffer beforehand. */ + +/** + * int jbd2_journal_get_create_access () - notify intent to use newly created bh + * @handle: transaction to new buffer to + * @bh: new buffer. + * + * Call this if you create a new bh. + */ +int jbd2_journal_get_create_access(handle_t *handle, struct buffer_head *bh) +{ + transaction_t *transaction = handle->h_transaction; + journal_t *journal = transaction->t_journal; + struct journal_head *jh = jbd2_journal_add_journal_head(bh); + int err; + + jbd_debug(5, "journal_head %p\n", jh); + err = -EROFS; + if (is_handle_aborted(handle)) + goto out; + err = 0; + + JBUFFER_TRACE(jh, "entry"); + /* + * The buffer may already belong to this transaction due to pre-zeroing + * in the filesystem's new_block code. It may also be on the previous, + * committing transaction's lists, but it HAS to be in Forget state in + * that case: the transaction must have deleted the buffer for it to be + * reused here. + */ + jbd_lock_bh_state(bh); + spin_lock(&journal->j_list_lock); + J_ASSERT_JH(jh, (jh->b_transaction == transaction || + jh->b_transaction == NULL || + (jh->b_transaction == journal->j_committing_transaction && + jh->b_jlist == BJ_Forget))); + + J_ASSERT_JH(jh, jh->b_next_transaction == NULL); + J_ASSERT_JH(jh, buffer_locked(jh2bh(jh))); + + if (jh->b_transaction == NULL) { + jh->b_transaction = transaction; + JBUFFER_TRACE(jh, "file as BJ_Reserved"); + __jbd2_journal_file_buffer(jh, transaction, BJ_Reserved); + } else if (jh->b_transaction == journal->j_committing_transaction) { + JBUFFER_TRACE(jh, "set next transaction"); + jh->b_next_transaction = transaction; + } + spin_unlock(&journal->j_list_lock); + jbd_unlock_bh_state(bh); + + /* + * akpm: I added this. ext3_alloc_branch can pick up new indirect + * blocks which contain freed but then revoked metadata. We need + * to cancel the revoke in case we end up freeing it yet again + * and the reallocating as data - this would cause a second revoke, + * which hits an assertion error. + */ + JBUFFER_TRACE(jh, "cancelling revoke"); + jbd2_journal_cancel_revoke(handle, jh); + jbd2_journal_put_journal_head(jh); +out: + return err; +} + +/** + * int jbd2_journal_get_undo_access() - Notify intent to modify metadata with + * non-rewindable consequences + * @handle: transaction + * @bh: buffer to undo + * @credits: store the number of taken credits here (if not NULL) + * + * Sometimes there is a need to distinguish between metadata which has + * been committed to disk and that which has not. The ext3fs code uses + * this for freeing and allocating space, we have to make sure that we + * do not reuse freed space until the deallocation has been committed, + * since if we overwrote that space we would make the delete + * un-rewindable in case of a crash. + * + * To deal with that, jbd2_journal_get_undo_access requests write access to a + * buffer for parts of non-rewindable operations such as delete + * operations on the bitmaps. The journaling code must keep a copy of + * the buffer's contents prior to the undo_access call until such time + * as we know that the buffer has definitely been committed to disk. + * + * We never need to know which transaction the committed data is part + * of, buffers touched here are guaranteed to be dirtied later and so + * will be committed to a new transaction in due course, at which point + * we can discard the old committed data pointer. + * + * Returns error number or 0 on success. + */ +int jbd2_journal_get_undo_access(handle_t *handle, struct buffer_head *bh) +{ + int err; + struct journal_head *jh = jbd2_journal_add_journal_head(bh); + char *committed_data = NULL; + + JBUFFER_TRACE(jh, "entry"); + + /* + * Do this first --- it can drop the journal lock, so we want to + * make sure that obtaining the committed_data is done + * atomically wrt. completion of any outstanding commits. + */ + err = do_get_write_access(handle, jh, 1); + if (err) + goto out; + +repeat: + if (!jh->b_committed_data) { + committed_data = jbd2_slab_alloc(jh2bh(jh)->b_size, GFP_NOFS); + if (!committed_data) { + printk(KERN_EMERG "%s: No memory for committed data\n", + __FUNCTION__); + err = -ENOMEM; + goto out; + } + } + + jbd_lock_bh_state(bh); + if (!jh->b_committed_data) { + /* Copy out the current buffer contents into the + * preserved, committed copy. */ + JBUFFER_TRACE(jh, "generate b_committed data"); + if (!committed_data) { + jbd_unlock_bh_state(bh); + goto repeat; + } + + jh->b_committed_data = committed_data; + committed_data = NULL; + memcpy(jh->b_committed_data, bh->b_data, bh->b_size); + } + jbd_unlock_bh_state(bh); +out: + jbd2_journal_put_journal_head(jh); + if (unlikely(committed_data)) + jbd2_slab_free(committed_data, bh->b_size); + return err; +} + +/** + * int jbd2_journal_dirty_data() - mark a buffer as containing dirty data which + * needs to be flushed before we can commit the + * current transaction. + * @handle: transaction + * @bh: bufferhead to mark + * + * The buffer is placed on the transaction's data list and is marked as + * belonging to the transaction. + * + * Returns error number or 0 on success. + * + * jbd2_journal_dirty_data() can be called via page_launder->ext3_writepage + * by kswapd. + */ +int jbd2_journal_dirty_data(handle_t *handle, struct buffer_head *bh) +{ + journal_t *journal = handle->h_transaction->t_journal; + int need_brelse = 0; + struct journal_head *jh; + + if (is_handle_aborted(handle)) + return 0; + + jh = jbd2_journal_add_journal_head(bh); + JBUFFER_TRACE(jh, "entry"); + + /* + * The buffer could *already* be dirty. Writeout can start + * at any time. + */ + jbd_debug(4, "jh: %p, tid:%d\n", jh, handle->h_transaction->t_tid); + + /* + * What if the buffer is already part of a running transaction? + * + * There are two cases: + * 1) It is part of the current running transaction. Refile it, + * just in case we have allocated it as metadata, deallocated + * it, then reallocated it as data. + * 2) It is part of the previous, still-committing transaction. + * If all we want to do is to guarantee that the buffer will be + * written to disk before this new transaction commits, then + * being sure that the *previous* transaction has this same + * property is sufficient for us! Just leave it on its old + * transaction. + * + * In case (2), the buffer must not already exist as metadata + * --- that would violate write ordering (a transaction is free + * to write its data at any point, even before the previous + * committing transaction has committed). The caller must + * never, ever allow this to happen: there's nothing we can do + * about it in this layer. + */ + jbd_lock_bh_state(bh); + spin_lock(&journal->j_list_lock); + if (jh->b_transaction) { + JBUFFER_TRACE(jh, "has transaction"); + if (jh->b_transaction != handle->h_transaction) { + JBUFFER_TRACE(jh, "belongs to older transaction"); + J_ASSERT_JH(jh, jh->b_transaction == + journal->j_committing_transaction); + + /* @@@ IS THIS TRUE ? */ + /* + * Not any more. Scenario: someone does a write() + * in data=journal mode. The buffer's transaction has + * moved into commit. Then someone does another + * write() to the file. We do the frozen data copyout + * and set b_next_transaction to point to j_running_t. + * And while we're in that state, someone does a + * writepage() in an attempt to pageout the same area + * of the file via a shared mapping. At present that + * calls jbd2_journal_dirty_data(), and we get right here. + * It may be too late to journal the data. Simply + * falling through to the next test will suffice: the + * data will be dirty and wil be checkpointed. The + * ordering comments in the next comment block still + * apply. + */ + //J_ASSERT_JH(jh, jh->b_next_transaction == NULL); + + /* + * If we're journalling data, and this buffer was + * subject to a write(), it could be metadata, forget + * or shadow against the committing transaction. Now, + * someone has dirtied the same darn page via a mapping + * and it is being writepage()'d. + * We *could* just steal the page from commit, with some + * fancy locking there. Instead, we just skip it - + * don't tie the page's buffers to the new transaction + * at all. + * Implication: if we crash before the writepage() data + * is written into the filesystem, recovery will replay + * the write() data. + */ + if (jh->b_jlist != BJ_None && + jh->b_jlist != BJ_SyncData && + jh->b_jlist != BJ_Locked) { + JBUFFER_TRACE(jh, "Not stealing"); + goto no_journal; + } + + /* + * This buffer may be undergoing writeout in commit. We + * can't return from here and let the caller dirty it + * again because that can cause the write-out loop in + * commit to never terminate. + */ + if (buffer_dirty(bh)) { + get_bh(bh); + spin_unlock(&journal->j_list_lock); + jbd_unlock_bh_state(bh); + need_brelse = 1; + sync_dirty_buffer(bh); + jbd_lock_bh_state(bh); + spin_lock(&journal->j_list_lock); + /* The buffer may become locked again at any + time if it is redirtied */ + } + + /* journal_clean_data_list() may have got there first */ + if (jh->b_transaction != NULL) { + JBUFFER_TRACE(jh, "unfile from commit"); + __jbd2_journal_temp_unlink_buffer(jh); + /* It still points to the committing + * transaction; move it to this one so + * that the refile assert checks are + * happy. */ + jh->b_transaction = handle->h_transaction; + } + /* The buffer will be refiled below */ + + } + /* + * Special case --- the buffer might actually have been + * allocated and then immediately deallocated in the previous, + * committing transaction, so might still be left on that + * transaction's metadata lists. + */ + if (jh->b_jlist != BJ_SyncData && jh->b_jlist != BJ_Locked) { + JBUFFER_TRACE(jh, "not on correct data list: unfile"); + J_ASSERT_JH(jh, jh->b_jlist != BJ_Shadow); + __jbd2_journal_temp_unlink_buffer(jh); + jh->b_transaction = handle->h_transaction; + JBUFFER_TRACE(jh, "file as data"); + __jbd2_journal_file_buffer(jh, handle->h_transaction, + BJ_SyncData); + } + } else { + JBUFFER_TRACE(jh, "not on a transaction"); + __jbd2_journal_file_buffer(jh, handle->h_transaction, BJ_SyncData); + } +no_journal: + spin_unlock(&journal->j_list_lock); + jbd_unlock_bh_state(bh); + if (need_brelse) { + BUFFER_TRACE(bh, "brelse"); + __brelse(bh); + } + JBUFFER_TRACE(jh, "exit"); + jbd2_journal_put_journal_head(jh); + return 0; +} + +/** + * int jbd2_journal_dirty_metadata() - mark a buffer as containing dirty metadata + * @handle: transaction to add buffer to. + * @bh: buffer to mark + * + * mark dirty metadata which needs to be journaled as part of the current + * transaction. + * + * The buffer is placed on the transaction's metadata list and is marked + * as belonging to the transaction. + * + * Returns error number or 0 on success. + * + * Special care needs to be taken if the buffer already belongs to the + * current committing transaction (in which case we should have frozen + * data present for that commit). In that case, we don't relink the + * buffer: that only gets done when the old transaction finally + * completes its commit. + */ +int jbd2_journal_dirty_metadata(handle_t *handle, struct buffer_head *bh) +{ + transaction_t *transaction = handle->h_transaction; + journal_t *journal = transaction->t_journal; + struct journal_head *jh = bh2jh(bh); + + jbd_debug(5, "journal_head %p\n", jh); + JBUFFER_TRACE(jh, "entry"); + if (is_handle_aborted(handle)) + goto out; + + jbd_lock_bh_state(bh); + + if (jh->b_modified == 0) { + /* + * This buffer's got modified and becoming part + * of the transaction. This needs to be done + * once a transaction -bzzz + */ + jh->b_modified = 1; + J_ASSERT_JH(jh, handle->h_buffer_credits > 0); + handle->h_buffer_credits--; + } + + /* + * fastpath, to avoid expensive locking. If this buffer is already + * on the running transaction's metadata list there is nothing to do. + * Nobody can take it off again because there is a handle open. + * I _think_ we're OK here with SMP barriers - a mistaken decision will + * result in this test being false, so we go in and take the locks. + */ + if (jh->b_transaction == transaction && jh->b_jlist == BJ_Metadata) { + JBUFFER_TRACE(jh, "fastpath"); + J_ASSERT_JH(jh, jh->b_transaction == + journal->j_running_transaction); + goto out_unlock_bh; + } + + set_buffer_jbddirty(bh); + + /* + * Metadata already on the current transaction list doesn't + * need to be filed. Metadata on another transaction's list must + * be committing, and will be refiled once the commit completes: + * leave it alone for now. + */ + if (jh->b_transaction != transaction) { + JBUFFER_TRACE(jh, "already on other transaction"); + J_ASSERT_JH(jh, jh->b_transaction == + journal->j_committing_transaction); + J_ASSERT_JH(jh, jh->b_next_transaction == transaction); + /* And this case is illegal: we can't reuse another + * transaction's data buffer, ever. */ + goto out_unlock_bh; + } + + /* That test should have eliminated the following case: */ + J_ASSERT_JH(jh, jh->b_frozen_data == 0); + + JBUFFER_TRACE(jh, "file as BJ_Metadata"); + spin_lock(&journal->j_list_lock); + __jbd2_journal_file_buffer(jh, handle->h_transaction, BJ_Metadata); + spin_unlock(&journal->j_list_lock); +out_unlock_bh: + jbd_unlock_bh_state(bh); +out: + JBUFFER_TRACE(jh, "exit"); + return 0; +} + +/* + * jbd2_journal_release_buffer: undo a get_write_access without any buffer + * updates, if the update decided in the end that it didn't need access. + * + */ +void +jbd2_journal_release_buffer(handle_t *handle, struct buffer_head *bh) +{ + BUFFER_TRACE(bh, "entry"); +} + +/** + * void jbd2_journal_forget() - bforget() for potentially-journaled buffers. + * @handle: transaction handle + * @bh: bh to 'forget' + * + * We can only do the bforget if there are no commits pending against the + * buffer. If the buffer is dirty in the current running transaction we + * can safely unlink it. + * + * bh may not be a journalled buffer at all - it may be a non-JBD + * buffer which came off the hashtable. Check for this. + * + * Decrements bh->b_count by one. + * + * Allow this call even if the handle has aborted --- it may be part of + * the caller's cleanup after an abort. + */ +int jbd2_journal_forget (handle_t *handle, struct buffer_head *bh) +{ + transaction_t *transaction = handle->h_transaction; + journal_t *journal = transaction->t_journal; + struct journal_head *jh; + int drop_reserve = 0; + int err = 0; + + BUFFER_TRACE(bh, "entry"); + + jbd_lock_bh_state(bh); + spin_lock(&journal->j_list_lock); + + if (!buffer_jbd(bh)) + goto not_jbd; + jh = bh2jh(bh); + + /* Critical error: attempting to delete a bitmap buffer, maybe? + * Don't do any jbd operations, and return an error. */ + if (!J_EXPECT_JH(jh, !jh->b_committed_data, + "inconsistent data on disk")) { + err = -EIO; + goto not_jbd; + } + + /* + * The buffer's going from the transaction, we must drop + * all references -bzzz + */ + jh->b_modified = 0; + + if (jh->b_transaction == handle->h_transaction) { + J_ASSERT_JH(jh, !jh->b_frozen_data); + + /* If we are forgetting a buffer which is already part + * of this transaction, then we can just drop it from + * the transaction immediately. */ + clear_buffer_dirty(bh); + clear_buffer_jbddirty(bh); + + JBUFFER_TRACE(jh, "belongs to current transaction: unfile"); + + drop_reserve = 1; + + /* + * We are no longer going to journal this buffer. + * However, the commit of this transaction is still + * important to the buffer: the delete that we are now + * processing might obsolete an old log entry, so by + * committing, we can satisfy the buffer's checkpoint. + * + * So, if we have a checkpoint on the buffer, we should + * now refile the buffer on our BJ_Forget list so that + * we know to remove the checkpoint after we commit. + */ + + if (jh->b_cp_transaction) { + __jbd2_journal_temp_unlink_buffer(jh); + __jbd2_journal_file_buffer(jh, transaction, BJ_Forget); + } else { + __jbd2_journal_unfile_buffer(jh); + jbd2_journal_remove_journal_head(bh); + __brelse(bh); + if (!buffer_jbd(bh)) { + spin_unlock(&journal->j_list_lock); + jbd_unlock_bh_state(bh); + __bforget(bh); + goto drop; + } + } + } else if (jh->b_transaction) { + J_ASSERT_JH(jh, (jh->b_transaction == + journal->j_committing_transaction)); + /* However, if the buffer is still owned by a prior + * (committing) transaction, we can't drop it yet... */ + JBUFFER_TRACE(jh, "belongs to older transaction"); + /* ... but we CAN drop it from the new transaction if we + * have also modified it since the original commit. */ + + if (jh->b_next_transaction) { + J_ASSERT(jh->b_next_transaction == transaction); + jh->b_next_transaction = NULL; + drop_reserve = 1; + } + } + +not_jbd: + spin_unlock(&journal->j_list_lock); + jbd_unlock_bh_state(bh); + __brelse(bh); +drop: + if (drop_reserve) { + /* no need to reserve log space for this block -bzzz */ + handle->h_buffer_credits++; + } + return err; +} + +/** + * int jbd2_journal_stop() - complete a transaction + * @handle: tranaction to complete. + * + * All done for a particular handle. + * + * There is not much action needed here. We just return any remaining + * buffer credits to the transaction and remove the handle. The only + * complication is that we need to start a commit operation if the + * filesystem is marked for synchronous update. + * + * jbd2_journal_stop itself will not usually return an error, but it may + * do so in unusual circumstances. In particular, expect it to + * return -EIO if a jbd2_journal_abort has been executed since the + * transaction began. + */ +int jbd2_journal_stop(handle_t *handle) +{ + transaction_t *transaction = handle->h_transaction; + journal_t *journal = transaction->t_journal; + int old_handle_count, err; + pid_t pid; + + J_ASSERT(transaction->t_updates > 0); + J_ASSERT(journal_current_handle() == handle); + + if (is_handle_aborted(handle)) + err = -EIO; + else + err = 0; + + if (--handle->h_ref > 0) { + jbd_debug(4, "h_ref %d -> %d\n", handle->h_ref + 1, + handle->h_ref); + return err; + } + + jbd_debug(4, "Handle %p going down\n", handle); + + /* + * Implement synchronous transaction batching. If the handle + * was synchronous, don't force a commit immediately. Let's + * yield and let another thread piggyback onto this transaction. + * Keep doing that while new threads continue to arrive. + * It doesn't cost much - we're about to run a commit and sleep + * on IO anyway. Speeds up many-threaded, many-dir operations + * by 30x or more... + * + * But don't do this if this process was the most recent one to + * perform a synchronous write. We do this to detect the case where a + * single process is doing a stream of sync writes. No point in waiting + * for joiners in that case. + */ + pid = current->pid; + if (handle->h_sync && journal->j_last_sync_writer != pid) { + journal->j_last_sync_writer = pid; + do { + old_handle_count = transaction->t_handle_count; + schedule_timeout_uninterruptible(1); + } while (old_handle_count != transaction->t_handle_count); + } + + current->journal_info = NULL; + spin_lock(&journal->j_state_lock); + spin_lock(&transaction->t_handle_lock); + transaction->t_outstanding_credits -= handle->h_buffer_credits; + transaction->t_updates--; + if (!transaction->t_updates) { + wake_up(&journal->j_wait_updates); + if (journal->j_barrier_count) + wake_up(&journal->j_wait_transaction_locked); + } + + /* + * If the handle is marked SYNC, we need to set another commit + * going! We also want to force a commit if the current + * transaction is occupying too much of the log, or if the + * transaction is too old now. + */ + if (handle->h_sync || + transaction->t_outstanding_credits > + journal->j_max_transaction_buffers || + time_after_eq(jiffies, transaction->t_expires)) { + /* Do this even for aborted journals: an abort still + * completes the commit thread, it just doesn't write + * anything to disk. */ + tid_t tid = transaction->t_tid; + + spin_unlock(&transaction->t_handle_lock); + jbd_debug(2, "transaction too old, requesting commit for " + "handle %p\n", handle); + /* This is non-blocking */ + __jbd2_log_start_commit(journal, transaction->t_tid); + spin_unlock(&journal->j_state_lock); + + /* + * Special case: JBD2_SYNC synchronous updates require us + * to wait for the commit to complete. + */ + if (handle->h_sync && !(current->flags & PF_MEMALLOC)) + err = jbd2_log_wait_commit(journal, tid); + } else { + spin_unlock(&transaction->t_handle_lock); + spin_unlock(&journal->j_state_lock); + } + + jbd_free_handle(handle); + return err; +} + +/**int jbd2_journal_force_commit() - force any uncommitted transactions + * @journal: journal to force + * + * For synchronous operations: force any uncommitted transactions + * to disk. May seem kludgy, but it reuses all the handle batching + * code in a very simple manner. + */ +int jbd2_journal_force_commit(journal_t *journal) +{ + handle_t *handle; + int ret; + + handle = jbd2_journal_start(journal, 1); + if (IS_ERR(handle)) { + ret = PTR_ERR(handle); + } else { + handle->h_sync = 1; + ret = jbd2_journal_stop(handle); + } + return ret; +} + +/* + * + * List management code snippets: various functions for manipulating the + * transaction buffer lists. + * + */ + +/* + * Append a buffer to a transaction list, given the transaction's list head + * pointer. + * + * j_list_lock is held. + * + * jbd_lock_bh_state(jh2bh(jh)) is held. + */ + +static inline void +__blist_add_buffer(struct journal_head **list, struct journal_head *jh) +{ + if (!*list) { + jh->b_tnext = jh->b_tprev = jh; + *list = jh; + } else { + /* Insert at the tail of the list to preserve order */ + struct journal_head *first = *list, *last = first->b_tprev; + jh->b_tprev = last; + jh->b_tnext = first; + last->b_tnext = first->b_tprev = jh; + } +} + +/* + * Remove a buffer from a transaction list, given the transaction's list + * head pointer. + * + * Called with j_list_lock held, and the journal may not be locked. + * + * jbd_lock_bh_state(jh2bh(jh)) is held. + */ + +static inline void +__blist_del_buffer(struct journal_head **list, struct journal_head *jh) +{ + if (*list == jh) { + *list = jh->b_tnext; + if (*list == jh) + *list = NULL; + } + jh->b_tprev->b_tnext = jh->b_tnext; + jh->b_tnext->b_tprev = jh->b_tprev; +} + +/* + * Remove a buffer from the appropriate transaction list. + * + * Note that this function can *change* the value of + * bh->b_transaction->t_sync_datalist, t_buffers, t_forget, + * t_iobuf_list, t_shadow_list, t_log_list or t_reserved_list. If the caller + * is holding onto a copy of one of thee pointers, it could go bad. + * Generally the caller needs to re-read the pointer from the transaction_t. + * + * Called under j_list_lock. The journal may not be locked. + */ +void __jbd2_journal_temp_unlink_buffer(struct journal_head *jh) +{ + struct journal_head **list = NULL; + transaction_t *transaction; + struct buffer_head *bh = jh2bh(jh); + + J_ASSERT_JH(jh, jbd_is_locked_bh_state(bh)); + transaction = jh->b_transaction; + if (transaction) + assert_spin_locked(&transaction->t_journal->j_list_lock); + + J_ASSERT_JH(jh, jh->b_jlist < BJ_Types); + if (jh->b_jlist != BJ_None) + J_ASSERT_JH(jh, transaction != 0); + + switch (jh->b_jlist) { + case BJ_None: + return; + case BJ_SyncData: + list = &transaction->t_sync_datalist; + break; + case BJ_Metadata: + transaction->t_nr_buffers--; + J_ASSERT_JH(jh, transaction->t_nr_buffers >= 0); + list = &transaction->t_buffers; + break; + case BJ_Forget: + list = &transaction->t_forget; + break; + case BJ_IO: + list = &transaction->t_iobuf_list; + break; + case BJ_Shadow: + list = &transaction->t_shadow_list; + break; + case BJ_LogCtl: + list = &transaction->t_log_list; + break; + case BJ_Reserved: + list = &transaction->t_reserved_list; + break; + case BJ_Locked: + list = &transaction->t_locked_list; + break; + } + + __blist_del_buffer(list, jh); + jh->b_jlist = BJ_None; + if (test_clear_buffer_jbddirty(bh)) + mark_buffer_dirty(bh); /* Expose it to the VM */ +} + +void __jbd2_journal_unfile_buffer(struct journal_head *jh) +{ + __jbd2_journal_temp_unlink_buffer(jh); + jh->b_transaction = NULL; +} + +void jbd2_journal_unfile_buffer(journal_t *journal, struct journal_head *jh) +{ + jbd_lock_bh_state(jh2bh(jh)); + spin_lock(&journal->j_list_lock); + __jbd2_journal_unfile_buffer(jh); + spin_unlock(&journal->j_list_lock); + jbd_unlock_bh_state(jh2bh(jh)); +} + +/* + * Called from jbd2_journal_try_to_free_buffers(). + * + * Called under jbd_lock_bh_state(bh) + */ +static void +__journal_try_to_free_buffer(journal_t *journal, struct buffer_head *bh) +{ + struct journal_head *jh; + + jh = bh2jh(bh); + + if (buffer_locked(bh) || buffer_dirty(bh)) + goto out; + + if (jh->b_next_transaction != 0) + goto out; + + spin_lock(&journal->j_list_lock); + if (jh->b_transaction != 0 && jh->b_cp_transaction == 0) { + if (jh->b_jlist == BJ_SyncData || jh->b_jlist == BJ_Locked) { + /* A written-back ordered data buffer */ + JBUFFER_TRACE(jh, "release data"); + __jbd2_journal_unfile_buffer(jh); + jbd2_journal_remove_journal_head(bh); + __brelse(bh); + } + } else if (jh->b_cp_transaction != 0 && jh->b_transaction == 0) { + /* written-back checkpointed metadata buffer */ + if (jh->b_jlist == BJ_None) { + JBUFFER_TRACE(jh, "remove from checkpoint list"); + __jbd2_journal_remove_checkpoint(jh); + jbd2_journal_remove_journal_head(bh); + __brelse(bh); + } + } + spin_unlock(&journal->j_list_lock); +out: + return; +} + + +/** + * int jbd2_journal_try_to_free_buffers() - try to free page buffers. + * @journal: journal for operation + * @page: to try and free + * @unused_gfp_mask: unused + * + * + * For all the buffers on this page, + * if they are fully written out ordered data, move them onto BUF_CLEAN + * so try_to_free_buffers() can reap them. + * + * This function returns non-zero if we wish try_to_free_buffers() + * to be called. We do this if the page is releasable by try_to_free_buffers(). + * We also do it if the page has locked or dirty buffers and the caller wants + * us to perform sync or async writeout. + * + * This complicates JBD locking somewhat. We aren't protected by the + * BKL here. We wish to remove the buffer from its committing or + * running transaction's ->t_datalist via __jbd2_journal_unfile_buffer. + * + * This may *change* the value of transaction_t->t_datalist, so anyone + * who looks at t_datalist needs to lock against this function. + * + * Even worse, someone may be doing a jbd2_journal_dirty_data on this + * buffer. So we need to lock against that. jbd2_journal_dirty_data() + * will come out of the lock with the buffer dirty, which makes it + * ineligible for release here. + * + * Who else is affected by this? hmm... Really the only contender + * is do_get_write_access() - it could be looking at the buffer while + * journal_try_to_free_buffer() is changing its state. But that + * cannot happen because we never reallocate freed data as metadata + * while the data is part of a transaction. Yes? + */ +int jbd2_journal_try_to_free_buffers(journal_t *journal, + struct page *page, gfp_t unused_gfp_mask) +{ + struct buffer_head *head; + struct buffer_head *bh; + int ret = 0; + + J_ASSERT(PageLocked(page)); + + head = page_buffers(page); + bh = head; + do { + struct journal_head *jh; + + /* + * We take our own ref against the journal_head here to avoid + * having to add tons of locking around each instance of + * jbd2_journal_remove_journal_head() and jbd2_journal_put_journal_head(). + */ + jh = jbd2_journal_grab_journal_head(bh); + if (!jh) + continue; + + jbd_lock_bh_state(bh); + __journal_try_to_free_buffer(journal, bh); + jbd2_journal_put_journal_head(jh); + jbd_unlock_bh_state(bh); + if (buffer_jbd(bh)) + goto busy; + } while ((bh = bh->b_this_page) != head); + ret = try_to_free_buffers(page); +busy: + return ret; +} + +/* + * This buffer is no longer needed. If it is on an older transaction's + * checkpoint list we need to record it on this transaction's forget list + * to pin this buffer (and hence its checkpointing transaction) down until + * this transaction commits. If the buffer isn't on a checkpoint list, we + * release it. + * Returns non-zero if JBD no longer has an interest in the buffer. + * + * Called under j_list_lock. + * + * Called under jbd_lock_bh_state(bh). + */ +static int __dispose_buffer(struct journal_head *jh, transaction_t *transaction) +{ + int may_free = 1; + struct buffer_head *bh = jh2bh(jh); + + __jbd2_journal_unfile_buffer(jh); + + if (jh->b_cp_transaction) { + JBUFFER_TRACE(jh, "on running+cp transaction"); + __jbd2_journal_file_buffer(jh, transaction, BJ_Forget); + clear_buffer_jbddirty(bh); + may_free = 0; + } else { + JBUFFER_TRACE(jh, "on running transaction"); + jbd2_journal_remove_journal_head(bh); + __brelse(bh); + } + return may_free; +} + +/* + * jbd2_journal_invalidatepage + * + * This code is tricky. It has a number of cases to deal with. + * + * There are two invariants which this code relies on: + * + * i_size must be updated on disk before we start calling invalidatepage on the + * data. + * + * This is done in ext3 by defining an ext3_setattr method which + * updates i_size before truncate gets going. By maintaining this + * invariant, we can be sure that it is safe to throw away any buffers + * attached to the current transaction: once the transaction commits, + * we know that the data will not be needed. + * + * Note however that we can *not* throw away data belonging to the + * previous, committing transaction! + * + * Any disk blocks which *are* part of the previous, committing + * transaction (and which therefore cannot be discarded immediately) are + * not going to be reused in the new running transaction + * + * The bitmap committed_data images guarantee this: any block which is + * allocated in one transaction and removed in the next will be marked + * as in-use in the committed_data bitmap, so cannot be reused until + * the next transaction to delete the block commits. This means that + * leaving committing buffers dirty is quite safe: the disk blocks + * cannot be reallocated to a different file and so buffer aliasing is + * not possible. + * + * + * The above applies mainly to ordered data mode. In writeback mode we + * don't make guarantees about the order in which data hits disk --- in + * particular we don't guarantee that new dirty data is flushed before + * transaction commit --- so it is always safe just to discard data + * immediately in that mode. --sct + */ + +/* + * The journal_unmap_buffer helper function returns zero if the buffer + * concerned remains pinned as an anonymous buffer belonging to an older + * transaction. + * + * We're outside-transaction here. Either or both of j_running_transaction + * and j_committing_transaction may be NULL. + */ +static int journal_unmap_buffer(journal_t *journal, struct buffer_head *bh) +{ + transaction_t *transaction; + struct journal_head *jh; + int may_free = 1; + int ret; + + BUFFER_TRACE(bh, "entry"); + + /* + * It is safe to proceed here without the j_list_lock because the + * buffers cannot be stolen by try_to_free_buffers as long as we are + * holding the page lock. --sct + */ + + if (!buffer_jbd(bh)) + goto zap_buffer_unlocked; + + spin_lock(&journal->j_state_lock); + jbd_lock_bh_state(bh); + spin_lock(&journal->j_list_lock); + + jh = jbd2_journal_grab_journal_head(bh); + if (!jh) + goto zap_buffer_no_jh; + + transaction = jh->b_transaction; + if (transaction == NULL) { + /* First case: not on any transaction. If it + * has no checkpoint link, then we can zap it: + * it's a writeback-mode buffer so we don't care + * if it hits disk safely. */ + if (!jh->b_cp_transaction) { + JBUFFER_TRACE(jh, "not on any transaction: zap"); + goto zap_buffer; + } + + if (!buffer_dirty(bh)) { + /* bdflush has written it. We can drop it now */ + goto zap_buffer; + } + + /* OK, it must be in the journal but still not + * written fully to disk: it's metadata or + * journaled data... */ + + if (journal->j_running_transaction) { + /* ... and once the current transaction has + * committed, the buffer won't be needed any + * longer. */ + JBUFFER_TRACE(jh, "checkpointed: add to BJ_Forget"); + ret = __dispose_buffer(jh, + journal->j_running_transaction); + jbd2_journal_put_journal_head(jh); + spin_unlock(&journal->j_list_lock); + jbd_unlock_bh_state(bh); + spin_unlock(&journal->j_state_lock); + return ret; + } else { + /* There is no currently-running transaction. So the + * orphan record which we wrote for this file must have + * passed into commit. We must attach this buffer to + * the committing transaction, if it exists. */ + if (journal->j_committing_transaction) { + JBUFFER_TRACE(jh, "give to committing trans"); + ret = __dispose_buffer(jh, + journal->j_committing_transaction); + jbd2_journal_put_journal_head(jh); + spin_unlock(&journal->j_list_lock); + jbd_unlock_bh_state(bh); + spin_unlock(&journal->j_state_lock); + return ret; + } else { + /* The orphan record's transaction has + * committed. We can cleanse this buffer */ + clear_buffer_jbddirty(bh); + goto zap_buffer; + } + } + } else if (transaction == journal->j_committing_transaction) { + if (jh->b_jlist == BJ_Locked) { + /* + * The buffer is on the committing transaction's locked + * list. We have the buffer locked, so I/O has + * completed. So we can nail the buffer now. + */ + may_free = __dispose_buffer(jh, transaction); + goto zap_buffer; + } + /* + * If it is committing, we simply cannot touch it. We + * can remove it's next_transaction pointer from the + * running transaction if that is set, but nothing + * else. */ + JBUFFER_TRACE(jh, "on committing transaction"); + set_buffer_freed(bh); + if (jh->b_next_transaction) { + J_ASSERT(jh->b_next_transaction == + journal->j_running_transaction); + jh->b_next_transaction = NULL; + } + jbd2_journal_put_journal_head(jh); + spin_unlock(&journal->j_list_lock); + jbd_unlock_bh_state(bh); + spin_unlock(&journal->j_state_lock); + return 0; + } else { + /* Good, the buffer belongs to the running transaction. + * We are writing our own transaction's data, not any + * previous one's, so it is safe to throw it away + * (remember that we expect the filesystem to have set + * i_size already for this truncate so recovery will not + * expose the disk blocks we are discarding here.) */ + J_ASSERT_JH(jh, transaction == journal->j_running_transaction); + may_free = __dispose_buffer(jh, transaction); + } + +zap_buffer: + jbd2_journal_put_journal_head(jh); +zap_buffer_no_jh: + spin_unlock(&journal->j_list_lock); + jbd_unlock_bh_state(bh); + spin_unlock(&journal->j_state_lock); +zap_buffer_unlocked: + clear_buffer_dirty(bh); + J_ASSERT_BH(bh, !buffer_jbddirty(bh)); + clear_buffer_mapped(bh); + clear_buffer_req(bh); + clear_buffer_new(bh); + bh->b_bdev = NULL; + return may_free; +} + +/** + * void jbd2_journal_invalidatepage() + * @journal: journal to use for flush... + * @page: page to flush + * @offset: length of page to invalidate. + * + * Reap page buffers containing data after offset in page. + * + */ +void jbd2_journal_invalidatepage(journal_t *journal, + struct page *page, + unsigned long offset) +{ + struct buffer_head *head, *bh, *next; + unsigned int curr_off = 0; + int may_free = 1; + + if (!PageLocked(page)) + BUG(); + if (!page_has_buffers(page)) + return; + + /* We will potentially be playing with lists other than just the + * data lists (especially for journaled data mode), so be + * cautious in our locking. */ + + head = bh = page_buffers(page); + do { + unsigned int next_off = curr_off + bh->b_size; + next = bh->b_this_page; + + if (offset <= curr_off) { + /* This block is wholly outside the truncation point */ + lock_buffer(bh); + may_free &= journal_unmap_buffer(journal, bh); + unlock_buffer(bh); + } + curr_off = next_off; + bh = next; + + } while (bh != head); + + if (!offset) { + if (may_free && try_to_free_buffers(page)) + J_ASSERT(!page_has_buffers(page)); + } +} + +/* + * File a buffer on the given transaction list. + */ +void __jbd2_journal_file_buffer(struct journal_head *jh, + transaction_t *transaction, int jlist) +{ + struct journal_head **list = NULL; + int was_dirty = 0; + struct buffer_head *bh = jh2bh(jh); + + J_ASSERT_JH(jh, jbd_is_locked_bh_state(bh)); + assert_spin_locked(&transaction->t_journal->j_list_lock); + + J_ASSERT_JH(jh, jh->b_jlist < BJ_Types); + J_ASSERT_JH(jh, jh->b_transaction == transaction || + jh->b_transaction == 0); + + if (jh->b_transaction && jh->b_jlist == jlist) + return; + + /* The following list of buffer states needs to be consistent + * with __jbd_unexpected_dirty_buffer()'s handling of dirty + * state. */ + + if (jlist == BJ_Metadata || jlist == BJ_Reserved || + jlist == BJ_Shadow || jlist == BJ_Forget) { + if (test_clear_buffer_dirty(bh) || + test_clear_buffer_jbddirty(bh)) + was_dirty = 1; + } + + if (jh->b_transaction) + __jbd2_journal_temp_unlink_buffer(jh); + jh->b_transaction = transaction; + + switch (jlist) { + case BJ_None: + J_ASSERT_JH(jh, !jh->b_committed_data); + J_ASSERT_JH(jh, !jh->b_frozen_data); + return; + case BJ_SyncData: + list = &transaction->t_sync_datalist; + break; + case BJ_Metadata: + transaction->t_nr_buffers++; + list = &transaction->t_buffers; + break; + case BJ_Forget: + list = &transaction->t_forget; + break; + case BJ_IO: + list = &transaction->t_iobuf_list; + break; + case BJ_Shadow: + list = &transaction->t_shadow_list; + break; + case BJ_LogCtl: + list = &transaction->t_log_list; + break; + case BJ_Reserved: + list = &transaction->t_reserved_list; + break; + case BJ_Locked: + list = &transaction->t_locked_list; + break; + } + + __blist_add_buffer(list, jh); + jh->b_jlist = jlist; + + if (was_dirty) + set_buffer_jbddirty(bh); +} + +void jbd2_journal_file_buffer(struct journal_head *jh, + transaction_t *transaction, int jlist) +{ + jbd_lock_bh_state(jh2bh(jh)); + spin_lock(&transaction->t_journal->j_list_lock); + __jbd2_journal_file_buffer(jh, transaction, jlist); + spin_unlock(&transaction->t_journal->j_list_lock); + jbd_unlock_bh_state(jh2bh(jh)); +} + +/* + * Remove a buffer from its current buffer list in preparation for + * dropping it from its current transaction entirely. If the buffer has + * already started to be used by a subsequent transaction, refile the + * buffer on that transaction's metadata list. + * + * Called under journal->j_list_lock + * + * Called under jbd_lock_bh_state(jh2bh(jh)) + */ +void __jbd2_journal_refile_buffer(struct journal_head *jh) +{ + int was_dirty; + struct buffer_head *bh = jh2bh(jh); + + J_ASSERT_JH(jh, jbd_is_locked_bh_state(bh)); + if (jh->b_transaction) + assert_spin_locked(&jh->b_transaction->t_journal->j_list_lock); + + /* If the buffer is now unused, just drop it. */ + if (jh->b_next_transaction == NULL) { + __jbd2_journal_unfile_buffer(jh); + return; + } + + /* + * It has been modified by a later transaction: add it to the new + * transaction's metadata list. + */ + + was_dirty = test_clear_buffer_jbddirty(bh); + __jbd2_journal_temp_unlink_buffer(jh); + jh->b_transaction = jh->b_next_transaction; + jh->b_next_transaction = NULL; + __jbd2_journal_file_buffer(jh, jh->b_transaction, + was_dirty ? BJ_Metadata : BJ_Reserved); + J_ASSERT_JH(jh, jh->b_transaction->t_state == T_RUNNING); + + if (was_dirty) + set_buffer_jbddirty(bh); +} + +/* + * For the unlocked version of this call, also make sure that any + * hanging journal_head is cleaned up if necessary. + * + * __jbd2_journal_refile_buffer is usually called as part of a single locked + * operation on a buffer_head, in which the caller is probably going to + * be hooking the journal_head onto other lists. In that case it is up + * to the caller to remove the journal_head if necessary. For the + * unlocked jbd2_journal_refile_buffer call, the caller isn't going to be + * doing anything else to the buffer so we need to do the cleanup + * ourselves to avoid a jh leak. + * + * *** The journal_head may be freed by this call! *** + */ +void jbd2_journal_refile_buffer(journal_t *journal, struct journal_head *jh) +{ + struct buffer_head *bh = jh2bh(jh); + + jbd_lock_bh_state(bh); + spin_lock(&journal->j_list_lock); + + __jbd2_journal_refile_buffer(jh); + jbd_unlock_bh_state(bh); + jbd2_journal_remove_journal_head(bh); + + spin_unlock(&journal->j_list_lock); + __brelse(bh); +} diff --git a/fs/jffs2/super.c b/fs/jffs2/super.c index 6de374513c01..bc4b8106a490 100644 --- a/fs/jffs2/super.c +++ b/fs/jffs2/super.c @@ -334,10 +334,10 @@ static int __init init_jffs2_fs(void) which means just 'no padding', without the alignment thing. But GCC doesn't have that -- we have to just hope the structs are the right sizes, instead. */ - BUG_ON(sizeof(struct jffs2_unknown_node) != 12); - BUG_ON(sizeof(struct jffs2_raw_dirent) != 40); - BUG_ON(sizeof(struct jffs2_raw_inode) != 68); - BUG_ON(sizeof(struct jffs2_raw_summary) != 32); + BUILD_BUG_ON(sizeof(struct jffs2_unknown_node) != 12); + BUILD_BUG_ON(sizeof(struct jffs2_raw_dirent) != 40); + BUILD_BUG_ON(sizeof(struct jffs2_raw_inode) != 68); + BUILD_BUG_ON(sizeof(struct jffs2_raw_summary) != 32); printk(KERN_INFO "JFFS2 version 2.2." #ifdef CONFIG_JFFS2_FS_WRITEBUFFER diff --git a/fs/minix/inode.c b/fs/minix/inode.c index c11a4b9fb863..1e36bae4d0eb 100644 --- a/fs/minix/inode.c +++ b/fs/minix/inode.c @@ -149,12 +149,8 @@ static int minix_fill_super(struct super_block *s, void *data, int silent) return -ENOMEM; s->s_fs_info = sbi; - /* N.B. These should be compile-time tests. - Unfortunately that is impossible. */ - if (32 != sizeof (struct minix_inode)) - panic("bad V1 i-node size"); - if (64 != sizeof(struct minix2_inode)) - panic("bad V2 i-node size"); + BUILD_BUG_ON(32 != sizeof (struct minix_inode)); + BUILD_BUG_ON(64 != sizeof(struct minix2_inode)); if (!sb_set_blocksize(s, BLOCK_SIZE)) goto out_bad_hblock; diff --git a/fs/ncpfs/ioctl.c b/fs/ncpfs/ioctl.c index a89ac84a8241..589d1eac55c1 100644 --- a/fs/ncpfs/ioctl.c +++ b/fs/ncpfs/ioctl.c @@ -726,7 +726,7 @@ outrel: struct compat_ncp_privatedata_ioctl user32; user32.len = user.len; user32.data = (unsigned long) user.data; - if (copy_to_user(&user32, argp, sizeof(user32))) + if (copy_to_user(argp, &user32, sizeof(user32))) return -EFAULT; } else #endif diff --git a/fs/nfs/client.c b/fs/nfs/client.c index 6e4e48c5092a..34c3996bd0f5 100644 --- a/fs/nfs/client.c +++ b/fs/nfs/client.c @@ -322,25 +322,11 @@ found_client: if (new) nfs_free_client(new); - if (clp->cl_cons_state == NFS_CS_INITING) { - DECLARE_WAITQUEUE(myself, current); - - add_wait_queue(&nfs_client_active_wq, &myself); - - for (;;) { - set_current_state(TASK_INTERRUPTIBLE); - if (signal_pending(current) || - clp->cl_cons_state > NFS_CS_READY) - break; - schedule(); - } - - remove_wait_queue(&nfs_client_active_wq, &myself); - - if (signal_pending(current)) { - nfs_put_client(clp); - return ERR_PTR(-ERESTARTSYS); - } + error = wait_event_interruptible(nfs_client_active_wq, + clp->cl_cons_state != NFS_CS_INITING); + if (error < 0) { + nfs_put_client(clp); + return ERR_PTR(-ERESTARTSYS); } if (clp->cl_cons_state < NFS_CS_READY) { diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c index 6fa6340a5fb8..013b38996e64 100644 --- a/fs/nfsd/nfssvc.c +++ b/fs/nfsd/nfssvc.c @@ -217,7 +217,7 @@ int nfsd_create_serv(void) atomic_set(&nfsd_busy, 0); nfsd_serv = svc_create_pooled(&nfsd_program, - NFSD_BUFSIZE - NFSSVC_MAXBLKSIZE + nfsd_max_blksize, + nfsd_max_blksize, nfsd_last_thread, nfsd, SIG_NOCLEAN, THIS_MODULE); if (nfsd_serv == NULL) diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c index 4c29cd7cc8e6..76b46ebbb10c 100644 --- a/fs/ocfs2/super.c +++ b/fs/ocfs2/super.c @@ -339,7 +339,7 @@ static unsigned long long ocfs2_max_file_offset(unsigned int blockshift) #if BITS_PER_LONG == 32 # if defined(CONFIG_LBD) - BUG_ON(sizeof(sector_t) != 8); + BUILD_BUG_ON(sizeof(sector_t) != 8); pagefactor = PAGE_CACHE_SIZE; bitshift = BITS_PER_LONG; # else diff --git a/fs/partitions/msdos.c b/fs/partitions/msdos.c index 4f8df71e49d3..8c7af1777819 100644 --- a/fs/partitions/msdos.c +++ b/fs/partitions/msdos.c @@ -32,13 +32,11 @@ #include <asm/unaligned.h> #define SYS_IND(p) (get_unaligned(&p->sys_ind)) -#define NR_SECTS(p) ({ __typeof__(p->nr_sects) __a = \ - get_unaligned(&p->nr_sects); \ +#define NR_SECTS(p) ({ __le32 __a = get_unaligned(&p->nr_sects); \ le32_to_cpu(__a); \ }) -#define START_SECT(p) ({ __typeof__(p->start_sect) __a = \ - get_unaligned(&p->start_sect); \ +#define START_SECT(p) ({ __le32 __a = get_unaligned(&p->start_sect); \ le32_to_cpu(__a); \ }) diff --git a/fs/reiserfs/bitmap.c b/fs/reiserfs/bitmap.c index 1bfae42117ca..e3d466a228d4 100644 --- a/fs/reiserfs/bitmap.c +++ b/fs/reiserfs/bitmap.c @@ -1304,8 +1304,8 @@ struct buffer_head *reiserfs_read_bitmap_block(struct super_block *sb, bh = sb_bread(sb, block); if (bh == NULL) - reiserfs_warning(sb, "sh-2029: %s: bitmap block (#%lu) " - "reading failed", __FUNCTION__, bh->b_blocknr); + reiserfs_warning(sb, "sh-2029: %s: bitmap block (#%u) " + "reading failed", __FUNCTION__, block); else { if (buffer_locked(bh)) { PROC_INFO_INC(sb, scan_bitmap.wait); diff --git a/fs/reiserfs/super.c b/fs/reiserfs/super.c index c89aa2338191..9041802df832 100644 --- a/fs/reiserfs/super.c +++ b/fs/reiserfs/super.c @@ -430,20 +430,29 @@ int remove_save_link(struct inode *inode, int truncate) return journal_end(&th, inode->i_sb, JOURNAL_PER_BALANCE_CNT); } -static void reiserfs_put_super(struct super_block *s) +static void reiserfs_kill_sb(struct super_block *s) { - struct reiserfs_transaction_handle th; - th.t_trans_id = 0; + if (REISERFS_SB(s)) { + if (REISERFS_SB(s)->xattr_root) { + d_invalidate(REISERFS_SB(s)->xattr_root); + dput(REISERFS_SB(s)->xattr_root); + REISERFS_SB(s)->xattr_root = NULL; + } - if (REISERFS_SB(s)->xattr_root) { - d_invalidate(REISERFS_SB(s)->xattr_root); - dput(REISERFS_SB(s)->xattr_root); + if (REISERFS_SB(s)->priv_root) { + d_invalidate(REISERFS_SB(s)->priv_root); + dput(REISERFS_SB(s)->priv_root); + REISERFS_SB(s)->priv_root = NULL; + } } - if (REISERFS_SB(s)->priv_root) { - d_invalidate(REISERFS_SB(s)->priv_root); - dput(REISERFS_SB(s)->priv_root); - } + kill_block_super(s); +} + +static void reiserfs_put_super(struct super_block *s) +{ + struct reiserfs_transaction_handle th; + th.t_trans_id = 0; /* change file system state to current state if it was mounted with read-write permissions */ if (!(s->s_flags & MS_RDONLY)) { @@ -2156,7 +2165,7 @@ struct file_system_type reiserfs_fs_type = { .owner = THIS_MODULE, .name = "reiserfs", .get_sb = get_super_block, - .kill_sb = kill_block_super, + .kill_sb = reiserfs_kill_sb, .fs_flags = FS_REQUIRES_DEV, }; diff --git a/fs/splice.c b/fs/splice.c index 13e92dd19fbb..a567010b62ac 100644 --- a/fs/splice.c +++ b/fs/splice.c @@ -607,7 +607,7 @@ find_page: ret = -ENOMEM; page = page_cache_alloc_cold(mapping); if (unlikely(!page)) - goto out_nomem; + goto out_ret; /* * This will also lock the page @@ -666,7 +666,7 @@ find_page: if (sd->pos + this_len > isize) vmtruncate(mapping->host, isize); - goto out; + goto out_ret; } if (buf->page != page) { @@ -698,7 +698,7 @@ find_page: out: page_cache_release(page); unlock_page(page); -out_nomem: +out_ret: return ret; } diff --git a/fs/super.c b/fs/super.c index aec99ddbe53f..47e554c12e76 100644 --- a/fs/super.c +++ b/fs/super.c @@ -260,17 +260,17 @@ int fsync_super(struct super_block *sb) * that need destruction out of superblock, call generic_shutdown_super() * and release aforementioned objects. Note: dentries and inodes _are_ * taken care of and do not need specific handling. + * + * Upon calling this function, the filesystem may no longer alter or + * rearrange the set of dentries belonging to this super_block, nor may it + * change the attachments of dentries to inodes. */ void generic_shutdown_super(struct super_block *sb) { - struct dentry *root = sb->s_root; struct super_operations *sop = sb->s_op; - if (root) { - sb->s_root = NULL; - shrink_dcache_parent(root); - shrink_dcache_sb(sb); - dput(root); + if (sb->s_root) { + shrink_dcache_for_umount(sb); fsync_super(sb); lock_super(sb); sb->s_flags &= ~MS_ACTIVE; diff --git a/fs/sysv/super.c b/fs/sysv/super.c index 350cba5d6803..dc9e7dc07fb7 100644 --- a/fs/sysv/super.c +++ b/fs/sysv/super.c @@ -358,16 +358,11 @@ static int sysv_fill_super(struct super_block *sb, void *data, int silent) unsigned long blocknr; int size = 0, i; - if (1024 != sizeof (struct xenix_super_block)) - panic("Xenix FS: bad superblock size"); - if (512 != sizeof (struct sysv4_super_block)) - panic("SystemV FS: bad superblock size"); - if (512 != sizeof (struct sysv2_super_block)) - panic("SystemV FS: bad superblock size"); - if (500 != sizeof (struct coh_super_block)) - panic("Coherent FS: bad superblock size"); - if (64 != sizeof (struct sysv_inode)) - panic("sysv fs: bad inode size"); + BUILD_BUG_ON(1024 != sizeof (struct xenix_super_block)); + BUILD_BUG_ON(512 != sizeof (struct sysv4_super_block)); + BUILD_BUG_ON(512 != sizeof (struct sysv2_super_block)); + BUILD_BUG_ON(500 != sizeof (struct coh_super_block)); + BUILD_BUG_ON(64 != sizeof (struct sysv_inode)); sbi = kzalloc(sizeof(struct sysv_sb_info), GFP_KERNEL); if (!sbi) diff --git a/fs/ufs/util.c b/fs/ufs/util.c index 22f820a9b15c..17437574f79c 100644 --- a/fs/ufs/util.c +++ b/fs/ufs/util.c @@ -184,14 +184,13 @@ void _ubh_memcpyubh_(struct ufs_sb_private_info * uspi, dev_t ufs_get_inode_dev(struct super_block *sb, struct ufs_inode_info *ufsi) { - __fs32 fs32; + __u32 fs32; dev_t dev; if ((UFS_SB(sb)->s_flags & UFS_ST_MASK) == UFS_ST_SUNx86) - fs32 = ufsi->i_u1.i_data[1]; + fs32 = fs32_to_cpu(sb, ufsi->i_u1.i_data[1]); else - fs32 = ufsi->i_u1.i_data[0]; - fs32 = fs32_to_cpu(sb, fs32); + fs32 = fs32_to_cpu(sb, ufsi->i_u1.i_data[0]); switch (UFS_SB(sb)->s_flags & UFS_ST_MASK) { case UFS_ST_SUNx86: case UFS_ST_SUN: @@ -212,7 +211,7 @@ ufs_get_inode_dev(struct super_block *sb, struct ufs_inode_info *ufsi) void ufs_set_inode_dev(struct super_block *sb, struct ufs_inode_info *ufsi, dev_t dev) { - __fs32 fs32; + __u32 fs32; switch (UFS_SB(sb)->s_flags & UFS_ST_MASK) { case UFS_ST_SUNx86: @@ -227,11 +226,10 @@ ufs_set_inode_dev(struct super_block *sb, struct ufs_inode_info *ufsi, dev_t dev fs32 = old_encode_dev(dev); break; } - fs32 = cpu_to_fs32(sb, fs32); if ((UFS_SB(sb)->s_flags & UFS_ST_MASK) == UFS_ST_SUNx86) - ufsi->i_u1.i_data[1] = fs32; + ufsi->i_u1.i_data[1] = cpu_to_fs32(sb, fs32); else - ufsi->i_u1.i_data[0] = fs32; + ufsi->i_u1.i_data[0] = cpu_to_fs32(sb, fs32); } /** diff --git a/fs/xattr.c b/fs/xattr.c index c32f15b5f60f..395635100f77 100644 --- a/fs/xattr.c +++ b/fs/xattr.c @@ -135,6 +135,26 @@ vfs_getxattr(struct dentry *dentry, char *name, void *value, size_t size) } EXPORT_SYMBOL_GPL(vfs_getxattr); +ssize_t +vfs_listxattr(struct dentry *d, char *list, size_t size) +{ + ssize_t error; + + error = security_inode_listxattr(d); + if (error) + return error; + error = -EOPNOTSUPP; + if (d->d_inode->i_op && d->d_inode->i_op->listxattr) { + error = d->d_inode->i_op->listxattr(d, list, size); + } else { + error = security_inode_listsecurity(d->d_inode, list, size); + if (size && error > size) + error = -ERANGE; + } + return error; +} +EXPORT_SYMBOL_GPL(vfs_listxattr); + int vfs_removexattr(struct dentry *dentry, char *name) { @@ -346,17 +366,7 @@ listxattr(struct dentry *d, char __user *list, size_t size) return -ENOMEM; } - error = security_inode_listxattr(d); - if (error) - goto out; - error = -EOPNOTSUPP; - if (d->d_inode->i_op && d->d_inode->i_op->listxattr) { - error = d->d_inode->i_op->listxattr(d, klist, size); - } else { - error = security_inode_listsecurity(d->d_inode, klist, size); - if (size && error > size) - error = -ERANGE; - } + error = vfs_listxattr(d, klist, size); if (error > 0) { if (size && copy_to_user(list, klist, error)) error = -EFAULT; @@ -365,7 +375,6 @@ listxattr(struct dentry *d, char __user *list, size_t size) than XATTR_LIST_MAX bytes. Not possible. */ error = -E2BIG; } -out: kfree(klist); return error; } diff --git a/include/acpi/aclocal.h b/include/acpi/aclocal.h index a4d0e73d5aca..063c4b54290f 100644 --- a/include/acpi/aclocal.h +++ b/include/acpi/aclocal.h @@ -708,7 +708,7 @@ struct acpi_bit_register_info { * must be preserved. */ #define ACPI_PM1_STATUS_PRESERVED_BITS 0x0800 /* Bit 11 */ -#define ACPI_PM1_CONTROL_PRESERVED_BITS 0x0201 /* Bit 9, Bit 0 (SCI_EN) */ +#define ACPI_PM1_CONTROL_PRESERVED_BITS 0x0200 /* Bit 9 (whatever) */ /* * Register IDs diff --git a/include/asm-alpha/io.h b/include/asm-alpha/io.h index f5ae98c25d1f..5d15af24573b 100644 --- a/include/asm-alpha/io.h +++ b/include/asm-alpha/io.h @@ -533,19 +533,6 @@ extern void outsl (unsigned long port, const void *src, unsigned long count); #define eth_io_copy_and_sum(skb,src,len,unused) \ memcpy_fromio((skb)->data,src,len) -static inline int -check_signature(const volatile void __iomem *io_addr, - const unsigned char *signature, int length) -{ - do { - if (readb(io_addr) != *signature) - return 0; - io_addr++; - signature++; - } while (--length); - return 1; -} - /* * The Alpha Jensen hardware for some rather strange reason puts * the RTC clock at 0x170 instead of 0x70. Probably due to some diff --git a/include/asm-alpha/irq_regs.h b/include/asm-alpha/irq_regs.h new file mode 100644 index 000000000000..3dd9c0b70270 --- /dev/null +++ b/include/asm-alpha/irq_regs.h @@ -0,0 +1 @@ +#include <asm-generic/irq_regs.h> diff --git a/include/asm-alpha/machvec.h b/include/asm-alpha/machvec.h index aced22f91752..a86c083cdf7f 100644 --- a/include/asm-alpha/machvec.h +++ b/include/asm-alpha/machvec.h @@ -15,7 +15,6 @@ struct task_struct; struct mm_struct; -struct pt_regs; struct vm_area_struct; struct linux_hose_info; struct pci_dev; @@ -79,8 +78,8 @@ struct alpha_machine_vector void (*update_irq_hw)(unsigned long, unsigned long, int); void (*ack_irq)(unsigned long); - void (*device_interrupt)(unsigned long vector, struct pt_regs *regs); - void (*machine_check)(u64 vector, u64 la, struct pt_regs *regs); + void (*device_interrupt)(unsigned long vector); + void (*machine_check)(u64 vector, u64 la); void (*smp_callin)(void); void (*init_arch)(void); diff --git a/include/asm-arm/arch-clps711x/time.h b/include/asm-arm/arch-clps711x/time.h index 0e4a3901d3b3..5edaae1c61d3 100644 --- a/include/asm-arm/arch-clps711x/time.h +++ b/include/asm-arm/arch-clps711x/time.h @@ -26,8 +26,9 @@ extern void clps711x_setup_timer(void); * IRQ handler for the timer */ static irqreturn_t -p720t_timer_interrupt(int irq, void *dev_id, struct pt_regs *regs) +p720t_timer_interrupt(int irq, void *dev_id) { + struct pt_regs *regs = get_irq_regs(); do_leds(); do_timer(1); #ifndef CONFIG_SMP diff --git a/include/asm-arm/arch-imx/imx-dma.h b/include/asm-arm/arch-imx/imx-dma.h index 599f03e5a9ef..5b1066da4e1f 100644 --- a/include/asm-arm/arch-imx/imx-dma.h +++ b/include/asm-arm/arch-imx/imx-dma.h @@ -45,8 +45,8 @@ struct imx_dma_channel { const char *name; - void (*irq_handler) (int, void *, struct pt_regs *); - void (*err_handler) (int, void *, struct pt_regs *, int errcode); + void (*irq_handler) (int, void *); + void (*err_handler) (int, void *, int errcode); void *data; dmamode_t dma_mode; struct scatterlist *sg; @@ -77,8 +77,8 @@ imx_dma_setup_sg(imx_dmach_t dma_ch, int imx_dma_setup_handlers(imx_dmach_t dma_ch, - void (*irq_handler) (int, void *, struct pt_regs *), - void (*err_handler) (int, void *, struct pt_regs *, int), void *data); + void (*irq_handler) (int, void *), + void (*err_handler) (int, void *, int), void *data); void imx_dma_enable(imx_dmach_t dma_ch); diff --git a/include/asm-arm/arch-l7200/time.h b/include/asm-arm/arch-l7200/time.h index c69cb508735f..ea22f7fff9cd 100644 --- a/include/asm-arm/arch-l7200/time.h +++ b/include/asm-arm/arch-l7200/time.h @@ -43,8 +43,9 @@ * Handler for RTC timer interrupt */ static irqreturn_t -timer_interrupt(int irq, void *dev_id, struct pt_regs *regs) +timer_interrupt(int irq, void *dev_id) { + struct pt_regs *regs = get_irq_regs(); do_timer(1); #ifndef CONFIG_SMP update_process_times(user_mode(regs)); diff --git a/include/asm-arm/arch-pnx4008/dma.h b/include/asm-arm/arch-pnx4008/dma.h index 3aee1204795b..418f15283ff1 100644 --- a/include/asm-arm/arch-pnx4008/dma.h +++ b/include/asm-arm/arch-pnx4008/dma.h @@ -137,7 +137,7 @@ extern void pnx4008_free_ll_entry(struct pnx4008_dma_ll *, dma_addr_t); extern void pnx4008_free_ll(u32 ll_dma, struct pnx4008_dma_ll *); extern int pnx4008_request_channel(char *, int, - void (*)(int, int, void *, struct pt_regs *), + void (*)(int, int, void *), void *); extern void pnx4008_free_channel(int); extern int pnx4008_config_dma(int, int, int); diff --git a/include/asm-arm/arch-pxa/dma.h b/include/asm-arm/arch-pxa/dma.h index a008150abc59..bed042d71d68 100644 --- a/include/asm-arm/arch-pxa/dma.h +++ b/include/asm-arm/arch-pxa/dma.h @@ -56,7 +56,7 @@ for ( \ int pxa_request_dma (char *name, pxa_dma_prio prio, - void (*irq_handler)(int, void *, struct pt_regs *), + void (*irq_handler)(int, void *), void *data); void pxa_free_dma (int dma_ch); diff --git a/include/asm-arm/arch-pxa/mmc.h b/include/asm-arm/arch-pxa/mmc.h index 88c17dd02ed2..a38a28c4bbd8 100644 --- a/include/asm-arm/arch-pxa/mmc.h +++ b/include/asm-arm/arch-pxa/mmc.h @@ -10,7 +10,7 @@ struct mmc_host; struct pxamci_platform_data { unsigned int ocr_mask; /* available voltages */ unsigned long detect_delay; /* delay in jiffies before detecting cards after interrupt */ - int (*init)(struct device *, irqreturn_t (*)(int, void *, struct pt_regs *), void *); + int (*init)(struct device *, irq_handler_t , void *); int (*get_ro)(struct device *); void (*setpower)(struct device *, unsigned int); void (*exit)(struct device *, void *); diff --git a/include/asm-arm/arch-versatile/hardware.h b/include/asm-arm/arch-versatile/hardware.h index 41c1bee342ad..edc06598d187 100644 --- a/include/asm-arm/arch-versatile/hardware.h +++ b/include/asm-arm/arch-versatile/hardware.h @@ -28,8 +28,8 @@ /* * PCI space virtual addresses */ -#define VERSATILE_PCI_VIRT_BASE 0xe8000000 -#define VERSATILE_PCI_CFG_VIRT_BASE 0xe9000000 +#define VERSATILE_PCI_VIRT_BASE (void __iomem *)0xe8000000ul +#define VERSATILE_PCI_CFG_VIRT_BASE (void __iomem *)0xe9000000ul #if 0 #define VERSATILE_PCI_VIRT_MEM_BASE0 0xf4000000 diff --git a/include/asm-arm/hardware/sharpsl_pm.h b/include/asm-arm/hardware/sharpsl_pm.h index a836e76a14f7..2d00db22b981 100644 --- a/include/asm-arm/hardware/sharpsl_pm.h +++ b/include/asm-arm/hardware/sharpsl_pm.h @@ -100,7 +100,7 @@ extern struct sharpsl_pm_status sharpsl_pm; void sharpsl_battery_kick(void); void sharpsl_pm_led(int val); -irqreturn_t sharpsl_ac_isr(int irq, void *dev_id, struct pt_regs *fp); -irqreturn_t sharpsl_chrg_full_isr(int irq, void *dev_id, struct pt_regs *fp); -irqreturn_t sharpsl_fatal_isr(int irq, void *dev_id, struct pt_regs *fp); +irqreturn_t sharpsl_ac_isr(int irq, void *dev_id); +irqreturn_t sharpsl_chrg_full_isr(int irq, void *dev_id); +irqreturn_t sharpsl_fatal_isr(int irq, void *dev_id); diff --git a/include/asm-arm/hw_irq.h b/include/asm-arm/hw_irq.h index ea856971989a..98d594a973d6 100644 --- a/include/asm-arm/hw_irq.h +++ b/include/asm-arm/hw_irq.h @@ -12,7 +12,7 @@ if (!(action->flags & IRQF_TIMER) && system_timer->dyn_tick) { \ write_seqlock(&xtime_lock); \ if (system_timer->dyn_tick->state & DYN_TICK_ENABLED) \ - system_timer->dyn_tick->handler(irq, 0, regs); \ + system_timer->dyn_tick->handler(irq, NULL); \ write_sequnlock(&xtime_lock); \ } #endif diff --git a/include/asm-arm/io.h b/include/asm-arm/io.h index 8076a85c3675..ae999fd5dc67 100644 --- a/include/asm-arm/io.h +++ b/include/asm-arm/io.h @@ -63,7 +63,7 @@ extern void __raw_readsl(const void __iomem *addr, void *data, int longlen); */ extern void __iomem * __ioremap_pfn(unsigned long, unsigned long, size_t, unsigned long); extern void __iomem * __ioremap(unsigned long, size_t, unsigned long); -extern void __iounmap(void __iomem *addr); +extern void __iounmap(volatile void __iomem *addr); /* * Bad read/write accesses... @@ -193,23 +193,6 @@ extern void _memset_io(volatile void __iomem *, int, size_t); #define eth_io_copy_and_sum(s,c,l,b) \ eth_copy_and_sum((s),__mem_pci(c),(l),(b)) -static inline int -check_signature(void __iomem *io_addr, const unsigned char *signature, - int length) -{ - int retval = 0; - do { - if (readb(io_addr) != *signature) - goto out; - io_addr++; - signature++; - length--; - } while (length); - retval = 1; -out: - return retval; -} - #elif !defined(readb) #define readb(c) (__readwrite_bug("readb"),0) diff --git a/include/asm-arm/irq_regs.h b/include/asm-arm/irq_regs.h new file mode 100644 index 000000000000..3dd9c0b70270 --- /dev/null +++ b/include/asm-arm/irq_regs.h @@ -0,0 +1 @@ +#include <asm-generic/irq_regs.h> diff --git a/include/asm-arm/mach/irq.h b/include/asm-arm/mach/irq.h index 131f33733d25..0e017ecf2096 100644 --- a/include/asm-arm/mach/irq.h +++ b/include/asm-arm/mach/irq.h @@ -30,10 +30,9 @@ extern int show_fiq_list(struct seq_file *, void *); /* * Obsolete inline function for calling irq descriptor handlers. */ -static inline void desc_handle_irq(unsigned int irq, struct irq_desc *desc, - struct pt_regs *regs) +static inline void desc_handle_irq(unsigned int irq, struct irq_desc *desc) { - desc->handle_irq(irq, desc, regs); + desc->handle_irq(irq, desc); } void set_irq_flags(unsigned int irq, unsigned int flags); @@ -51,10 +50,10 @@ void set_irq_flags(unsigned int irq, unsigned int flags); #define irqdesc irq_desc #define irqchip irq_chip -#define do_bad_IRQ(irq,desc,regs) \ +#define do_bad_IRQ(irq,desc) \ do { \ spin_lock(&desc->lock); \ - handle_bad_irq(irq, desc, regs); \ + handle_bad_irq(irq, desc); \ spin_unlock(&desc->lock); \ } while(0) diff --git a/include/asm-arm/mach/time.h b/include/asm-arm/mach/time.h index 1eb93f5c0d6c..5dc357013b79 100644 --- a/include/asm-arm/mach/time.h +++ b/include/asm-arm/mach/time.h @@ -57,7 +57,7 @@ struct dyn_tick_timer { int (*enable)(void); /* Enables dynamic tick */ int (*disable)(void); /* Disables dynamic tick */ void (*reprogram)(unsigned long); /* Reprograms the timer */ - int (*handler)(int, void *, struct pt_regs *); + int (*handler)(int, void *); }; void timer_dyn_reprogram(void); @@ -66,7 +66,7 @@ void timer_dyn_reprogram(void); #endif extern struct sys_timer *system_timer; -extern void timer_tick(struct pt_regs *); +extern void timer_tick(void); /* * Kernel time keeping support. diff --git a/include/asm-arm/uaccess.h b/include/asm-arm/uaccess.h index 87aba57a66c4..09ad0cab9014 100644 --- a/include/asm-arm/uaccess.h +++ b/include/asm-arm/uaccess.h @@ -110,7 +110,7 @@ extern int __get_user_4(void *); #define get_user(x,p) \ ({ \ const register typeof(*(p)) __user *__p asm("r0") = (p);\ - register unsigned int __r2 asm("r2"); \ + register unsigned long __r2 asm("r2"); \ register int __e asm("r0"); \ switch (sizeof(*(__p))) { \ case 1: \ diff --git a/include/asm-avr32/irq_regs.h b/include/asm-avr32/irq_regs.h new file mode 100644 index 000000000000..3dd9c0b70270 --- /dev/null +++ b/include/asm-avr32/irq_regs.h @@ -0,0 +1 @@ +#include <asm-generic/irq_regs.h> diff --git a/include/asm-frv/io.h b/include/asm-frv/io.h index 7765f5528894..20e44fe00abf 100644 --- a/include/asm-frv/io.h +++ b/include/asm-frv/io.h @@ -385,27 +385,6 @@ static inline void pci_iounmap(struct pci_dev *dev, void __iomem *p) */ #define xlate_dev_kmem_ptr(p) p -/* - * Check BIOS signature - */ -static inline int check_signature(volatile void __iomem *io_addr, - const unsigned char *signature, int length) -{ - int retval = 0; - - do { - if (readb(io_addr) != *signature) - goto out; - io_addr++; - signature++; - length--; - } while (length); - - retval = 1; -out: - return retval; -} - #endif /* __KERNEL__ */ #endif /* _ASM_IO_H */ diff --git a/include/asm-generic/bitops/sched.h b/include/asm-generic/bitops/sched.h index 5ef93a4d009f..815bb0148060 100644 --- a/include/asm-generic/bitops/sched.h +++ b/include/asm-generic/bitops/sched.h @@ -15,7 +15,7 @@ static inline int sched_find_first_bit(const unsigned long *b) #if BITS_PER_LONG == 64 if (unlikely(b[0])) return __ffs(b[0]); - if (unlikely(b[1])) + if (likely(b[1])) return __ffs(b[1]) + 64; return __ffs(b[2]) + 128; #elif BITS_PER_LONG == 32 diff --git a/include/asm-generic/bug.h b/include/asm-generic/bug.h index a5250895155e..1d9573cf4a0b 100644 --- a/include/asm-generic/bug.h +++ b/include/asm-generic/bug.h @@ -41,14 +41,14 @@ #endif #endif -#define WARN_ON_ONCE(condition) ({ \ - static int __warn_once = 1; \ - typeof(condition) __ret_warn_once = (condition);\ - \ - if (likely(__warn_once)) \ - if (WARN_ON(__ret_warn_once)) \ - __warn_once = 0; \ - unlikely(__ret_warn_once); \ +#define WARN_ON_ONCE(condition) ({ \ + static int __warned; \ + typeof(condition) __ret_warn_once = (condition); \ + \ + if (unlikely(__ret_warn_once)) \ + if (WARN_ON(!__warned)) \ + __warned = 1; \ + unlikely(__ret_warn_once); \ }) #ifdef CONFIG_SMP diff --git a/include/asm-generic/percpu.h b/include/asm-generic/percpu.h index 6d45ee5472af..196376262240 100644 --- a/include/asm-generic/percpu.h +++ b/include/asm-generic/percpu.h @@ -15,7 +15,7 @@ extern unsigned long __per_cpu_offset[NR_CPUS]; /* var is in discarded region: offset to particular copy we want */ #define per_cpu(var, cpu) (*({ \ - extern int simple_indentifier_##var(void); \ + extern int simple_identifier_##var(void); \ RELOC_HIDE(&per_cpu__##var, __per_cpu_offset[cpu]); })) #define __get_cpu_var(var) per_cpu(var, smp_processor_id()) #define __raw_get_cpu_var(var) per_cpu(var, raw_smp_processor_id()) diff --git a/include/asm-i386/hw_irq.h b/include/asm-i386/hw_irq.h index 8806c7e002a7..0bedbdf5e907 100644 --- a/include/asm-i386/hw_irq.h +++ b/include/asm-i386/hw_irq.h @@ -26,9 +26,6 @@ * Interrupt entry/exit code at both C and assembly level */ -extern u8 irq_vector[NR_IRQ_VECTORS]; -#define IO_APIC_VECTOR(irq) (irq_vector[irq]) - extern void (*interrupt[NR_IRQS])(void); #ifdef CONFIG_SMP diff --git a/include/asm-i386/io.h b/include/asm-i386/io.h index b3724fe93ff1..68df0dc3ab8f 100644 --- a/include/asm-i386/io.h +++ b/include/asm-i386/io.h @@ -224,33 +224,6 @@ static inline void memcpy_toio(volatile void __iomem *dst, const void *src, int #define eth_io_copy_and_sum(a,b,c,d) eth_copy_and_sum((a),(void __force *)(b),(c),(d)) -/** - * check_signature - find BIOS signatures - * @io_addr: mmio address to check - * @signature: signature block - * @length: length of signature - * - * Perform a signature comparison with the mmio address io_addr. This - * address should have been obtained by ioremap. - * Returns 1 on a match. - */ - -static inline int check_signature(volatile void __iomem * io_addr, - const unsigned char *signature, int length) -{ - int retval = 0; - do { - if (readb(io_addr) != *signature) - goto out; - io_addr++; - signature++; - length--; - } while (length); - retval = 1; -out: - return retval; -} - /* * Cache management * diff --git a/include/asm-i386/smp.h b/include/asm-i386/smp.h index 6aa1206f6e2a..bd59c1508e71 100644 --- a/include/asm-i386/smp.h +++ b/include/asm-i386/smp.h @@ -46,8 +46,6 @@ extern u8 x86_cpu_to_apicid[]; #define cpu_physical_id(cpu) x86_cpu_to_apicid[cpu] -extern u8 apicid_2_node[]; - #ifdef CONFIG_HOTPLUG_CPU extern void cpu_exit_clear(void); extern void cpu_uninit(void); @@ -101,6 +99,9 @@ extern unsigned int num_processors; #endif #ifndef __ASSEMBLY__ + +extern u8 apicid_2_node[]; + #ifdef CONFIG_X86_LOCAL_APIC static __inline int logical_smp_processor_id(void) { diff --git a/include/asm-i386/uaccess.h b/include/asm-i386/uaccess.h index 54d905ebc63d..eef5133b9ce2 100644 --- a/include/asm-i386/uaccess.h +++ b/include/asm-i386/uaccess.h @@ -404,20 +404,6 @@ unsigned long __must_check __copy_from_user_ll_nocache_nozero(void *to, * anything, so this is accurate. */ -/** - * __copy_to_user: - Copy a block of data into user space, with less checking. - * @to: Destination address, in user space. - * @from: Source address, in kernel space. - * @n: Number of bytes to copy. - * - * Context: User context only. This function may sleep. - * - * Copy data from kernel space to user space. Caller must check - * the specified block with access_ok() before calling this function. - * - * Returns number of bytes that could not be copied. - * On success, this will be zero. - */ static __always_inline unsigned long __must_check __copy_to_user_inatomic(void __user *to, const void *from, unsigned long n) { @@ -439,35 +425,27 @@ __copy_to_user_inatomic(void __user *to, const void *from, unsigned long n) return __copy_to_user_ll(to, from, n); } -static __always_inline unsigned long __must_check -__copy_to_user(void __user *to, const void *from, unsigned long n) -{ - might_sleep(); - return __copy_to_user_inatomic(to, from, n); -} - /** - * __copy_from_user: - Copy a block of data from user space, with less checking. - * @to: Destination address, in kernel space. - * @from: Source address, in user space. + * __copy_to_user: - Copy a block of data into user space, with less checking. + * @to: Destination address, in user space. + * @from: Source address, in kernel space. * @n: Number of bytes to copy. * * Context: User context only. This function may sleep. * - * Copy data from user space to kernel space. Caller must check + * Copy data from kernel space to user space. Caller must check * the specified block with access_ok() before calling this function. * * Returns number of bytes that could not be copied. * On success, this will be zero. - * - * If some data could not be copied, this function will pad the copied - * data to the requested size using zero bytes. - * - * An alternate version - __copy_from_user_inatomic() - may be called from - * atomic context and will fail rather than sleep. In this case the - * uncopied bytes will *NOT* be padded with zeros. See fs/filemap.h - * for explanation of why this is needed. */ +static __always_inline unsigned long __must_check +__copy_to_user(void __user *to, const void *from, unsigned long n) +{ + might_sleep(); + return __copy_to_user_inatomic(to, from, n); +} + static __always_inline unsigned long __copy_from_user_inatomic(void *to, const void __user *from, unsigned long n) { @@ -493,6 +471,29 @@ __copy_from_user_inatomic(void *to, const void __user *from, unsigned long n) } return __copy_from_user_ll_nozero(to, from, n); } + +/** + * __copy_from_user: - Copy a block of data from user space, with less checking. + * @to: Destination address, in kernel space. + * @from: Source address, in user space. + * @n: Number of bytes to copy. + * + * Context: User context only. This function may sleep. + * + * Copy data from user space to kernel space. Caller must check + * the specified block with access_ok() before calling this function. + * + * Returns number of bytes that could not be copied. + * On success, this will be zero. + * + * If some data could not be copied, this function will pad the copied + * data to the requested size using zero bytes. + * + * An alternate version - __copy_from_user_inatomic() - may be called from + * atomic context and will fail rather than sleep. In this case the + * uncopied bytes will *NOT* be padded with zeros. See fs/filemap.h + * for explanation of why this is needed. + */ static __always_inline unsigned long __copy_from_user(void *to, const void __user *from, unsigned long n) { diff --git a/include/asm-i386/unistd.h b/include/asm-i386/unistd.h index 3ca7ab963d7d..beeeaf6b054a 100644 --- a/include/asm-i386/unistd.h +++ b/include/asm-i386/unistd.h @@ -324,10 +324,11 @@ #define __NR_vmsplice 316 #define __NR_move_pages 317 #define __NR_getcpu 318 +#define __NR_epoll_pwait 319 #ifdef __KERNEL__ -#define NR_syscalls 319 +#define NR_syscalls 320 #include <linux/err.h> /* diff --git a/include/asm-ia64/sn/pcibr_provider.h b/include/asm-ia64/sn/pcibr_provider.h index e3b0c3fe5eed..da3eade0cae2 100644 --- a/include/asm-ia64/sn/pcibr_provider.h +++ b/include/asm-ia64/sn/pcibr_provider.h @@ -135,7 +135,7 @@ extern void pcireg_intr_addr_addr_set(struct pcibus_info *, int, u64 extern void pcireg_force_intr_set(struct pcibus_info *, int); extern u64 pcireg_wrb_flush_get(struct pcibus_info *, int); extern void pcireg_int_ate_set(struct pcibus_info *, int, u64); -extern u64 * pcireg_int_ate_addr(struct pcibus_info *, int); +extern u64 __iomem * pcireg_int_ate_addr(struct pcibus_info *, int); extern void pcibr_force_interrupt(struct sn_irq_info *sn_irq_info); extern void pcibr_change_devices_irq(struct sn_irq_info *sn_irq_info); extern int pcibr_ate_alloc(struct pcibus_info *, int); diff --git a/include/asm-ia64/sn/tioca_provider.h b/include/asm-ia64/sn/tioca_provider.h index 65cdd73c2a57..9a820ac61be3 100644 --- a/include/asm-ia64/sn/tioca_provider.h +++ b/include/asm-ia64/sn/tioca_provider.h @@ -162,11 +162,11 @@ static inline void tioca_tlbflush(struct tioca_kernel *tioca_kernel) { volatile u64 tmp; - volatile struct tioca *ca_base; + volatile struct tioca __iomem *ca_base; struct tioca_common *tioca_common; tioca_common = tioca_kernel->ca_common; - ca_base = (struct tioca *)tioca_common->ca_common.bs_base; + ca_base = (struct tioca __iomem *)tioca_common->ca_common.bs_base; /* * Explicit flushes not needed if GART is in cached mode diff --git a/include/asm-ia64/sn/tioce_provider.h b/include/asm-ia64/sn/tioce_provider.h index 6d62b13f7ae7..32c32f30b099 100644 --- a/include/asm-ia64/sn/tioce_provider.h +++ b/include/asm-ia64/sn/tioce_provider.h @@ -53,7 +53,7 @@ struct tioce_dmamap { u64 ct_start; /* coretalk start address */ u64 pci_start; /* bus start address */ - u64 *ate_hw; /* hw ptr of first ate in map */ + u64 __iomem *ate_hw;/* hw ptr of first ate in map */ u64 *ate_shadow; /* shadow ptr of firat ate */ u16 ate_count; /* # ate's in the map */ }; diff --git a/include/asm-ia64/sn/xpc.h b/include/asm-ia64/sn/xpc.h index 35e1386f37ab..1d45e1518fb3 100644 --- a/include/asm-ia64/sn/xpc.h +++ b/include/asm-ia64/sn/xpc.h @@ -669,7 +669,7 @@ extern struct device *xpc_part; extern struct device *xpc_chan; extern int xpc_disengage_request_timelimit; extern int xpc_disengage_request_timedout; -extern irqreturn_t xpc_notify_IRQ_handler(int, void *, struct pt_regs *); +extern irqreturn_t xpc_notify_IRQ_handler(int, void *); extern void xpc_dropped_IPI_check(struct xpc_partition *); extern void xpc_activate_partition(struct xpc_partition *); extern void xpc_activate_kthreads(struct xpc_channel *, int); diff --git a/include/asm-m32r/io.h b/include/asm-m32r/io.h index 70ad1c949c2b..d06933bd6318 100644 --- a/include/asm-m32r/io.h +++ b/include/asm-m32r/io.h @@ -166,38 +166,6 @@ static inline void _writel(unsigned long l, unsigned long addr) #define flush_write_buffers() do { } while (0) /* M32R_FIXME */ -/** - * check_signature - find BIOS signatures - * @io_addr: mmio address to check - * @signature: signature block - * @length: length of signature - * - * Perform a signature comparison with the ISA mmio address io_addr. - * Returns 1 on a match. - * - * This function is deprecated. New drivers should use ioremap and - * check_signature. - */ - -static inline int check_signature(void __iomem *io_addr, - const unsigned char *signature, int length) -{ - int retval = 0; -#if 0 -printk("check_signature\n"); - do { - if (readb(io_addr) != *signature) - goto out; - io_addr++; - signature++; - length--; - } while (length); - retval = 1; -out: -#endif - return retval; -} - static inline void memset_io(volatile void __iomem *addr, unsigned char val, int count) { diff --git a/include/asm-m32r/irq_regs.h b/include/asm-m32r/irq_regs.h new file mode 100644 index 000000000000..3dd9c0b70270 --- /dev/null +++ b/include/asm-m32r/irq_regs.h @@ -0,0 +1 @@ +#include <asm-generic/irq_regs.h> diff --git a/include/asm-m68k/atari_stdma.h b/include/asm-m68k/atari_stdma.h index b4eadf852738..8e389b7fa70c 100644 --- a/include/asm-m68k/atari_stdma.h +++ b/include/asm-m68k/atari_stdma.h @@ -8,8 +8,7 @@ /***************************** Prototypes *****************************/ -void stdma_lock(irqreturn_t (*handler)(int, void *, struct pt_regs *), - void *data); +void stdma_lock(irq_handler_t handler, void *data); void stdma_release( void ); int stdma_others_waiting( void ); int stdma_islocked( void ); diff --git a/include/asm-m68k/dma-mapping.h b/include/asm-m68k/dma-mapping.h index cebbb03370ec..d90d841d3dfd 100644 --- a/include/asm-m68k/dma-mapping.h +++ b/include/asm-m68k/dma-mapping.h @@ -5,6 +5,7 @@ struct scatterlist; +#ifndef CONFIG_MMU_SUN3 static inline int dma_supported(struct device *dev, u64 mask) { return 1; @@ -26,7 +27,7 @@ static inline int dma_is_consistent(dma_addr_t dma_addr) } extern void *dma_alloc_coherent(struct device *, size_t, - dma_addr_t *, int); + dma_addr_t *, gfp_t); extern void dma_free_coherent(struct device *, size_t, void *, dma_addr_t); @@ -88,4 +89,8 @@ static inline int dma_mapping_error(dma_addr_t handle) return 0; } +#else +#include <asm-generic/dma-mapping-broken.h> +#endif + #endif /* _M68K_DMA_MAPPING_H */ diff --git a/include/asm-m68k/floppy.h b/include/asm-m68k/floppy.h index 57f4fdda65ab..45dc908932a3 100644 --- a/include/asm-m68k/floppy.h +++ b/include/asm-m68k/floppy.h @@ -17,8 +17,7 @@ #include <linux/vmalloc.h> -asmlinkage irqreturn_t floppy_hardint(int irq, void *dev_id, - struct pt_regs *regs); +asmlinkage irqreturn_t floppy_hardint(int irq, void *dev_id); /* constants... */ @@ -184,8 +183,7 @@ static void fd_disable_dma(void) /* this is the only truly Q40 specific function */ -asmlinkage irqreturn_t floppy_hardint(int irq, void *dev_id, - struct pt_regs *regs) +asmlinkage irqreturn_t floppy_hardint(int irq, void *dev_id) { register unsigned char st; @@ -198,7 +196,7 @@ asmlinkage irqreturn_t floppy_hardint(int irq, void *dev_id, static int dma_wait=0; #endif if(!doing_pdma) { - floppy_interrupt(irq, dev_id, regs); + floppy_interrupt(irq, dev_id); return IRQ_HANDLED; } @@ -246,7 +244,7 @@ asmlinkage irqreturn_t floppy_hardint(int irq, void *dev_id, dma_wait=0; #endif doing_pdma = 0; - floppy_interrupt(irq, dev_id, regs); + floppy_interrupt(irq, dev_id); return IRQ_HANDLED; } #ifdef TRACE_FLPY_INT diff --git a/include/asm-m68k/ide.h b/include/asm-m68k/ide.h index 365f76fb8013..f9ffb2cbbae8 100644 --- a/include/asm-m68k/ide.h +++ b/include/asm-m68k/ide.h @@ -123,7 +123,7 @@ static __inline__ void ide_release_lock (void) } static __inline__ void -ide_get_lock(irqreturn_t (*handler)(int, void *, struct pt_regs *), void *data) +ide_get_lock(irq_handler_t handler, void *data) { if (MACH_IS_ATARI) { if (falconide_intr_lock == 0) { diff --git a/include/asm-m68k/irq.h b/include/asm-m68k/irq.h index 3257f9881002..4901cb105e2f 100644 --- a/include/asm-m68k/irq.h +++ b/include/asm-m68k/irq.h @@ -83,7 +83,7 @@ struct pt_regs; * interrupt source (if it supports chaining). */ typedef struct irq_node { - int (*handler)(int, void *, struct pt_regs *); + int (*handler)(int, void *); void *dev_id; struct irq_node *next; unsigned long flags; @@ -93,12 +93,12 @@ typedef struct irq_node { /* * This structure has only 4 elements for speed reasons */ -typedef struct irq_handler { - int (*handler)(int, void *, struct pt_regs *); +struct irq_handler { + int (*handler)(int, void *); unsigned long flags; void *dev_id; const char *devname; -} irq_handler_t; +}; struct irq_controller { const char *name; @@ -122,6 +122,7 @@ extern void m68k_setup_user_interrupt(unsigned int vec, unsigned int cnt, void (*handler)(unsigned int, struct pt_regs *)); extern void m68k_setup_irq_controller(struct irq_controller *, unsigned int, unsigned int); -asmlinkage void m68k_handle_int(unsigned int, struct pt_regs *); +asmlinkage void m68k_handle_int(unsigned int); +asmlinkage void __m68k_handle_int(unsigned int, struct pt_regs *); #endif /* _M68K_IRQ_H_ */ diff --git a/include/asm-m68k/irq_regs.h b/include/asm-m68k/irq_regs.h new file mode 100644 index 000000000000..3dd9c0b70270 --- /dev/null +++ b/include/asm-m68k/irq_regs.h @@ -0,0 +1 @@ +#include <asm-generic/irq_regs.h> diff --git a/include/asm-m68k/mac_iop.h b/include/asm-m68k/mac_iop.h index b0d2e3473537..a2c7e6fcca38 100644 --- a/include/asm-m68k/mac_iop.h +++ b/include/asm-m68k/mac_iop.h @@ -143,17 +143,17 @@ struct iop_msg { int status; /* status of this message */ __u8 message[IOP_MSG_LEN]; /* the message being sent/received */ __u8 reply[IOP_MSG_LEN]; /* the reply to the message */ - void (*handler)(struct iop_msg *, struct pt_regs *); + void (*handler)(struct iop_msg *); /* function to call when reply recvd */ }; extern int iop_scc_present,iop_ism_present; extern int iop_listen(uint, uint, - void (*handler)(struct iop_msg *, struct pt_regs *), + void (*handler)(struct iop_msg *), const char *); extern int iop_send_message(uint, uint, void *, uint, __u8 *, - void (*)(struct iop_msg *, struct pt_regs *)); + void (*)(struct iop_msg *)); extern void iop_complete_message(struct iop_msg *); extern void iop_upload_code(uint, __u8 *, uint, __u16); extern void iop_download_code(uint, __u8 *, uint, __u16); diff --git a/include/asm-m68k/machdep.h b/include/asm-m68k/machdep.h index df898f27e434..26d2b91209c5 100644 --- a/include/asm-m68k/machdep.h +++ b/include/asm-m68k/machdep.h @@ -10,7 +10,7 @@ struct rtc_time; struct rtc_pll_info; struct buffer_head; -extern void (*mach_sched_init) (irqreturn_t (*handler)(int, void *, struct pt_regs *)); +extern void (*mach_sched_init) (irq_handler_t handler); /* machine dependent irq functions */ extern void (*mach_init_IRQ) (void); extern void (*mach_get_model) (char *model); diff --git a/include/asm-m68k/signal.h b/include/asm-m68k/signal.h index de1ba6ead3b4..3db8a81942f1 100644 --- a/include/asm-m68k/signal.h +++ b/include/asm-m68k/signal.h @@ -198,6 +198,7 @@ static inline int sigfindinword(unsigned long word) return word ^ 31; } +struct pt_regs; extern void ptrace_signal_deliver(struct pt_regs *regs, void *cookie); #endif /* __KERNEL__ */ diff --git a/include/asm-m68k/string.h b/include/asm-m68k/string.h index 6c59215b285e..2eb7df1e0f5d 100644 --- a/include/asm-m68k/string.h +++ b/include/asm-m68k/string.h @@ -1,138 +1,114 @@ #ifndef _M68K_STRING_H_ #define _M68K_STRING_H_ -#include <asm/setup.h> -#include <asm/page.h> +#include <linux/types.h> +#include <linux/compiler.h> -#define __HAVE_ARCH_STRCPY -static inline char * strcpy(char * dest,const char *src) +static inline size_t __kernel_strlen(const char *s) { - char *xdest = dest; - - __asm__ __volatile__ - ("1:\tmoveb %1@+,%0@+\n\t" - "jne 1b" - : "=a" (dest), "=a" (src) - : "0" (dest), "1" (src) : "memory"); - return xdest; -} + const char *sc; -#define __HAVE_ARCH_STRNCPY -static inline char * strncpy(char *dest, const char *src, size_t n) -{ - char *xdest = dest; - - if (n == 0) - return xdest; - - __asm__ __volatile__ - ("1:\tmoveb %1@+,%0@+\n\t" - "jeq 2f\n\t" - "subql #1,%2\n\t" - "jne 1b\n\t" - "2:" - : "=a" (dest), "=a" (src), "=d" (n) - : "0" (dest), "1" (src), "2" (n) - : "memory"); - return xdest; + for (sc = s; *sc++; ) + ; + return sc - s - 1; } -#define __HAVE_ARCH_STRCAT -static inline char * strcat(char * dest, const char * src) +static inline char *__kernel_strcpy(char *dest, const char *src) { - char *tmp = dest; - - while (*dest) - dest++; - while ((*dest++ = *src++)) - ; - - return tmp; + char *xdest = dest; + + asm volatile ("\n" + "1: move.b (%1)+,(%0)+\n" + " jne 1b" + : "+a" (dest), "+a" (src) + : : "memory"); + return xdest; } -#define __HAVE_ARCH_STRNCAT -static inline char * strncat(char *dest, const char *src, size_t count) -{ - char *tmp = dest; - - if (count) { - while (*dest) - dest++; - while ((*dest++ = *src++)) { - if (--count == 0) { - *dest++='\0'; - break; - } - } - } +#ifndef __IN_STRING_C - return tmp; -} +#define __HAVE_ARCH_STRLEN +#define strlen(s) (__builtin_constant_p(s) ? \ + __builtin_strlen(s) : \ + __kernel_strlen(s)) -#define __HAVE_ARCH_STRCHR -static inline char * strchr(const char * s, int c) +#define __HAVE_ARCH_STRNLEN +static inline size_t strnlen(const char *s, size_t count) { - const char ch = c; - - for(; *s != ch; ++s) - if (*s == '\0') - return( NULL ); - return( (char *) s); + const char *sc = s; + + asm volatile ("\n" + "1: subq.l #1,%1\n" + " jcs 2f\n" + " tst.b (%0)+\n" + " jne 1b\n" + " subq.l #1,%0\n" + "2:" + : "+a" (sc), "+d" (count)); + return sc - s; } -/* strstr !! */ +#define __HAVE_ARCH_STRCPY +#if __GNUC__ >= 4 +#define strcpy(d, s) (__builtin_constant_p(s) && \ + __builtin_strlen(s) <= 32 ? \ + __builtin_strcpy(d, s) : \ + __kernel_strcpy(d, s)) +#else +#define strcpy(d, s) __kernel_strcpy(d, s) +#endif -#define __HAVE_ARCH_STRLEN -static inline size_t strlen(const char * s) +#define __HAVE_ARCH_STRNCPY +static inline char *strncpy(char *dest, const char *src, size_t n) { - const char *sc; - for (sc = s; *sc != '\0'; ++sc) ; - return(sc - s); + char *xdest = dest; + + asm volatile ("\n" + " jra 2f\n" + "1: move.b (%1),(%0)+\n" + " jeq 2f\n" + " addq.l #1,%1\n" + "2: subq.l #1,%2\n" + " jcc 1b\n" + : "+a" (dest), "+a" (src), "+d" (n) + : : "memory"); + return xdest; } -/* strnlen !! */ +#define __HAVE_ARCH_STRCAT +#define strcat(d, s) ({ \ + char *__d = (d); \ + strcpy(__d + strlen(__d), (s)); \ +}) -#define __HAVE_ARCH_STRCMP -static inline int strcmp(const char * cs,const char * ct) +#define __HAVE_ARCH_STRCHR +static inline char *strchr(const char *s, int c) { - char __res; - - __asm__ - ("1:\tmoveb %0@+,%2\n\t" /* get *cs */ - "cmpb %1@+,%2\n\t" /* compare a byte */ - "jne 2f\n\t" /* not equal, break out */ - "tstb %2\n\t" /* at end of cs? */ - "jne 1b\n\t" /* no, keep going */ - "jra 3f\n\t" /* strings are equal */ - "2:\tsubb %1@-,%2\n\t" /* *cs - *ct */ - "3:" - : "=a" (cs), "=a" (ct), "=d" (__res) - : "0" (cs), "1" (ct)); - return __res; + char sc, ch = c; + + for (; (sc = *s++) != ch; ) { + if (!sc) + return NULL; + } + return (char *)s - 1; } -#define __HAVE_ARCH_STRNCMP -static inline int strncmp(const char * cs,const char * ct,size_t count) +#define __HAVE_ARCH_STRCMP +static inline int strcmp(const char *cs, const char *ct) { - char __res; - - if (!count) - return 0; - __asm__ - ("1:\tmovb %0@+,%3\n\t" /* get *cs */ - "cmpb %1@+,%3\n\t" /* compare a byte */ - "jne 3f\n\t" /* not equal, break out */ - "tstb %3\n\t" /* at end of cs? */ - "jeq 4f\n\t" /* yes, all done */ - "subql #1,%2\n\t" /* no, adjust count */ - "jne 1b\n\t" /* more to do, keep going */ - "2:\tmoveq #0,%3\n\t" /* strings are equal */ - "jra 4f\n\t" - "3:\tsubb %1@-,%3\n\t" /* *cs - *ct */ - "4:" - : "=a" (cs), "=a" (ct), "=d" (count), "=d" (__res) - : "0" (cs), "1" (ct), "2" (count)); - return __res; + char res; + + asm ("\n" + "1: move.b (%0)+,%2\n" /* get *cs */ + " cmp.b (%1)+,%2\n" /* compare a byte */ + " jne 2f\n" /* not equal, break out */ + " tst.b %2\n" /* at end of cs? */ + " jne 1b\n" /* no, keep going */ + " jra 3f\n" /* strings are equal */ + "2: sub.b -(%1),%2\n" /* *cs - *ct */ + "3:" + : "+a" (cs), "+a" (ct), "=d" (res)); + return res; } #define __HAVE_ARCH_MEMSET @@ -150,4 +126,6 @@ extern void *memmove(void *, const void *, __kernel_size_t); extern int memcmp(const void *, const void *, __kernel_size_t); #define memcmp(d, s, n) __builtin_memcmp(d, s, n) +#endif + #endif /* _M68K_STRING_H_ */ diff --git a/include/asm-m68k/sun3xflop.h b/include/asm-m68k/sun3xflop.h index ca8cc4113843..32c45f84ac60 100644 --- a/include/asm-m68k/sun3xflop.h +++ b/include/asm-m68k/sun3xflop.h @@ -111,8 +111,7 @@ static void sun3x_82072_fd_outb(unsigned char value, int port) } -asmlinkage irqreturn_t sun3xflop_hardint(int irq, void *dev_id, - struct pt_regs * regs) +asmlinkage irqreturn_t sun3xflop_hardint(int irq, void *dev_id) { register unsigned char st; @@ -125,7 +124,7 @@ asmlinkage irqreturn_t sun3xflop_hardint(int irq, void *dev_id, static int dma_wait=0; #endif if(!doing_pdma) { - floppy_interrupt(irq, dev_id, regs); + floppy_interrupt(irq, dev_id); return IRQ_HANDLED; } @@ -189,7 +188,7 @@ asmlinkage irqreturn_t sun3xflop_hardint(int irq, void *dev_id, dma_wait=0; #endif - floppy_interrupt(irq, dev_id, regs); + floppy_interrupt(irq, dev_id); return IRQ_HANDLED; } diff --git a/include/asm-m68k/system.h b/include/asm-m68k/system.h index 131a0cb0f491..243dd13e6bfc 100644 --- a/include/asm-m68k/system.h +++ b/include/asm-m68k/system.h @@ -78,13 +78,13 @@ static inline int irqs_disabled(void) #define mb() barrier() #define rmb() barrier() #define wmb() barrier() -#define read_barrier_depends() do { } while(0) -#define set_mb(var, value) do { xchg(&var, value); } while (0) +#define read_barrier_depends() ((void)0) +#define set_mb(var, value) ({ (var) = (value); wmb(); }) #define smp_mb() barrier() #define smp_rmb() barrier() #define smp_wmb() barrier() -#define smp_read_barrier_depends() do { } while(0) +#define smp_read_barrier_depends() ((void)0) #define xchg(ptr,x) ((__typeof__(*(ptr)))__xchg((unsigned long)(x),(ptr),sizeof(*(ptr)))) diff --git a/include/asm-m68k/uaccess.h b/include/asm-m68k/uaccess.h index 88b1f47400e1..e4c9f080ff20 100644 --- a/include/asm-m68k/uaccess.h +++ b/include/asm-m68k/uaccess.h @@ -76,7 +76,7 @@ asm volatile ("\n" \ break; \ case 8: \ { \ - const void *__pu_ptr = (ptr); \ + const void __user *__pu_ptr = (ptr); \ asm volatile ("\n" \ "1: moves.l %2,(%1)+\n" \ "2: moves.l %R2,(%1)\n" \ @@ -125,7 +125,7 @@ asm volatile ("\n" \ " .previous" \ : "+d" (res), "=&" #reg (__gu_val) \ : "m" (*(ptr)), "i" (err)); \ - (x) = (typeof(*(ptr)))(long)__gu_val; \ + (x) = (typeof(*(ptr)))(unsigned long)__gu_val; \ }) #define __get_user(x, ptr) \ @@ -221,16 +221,16 @@ __constant_copy_from_user(void *to, const void __user *from, unsigned long n) switch (n) { case 1: - __get_user_asm(res, *(u8 *)to, (u8 *)from, u8, b, d, 1); + __get_user_asm(res, *(u8 *)to, (u8 __user *)from, u8, b, d, 1); break; case 2: - __get_user_asm(res, *(u16 *)to, (u16 *)from, u16, w, d, 2); + __get_user_asm(res, *(u16 *)to, (u16 __user *)from, u16, w, d, 2); break; case 3: __constant_copy_from_user_asm(res, to, from, tmp, 3, w, b,); break; case 4: - __get_user_asm(res, *(u32 *)to, (u32 *)from, u32, l, r, 4); + __get_user_asm(res, *(u32 *)to, (u32 __user *)from, u32, l, r, 4); break; case 5: __constant_copy_from_user_asm(res, to, from, tmp, 5, l, b,); @@ -302,16 +302,16 @@ __constant_copy_to_user(void __user *to, const void *from, unsigned long n) switch (n) { case 1: - __put_user_asm(res, *(u8 *)from, (u8 *)to, b, d, 1); + __put_user_asm(res, *(u8 *)from, (u8 __user *)to, b, d, 1); break; case 2: - __put_user_asm(res, *(u16 *)from, (u16 *)to, w, d, 2); + __put_user_asm(res, *(u16 *)from, (u16 __user *)to, w, d, 2); break; case 3: __constant_copy_to_user_asm(res, to, from, tmp, 3, w, b,); break; case 4: - __put_user_asm(res, *(u32 *)from, (u32 *)to, l, r, 4); + __put_user_asm(res, *(u32 *)from, (u32 __user *)to, l, r, 4); break; case 5: __constant_copy_to_user_asm(res, to, from, tmp, 5, l, b,); diff --git a/include/asm-m68k/unistd.h b/include/asm-m68k/unistd.h index 3ab716f0fc18..ad4348058c66 100644 --- a/include/asm-m68k/unistd.h +++ b/include/asm-m68k/unistd.h @@ -284,10 +284,39 @@ #define __NR_add_key 279 #define __NR_request_key 280 #define __NR_keyctl 281 +#define __NR_ioprio_set 282 +#define __NR_ioprio_get 283 +#define __NR_inotify_init 284 +#define __NR_inotify_add_watch 285 +#define __NR_inotify_rm_watch 286 +#define __NR_migrate_pages 287 +#define __NR_openat 288 +#define __NR_mkdirat 289 +#define __NR_mknodat 290 +#define __NR_fchownat 291 +#define __NR_futimesat 292 +#define __NR_fstatat64 293 +#define __NR_unlinkat 294 +#define __NR_renameat 295 +#define __NR_linkat 296 +#define __NR_symlinkat 297 +#define __NR_readlinkat 298 +#define __NR_fchmodat 299 +#define __NR_faccessat 300 +#define __NR_pselect6 301 +#define __NR_ppoll 302 +#define __NR_unshare 303 +#define __NR_set_robust_list 304 +#define __NR_get_robust_list 305 +#define __NR_splice 306 +#define __NR_sync_file_range 307 +#define __NR_tee 308 +#define __NR_vmsplice 309 +#define __NR_move_pages 310 #ifdef __KERNEL__ -#define NR_syscalls 282 +#define NR_syscalls 311 #include <linux/err.h> /* user-visible error numbers are in the range -1 - -MAX_ERRNO: see diff --git a/include/asm-m68k/user.h b/include/asm-m68k/user.h index e8d5a64c7e79..d7c0b109bd45 100644 --- a/include/asm-m68k/user.h +++ b/include/asm-m68k/user.h @@ -81,7 +81,7 @@ struct user{ unsigned long magic; /* To uniquely identify a core file */ char u_comm[32]; /* User command that was responsible */ }; -#define NBPG PAGE_SIZE +#define NBPG 4096 #define UPAGES 1 #define HOST_TEXT_START_ADDR (u.start_code) #define HOST_STACK_END_ADDR (u.start_stack + u.u_ssize * NBPG) diff --git a/include/asm-mips/dec/ecc.h b/include/asm-mips/dec/ecc.h index 19495a490e72..707ffdbc9add 100644 --- a/include/asm-mips/dec/ecc.h +++ b/include/asm-mips/dec/ecc.h @@ -49,8 +49,7 @@ struct pt_regs; extern void dec_ecc_be_init(void); extern int dec_ecc_be_handler(struct pt_regs *regs, int is_fixup); -extern irqreturn_t dec_ecc_be_interrupt(int irq, void *dev_id, - struct pt_regs *regs); +extern irqreturn_t dec_ecc_be_interrupt(int irq, void *dev_id); #endif #endif /* __ASM_MIPS_DEC_ECC_H */ diff --git a/include/asm-mips/dec/kn01.h b/include/asm-mips/dec/kn01.h index eb522aa1e226..28fa717ac423 100644 --- a/include/asm-mips/dec/kn01.h +++ b/include/asm-mips/dec/kn01.h @@ -84,8 +84,7 @@ extern spinlock_t kn01_lock; extern void dec_kn01_be_init(void); extern int dec_kn01_be_handler(struct pt_regs *regs, int is_fixup); -extern irqreturn_t dec_kn01_be_interrupt(int irq, void *dev_id, - struct pt_regs *regs); +extern irqreturn_t dec_kn01_be_interrupt(int irq, void *dev_id); #endif #endif /* __ASM_MIPS_DEC_KN01_H */ diff --git a/include/asm-mips/dec/kn02xa.h b/include/asm-mips/dec/kn02xa.h index a25f3d7da7f7..b56b4577f6ef 100644 --- a/include/asm-mips/dec/kn02xa.h +++ b/include/asm-mips/dec/kn02xa.h @@ -78,8 +78,7 @@ struct pt_regs; extern void dec_kn02xa_be_init(void); extern int dec_kn02xa_be_handler(struct pt_regs *regs, int is_fixup); -extern irqreturn_t dec_kn02xa_be_interrupt(int irq, void *dev_id, - struct pt_regs *regs); +extern irqreturn_t dec_kn02xa_be_interrupt(int irq, void *dev_id); #endif #endif /* __ASM_MIPS_DEC_KN02XA_H */ diff --git a/include/asm-mips/fpu.h b/include/asm-mips/fpu.h index 58c561a9ec6b..efef843b93f0 100644 --- a/include/asm-mips/fpu.h +++ b/include/asm-mips/fpu.h @@ -134,9 +134,11 @@ static inline void restore_fp(struct task_struct *tsk) static inline fpureg_t *get_fpu_regs(struct task_struct *tsk) { - if (cpu_has_fpu) { - if ((tsk == current) && __is_fpu_owner()) + if (tsk == current) { + preempt_disable(); + if (is_fpu_owner()) _save_fp(current); + preempt_enable(); } return tsk->thread.fpu.fpr; diff --git a/include/asm-mips/io.h b/include/asm-mips/io.h index df624e1ee6e2..c2d124badbe5 100644 --- a/include/asm-mips/io.h +++ b/include/asm-mips/io.h @@ -562,32 +562,6 @@ extern void pci_iounmap(struct pci_dev *dev, void __iomem *); #define eth_io_copy_and_sum(skb,src,len,unused) memcpy_fromio((skb)->data,(src),(len)) /* - * check_signature - find BIOS signatures - * @io_addr: mmio address to check - * @signature: signature block - * @length: length of signature - * - * Perform a signature comparison with the mmio address io_addr. This - * address should have been obtained by ioremap. - * Returns 1 on a match. - */ -static inline int check_signature(char __iomem *io_addr, - const unsigned char *signature, int length) -{ - int retval = 0; - do { - if (readb(io_addr) != *signature) - goto out; - io_addr++; - signature++; - length--; - } while (length); - retval = 1; -out: - return retval; -} - -/* * The caches on some architectures aren't dma-coherent and have need to * handle this in software. There are three types of operations that * can be applied to dma buffers. diff --git a/include/asm-mips/irq.h b/include/asm-mips/irq.h index d35c61776a02..0ce2a80b689e 100644 --- a/include/asm-mips/irq.h +++ b/include/asm-mips/irq.h @@ -24,9 +24,7 @@ static inline int irq_canonicalize(int irq) #define irq_canonicalize(irq) (irq) /* Sane hardware, sane code ... */ #endif -struct pt_regs; - -extern asmlinkage unsigned int do_IRQ(unsigned int irq, struct pt_regs *regs); +extern asmlinkage unsigned int do_IRQ(unsigned int irq); #ifdef CONFIG_MIPS_MT_SMTC /* @@ -55,18 +53,18 @@ do { \ * Ideally there should be away to get this into kernel/irq/handle.c to * avoid the overhead of a call for just a tiny function ... */ -#define do_IRQ(irq, regs) \ +#define do_IRQ(irq) \ do { \ irq_enter(); \ __DO_IRQ_SMTC_HOOK(); \ - __do_IRQ((irq), (regs)); \ + __do_IRQ((irq)); \ irq_exit(); \ } while (0) #endif extern void arch_init_irq(void); -extern void spurious_interrupt(struct pt_regs *regs); +extern void spurious_interrupt(void); #ifdef CONFIG_MIPS_MT_SMTC struct irqaction; diff --git a/include/asm-mips/irq_regs.h b/include/asm-mips/irq_regs.h index 3dd9c0b70270..33bd2a06de57 100644 --- a/include/asm-mips/irq_regs.h +++ b/include/asm-mips/irq_regs.h @@ -1 +1,21 @@ -#include <asm-generic/irq_regs.h> +/* + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + * Copyright (C) 2006 Ralf Baechle (ralf@linux-mips.org) + */ +#ifndef __ASM_IRQ_REGS_H +#define __ASM_IRQ_REGS_H + +#define ARCH_HAS_OWN_IRQ_REGS + +#include <linux/thread_info.h> + +static inline struct pt_regs *get_irq_regs(void) +{ + return current_thread_info()->regs; +} + +#endif /* __ASM_IRQ_REGS_H */ diff --git a/include/asm-mips/jmr3927/irq.h b/include/asm-mips/jmr3927/irq.h index fe551f33a74f..e3e7ed38da6c 100644 --- a/include/asm-mips/jmr3927/irq.h +++ b/include/asm-mips/jmr3927/irq.h @@ -45,10 +45,6 @@ extern int toshibaboards_setup_irq(int irq, struct irqaction * new); -#ifdef CONFIG_TX_BRANCH_LIKELY_BUG_WORKAROUND -extern void tx_branch_likely_bug_fixup(struct pt_regs *regs); -#endif - extern int (*toshibaboards_gen_iack)(void); #endif /* !__ASSEMBLY__ */ diff --git a/include/asm-mips/mach-au1x00/au1000_dma.h b/include/asm-mips/mach-au1x00/au1000_dma.h index 810f2fa33444..9f29520e8fb0 100644 --- a/include/asm-mips/mach-au1x00/au1000_dma.h +++ b/include/asm-mips/mach-au1x00/au1000_dma.h @@ -123,8 +123,7 @@ struct dma_chan { extern struct dma_chan au1000_dma_table[]; extern int request_au1000_dma(int dev_id, const char *dev_str, - irqreturn_t (*irqhandler)(int, void *, - struct pt_regs *), + irq_handler_t irqhandler, unsigned long irqflags, void *irq_dev_id); extern void free_au1000_dma(unsigned int dmanr); diff --git a/include/asm-mips/mach-au1x00/au1000_usbdev.h b/include/asm-mips/mach-au1x00/au1000_usbdev.h deleted file mode 100644 index 05bc74bed0b1..000000000000 --- a/include/asm-mips/mach-au1x00/au1000_usbdev.h +++ /dev/null @@ -1,73 +0,0 @@ -/* - * BRIEF MODULE DESCRIPTION - * Au1000 USB Device-Side Driver - * - * Copyright 2001 MontaVista Software Inc. - * Author: MontaVista Software, Inc. - * stevel@mvista.com or source@mvista.com - * - * This program is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License as published by the - * Free Software Foundation; either version 2 of the License, or (at your - * option) any later version. - * - * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED - * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF - * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN - * NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, - * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT - * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF - * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON - * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF - * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * You should have received a copy of the GNU General Public License along - * with this program; if not, write to the Free Software Foundation, Inc., - * 675 Mass Ave, Cambridge, MA 02139, USA. - */ - -#define USBDEV_REV 0x0110 // BCD -#define USBDEV_EP0_MAX_PACKET_SIZE 64 - -typedef enum { - ATTACHED = 0, - POWERED, - DEFAULT, - ADDRESS, - CONFIGURED -} usbdev_state_t; - -typedef enum { - CB_NEW_STATE = 0, - CB_PKT_COMPLETE -} usbdev_cb_type_t; - - -typedef struct usbdev_pkt { - int ep_addr; // ep addr this packet routed to - int size; // size of payload in bytes - unsigned status; // packet status - struct usbdev_pkt* next; // function layer can't touch this - u8 payload[0]; // the payload -} usbdev_pkt_t; - -#define PKT_STATUS_ACK (1<<0) -#define PKT_STATUS_NAK (1<<1) -#define PKT_STATUS_SU (1<<2) - -extern int usbdev_init(struct usb_device_descriptor* dev_desc, - struct usb_config_descriptor* config_desc, - struct usb_interface_descriptor* if_desc, - struct usb_endpoint_descriptor* ep_desc, - struct usb_string_descriptor* str_desc[], - void (*cb)(usbdev_cb_type_t, unsigned long, void *), - void* cb_data); - -extern void usbdev_exit(void); - -extern int usbdev_alloc_packet (int ep_addr, int data_size, - usbdev_pkt_t** pkt); -extern int usbdev_send_packet (int ep_addr, usbdev_pkt_t* pkt); -extern int usbdev_receive_packet(int ep_addr, usbdev_pkt_t** pkt); -extern int usbdev_get_byte_count(int ep_addr); diff --git a/include/asm-mips/marvell.h b/include/asm-mips/marvell.h index 6bb2125bb053..df94955b098a 100644 --- a/include/asm-mips/marvell.h +++ b/include/asm-mips/marvell.h @@ -53,6 +53,6 @@ struct mv_pci_controller { unsigned long config_vreg; }; -extern void ll_mv64340_irq(struct pt_regs *regs); +extern void ll_mv64340_irq(void); #endif /* __ASM_MIPS_MARVELL_H */ diff --git a/include/asm-mips/msc01_ic.h b/include/asm-mips/msc01_ic.h index 64f17208d602..aa7ad9a71762 100644 --- a/include/asm-mips/msc01_ic.h +++ b/include/asm-mips/msc01_ic.h @@ -145,7 +145,7 @@ typedef struct msc_irqmap { #define MSC01_IRQ_EDGE 1 extern void __init init_msc_irqs(unsigned int base, msc_irqmap_t *imp, int nirq); -extern void ll_msc_irq(struct pt_regs *regs); +extern void ll_msc_irq(void); #endif /* __ASM_MIPS_BOARDS_MSC01_IC_H */ diff --git a/include/asm-mips/stackframe.h b/include/asm-mips/stackframe.h index 158a4cd12e46..1fae5dc58138 100644 --- a/include/asm-mips/stackframe.h +++ b/include/asm-mips/stackframe.h @@ -59,69 +59,43 @@ .endm #ifdef CONFIG_SMP - .macro get_saved_sp /* SMP variation */ -#ifdef CONFIG_32BIT #ifdef CONFIG_MIPS_MT_SMTC - .set mips32 - mfc0 k0, CP0_TCBIND; - .set mips0 - lui k1, %hi(kernelsp) - srl k0, k0, 19 - /* No need to shift down and up to clear bits 0-1 */ +#define PTEBASE_SHIFT 19 /* TCBIND */ #else - mfc0 k0, CP0_CONTEXT - lui k1, %hi(kernelsp) - srl k0, k0, 23 -#endif - addu k1, k0 - LONG_L k1, %lo(kernelsp)(k1) +#define PTEBASE_SHIFT 23 /* CONTEXT */ #endif -#ifdef CONFIG_64BIT + .macro get_saved_sp /* SMP variation */ #ifdef CONFIG_MIPS_MT_SMTC - .set mips64 - mfc0 k0, CP0_TCBIND; - .set mips0 - lui k0, %highest(kernelsp) - dsrl k1, 19 - /* No need to shift down and up to clear bits 0-2 */ + mfc0 k0, CP0_TCBIND #else - MFC0 k1, CP0_CONTEXT - lui k0, %highest(kernelsp) - dsrl k1, 23 - daddiu k0, %higher(kernelsp) - dsll k0, k0, 16 - daddiu k0, %hi(kernelsp) - dsll k0, k0, 16 -#endif /* CONFIG_MIPS_MT_SMTC */ - daddu k1, k1, k0 + MFC0 k0, CP0_CONTEXT +#endif +#if defined(CONFIG_BUILD_ELF64) || (defined(CONFIG_64BIT) && __GNUC__ < 4) + lui k1, %highest(kernelsp) + daddiu k1, %higher(kernelsp) + dsll k1, 16 + daddiu k1, %hi(kernelsp) + dsll k1, 16 +#else + lui k1, %hi(kernelsp) +#endif + LONG_SRL k0, PTEBASE_SHIFT + LONG_ADDU k1, k0 LONG_L k1, %lo(kernelsp)(k1) -#endif /* CONFIG_64BIT */ .endm .macro set_saved_sp stackp temp temp2 -#ifdef CONFIG_32BIT -#ifdef CONFIG_MIPS_MT_SMTC - mfc0 \temp, CP0_TCBIND - srl \temp, 19 -#else - mfc0 \temp, CP0_CONTEXT - srl \temp, 23 -#endif -#endif -#ifdef CONFIG_64BIT #ifdef CONFIG_MIPS_MT_SMTC mfc0 \temp, CP0_TCBIND - dsrl \temp, 19 #else MFC0 \temp, CP0_CONTEXT - dsrl \temp, 23 -#endif #endif + LONG_SRL \temp, PTEBASE_SHIFT LONG_S \stackp, kernelsp(\temp) .endm #else .macro get_saved_sp /* Uniprocessor variation */ -#ifdef CONFIG_64BIT +#if defined(CONFIG_BUILD_ELF64) || (defined(CONFIG_64BIT) && __GNUC__ < 4) lui k1, %highest(kernelsp) daddiu k1, %higher(kernelsp) dsll k1, k1, 16 diff --git a/include/asm-mips/termbits.h b/include/asm-mips/termbits.h index fa6d04dac56b..b62ec7c521cc 100644 --- a/include/asm-mips/termbits.h +++ b/include/asm-mips/termbits.h @@ -3,7 +3,7 @@ * License. See the file "COPYING" in the main directory of this archive * for more details. * - * Copyright (C) 1995, 1996, 1999, 2001 Ralf Baechle + * Copyright (C) 1995, 96, 99, 2001, 06 Ralf Baechle * Copyright (C) 1999 Silicon Graphics, Inc. * Copyright (C) 2001 MIPS Technologies, Inc. */ @@ -13,14 +13,8 @@ #include <linux/posix_types.h> typedef unsigned char cc_t; -#if (_MIPS_SZLONG == 32) -typedef unsigned long speed_t; -typedef unsigned long tcflag_t; -#endif -#if (_MIPS_SZLONG == 64) -typedef __u32 speed_t; -typedef __u32 tcflag_t; -#endif +typedef unsigned int speed_t; +typedef unsigned int tcflag_t; /* * The ABI says nothing about NCC but seems to use NCCS as diff --git a/include/asm-mips/thread_info.h b/include/asm-mips/thread_info.h index ae8ada5b42a9..e475c45ea263 100644 --- a/include/asm-mips/thread_info.h +++ b/include/asm-mips/thread_info.h @@ -34,6 +34,7 @@ struct thread_info { 0-0xFFFFFFFF for kernel-thread */ struct restart_block restart_block; + struct pt_regs *regs; }; /* diff --git a/include/asm-mips/time.h b/include/asm-mips/time.h index 30f21df39253..28512ba2266e 100644 --- a/include/asm-mips/time.h +++ b/include/asm-mips/time.h @@ -72,13 +72,13 @@ extern irqreturn_t timer_interrupt(int irq, void *dev_id); /* * the corresponding low-level timer interrupt routine. */ -extern asmlinkage void ll_timer_interrupt(int irq, struct pt_regs *regs); +extern asmlinkage void ll_timer_interrupt(int irq); /* * profiling and process accouting is done separately in local_timer_interrupt */ extern void local_timer_interrupt(int irq, void *dev_id); -extern asmlinkage void ll_local_timer_interrupt(int irq, struct pt_regs *regs); +extern asmlinkage void ll_local_timer_interrupt(int irq); /* * board specific routines required by time_init(). diff --git a/include/asm-parisc/irq_regs.h b/include/asm-parisc/irq_regs.h new file mode 100644 index 000000000000..3dd9c0b70270 --- /dev/null +++ b/include/asm-parisc/irq_regs.h @@ -0,0 +1 @@ +#include <asm-generic/irq_regs.h> diff --git a/include/asm-parisc/pdc.h b/include/asm-parisc/pdc.h index c9b2e35326ee..423c2b84b4a0 100644 --- a/include/asm-parisc/pdc.h +++ b/include/asm-parisc/pdc.h @@ -774,8 +774,6 @@ int pdc_sti_call(unsigned long func, unsigned long flags, unsigned long inptr, unsigned long outputr, unsigned long glob_cfg); -extern void pdc_init(void); - static inline char * os_id_to_string(u16 os_id) { switch(os_id) { case OS_ID_NONE: return "No OS"; diff --git a/include/asm-powerpc/i8259.h b/include/asm-powerpc/i8259.h index c80e113052cd..78489fb8d140 100644 --- a/include/asm-powerpc/i8259.h +++ b/include/asm-powerpc/i8259.h @@ -6,10 +6,10 @@ #ifdef CONFIG_PPC_MERGE extern void i8259_init(struct device_node *node, unsigned long intack_addr); -extern unsigned int i8259_irq(struct pt_regs *regs); +extern unsigned int i8259_irq(void); #else extern void i8259_init(unsigned long intack_addr, int offset); -extern int i8259_irq(struct pt_regs *regs); +extern int i8259_irq(void); #endif #endif /* __KERNEL__ */ diff --git a/include/asm-powerpc/ibmebus.h b/include/asm-powerpc/ibmebus.h index 7ab195a27888..3493429b70f5 100644 --- a/include/asm-powerpc/ibmebus.h +++ b/include/asm-powerpc/ibmebus.h @@ -65,7 +65,7 @@ void ibmebus_unregister_driver(struct ibmebus_driver *drv); int ibmebus_request_irq(struct ibmebus_dev *dev, u32 ist, - irqreturn_t (*handler)(int, void*, struct pt_regs *), + irq_handler_t handler, unsigned long irq_flags, const char * devname, void *dev_id); void ibmebus_free_irq(struct ibmebus_dev *dev, u32 ist, void *dev_id); diff --git a/include/asm-powerpc/io.h b/include/asm-powerpc/io.h index cbbd8c648df1..3baff8b0fd5a 100644 --- a/include/asm-powerpc/io.h +++ b/include/asm-powerpc/io.h @@ -404,32 +404,6 @@ static inline void __out_be64(volatile unsigned long __iomem *addr, unsigned lon #include <asm/eeh.h> -/** - * check_signature - find BIOS signatures - * @io_addr: mmio address to check - * @signature: signature block - * @length: length of signature - * - * Perform a signature comparison with the mmio address io_addr. This - * address should have been obtained by ioremap. - * Returns 1 on a match. - */ -static inline int check_signature(const volatile void __iomem * io_addr, - const unsigned char *signature, int length) -{ - int retval = 0; - do { - if (readb(io_addr) != *signature) - goto out; - io_addr++; - signature++; - length--; - } while (length); - retval = 1; -out: - return retval; -} - /* Nothing to do */ #define dma_cache_inv(_start,_size) do { } while (0) diff --git a/include/asm-powerpc/ipic.h b/include/asm-powerpc/ipic.h index 1ce09a35906e..9fbb03415860 100644 --- a/include/asm-powerpc/ipic.h +++ b/include/asm-powerpc/ipic.h @@ -79,12 +79,12 @@ extern void ipic_clear_mcp_status(u32 mask); #ifdef CONFIG_PPC_MERGE extern void ipic_init(struct device_node *node, unsigned int flags); -extern unsigned int ipic_get_irq(struct pt_regs *regs); +extern unsigned int ipic_get_irq(void); #else extern void ipic_init(phys_addr_t phys_addr, unsigned int flags, unsigned int irq_offset, unsigned char *senses, unsigned int senses_count); -extern int ipic_get_irq(struct pt_regs *regs); +extern int ipic_get_irq(void); #endif #endif /* __ASM_IPIC_H__ */ diff --git a/include/asm-powerpc/iseries/hv_lp_event.h b/include/asm-powerpc/iseries/hv_lp_event.h index 4065a4de4935..6ce2ce1e2690 100644 --- a/include/asm-powerpc/iseries/hv_lp_event.h +++ b/include/asm-powerpc/iseries/hv_lp_event.h @@ -50,7 +50,7 @@ struct HvLpEvent { u64 xCorrelationToken; /* Unique value for source/type x10-x17 */ }; -typedef void (*LpEventHandler)(struct HvLpEvent *, struct pt_regs *); +typedef void (*LpEventHandler)(struct HvLpEvent *); /* Register a handler for an event type - returns 0 on success */ extern int HvLpEvent_registerHandler(HvLpEvent_Type eventType, diff --git a/include/asm-powerpc/iseries/it_lp_queue.h b/include/asm-powerpc/iseries/it_lp_queue.h index 3f6814769295..428278838821 100644 --- a/include/asm-powerpc/iseries/it_lp_queue.h +++ b/include/asm-powerpc/iseries/it_lp_queue.h @@ -72,7 +72,7 @@ struct hvlpevent_queue { extern struct hvlpevent_queue hvlpevent_queue; extern int hvlpevent_is_pending(void); -extern void process_hvlpevents(struct pt_regs *); +extern void process_hvlpevents(void); extern void setup_hvlpevent_queue(void); #endif /* _ASM_POWERPC_ISERIES_IT_LP_QUEUE_H */ diff --git a/include/asm-powerpc/machdep.h b/include/asm-powerpc/machdep.h index c17c13742401..dac90dc341cb 100644 --- a/include/asm-powerpc/machdep.h +++ b/include/asm-powerpc/machdep.h @@ -97,7 +97,7 @@ struct machdep_calls { void (*show_percpuinfo)(struct seq_file *m, int i); void (*init_IRQ)(void); - unsigned int (*get_irq)(struct pt_regs *); + unsigned int (*get_irq)(void); #ifdef CONFIG_KEXEC void (*kexec_cpu_down)(int crash_shutdown, int secondary); #endif diff --git a/include/asm-powerpc/mpic.h b/include/asm-powerpc/mpic.h index a9f9604b9eff..ef0a5458d2b2 100644 --- a/include/asm-powerpc/mpic.h +++ b/include/asm-powerpc/mpic.h @@ -409,9 +409,9 @@ extern void mpic_send_ipi(unsigned int ipi_no, unsigned int cpu_mask); void smp_mpic_message_pass(int target, int msg); /* Fetch interrupt from a given mpic */ -extern unsigned int mpic_get_one_irq(struct mpic *mpic, struct pt_regs *regs); +extern unsigned int mpic_get_one_irq(struct mpic *mpic); /* This one gets to the primary mpic */ -extern unsigned int mpic_get_irq(struct pt_regs *regs); +extern unsigned int mpic_get_irq(void); /* Set the EPIC clock ratio */ void mpic_set_clk_ratio(struct mpic *mpic, u32 clock_ratio); diff --git a/include/asm-powerpc/reg.h b/include/asm-powerpc/reg.h index 3a9fcc15811b..8fb96811b55d 100644 --- a/include/asm-powerpc/reg.h +++ b/include/asm-powerpc/reg.h @@ -503,7 +503,7 @@ /* * An mtfsf instruction with the L bit set. On CPUs that support this a - * full 64bits of FPSCR is restored and on other CPUs it is ignored. + * full 64bits of FPSCR is restored and on other CPUs the L bit is ignored. * * Until binutils gets the new form of mtfsf, hardwire the instruction. */ diff --git a/include/asm-ppc/commproc.h b/include/asm-ppc/commproc.h index 3247bea5fc2b..7b06b4e6bf30 100644 --- a/include/asm-ppc/commproc.h +++ b/include/asm-ppc/commproc.h @@ -690,8 +690,7 @@ typedef struct risc_timer_pram { #define CICR_IEN ((uint)0x00000080) /* Int. enable */ #define CICR_SPS ((uint)0x00000001) /* SCC Spread */ -extern void cpm_install_handler(int vec, - void (*handler)(void *, struct pt_regs *regs), void *dev_id); +extern void cpm_install_handler(int vec, void (*handler)(void *), void *dev_id); extern void cpm_free_handler(int vec); #endif /* __CPM_8XX__ */ diff --git a/include/asm-ppc/floppy.h b/include/asm-ppc/floppy.h index d3963ca79ad8..ae316e6d2ca9 100644 --- a/include/asm-ppc/floppy.h +++ b/include/asm-ppc/floppy.h @@ -38,14 +38,14 @@ static int virtual_dma_mode; static int doing_vdma; static struct fd_dma_ops *fd_ops; -static irqreturn_t floppy_hardint(int irq, void *dev_id, struct pt_regs * regs) +static irqreturn_t floppy_hardint(int irq, void *dev_id) { unsigned char st; int lcount; char *lptr; if (!doing_vdma) - return floppy_interrupt(irq, dev_id, regs); + return floppy_interrupt(irq, dev_id); st = 1; @@ -69,7 +69,7 @@ static irqreturn_t floppy_hardint(int irq, void *dev_id, struct pt_regs * regs) virtual_dma_residue += virtual_dma_count; virtual_dma_count=0; doing_vdma = 0; - floppy_interrupt(irq, dev_id, regs); + floppy_interrupt(irq, dev_id); return IRQ_HANDLED; } return IRQ_HANDLED; diff --git a/include/asm-ppc/gt64260.h b/include/asm-ppc/gt64260.h index cd0ef644943d..9e63b3cfffca 100644 --- a/include/asm-ppc/gt64260.h +++ b/include/asm-ppc/gt64260.h @@ -315,7 +315,7 @@ int gt64260_get_base(u32 *base); int gt64260_pci_exclude_device(u8 bus, u8 devfn); void gt64260_init_irq(void); -int gt64260_get_irq(struct pt_regs *regs); +int gt64260_get_irq(void); void gt64260_mpsc_progress(char *s, unsigned short hex); diff --git a/include/asm-ppc/io.h b/include/asm-ppc/io.h index 3d9a9e6f3321..a4c411b753ef 100644 --- a/include/asm-ppc/io.h +++ b/include/asm-ppc/io.h @@ -439,22 +439,6 @@ extern inline void * phys_to_virt(unsigned long address) #define iobarrier_r() eieio() #define iobarrier_w() eieio() -static inline int check_signature(volatile void __iomem * io_addr, - const unsigned char *signature, int length) -{ - int retval = 0; - do { - if (readb(io_addr) != *signature) - goto out; - io_addr++; - signature++; - length--; - } while (length); - retval = 1; -out: - return retval; -} - /* * Here comes the ppc implementation of the IOMAP * interfaces. diff --git a/include/asm-ppc/machdep.h b/include/asm-ppc/machdep.h index da7746738aee..293a444a1d77 100644 --- a/include/asm-ppc/machdep.h +++ b/include/asm-ppc/machdep.h @@ -43,7 +43,7 @@ struct machdep_calls { /* Optional, may be NULL. */ unsigned int (*irq_canonicalize)(unsigned int irq); void (*init_IRQ)(void); - int (*get_irq)(struct pt_regs *); + int (*get_irq)(void); /* A general init function, called by ppc_init in init/main.c. May be NULL. DEPRECATED ! */ diff --git a/include/asm-ppc/mpc52xx.h b/include/asm-ppc/mpc52xx.h index 7e9842805a28..64c8874618dc 100644 --- a/include/asm-ppc/mpc52xx.h +++ b/include/asm-ppc/mpc52xx.h @@ -415,7 +415,7 @@ struct mpc52xx_cdm { #ifndef __ASSEMBLY__ extern void mpc52xx_init_irq(void); -extern int mpc52xx_get_irq(struct pt_regs *regs); +extern int mpc52xx_get_irq(void); extern unsigned long mpc52xx_find_end_of_memory(void); extern void mpc52xx_set_bat(void); diff --git a/include/asm-ppc/mv64x60.h b/include/asm-ppc/mv64x60.h index 663edbee3e91..db3776f18198 100644 --- a/include/asm-ppc/mv64x60.h +++ b/include/asm-ppc/mv64x60.h @@ -336,9 +336,9 @@ int mv64x60_pci_exclude_device(u8 bus, u8 devfn); void gt64260_init_irq(void); -int gt64260_get_irq(struct pt_regs *regs); +int gt64260_get_irq(void); void mv64360_init_irq(void); -int mv64360_get_irq(struct pt_regs *regs); +int mv64360_get_irq(void); u32 mv64x60_mask(u32 val, u32 num_bits); u32 mv64x60_shift_left(u32 val, u32 num_bits); diff --git a/include/asm-ppc/open_pic.h b/include/asm-ppc/open_pic.h index a4fe962d9f73..778d5726212c 100644 --- a/include/asm-ppc/open_pic.h +++ b/include/asm-ppc/open_pic.h @@ -48,12 +48,12 @@ extern void openpic_init(int linux_irq_offset); extern void openpic_init_nmi_irq(u_int irq); extern void openpic_set_irq_priority(u_int irq, u_int pri); extern void openpic_hookup_cascade(u_int irq, char *name, - int (*cascade_fn)(struct pt_regs *)); + int (*cascade_fn)(void)); extern u_int openpic_irq(void); extern void openpic_eoi(void); extern void openpic_request_IPIs(void); extern void do_openpic_setup_cpu(void); -extern int openpic_get_irq(struct pt_regs *regs); +extern int openpic_get_irq(void); extern void openpic_reset_processor_phys(u_int cpumask); extern void openpic_setup_ISU(int isu_num, unsigned long addr); extern void openpic_cause_IPI(u_int ipi, cpumask_t cpumask); @@ -93,6 +93,6 @@ extern void openpic2_init(int linux_irq_offset); extern void openpic2_init_nmi_irq(u_int irq); extern u_int openpic2_irq(void); extern void openpic2_eoi(void); -extern int openpic2_get_irq(struct pt_regs *regs); +extern int openpic2_get_irq(void); extern void openpic2_setup_ISU(int isu_num, unsigned long addr); #endif /* _PPC_KERNEL_OPEN_PIC_H */ diff --git a/include/asm-ppc/smp.h b/include/asm-ppc/smp.h index 0b7fa89589df..e75791ea33a6 100644 --- a/include/asm-ppc/smp.h +++ b/include/asm-ppc/smp.h @@ -39,7 +39,7 @@ extern struct smp_ops_t *smp_ops; extern void smp_send_tlb_invalidate(int); extern void smp_send_xmon_break(int cpu); struct pt_regs; -extern void smp_message_recv(int, struct pt_regs *); +extern void smp_message_recv(int); extern int __cpu_disable(void); extern void __cpu_die(unsigned int cpu); diff --git a/include/asm-s390/cio.h b/include/asm-s390/cio.h index da063cd5f0a0..81287d86329d 100644 --- a/include/asm-s390/cio.h +++ b/include/asm-s390/cio.h @@ -275,6 +275,12 @@ struct ccw_dev_id { u16 devno; }; +static inline int ccw_dev_id_is_equal(struct ccw_dev_id *dev_id1, + struct ccw_dev_id *dev_id2) +{ + return !memcmp(dev_id1, dev_id2, sizeof(struct ccw_dev_id)); +} + extern int diag210(struct diag210 *addr); extern void wait_cons_dev(void); diff --git a/include/asm-s390/hardirq.h b/include/asm-s390/hardirq.h index e84b7ef54aac..c2f6a8782d31 100644 --- a/include/asm-s390/hardirq.h +++ b/include/asm-s390/hardirq.h @@ -32,6 +32,6 @@ typedef struct { #define HARDIRQ_BITS 8 -extern void account_ticks(struct pt_regs *); +extern void account_ticks(void); #endif /* __ASM_HARDIRQ_H */ diff --git a/include/asm-s390/irq_regs.h b/include/asm-s390/irq_regs.h new file mode 100644 index 000000000000..3dd9c0b70270 --- /dev/null +++ b/include/asm-s390/irq_regs.h @@ -0,0 +1 @@ +#include <asm-generic/irq_regs.h> diff --git a/include/asm-s390/percpu.h b/include/asm-s390/percpu.h index 495ad99c7635..9ea7f1023e57 100644 --- a/include/asm-s390/percpu.h +++ b/include/asm-s390/percpu.h @@ -16,7 +16,7 @@ #if defined(__s390x__) && defined(MODULE) #define __reloc_hide(var,offset) (*({ \ - extern int simple_indentifier_##var(void); \ + extern int simple_identifier_##var(void); \ unsigned long *__ptr; \ asm ( "larl %0,per_cpu__"#var"@GOTENT" \ : "=a" (__ptr) : "X" (per_cpu__##var) ); \ @@ -25,7 +25,7 @@ #else #define __reloc_hide(var, offset) (*({ \ - extern int simple_indentifier_##var(void); \ + extern int simple_identifier_##var(void); \ unsigned long __ptr; \ asm ( "" : "=a" (__ptr) : "0" (&per_cpu__##var) ); \ (typeof(&per_cpu__##var)) (__ptr + (offset)); })) diff --git a/include/asm-s390/s390_ext.h b/include/asm-s390/s390_ext.h index e9a2862b230d..df9b1017b703 100644 --- a/include/asm-s390/s390_ext.h +++ b/include/asm-s390/s390_ext.h @@ -10,7 +10,7 @@ * Martin Schwidefsky (schwidefsky@de.ibm.com) */ -typedef void (*ext_int_handler_t)(struct pt_regs *regs, __u16 code); +typedef void (*ext_int_handler_t)(__u16 code); /* * Warning: if you change ext_int_info_t you have to change the diff --git a/include/asm-s390/timer.h b/include/asm-s390/timer.h index fcd6c256a2d1..30e5cbe570f2 100644 --- a/include/asm-s390/timer.h +++ b/include/asm-s390/timer.h @@ -26,7 +26,7 @@ struct vtimer_list { spinlock_t lock; unsigned long magic; - void (*function)(unsigned long, struct pt_regs*); + void (*function)(unsigned long); unsigned long data; }; diff --git a/include/asm-sh/io.h b/include/asm-sh/io.h index ed12d38e8c00..a0e55b09e4fd 100644 --- a/include/asm-sh/io.h +++ b/include/asm-sh/io.h @@ -304,22 +304,6 @@ __ioremap_mode(unsigned long offset, unsigned long size, unsigned long flags) #define iounmap(addr) \ __iounmap((addr)) -static inline int check_signature(char __iomem *io_addr, - const unsigned char *signature, int length) -{ - int retval = 0; - do { - if (readb(io_addr) != *signature) - goto out; - io_addr++; - signature++; - length--; - } while (length); - retval = 1; -out: - return retval; -} - /* * The caches on some architectures aren't dma-coherent and have need to * handle this in software. There are three types of operations that diff --git a/include/asm-sh64/io.h b/include/asm-sh64/io.h index 252fedbb6621..14d8e7b4bf4b 100644 --- a/include/asm-sh64/io.h +++ b/include/asm-sh64/io.h @@ -178,22 +178,6 @@ extern void iounmap(void *addr); unsigned long onchip_remap(unsigned long addr, unsigned long size, const char* name); extern void onchip_unmap(unsigned long vaddr); -static __inline__ int check_signature(volatile void __iomem *io_addr, - const unsigned char *signature, int length) -{ - int retval = 0; - do { - if (readb(io_addr) != *signature) - goto out; - io_addr++; - signature++; - length--; - } while (length); - retval = 1; -out: - return retval; -} - /* * The caches on some architectures aren't dma-coherent and have need to * handle this in software. There are three types of operations that diff --git a/include/asm-sparc/floppy.h b/include/asm-sparc/floppy.h index c53b332c850a..9073c84218ce 100644 --- a/include/asm-sparc/floppy.h +++ b/include/asm-sparc/floppy.h @@ -262,7 +262,7 @@ static __inline__ void sun_fd_enable_dma(void) } /* Our low-level entry point in arch/sparc/kernel/entry.S */ -irqreturn_t floppy_hardint(int irq, void *unused, struct pt_regs *regs); +irqreturn_t floppy_hardint(int irq, void *unused); static int sun_fd_request_irq(void) { diff --git a/include/asm-sparc/irq.h b/include/asm-sparc/irq.h index 3141ddfea97d..ff520ea97473 100644 --- a/include/asm-sparc/irq.h +++ b/include/asm-sparc/irq.h @@ -76,8 +76,8 @@ static inline void load_profile_irq(int cpu, int limit) BTFIXUP_CALL(load_profile_irq)(cpu, limit); } -extern void (*sparc_init_timers)(irqreturn_t (*lvl10_irq)(int, void *, struct pt_regs *)); -extern void claim_ticker14(irqreturn_t (*irq_handler)(int, void *, struct pt_regs *), +extern void (*sparc_init_timers)(irq_handler_t lvl10_irq); +extern void claim_ticker14(irq_handler_t irq_handler, int irq, unsigned int timeout); @@ -91,7 +91,7 @@ BTFIXUPDEF_CALL(void, set_irq_udt, int) #define set_irq_udt(cpu) BTFIXUP_CALL(set_irq_udt)(cpu) #endif -extern int request_fast_irq(unsigned int irq, irqreturn_t (*handler)(int, void *, struct pt_regs *), unsigned long flags, __const__ char *devname); +extern int request_fast_irq(unsigned int irq, irq_handler_t handler, unsigned long flags, __const__ char *devname); /* On the sun4m, just like the timers, we have both per-cpu and master * interrupt registers. diff --git a/include/asm-sparc/irq_regs.h b/include/asm-sparc/irq_regs.h new file mode 100644 index 000000000000..3dd9c0b70270 --- /dev/null +++ b/include/asm-sparc/irq_regs.h @@ -0,0 +1 @@ +#include <asm-generic/irq_regs.h> diff --git a/include/asm-sparc/spinlock.h b/include/asm-sparc/spinlock.h index 557d08959d2f..de2249b267c6 100644 --- a/include/asm-sparc/spinlock.h +++ b/include/asm-sparc/spinlock.h @@ -129,6 +129,7 @@ static inline void __raw_write_lock(raw_rwlock_t *rw) : /* no outputs */ : "r" (lp) : "g2", "g4", "memory", "cc"); + *(volatile __u32 *)&lp->lock = ~0U; } static inline int __raw_write_trylock(raw_rwlock_t *rw) @@ -144,15 +145,40 @@ static inline int __raw_write_trylock(raw_rwlock_t *rw) val = rw->lock & ~0xff; if (val) ((volatile u8*)&rw->lock)[3] = 0; + else + *(volatile u32*)&rw->lock = ~0U; } return (val == 0); } +static inline int __read_trylock(raw_rwlock_t *rw) +{ + register raw_rwlock_t *lp asm("g1"); + register int res asm("o0"); + lp = rw; + __asm__ __volatile__( + "mov %%o7, %%g4\n\t" + "call ___rw_read_try\n\t" + " ldstub [%%g1 + 3], %%g2\n" + : "=r" (res) + : "r" (lp) + : "g2", "g4", "memory", "cc"); + return res; +} + +#define __raw_read_trylock(lock) \ +({ unsigned long flags; \ + int res; \ + local_irq_save(flags); \ + res = __read_trylock(lock); \ + local_irq_restore(flags); \ + res; \ +}) + #define __raw_write_unlock(rw) do { (rw)->lock = 0; } while(0) #define __raw_spin_lock_flags(lock, flags) __raw_spin_lock(lock) -#define __raw_read_trylock(lock) generic__raw_read_trylock(lock) #define _raw_spin_relax(lock) cpu_relax() #define _raw_read_relax(lock) cpu_relax() diff --git a/include/asm-sparc64/floppy.h b/include/asm-sparc64/floppy.h index abf150038019..dbe033e494db 100644 --- a/include/asm-sparc64/floppy.h +++ b/include/asm-sparc64/floppy.h @@ -208,7 +208,7 @@ static void sun_fd_enable_dma(void) pdma_areasize = pdma_size; } -irqreturn_t sparc_floppy_irq(int irq, void *dev_cookie, struct pt_regs *regs) +irqreturn_t sparc_floppy_irq(int irq, void *dev_cookie) { if (likely(doing_pdma)) { void __iomem *stat = (void __iomem *) fdc_status; @@ -255,7 +255,7 @@ irqreturn_t sparc_floppy_irq(int irq, void *dev_cookie, struct pt_regs *regs) } main_interrupt: - return floppy_interrupt(irq, dev_cookie, regs); + return floppy_interrupt(irq, dev_cookie); } static int sun_fd_request_irq(void) @@ -311,7 +311,7 @@ struct sun_pci_dma_op { static struct sun_pci_dma_op sun_pci_dma_current = { -1U, 0, 0, NULL}; static struct sun_pci_dma_op sun_pci_dma_pending = { -1U, 0, 0, NULL}; -extern irqreturn_t floppy_interrupt(int irq, void *dev_id, struct pt_regs *regs); +extern irqreturn_t floppy_interrupt(int irq, void *dev_id); static unsigned char sun_pci_fd_inb(unsigned long port) { @@ -446,7 +446,7 @@ static int sun_pci_fd_eject(int drive) void sun_pci_fd_dma_callback(struct ebus_dma_info *p, int event, void *cookie) { - floppy_interrupt(0, NULL, NULL); + floppy_interrupt(0, NULL); } /* diff --git a/include/asm-sparc64/io.h b/include/asm-sparc64/io.h index 0056770e83ad..30b912d8e8bc 100644 --- a/include/asm-sparc64/io.h +++ b/include/asm-sparc64/io.h @@ -440,21 +440,6 @@ _memcpy_toio(volatile void __iomem *dst, const void *src, __kernel_size_t n) #define memcpy_toio(d,s,sz) _memcpy_toio(d,s,sz) -static inline int check_signature(void __iomem *io_addr, - const unsigned char *signature, - int length) -{ - int retval = 0; - do { - if (readb(io_addr) != *signature++) - goto out; - io_addr++; - } while (--length); - retval = 1; -out: - return retval; -} - #define mmiowb() #ifdef __KERNEL__ diff --git a/include/asm-sparc64/irq_regs.h b/include/asm-sparc64/irq_regs.h new file mode 100644 index 000000000000..3dd9c0b70270 --- /dev/null +++ b/include/asm-sparc64/irq_regs.h @@ -0,0 +1 @@ +#include <asm-generic/irq_regs.h> diff --git a/include/asm-um/irq_regs.h b/include/asm-um/irq_regs.h new file mode 100644 index 000000000000..3dd9c0b70270 --- /dev/null +++ b/include/asm-um/irq_regs.h @@ -0,0 +1 @@ +#include <asm-generic/irq_regs.h> diff --git a/include/asm-x86_64/genapic.h b/include/asm-x86_64/genapic.h index 81e714665344..a0e9a4b93484 100644 --- a/include/asm-x86_64/genapic.h +++ b/include/asm-x86_64/genapic.h @@ -18,6 +18,7 @@ struct genapic { u32 int_dest_mode; int (*apic_id_registered)(void); cpumask_t (*target_cpus)(void); + cpumask_t (*vector_allocation_domain)(int cpu); void (*init_apic_ldr)(void); /* ipi */ void (*send_IPI_mask)(cpumask_t mask, int vector); diff --git a/include/asm-x86_64/hw_irq.h b/include/asm-x86_64/hw_irq.h index 53d0d9fd10d6..792dd52fcd70 100644 --- a/include/asm-x86_64/hw_irq.h +++ b/include/asm-x86_64/hw_irq.h @@ -74,10 +74,8 @@ #ifndef __ASSEMBLY__ -extern unsigned int irq_vector[NR_IRQ_VECTORS]; typedef int vector_irq_t[NR_VECTORS]; DECLARE_PER_CPU(vector_irq_t, vector_irq); -#define IO_APIC_VECTOR(irq) (irq_vector[irq]) /* * Various low-level irq details needed by irq.c, process.c, diff --git a/include/asm-x86_64/io.h b/include/asm-x86_64/io.h index 70e91fe76344..6ee9fadaaacb 100644 --- a/include/asm-x86_64/io.h +++ b/include/asm-x86_64/io.h @@ -254,33 +254,6 @@ void memset_io(volatile void __iomem *a, int b, size_t c); #define eth_io_copy_and_sum(a,b,c,d) eth_copy_and_sum((a),(void *)(b),(c),(d)) -/** - * check_signature - find BIOS signatures - * @io_addr: mmio address to check - * @signature: signature block - * @length: length of signature - * - * Perform a signature comparison with the mmio address io_addr. This - * address should have been obtained by ioremap. - * Returns 1 on a match. - */ - -static inline int check_signature(void __iomem *io_addr, - const unsigned char *signature, int length) -{ - int retval = 0; - do { - if (readb(io_addr) != *signature) - goto out; - io_addr++; - signature++; - length--; - } while (length); - retval = 1; -out: - return retval; -} - /* Nothing to do */ #define dma_cache_inv(_start,_size) do { } while (0) diff --git a/include/asm-x86_64/mach_apic.h b/include/asm-x86_64/mach_apic.h index d33422450c00..7b7115a0c1c9 100644 --- a/include/asm-x86_64/mach_apic.h +++ b/include/asm-x86_64/mach_apic.h @@ -17,6 +17,7 @@ #define INT_DELIVERY_MODE (genapic->int_delivery_mode) #define INT_DEST_MODE (genapic->int_dest_mode) #define TARGET_CPUS (genapic->target_cpus()) +#define vector_allocation_domain (genapic->vector_allocation_domain) #define apic_id_registered (genapic->apic_id_registered) #define init_apic_ldr (genapic->init_apic_ldr) #define send_IPI_mask (genapic->send_IPI_mask) diff --git a/include/asm-x86_64/percpu.h b/include/asm-x86_64/percpu.h index 285756010c51..5ed0ef340842 100644 --- a/include/asm-x86_64/percpu.h +++ b/include/asm-x86_64/percpu.h @@ -32,13 +32,13 @@ /* var is in discarded region: offset to particular copy we want */ #define per_cpu(var, cpu) (*({ \ - extern int simple_indentifier_##var(void); \ + extern int simple_identifier_##var(void); \ RELOC_HIDE(&per_cpu__##var, __per_cpu_offset(cpu)); })) #define __get_cpu_var(var) (*({ \ - extern int simple_indentifier_##var(void); \ + extern int simple_identifier_##var(void); \ RELOC_HIDE(&per_cpu__##var, __my_cpu_offset()); })) #define __raw_get_cpu_var(var) (*({ \ - extern int simple_indentifier_##var(void); \ + extern int simple_identifier_##var(void); \ RELOC_HIDE(&per_cpu__##var, __my_cpu_offset()); })) /* A macro to avoid #include hell... */ diff --git a/include/linux/bitmap.h b/include/linux/bitmap.h index dcc5de7cc487..64b4641904fe 100644 --- a/include/linux/bitmap.h +++ b/include/linux/bitmap.h @@ -46,7 +46,8 @@ * bitmap_remap(dst, src, old, new, nbits) *dst = map(old, new)(src) * bitmap_bitremap(oldbit, old, new, nbits) newbit = map(old, new)(oldbit) * bitmap_scnprintf(buf, len, src, nbits) Print bitmap src to buf - * bitmap_parse(ubuf, ulen, dst, nbits) Parse bitmap dst from user buf + * bitmap_parse(buf, buflen, dst, nbits) Parse bitmap dst from kernel buf + * bitmap_parse_user(ubuf, ulen, dst, nbits) Parse bitmap dst from user buf * bitmap_scnlistprintf(buf, len, src, nbits) Print bitmap src as list to buf * bitmap_parselist(buf, dst, nbits) Parse bitmap dst from list * bitmap_find_free_region(bitmap, bits, order) Find and allocate bit region @@ -106,7 +107,9 @@ extern int __bitmap_weight(const unsigned long *bitmap, int bits); extern int bitmap_scnprintf(char *buf, unsigned int len, const unsigned long *src, int nbits); -extern int bitmap_parse(const char __user *ubuf, unsigned int ulen, +extern int __bitmap_parse(const char *buf, unsigned int buflen, int is_user, + unsigned long *dst, int nbits); +extern int bitmap_parse_user(const char __user *ubuf, unsigned int ulen, unsigned long *dst, int nbits); extern int bitmap_scnlistprintf(char *buf, unsigned int len, const unsigned long *src, int nbits); @@ -270,6 +273,12 @@ static inline void bitmap_shift_left(unsigned long *dst, __bitmap_shift_left(dst, src, n, nbits); } +static inline int bitmap_parse(const char *buf, unsigned int buflen, + unsigned long *maskp, int nmaskbits) +{ + return __bitmap_parse(buf, buflen, 0, maskp, nmaskbits); +} + #endif /* __ASSEMBLY__ */ #endif /* __LINUX_BITMAP_H */ diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 26f7856ff812..d370d2cfe138 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -157,6 +157,7 @@ enum rq_cmd_type_bits { REQ_TYPE_ATA_CMD, REQ_TYPE_ATA_TASK, REQ_TYPE_ATA_TASKFILE, + REQ_TYPE_ATA_PC, }; /* diff --git a/include/linux/carta_random32.h b/include/linux/carta_random32.h new file mode 100644 index 000000000000..f6f3bd9f20b5 --- /dev/null +++ b/include/linux/carta_random32.h @@ -0,0 +1,29 @@ +/* + * Fast, simple, yet decent quality random number generator based on + * a paper by David G. Carta ("Two Fast Implementations of the + * `Minimal Standard' Random Number Generator," Communications of the + * ACM, January, 1990). + * + * Copyright (c) 2002-2006 Hewlett-Packard Development Company, L.P. + * Contributed by Stephane Eranian <eranian@hpl.hp.com> + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA + * 02111-1307 USA + */ +#ifndef _LINUX_CARTA_RANDOM32_H_ +#define _LINUX_CARTA_RANDOM32_H_ + +u64 carta_random32(u64 seed); + +#endif /* _LINUX_CARTA_RANDOM32_H_ */ diff --git a/include/linux/cdrom.h b/include/linux/cdrom.h index 3c9b0bc05123..bbbe7b4da0bb 100644 --- a/include/linux/cdrom.h +++ b/include/linux/cdrom.h @@ -749,7 +749,7 @@ struct request_sense { #define MRW_MODE_PC 0x03 struct mrw_feature_desc { - __u16 feature_code; + __be16 feature_code; #if defined(__BIG_ENDIAN_BITFIELD) __u8 reserved1 : 2; __u8 feature_version : 4; @@ -776,7 +776,7 @@ struct mrw_feature_desc { /* cf. mmc4r02g.pdf 5.3.10 Random Writable Feature (0020h) pg 197 of 635 */ struct rwrt_feature_desc { - __u16 feature_code; + __be16 feature_code; #if defined(__BIG_ENDIAN_BITFIELD) __u8 reserved1 : 2; __u8 feature_version : 4; @@ -803,7 +803,7 @@ struct rwrt_feature_desc { }; typedef struct { - __u16 disc_information_length; + __be16 disc_information_length; #if defined(__BIG_ENDIAN_BITFIELD) __u8 reserved1 : 3; __u8 erasable : 1; @@ -849,7 +849,7 @@ typedef struct { } disc_information; typedef struct { - __u16 track_information_length; + __be16 track_information_length; __u8 track_lsb; __u8 session_lsb; __u8 reserved1; @@ -880,12 +880,12 @@ typedef struct { __u8 lra_v : 1; __u8 reserved3 : 6; #endif - __u32 track_start; - __u32 next_writable; - __u32 free_blocks; - __u32 fixed_packet_size; - __u32 track_size; - __u32 last_rec_address; + __be32 track_start; + __be32 next_writable; + __be32 free_blocks; + __be32 fixed_packet_size; + __be32 track_size; + __be32 last_rec_address; } track_information; struct feature_header { @@ -896,12 +896,12 @@ struct feature_header { }; struct mode_page_header { - __u16 mode_data_length; + __be16 mode_data_length; __u8 medium_type; __u8 reserved1; __u8 reserved2; __u8 reserved3; - __u16 desc_length; + __be16 desc_length; }; #ifdef __KERNEL__ @@ -1106,7 +1106,7 @@ typedef struct { #endif __u8 session_format; __u8 reserved6; - __u32 packet_size; + __be32 packet_size; __u16 audio_pause; __u8 mcn[16]; __u8 isrc[16]; @@ -1151,7 +1151,7 @@ typedef struct { } rpc_state_t; struct event_header { - __u16 data_len; + __be16 data_len; #if defined(__BIG_ENDIAN_BITFIELD) __u8 nea : 1; __u8 reserved1 : 4; diff --git a/include/linux/compat.h b/include/linux/compat.h index ef5cd192784c..f4ebf96f5308 100644 --- a/include/linux/compat.h +++ b/include/linux/compat.h @@ -163,7 +163,7 @@ asmlinkage long compat_sys_set_robust_list(struct compat_robust_list_head __user *head, compat_size_t len); asmlinkage long -compat_sys_get_robust_list(int pid, compat_uptr_t *head_ptr, +compat_sys_get_robust_list(int pid, compat_uptr_t __user *head_ptr, compat_size_t __user *len_ptr); long compat_sys_semctl(int first, int second, int third, void __user *uptr); diff --git a/include/linux/compat_ioctl.h b/include/linux/compat_ioctl.h index 4e1663d7691e..cfdb4f6a89d4 100644 --- a/include/linux/compat_ioctl.h +++ b/include/linux/compat_ioctl.h @@ -61,17 +61,23 @@ COMPATIBLE_IOCTL(FIGETBSZ) * Some need translations, these do not. */ COMPATIBLE_IOCTL(HDIO_GET_IDENTITY) -COMPATIBLE_IOCTL(HDIO_SET_DMA) -COMPATIBLE_IOCTL(HDIO_SET_UNMASKINTR) -COMPATIBLE_IOCTL(HDIO_SET_NOWERR) -COMPATIBLE_IOCTL(HDIO_SET_32BIT) -COMPATIBLE_IOCTL(HDIO_SET_MULTCOUNT) -COMPATIBLE_IOCTL(HDIO_DRIVE_CMD) COMPATIBLE_IOCTL(HDIO_DRIVE_TASK) -COMPATIBLE_IOCTL(HDIO_SET_PIO_MODE) -COMPATIBLE_IOCTL(HDIO_SET_NICE) -COMPATIBLE_IOCTL(HDIO_SET_KEEPSETTINGS) +COMPATIBLE_IOCTL(HDIO_DRIVE_CMD) +ULONG_IOCTL(HDIO_SET_MULTCOUNT) +ULONG_IOCTL(HDIO_SET_UNMASKINTR) +ULONG_IOCTL(HDIO_SET_KEEPSETTINGS) +ULONG_IOCTL(HDIO_SET_32BIT) +ULONG_IOCTL(HDIO_SET_NOWERR) +ULONG_IOCTL(HDIO_SET_DMA) +ULONG_IOCTL(HDIO_SET_PIO_MODE) +ULONG_IOCTL(HDIO_SET_NICE) +ULONG_IOCTL(HDIO_SET_WCACHE) +ULONG_IOCTL(HDIO_SET_ACOUSTIC) +ULONG_IOCTL(HDIO_SET_BUSSTATE) +ULONG_IOCTL(HDIO_SET_ADDRESS) COMPATIBLE_IOCTL(HDIO_SCAN_HWIF) +/* 0x330 is reserved -- it used to be HDIO_GETGEO_BIG */ +COMPATIBLE_IOCTL(0x330) /* 0x02 -- Floppy ioctls */ COMPATIBLE_IOCTL(FDMSGON) COMPATIBLE_IOCTL(FDMSGOFF) diff --git a/include/linux/config.h b/include/linux/config.h deleted file mode 100644 index 479ffb0a22d8..000000000000 --- a/include/linux/config.h +++ /dev/null @@ -1,9 +0,0 @@ -#ifndef _LINUX_CONFIG_H -#define _LINUX_CONFIG_H -/* This file is no longer in use and kept only for backward compatibility. - * autoconf.h is now included via -imacros on the commandline - */ -#warning Including config.h is deprecated. -#include <linux/autoconf.h> - -#endif diff --git a/include/linux/cpumask.h b/include/linux/cpumask.h index b268a3c0c376..d0e8c8b0e34d 100644 --- a/include/linux/cpumask.h +++ b/include/linux/cpumask.h @@ -8,8 +8,8 @@ * See detailed comments in the file linux/bitmap.h describing the * data type on which these cpumasks are based. * - * For details of cpumask_scnprintf() and cpumask_parse(), - * see bitmap_scnprintf() and bitmap_parse() in lib/bitmap.c. + * For details of cpumask_scnprintf() and cpumask_parse_user(), + * see bitmap_scnprintf() and bitmap_parse_user() in lib/bitmap.c. * For details of cpulist_scnprintf() and cpulist_parse(), see * bitmap_scnlistprintf() and bitmap_parselist(), also in bitmap.c. * For details of cpu_remap(), see bitmap_bitremap in lib/bitmap.c @@ -49,7 +49,7 @@ * unsigned long *cpus_addr(mask) Array of unsigned long's in mask * * int cpumask_scnprintf(buf, len, mask) Format cpumask for printing - * int cpumask_parse(ubuf, ulen, mask) Parse ascii string as cpumask + * int cpumask_parse_user(ubuf, ulen, mask) Parse ascii string as cpumask * int cpulist_scnprintf(buf, len, mask) Format cpumask as list for printing * int cpulist_parse(buf, map) Parse ascii string as cpulist * int cpu_remap(oldbit, old, new) newbit = map(old, new)(oldbit) @@ -273,12 +273,12 @@ static inline int __cpumask_scnprintf(char *buf, int len, return bitmap_scnprintf(buf, len, srcp->bits, nbits); } -#define cpumask_parse(ubuf, ulen, dst) \ - __cpumask_parse((ubuf), (ulen), &(dst), NR_CPUS) -static inline int __cpumask_parse(const char __user *buf, int len, +#define cpumask_parse_user(ubuf, ulen, dst) \ + __cpumask_parse_user((ubuf), (ulen), &(dst), NR_CPUS) +static inline int __cpumask_parse_user(const char __user *buf, int len, cpumask_t *dstp, int nbits) { - return bitmap_parse(buf, len, dstp->bits, nbits); + return bitmap_parse_user(buf, len, dstp->bits, nbits); } #define cpulist_scnprintf(buf, len, src) \ diff --git a/include/linux/dcache.h b/include/linux/dcache.h index 44605be59409..63f64a9a5bf7 100644 --- a/include/linux/dcache.h +++ b/include/linux/dcache.h @@ -230,6 +230,7 @@ extern struct dentry * d_alloc_anon(struct inode *); extern struct dentry * d_splice_alias(struct inode *, struct dentry *); extern void shrink_dcache_sb(struct super_block *); extern void shrink_dcache_parent(struct dentry *); +extern void shrink_dcache_for_umount(struct super_block *); extern int d_invalidate(struct dentry *); /* only used at mount-time */ diff --git a/include/linux/dccp.h b/include/linux/dccp.h index d6f4ec467a4b..53553c99cad6 100644 --- a/include/linux/dccp.h +++ b/include/linux/dccp.h @@ -191,7 +191,7 @@ enum { /* this structure is argument to DCCP_SOCKOPT_CHANGE_X */ struct dccp_so_feat { __u8 dccpsf_feat; - __u8 *dccpsf_val; + __u8 __user *dccpsf_val; __u8 dccpsf_len; }; diff --git a/include/linux/elevator.h b/include/linux/elevator.h index b3370ef5164d..2fa9f1144228 100644 --- a/include/linux/elevator.h +++ b/include/linux/elevator.h @@ -70,7 +70,6 @@ struct elevator_type { struct list_head list; struct elevator_ops ops; - struct elevator_type *elevator_type; struct elv_fs_entry *elevator_attrs; char elevator_name[ELV_NAME_MAX]; struct module *elevator_owner; diff --git a/include/linux/ext4_fs.h b/include/linux/ext4_fs.h new file mode 100644 index 000000000000..498503ee613d --- /dev/null +++ b/include/linux/ext4_fs.h @@ -0,0 +1,994 @@ +/* + * linux/include/linux/ext4_fs.h + * + * Copyright (C) 1992, 1993, 1994, 1995 + * Remy Card (card@masi.ibp.fr) + * Laboratoire MASI - Institut Blaise Pascal + * Universite Pierre et Marie Curie (Paris VI) + * + * from + * + * linux/include/linux/minix_fs.h + * + * Copyright (C) 1991, 1992 Linus Torvalds + */ + +#ifndef _LINUX_EXT4_FS_H +#define _LINUX_EXT4_FS_H + +#include <linux/types.h> +#include <linux/blkdev.h> +#include <linux/magic.h> + +/* + * The second extended filesystem constants/structures + */ + +/* + * Define EXT4FS_DEBUG to produce debug messages + */ +#undef EXT4FS_DEBUG + +/* + * Define EXT4_RESERVATION to reserve data blocks for expanding files + */ +#define EXT4_DEFAULT_RESERVE_BLOCKS 8 +/*max window size: 1024(direct blocks) + 3([t,d]indirect blocks) */ +#define EXT4_MAX_RESERVE_BLOCKS 1027 +#define EXT4_RESERVE_WINDOW_NOT_ALLOCATED 0 +/* + * Always enable hashed directories + */ +#define CONFIG_EXT4_INDEX + +/* + * Debug code + */ +#ifdef EXT4FS_DEBUG +#define ext4_debug(f, a...) \ + do { \ + printk (KERN_DEBUG "EXT4-fs DEBUG (%s, %d): %s:", \ + __FILE__, __LINE__, __FUNCTION__); \ + printk (KERN_DEBUG f, ## a); \ + } while (0) +#else +#define ext4_debug(f, a...) do {} while (0) +#endif + +/* + * Special inodes numbers + */ +#define EXT4_BAD_INO 1 /* Bad blocks inode */ +#define EXT4_ROOT_INO 2 /* Root inode */ +#define EXT4_BOOT_LOADER_INO 5 /* Boot loader inode */ +#define EXT4_UNDEL_DIR_INO 6 /* Undelete directory inode */ +#define EXT4_RESIZE_INO 7 /* Reserved group descriptors inode */ +#define EXT4_JOURNAL_INO 8 /* Journal inode */ + +/* First non-reserved inode for old ext4 filesystems */ +#define EXT4_GOOD_OLD_FIRST_INO 11 + +/* + * Maximal count of links to a file + */ +#define EXT4_LINK_MAX 32000 + +/* + * Macro-instructions used to manage several block sizes + */ +#define EXT4_MIN_BLOCK_SIZE 1024 +#define EXT4_MAX_BLOCK_SIZE 4096 +#define EXT4_MIN_BLOCK_LOG_SIZE 10 +#ifdef __KERNEL__ +# define EXT4_BLOCK_SIZE(s) ((s)->s_blocksize) +#else +# define EXT4_BLOCK_SIZE(s) (EXT4_MIN_BLOCK_SIZE << (s)->s_log_block_size) +#endif +#define EXT4_ADDR_PER_BLOCK(s) (EXT4_BLOCK_SIZE(s) / sizeof (__u32)) +#ifdef __KERNEL__ +# define EXT4_BLOCK_SIZE_BITS(s) ((s)->s_blocksize_bits) +#else +# define EXT4_BLOCK_SIZE_BITS(s) ((s)->s_log_block_size + 10) +#endif +#ifdef __KERNEL__ +#define EXT4_ADDR_PER_BLOCK_BITS(s) (EXT4_SB(s)->s_addr_per_block_bits) +#define EXT4_INODE_SIZE(s) (EXT4_SB(s)->s_inode_size) +#define EXT4_FIRST_INO(s) (EXT4_SB(s)->s_first_ino) +#else +#define EXT4_INODE_SIZE(s) (((s)->s_rev_level == EXT4_GOOD_OLD_REV) ? \ + EXT4_GOOD_OLD_INODE_SIZE : \ + (s)->s_inode_size) +#define EXT4_FIRST_INO(s) (((s)->s_rev_level == EXT4_GOOD_OLD_REV) ? \ + EXT4_GOOD_OLD_FIRST_INO : \ + (s)->s_first_ino) +#endif + +/* + * Macro-instructions used to manage fragments + */ +#define EXT4_MIN_FRAG_SIZE 1024 +#define EXT4_MAX_FRAG_SIZE 4096 +#define EXT4_MIN_FRAG_LOG_SIZE 10 +#ifdef __KERNEL__ +# define EXT4_FRAG_SIZE(s) (EXT4_SB(s)->s_frag_size) +# define EXT4_FRAGS_PER_BLOCK(s) (EXT4_SB(s)->s_frags_per_block) +#else +# define EXT4_FRAG_SIZE(s) (EXT4_MIN_FRAG_SIZE << (s)->s_log_frag_size) +# define EXT4_FRAGS_PER_BLOCK(s) (EXT4_BLOCK_SIZE(s) / EXT4_FRAG_SIZE(s)) +#endif + +/* + * Structure of a blocks group descriptor + */ +struct ext4_group_desc +{ + __le32 bg_block_bitmap; /* Blocks bitmap block */ + __le32 bg_inode_bitmap; /* Inodes bitmap block */ + __le32 bg_inode_table; /* Inodes table block */ + __le16 bg_free_blocks_count; /* Free blocks count */ + __le16 bg_free_inodes_count; /* Free inodes count */ + __le16 bg_used_dirs_count; /* Directories count */ + __u16 bg_flags; + __u32 bg_reserved[3]; + __le32 bg_block_bitmap_hi; /* Blocks bitmap block MSB */ + __le32 bg_inode_bitmap_hi; /* Inodes bitmap block MSB */ + __le32 bg_inode_table_hi; /* Inodes table block MSB */ +}; + +#ifdef __KERNEL__ +#include <linux/ext4_fs_i.h> +#include <linux/ext4_fs_sb.h> +#endif +/* + * Macro-instructions used to manage group descriptors + */ +#define EXT4_MIN_DESC_SIZE 32 +#define EXT4_MIN_DESC_SIZE_64BIT 64 +#define EXT4_MAX_DESC_SIZE EXT4_MIN_BLOCK_SIZE +#define EXT4_DESC_SIZE(s) (EXT4_SB(s)->s_desc_size) +#ifdef __KERNEL__ +# define EXT4_BLOCKS_PER_GROUP(s) (EXT4_SB(s)->s_blocks_per_group) +# define EXT4_DESC_PER_BLOCK(s) (EXT4_SB(s)->s_desc_per_block) +# define EXT4_INODES_PER_GROUP(s) (EXT4_SB(s)->s_inodes_per_group) +# define EXT4_DESC_PER_BLOCK_BITS(s) (EXT4_SB(s)->s_desc_per_block_bits) +#else +# define EXT4_BLOCKS_PER_GROUP(s) ((s)->s_blocks_per_group) +# define EXT4_DESC_PER_BLOCK(s) (EXT4_BLOCK_SIZE(s) / EXT4_DESC_SIZE(s)) +# define EXT4_INODES_PER_GROUP(s) ((s)->s_inodes_per_group) +#endif + +/* + * Constants relative to the data blocks + */ +#define EXT4_NDIR_BLOCKS 12 +#define EXT4_IND_BLOCK EXT4_NDIR_BLOCKS +#define EXT4_DIND_BLOCK (EXT4_IND_BLOCK + 1) +#define EXT4_TIND_BLOCK (EXT4_DIND_BLOCK + 1) +#define EXT4_N_BLOCKS (EXT4_TIND_BLOCK + 1) + +/* + * Inode flags + */ +#define EXT4_SECRM_FL 0x00000001 /* Secure deletion */ +#define EXT4_UNRM_FL 0x00000002 /* Undelete */ +#define EXT4_COMPR_FL 0x00000004 /* Compress file */ +#define EXT4_SYNC_FL 0x00000008 /* Synchronous updates */ +#define EXT4_IMMUTABLE_FL 0x00000010 /* Immutable file */ +#define EXT4_APPEND_FL 0x00000020 /* writes to file may only append */ +#define EXT4_NODUMP_FL 0x00000040 /* do not dump file */ +#define EXT4_NOATIME_FL 0x00000080 /* do not update atime */ +/* Reserved for compression usage... */ +#define EXT4_DIRTY_FL 0x00000100 +#define EXT4_COMPRBLK_FL 0x00000200 /* One or more compressed clusters */ +#define EXT4_NOCOMPR_FL 0x00000400 /* Don't compress */ +#define EXT4_ECOMPR_FL 0x00000800 /* Compression error */ +/* End compression flags --- maybe not all used */ +#define EXT4_INDEX_FL 0x00001000 /* hash-indexed directory */ +#define EXT4_IMAGIC_FL 0x00002000 /* AFS directory */ +#define EXT4_JOURNAL_DATA_FL 0x00004000 /* file data should be journaled */ +#define EXT4_NOTAIL_FL 0x00008000 /* file tail should not be merged */ +#define EXT4_DIRSYNC_FL 0x00010000 /* dirsync behaviour (directories only) */ +#define EXT4_TOPDIR_FL 0x00020000 /* Top of directory hierarchies*/ +#define EXT4_RESERVED_FL 0x80000000 /* reserved for ext4 lib */ +#define EXT4_EXTENTS_FL 0x00080000 /* Inode uses extents */ + +#define EXT4_FL_USER_VISIBLE 0x000BDFFF /* User visible flags */ +#define EXT4_FL_USER_MODIFIABLE 0x000380FF /* User modifiable flags */ + +/* + * Inode dynamic state flags + */ +#define EXT4_STATE_JDATA 0x00000001 /* journaled data exists */ +#define EXT4_STATE_NEW 0x00000002 /* inode is newly created */ +#define EXT4_STATE_XATTR 0x00000004 /* has in-inode xattrs */ + +/* Used to pass group descriptor data when online resize is done */ +struct ext4_new_group_input { + __u32 group; /* Group number for this data */ + __u64 block_bitmap; /* Absolute block number of block bitmap */ + __u64 inode_bitmap; /* Absolute block number of inode bitmap */ + __u64 inode_table; /* Absolute block number of inode table start */ + __u32 blocks_count; /* Total number of blocks in this group */ + __u16 reserved_blocks; /* Number of reserved blocks in this group */ + __u16 unused; +}; + +/* The struct ext4_new_group_input in kernel space, with free_blocks_count */ +struct ext4_new_group_data { + __u32 group; + __u64 block_bitmap; + __u64 inode_bitmap; + __u64 inode_table; + __u32 blocks_count; + __u16 reserved_blocks; + __u16 unused; + __u32 free_blocks_count; +}; + + +/* + * ioctl commands + */ +#define EXT4_IOC_GETFLAGS FS_IOC_GETFLAGS +#define EXT4_IOC_SETFLAGS FS_IOC_SETFLAGS +#define EXT4_IOC_GETVERSION _IOR('f', 3, long) +#define EXT4_IOC_SETVERSION _IOW('f', 4, long) +#define EXT4_IOC_GROUP_EXTEND _IOW('f', 7, unsigned long) +#define EXT4_IOC_GROUP_ADD _IOW('f', 8,struct ext4_new_group_input) +#define EXT4_IOC_GETVERSION_OLD FS_IOC_GETVERSION +#define EXT4_IOC_SETVERSION_OLD FS_IOC_SETVERSION +#ifdef CONFIG_JBD_DEBUG +#define EXT4_IOC_WAIT_FOR_READONLY _IOR('f', 99, long) +#endif +#define EXT4_IOC_GETRSVSZ _IOR('f', 5, long) +#define EXT4_IOC_SETRSVSZ _IOW('f', 6, long) + +/* + * ioctl commands in 32 bit emulation + */ +#define EXT4_IOC32_GETFLAGS FS_IOC32_GETFLAGS +#define EXT4_IOC32_SETFLAGS FS_IOC32_SETFLAGS +#define EXT4_IOC32_GETVERSION _IOR('f', 3, int) +#define EXT4_IOC32_SETVERSION _IOW('f', 4, int) +#define EXT4_IOC32_GETRSVSZ _IOR('f', 5, int) +#define EXT4_IOC32_SETRSVSZ _IOW('f', 6, int) +#define EXT4_IOC32_GROUP_EXTEND _IOW('f', 7, unsigned int) +#ifdef CONFIG_JBD_DEBUG +#define EXT4_IOC32_WAIT_FOR_READONLY _IOR('f', 99, int) +#endif +#define EXT4_IOC32_GETVERSION_OLD FS_IOC32_GETVERSION +#define EXT4_IOC32_SETVERSION_OLD FS_IOC32_SETVERSION + + +/* + * Mount options + */ +struct ext4_mount_options { + unsigned long s_mount_opt; + uid_t s_resuid; + gid_t s_resgid; + unsigned long s_commit_interval; +#ifdef CONFIG_QUOTA + int s_jquota_fmt; + char *s_qf_names[MAXQUOTAS]; +#endif +}; + +/* + * Structure of an inode on the disk + */ +struct ext4_inode { + __le16 i_mode; /* File mode */ + __le16 i_uid; /* Low 16 bits of Owner Uid */ + __le32 i_size; /* Size in bytes */ + __le32 i_atime; /* Access time */ + __le32 i_ctime; /* Creation time */ + __le32 i_mtime; /* Modification time */ + __le32 i_dtime; /* Deletion Time */ + __le16 i_gid; /* Low 16 bits of Group Id */ + __le16 i_links_count; /* Links count */ + __le32 i_blocks; /* Blocks count */ + __le32 i_flags; /* File flags */ + union { + struct { + __u32 l_i_reserved1; + } linux1; + struct { + __u32 h_i_translator; + } hurd1; + struct { + __u32 m_i_reserved1; + } masix1; + } osd1; /* OS dependent 1 */ + __le32 i_block[EXT4_N_BLOCKS];/* Pointers to blocks */ + __le32 i_generation; /* File version (for NFS) */ + __le32 i_file_acl; /* File ACL */ + __le32 i_dir_acl; /* Directory ACL */ + __le32 i_faddr; /* Fragment address */ + union { + struct { + __u8 l_i_frag; /* Fragment number */ + __u8 l_i_fsize; /* Fragment size */ + __le16 l_i_file_acl_high; + __le16 l_i_uid_high; /* these 2 fields */ + __le16 l_i_gid_high; /* were reserved2[0] */ + __u32 l_i_reserved2; + } linux2; + struct { + __u8 h_i_frag; /* Fragment number */ + __u8 h_i_fsize; /* Fragment size */ + __u16 h_i_mode_high; + __u16 h_i_uid_high; + __u16 h_i_gid_high; + __u32 h_i_author; + } hurd2; + struct { + __u8 m_i_frag; /* Fragment number */ + __u8 m_i_fsize; /* Fragment size */ + __le16 m_i_file_acl_high; + __u32 m_i_reserved2[2]; + } masix2; + } osd2; /* OS dependent 2 */ + __le16 i_extra_isize; + __le16 i_pad1; +}; + +#define i_size_high i_dir_acl + +#if defined(__KERNEL__) || defined(__linux__) +#define i_reserved1 osd1.linux1.l_i_reserved1 +#define i_frag osd2.linux2.l_i_frag +#define i_fsize osd2.linux2.l_i_fsize +#define i_file_acl_high osd2.linux2.l_i_file_acl_high +#define i_uid_low i_uid +#define i_gid_low i_gid +#define i_uid_high osd2.linux2.l_i_uid_high +#define i_gid_high osd2.linux2.l_i_gid_high +#define i_reserved2 osd2.linux2.l_i_reserved2 + +#elif defined(__GNU__) + +#define i_translator osd1.hurd1.h_i_translator +#define i_frag osd2.hurd2.h_i_frag; +#define i_fsize osd2.hurd2.h_i_fsize; +#define i_uid_high osd2.hurd2.h_i_uid_high +#define i_gid_high osd2.hurd2.h_i_gid_high +#define i_author osd2.hurd2.h_i_author + +#elif defined(__masix__) + +#define i_reserved1 osd1.masix1.m_i_reserved1 +#define i_frag osd2.masix2.m_i_frag +#define i_fsize osd2.masix2.m_i_fsize +#define i_file_acl_high osd2.masix2.m_i_file_acl_high +#define i_reserved2 osd2.masix2.m_i_reserved2 + +#endif /* defined(__KERNEL__) || defined(__linux__) */ + +/* + * File system states + */ +#define EXT4_VALID_FS 0x0001 /* Unmounted cleanly */ +#define EXT4_ERROR_FS 0x0002 /* Errors detected */ +#define EXT4_ORPHAN_FS 0x0004 /* Orphans being recovered */ + +/* + * Mount flags + */ +#define EXT4_MOUNT_CHECK 0x00001 /* Do mount-time checks */ +#define EXT4_MOUNT_OLDALLOC 0x00002 /* Don't use the new Orlov allocator */ +#define EXT4_MOUNT_GRPID 0x00004 /* Create files with directory's group */ +#define EXT4_MOUNT_DEBUG 0x00008 /* Some debugging messages */ +#define EXT4_MOUNT_ERRORS_CONT 0x00010 /* Continue on errors */ +#define EXT4_MOUNT_ERRORS_RO 0x00020 /* Remount fs ro on errors */ +#define EXT4_MOUNT_ERRORS_PANIC 0x00040 /* Panic on errors */ +#define EXT4_MOUNT_MINIX_DF 0x00080 /* Mimics the Minix statfs */ +#define EXT4_MOUNT_NOLOAD 0x00100 /* Don't use existing journal*/ +#define EXT4_MOUNT_ABORT 0x00200 /* Fatal error detected */ +#define EXT4_MOUNT_DATA_FLAGS 0x00C00 /* Mode for data writes: */ +#define EXT4_MOUNT_JOURNAL_DATA 0x00400 /* Write data to journal */ +#define EXT4_MOUNT_ORDERED_DATA 0x00800 /* Flush data before commit */ +#define EXT4_MOUNT_WRITEBACK_DATA 0x00C00 /* No data ordering */ +#define EXT4_MOUNT_UPDATE_JOURNAL 0x01000 /* Update the journal format */ +#define EXT4_MOUNT_NO_UID32 0x02000 /* Disable 32-bit UIDs */ +#define EXT4_MOUNT_XATTR_USER 0x04000 /* Extended user attributes */ +#define EXT4_MOUNT_POSIX_ACL 0x08000 /* POSIX Access Control Lists */ +#define EXT4_MOUNT_RESERVATION 0x10000 /* Preallocation */ +#define EXT4_MOUNT_BARRIER 0x20000 /* Use block barriers */ +#define EXT4_MOUNT_NOBH 0x40000 /* No bufferheads */ +#define EXT4_MOUNT_QUOTA 0x80000 /* Some quota option set */ +#define EXT4_MOUNT_USRQUOTA 0x100000 /* "old" user quota */ +#define EXT4_MOUNT_GRPQUOTA 0x200000 /* "old" group quota */ +#define EXT4_MOUNT_EXTENTS 0x400000 /* Extents support */ + +/* Compatibility, for having both ext2_fs.h and ext4_fs.h included at once */ +#ifndef _LINUX_EXT2_FS_H +#define clear_opt(o, opt) o &= ~EXT4_MOUNT_##opt +#define set_opt(o, opt) o |= EXT4_MOUNT_##opt +#define test_opt(sb, opt) (EXT4_SB(sb)->s_mount_opt & \ + EXT4_MOUNT_##opt) +#else +#define EXT2_MOUNT_NOLOAD EXT4_MOUNT_NOLOAD +#define EXT2_MOUNT_ABORT EXT4_MOUNT_ABORT +#define EXT2_MOUNT_DATA_FLAGS EXT4_MOUNT_DATA_FLAGS +#endif + +#define ext4_set_bit ext2_set_bit +#define ext4_set_bit_atomic ext2_set_bit_atomic +#define ext4_clear_bit ext2_clear_bit +#define ext4_clear_bit_atomic ext2_clear_bit_atomic +#define ext4_test_bit ext2_test_bit +#define ext4_find_first_zero_bit ext2_find_first_zero_bit +#define ext4_find_next_zero_bit ext2_find_next_zero_bit + +/* + * Maximal mount counts between two filesystem checks + */ +#define EXT4_DFL_MAX_MNT_COUNT 20 /* Allow 20 mounts */ +#define EXT4_DFL_CHECKINTERVAL 0 /* Don't use interval check */ + +/* + * Behaviour when detecting errors + */ +#define EXT4_ERRORS_CONTINUE 1 /* Continue execution */ +#define EXT4_ERRORS_RO 2 /* Remount fs read-only */ +#define EXT4_ERRORS_PANIC 3 /* Panic */ +#define EXT4_ERRORS_DEFAULT EXT4_ERRORS_CONTINUE + +/* + * Structure of the super block + */ +struct ext4_super_block { +/*00*/ __le32 s_inodes_count; /* Inodes count */ + __le32 s_blocks_count; /* Blocks count */ + __le32 s_r_blocks_count; /* Reserved blocks count */ + __le32 s_free_blocks_count; /* Free blocks count */ +/*10*/ __le32 s_free_inodes_count; /* Free inodes count */ + __le32 s_first_data_block; /* First Data Block */ + __le32 s_log_block_size; /* Block size */ + __le32 s_log_frag_size; /* Fragment size */ +/*20*/ __le32 s_blocks_per_group; /* # Blocks per group */ + __le32 s_frags_per_group; /* # Fragments per group */ + __le32 s_inodes_per_group; /* # Inodes per group */ + __le32 s_mtime; /* Mount time */ +/*30*/ __le32 s_wtime; /* Write time */ + __le16 s_mnt_count; /* Mount count */ + __le16 s_max_mnt_count; /* Maximal mount count */ + __le16 s_magic; /* Magic signature */ + __le16 s_state; /* File system state */ + __le16 s_errors; /* Behaviour when detecting errors */ + __le16 s_minor_rev_level; /* minor revision level */ +/*40*/ __le32 s_lastcheck; /* time of last check */ + __le32 s_checkinterval; /* max. time between checks */ + __le32 s_creator_os; /* OS */ + __le32 s_rev_level; /* Revision level */ +/*50*/ __le16 s_def_resuid; /* Default uid for reserved blocks */ + __le16 s_def_resgid; /* Default gid for reserved blocks */ + /* + * These fields are for EXT4_DYNAMIC_REV superblocks only. + * + * Note: the difference between the compatible feature set and + * the incompatible feature set is that if there is a bit set + * in the incompatible feature set that the kernel doesn't + * know about, it should refuse to mount the filesystem. + * + * e2fsck's requirements are more strict; if it doesn't know + * about a feature in either the compatible or incompatible + * feature set, it must abort and not try to meddle with + * things it doesn't understand... + */ + __le32 s_first_ino; /* First non-reserved inode */ + __le16 s_inode_size; /* size of inode structure */ + __le16 s_block_group_nr; /* block group # of this superblock */ + __le32 s_feature_compat; /* compatible feature set */ +/*60*/ __le32 s_feature_incompat; /* incompatible feature set */ + __le32 s_feature_ro_compat; /* readonly-compatible feature set */ +/*68*/ __u8 s_uuid[16]; /* 128-bit uuid for volume */ +/*78*/ char s_volume_name[16]; /* volume name */ +/*88*/ char s_last_mounted[64]; /* directory where last mounted */ +/*C8*/ __le32 s_algorithm_usage_bitmap; /* For compression */ + /* + * Performance hints. Directory preallocation should only + * happen if the EXT4_FEATURE_COMPAT_DIR_PREALLOC flag is on. + */ + __u8 s_prealloc_blocks; /* Nr of blocks to try to preallocate*/ + __u8 s_prealloc_dir_blocks; /* Nr to preallocate for dirs */ + __le16 s_reserved_gdt_blocks; /* Per group desc for online growth */ + /* + * Journaling support valid if EXT4_FEATURE_COMPAT_HAS_JOURNAL set. + */ +/*D0*/ __u8 s_journal_uuid[16]; /* uuid of journal superblock */ +/*E0*/ __le32 s_journal_inum; /* inode number of journal file */ + __le32 s_journal_dev; /* device number of journal file */ + __le32 s_last_orphan; /* start of list of inodes to delete */ + __le32 s_hash_seed[4]; /* HTREE hash seed */ + __u8 s_def_hash_version; /* Default hash version to use */ + __u8 s_reserved_char_pad; + __le16 s_desc_size; /* size of group descriptor */ +/*100*/ __le32 s_default_mount_opts; + __le32 s_first_meta_bg; /* First metablock block group */ + __le32 s_mkfs_time; /* When the filesystem was created */ + __le32 s_jnl_blocks[17]; /* Backup of the journal inode */ + /* 64bit support valid if EXT4_FEATURE_COMPAT_64BIT */ +/*150*/ __le32 s_blocks_count_hi; /* Blocks count */ + __le32 s_r_blocks_count_hi; /* Reserved blocks count */ + __le32 s_free_blocks_count_hi; /* Free blocks count */ + __u32 s_reserved[169]; /* Padding to the end of the block */ +}; + +#ifdef __KERNEL__ +static inline struct ext4_sb_info * EXT4_SB(struct super_block *sb) +{ + return sb->s_fs_info; +} +static inline struct ext4_inode_info *EXT4_I(struct inode *inode) +{ + return container_of(inode, struct ext4_inode_info, vfs_inode); +} + +static inline int ext4_valid_inum(struct super_block *sb, unsigned long ino) +{ + return ino == EXT4_ROOT_INO || + ino == EXT4_JOURNAL_INO || + ino == EXT4_RESIZE_INO || + (ino >= EXT4_FIRST_INO(sb) && + ino <= le32_to_cpu(EXT4_SB(sb)->s_es->s_inodes_count)); +} +#else +/* Assume that user mode programs are passing in an ext4fs superblock, not + * a kernel struct super_block. This will allow us to call the feature-test + * macros from user land. */ +#define EXT4_SB(sb) (sb) +#endif + +#define NEXT_ORPHAN(inode) EXT4_I(inode)->i_dtime + +/* + * Codes for operating systems + */ +#define EXT4_OS_LINUX 0 +#define EXT4_OS_HURD 1 +#define EXT4_OS_MASIX 2 +#define EXT4_OS_FREEBSD 3 +#define EXT4_OS_LITES 4 + +/* + * Revision levels + */ +#define EXT4_GOOD_OLD_REV 0 /* The good old (original) format */ +#define EXT4_DYNAMIC_REV 1 /* V2 format w/ dynamic inode sizes */ + +#define EXT4_CURRENT_REV EXT4_GOOD_OLD_REV +#define EXT4_MAX_SUPP_REV EXT4_DYNAMIC_REV + +#define EXT4_GOOD_OLD_INODE_SIZE 128 + +/* + * Feature set definitions + */ + +#define EXT4_HAS_COMPAT_FEATURE(sb,mask) \ + ( EXT4_SB(sb)->s_es->s_feature_compat & cpu_to_le32(mask) ) +#define EXT4_HAS_RO_COMPAT_FEATURE(sb,mask) \ + ( EXT4_SB(sb)->s_es->s_feature_ro_compat & cpu_to_le32(mask) ) +#define EXT4_HAS_INCOMPAT_FEATURE(sb,mask) \ + ( EXT4_SB(sb)->s_es->s_feature_incompat & cpu_to_le32(mask) ) +#define EXT4_SET_COMPAT_FEATURE(sb,mask) \ + EXT4_SB(sb)->s_es->s_feature_compat |= cpu_to_le32(mask) +#define EXT4_SET_RO_COMPAT_FEATURE(sb,mask) \ + EXT4_SB(sb)->s_es->s_feature_ro_compat |= cpu_to_le32(mask) +#define EXT4_SET_INCOMPAT_FEATURE(sb,mask) \ + EXT4_SB(sb)->s_es->s_feature_incompat |= cpu_to_le32(mask) +#define EXT4_CLEAR_COMPAT_FEATURE(sb,mask) \ + EXT4_SB(sb)->s_es->s_feature_compat &= ~cpu_to_le32(mask) +#define EXT4_CLEAR_RO_COMPAT_FEATURE(sb,mask) \ + EXT4_SB(sb)->s_es->s_feature_ro_compat &= ~cpu_to_le32(mask) +#define EXT4_CLEAR_INCOMPAT_FEATURE(sb,mask) \ + EXT4_SB(sb)->s_es->s_feature_incompat &= ~cpu_to_le32(mask) + +#define EXT4_FEATURE_COMPAT_DIR_PREALLOC 0x0001 +#define EXT4_FEATURE_COMPAT_IMAGIC_INODES 0x0002 +#define EXT4_FEATURE_COMPAT_HAS_JOURNAL 0x0004 +#define EXT4_FEATURE_COMPAT_EXT_ATTR 0x0008 +#define EXT4_FEATURE_COMPAT_RESIZE_INODE 0x0010 +#define EXT4_FEATURE_COMPAT_DIR_INDEX 0x0020 + +#define EXT4_FEATURE_RO_COMPAT_SPARSE_SUPER 0x0001 +#define EXT4_FEATURE_RO_COMPAT_LARGE_FILE 0x0002 +#define EXT4_FEATURE_RO_COMPAT_BTREE_DIR 0x0004 + +#define EXT4_FEATURE_INCOMPAT_COMPRESSION 0x0001 +#define EXT4_FEATURE_INCOMPAT_FILETYPE 0x0002 +#define EXT4_FEATURE_INCOMPAT_RECOVER 0x0004 /* Needs recovery */ +#define EXT4_FEATURE_INCOMPAT_JOURNAL_DEV 0x0008 /* Journal device */ +#define EXT4_FEATURE_INCOMPAT_META_BG 0x0010 +#define EXT4_FEATURE_INCOMPAT_EXTENTS 0x0040 /* extents support */ +#define EXT4_FEATURE_INCOMPAT_64BIT 0x0080 + +#define EXT4_FEATURE_COMPAT_SUPP EXT2_FEATURE_COMPAT_EXT_ATTR +#define EXT4_FEATURE_INCOMPAT_SUPP (EXT4_FEATURE_INCOMPAT_FILETYPE| \ + EXT4_FEATURE_INCOMPAT_RECOVER| \ + EXT4_FEATURE_INCOMPAT_META_BG| \ + EXT4_FEATURE_INCOMPAT_EXTENTS| \ + EXT4_FEATURE_INCOMPAT_64BIT) +#define EXT4_FEATURE_RO_COMPAT_SUPP (EXT4_FEATURE_RO_COMPAT_SPARSE_SUPER| \ + EXT4_FEATURE_RO_COMPAT_LARGE_FILE| \ + EXT4_FEATURE_RO_COMPAT_BTREE_DIR) + +/* + * Default values for user and/or group using reserved blocks + */ +#define EXT4_DEF_RESUID 0 +#define EXT4_DEF_RESGID 0 + +/* + * Default mount options + */ +#define EXT4_DEFM_DEBUG 0x0001 +#define EXT4_DEFM_BSDGROUPS 0x0002 +#define EXT4_DEFM_XATTR_USER 0x0004 +#define EXT4_DEFM_ACL 0x0008 +#define EXT4_DEFM_UID16 0x0010 +#define EXT4_DEFM_JMODE 0x0060 +#define EXT4_DEFM_JMODE_DATA 0x0020 +#define EXT4_DEFM_JMODE_ORDERED 0x0040 +#define EXT4_DEFM_JMODE_WBACK 0x0060 + +/* + * Structure of a directory entry + */ +#define EXT4_NAME_LEN 255 + +struct ext4_dir_entry { + __le32 inode; /* Inode number */ + __le16 rec_len; /* Directory entry length */ + __le16 name_len; /* Name length */ + char name[EXT4_NAME_LEN]; /* File name */ +}; + +/* + * The new version of the directory entry. Since EXT4 structures are + * stored in intel byte order, and the name_len field could never be + * bigger than 255 chars, it's safe to reclaim the extra byte for the + * file_type field. + */ +struct ext4_dir_entry_2 { + __le32 inode; /* Inode number */ + __le16 rec_len; /* Directory entry length */ + __u8 name_len; /* Name length */ + __u8 file_type; + char name[EXT4_NAME_LEN]; /* File name */ +}; + +/* + * Ext4 directory file types. Only the low 3 bits are used. The + * other bits are reserved for now. + */ +#define EXT4_FT_UNKNOWN 0 +#define EXT4_FT_REG_FILE 1 +#define EXT4_FT_DIR 2 +#define EXT4_FT_CHRDEV 3 +#define EXT4_FT_BLKDEV 4 +#define EXT4_FT_FIFO 5 +#define EXT4_FT_SOCK 6 +#define EXT4_FT_SYMLINK 7 + +#define EXT4_FT_MAX 8 + +/* + * EXT4_DIR_PAD defines the directory entries boundaries + * + * NOTE: It must be a multiple of 4 + */ +#define EXT4_DIR_PAD 4 +#define EXT4_DIR_ROUND (EXT4_DIR_PAD - 1) +#define EXT4_DIR_REC_LEN(name_len) (((name_len) + 8 + EXT4_DIR_ROUND) & \ + ~EXT4_DIR_ROUND) +/* + * Hash Tree Directory indexing + * (c) Daniel Phillips, 2001 + */ + +#ifdef CONFIG_EXT4_INDEX + #define is_dx(dir) (EXT4_HAS_COMPAT_FEATURE(dir->i_sb, \ + EXT4_FEATURE_COMPAT_DIR_INDEX) && \ + (EXT4_I(dir)->i_flags & EXT4_INDEX_FL)) +#define EXT4_DIR_LINK_MAX(dir) (!is_dx(dir) && (dir)->i_nlink >= EXT4_LINK_MAX) +#define EXT4_DIR_LINK_EMPTY(dir) ((dir)->i_nlink == 2 || (dir)->i_nlink == 1) +#else + #define is_dx(dir) 0 +#define EXT4_DIR_LINK_MAX(dir) ((dir)->i_nlink >= EXT4_LINK_MAX) +#define EXT4_DIR_LINK_EMPTY(dir) ((dir)->i_nlink == 2) +#endif + +/* Legal values for the dx_root hash_version field: */ + +#define DX_HASH_LEGACY 0 +#define DX_HASH_HALF_MD4 1 +#define DX_HASH_TEA 2 + +#ifdef __KERNEL__ + +/* hash info structure used by the directory hash */ +struct dx_hash_info +{ + u32 hash; + u32 minor_hash; + int hash_version; + u32 *seed; +}; + +#define EXT4_HTREE_EOF 0x7fffffff + +/* + * Control parameters used by ext4_htree_next_block + */ +#define HASH_NB_ALWAYS 1 + + +/* + * Describe an inode's exact location on disk and in memory + */ +struct ext4_iloc +{ + struct buffer_head *bh; + unsigned long offset; + unsigned long block_group; +}; + +static inline struct ext4_inode *ext4_raw_inode(struct ext4_iloc *iloc) +{ + return (struct ext4_inode *) (iloc->bh->b_data + iloc->offset); +} + +/* + * This structure is stuffed into the struct file's private_data field + * for directories. It is where we put information so that we can do + * readdir operations in hash tree order. + */ +struct dir_private_info { + struct rb_root root; + struct rb_node *curr_node; + struct fname *extra_fname; + loff_t last_pos; + __u32 curr_hash; + __u32 curr_minor_hash; + __u32 next_hash; +}; + +/* calculate the first block number of the group */ +static inline ext4_fsblk_t +ext4_group_first_block_no(struct super_block *sb, unsigned long group_no) +{ + return group_no * (ext4_fsblk_t)EXT4_BLOCKS_PER_GROUP(sb) + + le32_to_cpu(EXT4_SB(sb)->s_es->s_first_data_block); +} + +/* + * Special error return code only used by dx_probe() and its callers. + */ +#define ERR_BAD_DX_DIR -75000 + +void ext4_get_group_no_and_offset(struct super_block *sb, ext4_fsblk_t blocknr, + unsigned long *blockgrpp, ext4_grpblk_t *offsetp); + +/* + * Function prototypes + */ + +/* + * Ok, these declarations are also in <linux/kernel.h> but none of the + * ext4 source programs needs to include it so they are duplicated here. + */ +# define NORET_TYPE /**/ +# define ATTRIB_NORET __attribute__((noreturn)) +# define NORET_AND noreturn, + +/* balloc.c */ +extern unsigned int ext4_block_group(struct super_block *sb, + ext4_fsblk_t blocknr); +extern ext4_grpblk_t ext4_block_group_offset(struct super_block *sb, + ext4_fsblk_t blocknr); +extern int ext4_bg_has_super(struct super_block *sb, int group); +extern unsigned long ext4_bg_num_gdb(struct super_block *sb, int group); +extern ext4_fsblk_t ext4_new_block (handle_t *handle, struct inode *inode, + ext4_fsblk_t goal, int *errp); +extern ext4_fsblk_t ext4_new_blocks (handle_t *handle, struct inode *inode, + ext4_fsblk_t goal, unsigned long *count, int *errp); +extern void ext4_free_blocks (handle_t *handle, struct inode *inode, + ext4_fsblk_t block, unsigned long count); +extern void ext4_free_blocks_sb (handle_t *handle, struct super_block *sb, + ext4_fsblk_t block, unsigned long count, + unsigned long *pdquot_freed_blocks); +extern ext4_fsblk_t ext4_count_free_blocks (struct super_block *); +extern void ext4_check_blocks_bitmap (struct super_block *); +extern struct ext4_group_desc * ext4_get_group_desc(struct super_block * sb, + unsigned int block_group, + struct buffer_head ** bh); +extern int ext4_should_retry_alloc(struct super_block *sb, int *retries); +extern void ext4_init_block_alloc_info(struct inode *); +extern void ext4_rsv_window_add(struct super_block *sb, struct ext4_reserve_window_node *rsv); + +/* dir.c */ +extern int ext4_check_dir_entry(const char *, struct inode *, + struct ext4_dir_entry_2 *, + struct buffer_head *, unsigned long); +extern int ext4_htree_store_dirent(struct file *dir_file, __u32 hash, + __u32 minor_hash, + struct ext4_dir_entry_2 *dirent); +extern void ext4_htree_free_dir_info(struct dir_private_info *p); + +/* fsync.c */ +extern int ext4_sync_file (struct file *, struct dentry *, int); + +/* hash.c */ +extern int ext4fs_dirhash(const char *name, int len, struct + dx_hash_info *hinfo); + +/* ialloc.c */ +extern struct inode * ext4_new_inode (handle_t *, struct inode *, int); +extern void ext4_free_inode (handle_t *, struct inode *); +extern struct inode * ext4_orphan_get (struct super_block *, unsigned long); +extern unsigned long ext4_count_free_inodes (struct super_block *); +extern unsigned long ext4_count_dirs (struct super_block *); +extern void ext4_check_inodes_bitmap (struct super_block *); +extern unsigned long ext4_count_free (struct buffer_head *, unsigned); + + +/* inode.c */ +int ext4_forget(handle_t *handle, int is_metadata, struct inode *inode, + struct buffer_head *bh, ext4_fsblk_t blocknr); +struct buffer_head * ext4_getblk (handle_t *, struct inode *, long, int, int *); +struct buffer_head * ext4_bread (handle_t *, struct inode *, int, int, int *); +int ext4_get_blocks_handle(handle_t *handle, struct inode *inode, + sector_t iblock, unsigned long maxblocks, struct buffer_head *bh_result, + int create, int extend_disksize); + +extern void ext4_read_inode (struct inode *); +extern int ext4_write_inode (struct inode *, int); +extern int ext4_setattr (struct dentry *, struct iattr *); +extern void ext4_delete_inode (struct inode *); +extern int ext4_sync_inode (handle_t *, struct inode *); +extern void ext4_discard_reservation (struct inode *); +extern void ext4_dirty_inode(struct inode *); +extern int ext4_change_inode_journal_flag(struct inode *, int); +extern int ext4_get_inode_loc(struct inode *, struct ext4_iloc *); +extern void ext4_truncate (struct inode *); +extern void ext4_set_inode_flags(struct inode *); +extern void ext4_set_aops(struct inode *inode); +extern int ext4_writepage_trans_blocks(struct inode *); +extern int ext4_block_truncate_page(handle_t *handle, struct page *page, + struct address_space *mapping, loff_t from); + +/* ioctl.c */ +extern int ext4_ioctl (struct inode *, struct file *, unsigned int, + unsigned long); +extern long ext4_compat_ioctl (struct file *, unsigned int, unsigned long); + +/* namei.c */ +extern int ext4_orphan_add(handle_t *, struct inode *); +extern int ext4_orphan_del(handle_t *, struct inode *); +extern int ext4_htree_fill_tree(struct file *dir_file, __u32 start_hash, + __u32 start_minor_hash, __u32 *next_hash); + +/* resize.c */ +extern int ext4_group_add(struct super_block *sb, + struct ext4_new_group_data *input); +extern int ext4_group_extend(struct super_block *sb, + struct ext4_super_block *es, + ext4_fsblk_t n_blocks_count); + +/* super.c */ +extern void ext4_error (struct super_block *, const char *, const char *, ...) + __attribute__ ((format (printf, 3, 4))); +extern void __ext4_std_error (struct super_block *, const char *, int); +extern void ext4_abort (struct super_block *, const char *, const char *, ...) + __attribute__ ((format (printf, 3, 4))); +extern void ext4_warning (struct super_block *, const char *, const char *, ...) + __attribute__ ((format (printf, 3, 4))); +extern void ext4_update_dynamic_rev (struct super_block *sb); +extern ext4_fsblk_t ext4_block_bitmap(struct super_block *sb, + struct ext4_group_desc *bg); +extern ext4_fsblk_t ext4_inode_bitmap(struct super_block *sb, + struct ext4_group_desc *bg); +extern ext4_fsblk_t ext4_inode_table(struct super_block *sb, + struct ext4_group_desc *bg); +extern void ext4_block_bitmap_set(struct super_block *sb, + struct ext4_group_desc *bg, ext4_fsblk_t blk); +extern void ext4_inode_bitmap_set(struct super_block *sb, + struct ext4_group_desc *bg, ext4_fsblk_t blk); +extern void ext4_inode_table_set(struct super_block *sb, + struct ext4_group_desc *bg, ext4_fsblk_t blk); + +static inline ext4_fsblk_t ext4_blocks_count(struct ext4_super_block *es) +{ + return ((ext4_fsblk_t)le32_to_cpu(es->s_blocks_count_hi) << 32) | + le32_to_cpu(es->s_blocks_count); +} + +static inline ext4_fsblk_t ext4_r_blocks_count(struct ext4_super_block *es) +{ + return ((ext4_fsblk_t)le32_to_cpu(es->s_r_blocks_count_hi) << 32) | + le32_to_cpu(es->s_r_blocks_count); +} + +static inline ext4_fsblk_t ext4_free_blocks_count(struct ext4_super_block *es) +{ + return ((ext4_fsblk_t)le32_to_cpu(es->s_free_blocks_count_hi) << 32) | + le32_to_cpu(es->s_free_blocks_count); +} + +static inline void ext4_blocks_count_set(struct ext4_super_block *es, + ext4_fsblk_t blk) +{ + es->s_blocks_count = cpu_to_le32((u32)blk); + es->s_blocks_count_hi = cpu_to_le32(blk >> 32); +} + +static inline void ext4_free_blocks_count_set(struct ext4_super_block *es, + ext4_fsblk_t blk) +{ + es->s_free_blocks_count = cpu_to_le32((u32)blk); + es->s_free_blocks_count_hi = cpu_to_le32(blk >> 32); +} + +static inline void ext4_r_blocks_count_set(struct ext4_super_block *es, + ext4_fsblk_t blk) +{ + es->s_r_blocks_count = cpu_to_le32((u32)blk); + es->s_r_blocks_count_hi = cpu_to_le32(blk >> 32); +} + + + +#define ext4_std_error(sb, errno) \ +do { \ + if ((errno)) \ + __ext4_std_error((sb), __FUNCTION__, (errno)); \ +} while (0) + +/* + * Inodes and files operations + */ + +/* dir.c */ +extern const struct file_operations ext4_dir_operations; + +/* file.c */ +extern struct inode_operations ext4_file_inode_operations; +extern const struct file_operations ext4_file_operations; + +/* namei.c */ +extern struct inode_operations ext4_dir_inode_operations; +extern struct inode_operations ext4_special_inode_operations; + +/* symlink.c */ +extern struct inode_operations ext4_symlink_inode_operations; +extern struct inode_operations ext4_fast_symlink_inode_operations; + +/* extents.c */ +extern int ext4_ext_tree_init(handle_t *handle, struct inode *); +extern int ext4_ext_writepage_trans_blocks(struct inode *, int); +extern int ext4_ext_get_blocks(handle_t *handle, struct inode *inode, + ext4_fsblk_t iblock, + unsigned long max_blocks, struct buffer_head *bh_result, + int create, int extend_disksize); +extern void ext4_ext_truncate(struct inode *, struct page *); +extern void ext4_ext_init(struct super_block *); +extern void ext4_ext_release(struct super_block *); +static inline int +ext4_get_blocks_wrap(handle_t *handle, struct inode *inode, sector_t block, + unsigned long max_blocks, struct buffer_head *bh, + int create, int extend_disksize) +{ + if (EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL) + return ext4_ext_get_blocks(handle, inode, block, max_blocks, + bh, create, extend_disksize); + return ext4_get_blocks_handle(handle, inode, block, max_blocks, bh, + create, extend_disksize); +} + + +#endif /* __KERNEL__ */ + +#endif /* _LINUX_EXT4_FS_H */ diff --git a/include/linux/ext4_fs_extents.h b/include/linux/ext4_fs_extents.h new file mode 100644 index 000000000000..a41cc24568ca --- /dev/null +++ b/include/linux/ext4_fs_extents.h @@ -0,0 +1,198 @@ +/* + * Copyright (c) 2003-2006, Cluster File Systems, Inc, info@clusterfs.com + * Written by Alex Tomas <alex@clusterfs.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public Licens + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111- + */ + +#ifndef _LINUX_EXT4_EXTENTS +#define _LINUX_EXT4_EXTENTS + +#include <linux/ext4_fs.h> + +/* + * With AGRESSIVE_TEST defined, the capacity of index/leaf blocks + * becomes very small, so index split, in-depth growing and + * other hard changes happen much more often. + * This is for debug purposes only. + */ +#define AGRESSIVE_TEST_ + +/* + * With EXTENTS_STATS defined, the number of blocks and extents + * are collected in the truncate path. They'll be shown at + * umount time. + */ +#define EXTENTS_STATS__ + +/* + * If CHECK_BINSEARCH is defined, then the results of the binary search + * will also be checked by linear search. + */ +#define CHECK_BINSEARCH__ + +/* + * If EXT_DEBUG is defined you can use the 'extdebug' mount option + * to get lots of info about what's going on. + */ +#define EXT_DEBUG__ +#ifdef EXT_DEBUG +#define ext_debug(a...) printk(a) +#else +#define ext_debug(a...) +#endif + +/* + * If EXT_STATS is defined then stats numbers are collected. + * These number will be displayed at umount time. + */ +#define EXT_STATS_ + + +/* + * ext4_inode has i_block array (60 bytes total). + * The first 12 bytes store ext4_extent_header; + * the remainder stores an array of ext4_extent. + */ + +/* + * This is the extent on-disk structure. + * It's used at the bottom of the tree. + */ +struct ext4_extent { + __le32 ee_block; /* first logical block extent covers */ + __le16 ee_len; /* number of blocks covered by extent */ + __le16 ee_start_hi; /* high 16 bits of physical block */ + __le32 ee_start; /* low 32 bits of physical block */ +}; + +/* + * This is index on-disk structure. + * It's used at all the levels except the bottom. + */ +struct ext4_extent_idx { + __le32 ei_block; /* index covers logical blocks from 'block' */ + __le32 ei_leaf; /* pointer to the physical block of the next * + * level. leaf or next index could be there */ + __le16 ei_leaf_hi; /* high 16 bits of physical block */ + __u16 ei_unused; +}; + +/* + * Each block (leaves and indexes), even inode-stored has header. + */ +struct ext4_extent_header { + __le16 eh_magic; /* probably will support different formats */ + __le16 eh_entries; /* number of valid entries */ + __le16 eh_max; /* capacity of store in entries */ + __le16 eh_depth; /* has tree real underlying blocks? */ + __le32 eh_generation; /* generation of the tree */ +}; + +#define EXT4_EXT_MAGIC cpu_to_le16(0xf30a) + +/* + * Array of ext4_ext_path contains path to some extent. + * Creation/lookup routines use it for traversal/splitting/etc. + * Truncate uses it to simulate recursive walking. + */ +struct ext4_ext_path { + ext4_fsblk_t p_block; + __u16 p_depth; + struct ext4_extent *p_ext; + struct ext4_extent_idx *p_idx; + struct ext4_extent_header *p_hdr; + struct buffer_head *p_bh; +}; + +/* + * structure for external API + */ + +#define EXT4_EXT_CACHE_NO 0 +#define EXT4_EXT_CACHE_GAP 1 +#define EXT4_EXT_CACHE_EXTENT 2 + +/* + * to be called by ext4_ext_walk_space() + * negative retcode - error + * positive retcode - signal for ext4_ext_walk_space(), see below + * callback must return valid extent (passed or newly created) + */ +typedef int (*ext_prepare_callback)(struct inode *, struct ext4_ext_path *, + struct ext4_ext_cache *, + void *); + +#define EXT_CONTINUE 0 +#define EXT_BREAK 1 +#define EXT_REPEAT 2 + + +#define EXT_MAX_BLOCK 0xffffffff + +#define EXT_MAX_LEN ((1UL << 15) - 1) + + +#define EXT_FIRST_EXTENT(__hdr__) \ + ((struct ext4_extent *) (((char *) (__hdr__)) + \ + sizeof(struct ext4_extent_header))) +#define EXT_FIRST_INDEX(__hdr__) \ + ((struct ext4_extent_idx *) (((char *) (__hdr__)) + \ + sizeof(struct ext4_extent_header))) +#define EXT_HAS_FREE_INDEX(__path__) \ + (le16_to_cpu((__path__)->p_hdr->eh_entries) \ + < le16_to_cpu((__path__)->p_hdr->eh_max)) +#define EXT_LAST_EXTENT(__hdr__) \ + (EXT_FIRST_EXTENT((__hdr__)) + le16_to_cpu((__hdr__)->eh_entries) - 1) +#define EXT_LAST_INDEX(__hdr__) \ + (EXT_FIRST_INDEX((__hdr__)) + le16_to_cpu((__hdr__)->eh_entries) - 1) +#define EXT_MAX_EXTENT(__hdr__) \ + (EXT_FIRST_EXTENT((__hdr__)) + le16_to_cpu((__hdr__)->eh_max) - 1) +#define EXT_MAX_INDEX(__hdr__) \ + (EXT_FIRST_INDEX((__hdr__)) + le16_to_cpu((__hdr__)->eh_max) - 1) + +static inline struct ext4_extent_header *ext_inode_hdr(struct inode *inode) +{ + return (struct ext4_extent_header *) EXT4_I(inode)->i_data; +} + +static inline struct ext4_extent_header *ext_block_hdr(struct buffer_head *bh) +{ + return (struct ext4_extent_header *) bh->b_data; +} + +static inline unsigned short ext_depth(struct inode *inode) +{ + return le16_to_cpu(ext_inode_hdr(inode)->eh_depth); +} + +static inline void ext4_ext_tree_changed(struct inode *inode) +{ + EXT4_I(inode)->i_ext_generation++; +} + +static inline void +ext4_ext_invalidate_cache(struct inode *inode) +{ + EXT4_I(inode)->i_cached_extent.ec_type = EXT4_EXT_CACHE_NO; +} + +extern int ext4_extent_tree_init(handle_t *, struct inode *); +extern int ext4_ext_calc_credits_for_insert(struct inode *, struct ext4_ext_path *); +extern int ext4_ext_insert_extent(handle_t *, struct inode *, struct ext4_ext_path *, struct ext4_extent *); +extern int ext4_ext_walk_space(struct inode *, unsigned long, unsigned long, ext_prepare_callback, void *); +extern struct ext4_ext_path * ext4_ext_find_extent(struct inode *, int, struct ext4_ext_path *); + +#endif /* _LINUX_EXT4_EXTENTS */ + diff --git a/include/linux/ext4_fs_i.h b/include/linux/ext4_fs_i.h new file mode 100644 index 000000000000..bb42379cb7fd --- /dev/null +++ b/include/linux/ext4_fs_i.h @@ -0,0 +1,158 @@ +/* + * linux/include/linux/ext4_fs_i.h + * + * Copyright (C) 1992, 1993, 1994, 1995 + * Remy Card (card@masi.ibp.fr) + * Laboratoire MASI - Institut Blaise Pascal + * Universite Pierre et Marie Curie (Paris VI) + * + * from + * + * linux/include/linux/minix_fs_i.h + * + * Copyright (C) 1991, 1992 Linus Torvalds + */ + +#ifndef _LINUX_EXT4_FS_I +#define _LINUX_EXT4_FS_I + +#include <linux/rwsem.h> +#include <linux/rbtree.h> +#include <linux/seqlock.h> +#include <linux/mutex.h> + +/* data type for block offset of block group */ +typedef int ext4_grpblk_t; + +/* data type for filesystem-wide blocks number */ +typedef unsigned long long ext4_fsblk_t; + +struct ext4_reserve_window { + ext4_fsblk_t _rsv_start; /* First byte reserved */ + ext4_fsblk_t _rsv_end; /* Last byte reserved or 0 */ +}; + +struct ext4_reserve_window_node { + struct rb_node rsv_node; + __u32 rsv_goal_size; + __u32 rsv_alloc_hit; + struct ext4_reserve_window rsv_window; +}; + +struct ext4_block_alloc_info { + /* information about reservation window */ + struct ext4_reserve_window_node rsv_window_node; + /* + * was i_next_alloc_block in ext4_inode_info + * is the logical (file-relative) number of the + * most-recently-allocated block in this file. + * We use this for detecting linearly ascending allocation requests. + */ + __u32 last_alloc_logical_block; + /* + * Was i_next_alloc_goal in ext4_inode_info + * is the *physical* companion to i_next_alloc_block. + * it the the physical block number of the block which was most-recentl + * allocated to this file. This give us the goal (target) for the next + * allocation when we detect linearly ascending requests. + */ + ext4_fsblk_t last_alloc_physical_block; +}; + +#define rsv_start rsv_window._rsv_start +#define rsv_end rsv_window._rsv_end + +/* + * storage for cached extent + */ +struct ext4_ext_cache { + ext4_fsblk_t ec_start; + __u32 ec_block; + __u32 ec_len; /* must be 32bit to return holes */ + __u32 ec_type; +}; + +/* + * third extended file system inode data in memory + */ +struct ext4_inode_info { + __le32 i_data[15]; /* unconverted */ + __u32 i_flags; +#ifdef EXT4_FRAGMENTS + __u32 i_faddr; + __u8 i_frag_no; + __u8 i_frag_size; +#endif + ext4_fsblk_t i_file_acl; + __u32 i_dir_acl; + __u32 i_dtime; + + /* + * i_block_group is the number of the block group which contains + * this file's inode. Constant across the lifetime of the inode, + * it is ued for making block allocation decisions - we try to + * place a file's data blocks near its inode block, and new inodes + * near to their parent directory's inode. + */ + __u32 i_block_group; + __u32 i_state; /* Dynamic state flags for ext4 */ + + /* block reservation info */ + struct ext4_block_alloc_info *i_block_alloc_info; + + __u32 i_dir_start_lookup; +#ifdef CONFIG_EXT4DEV_FS_XATTR + /* + * Extended attributes can be read independently of the main file + * data. Taking i_mutex even when reading would cause contention + * between readers of EAs and writers of regular file data, so + * instead we synchronize on xattr_sem when reading or changing + * EAs. + */ + struct rw_semaphore xattr_sem; +#endif +#ifdef CONFIG_EXT4DEV_FS_POSIX_ACL + struct posix_acl *i_acl; + struct posix_acl *i_default_acl; +#endif + + struct list_head i_orphan; /* unlinked but open inodes */ + + /* + * i_disksize keeps track of what the inode size is ON DISK, not + * in memory. During truncate, i_size is set to the new size by + * the VFS prior to calling ext4_truncate(), but the filesystem won't + * set i_disksize to 0 until the truncate is actually under way. + * + * The intent is that i_disksize always represents the blocks which + * are used by this file. This allows recovery to restart truncate + * on orphans if we crash during truncate. We actually write i_disksize + * into the on-disk inode when writing inodes out, instead of i_size. + * + * The only time when i_disksize and i_size may be different is when + * a truncate is in progress. The only things which change i_disksize + * are ext4_get_block (growth) and ext4_truncate (shrinkth). + */ + loff_t i_disksize; + + /* on-disk additional length */ + __u16 i_extra_isize; + + /* + * truncate_mutex is for serialising ext4_truncate() against + * ext4_getblock(). In the 2.4 ext2 design, great chunks of inode's + * data tree are chopped off during truncate. We can't do that in + * ext4 because whenever we perform intermediate commits during + * truncate, the inode and all the metadata blocks *must* be in a + * consistent state which allows truncation of the orphans to restart + * during recovery. Hence we must fix the get_block-vs-truncate race + * by other means, so we have truncate_mutex. + */ + struct mutex truncate_mutex; + struct inode vfs_inode; + + unsigned long i_ext_generation; + struct ext4_ext_cache i_cached_extent; +}; + +#endif /* _LINUX_EXT4_FS_I */ diff --git a/include/linux/ext4_fs_sb.h b/include/linux/ext4_fs_sb.h new file mode 100644 index 000000000000..691a713139ce --- /dev/null +++ b/include/linux/ext4_fs_sb.h @@ -0,0 +1,94 @@ +/* + * linux/include/linux/ext4_fs_sb.h + * + * Copyright (C) 1992, 1993, 1994, 1995 + * Remy Card (card@masi.ibp.fr) + * Laboratoire MASI - Institut Blaise Pascal + * Universite Pierre et Marie Curie (Paris VI) + * + * from + * + * linux/include/linux/minix_fs_sb.h + * + * Copyright (C) 1991, 1992 Linus Torvalds + */ + +#ifndef _LINUX_EXT4_FS_SB +#define _LINUX_EXT4_FS_SB + +#ifdef __KERNEL__ +#include <linux/timer.h> +#include <linux/wait.h> +#include <linux/blockgroup_lock.h> +#include <linux/percpu_counter.h> +#endif +#include <linux/rbtree.h> + +/* + * third extended-fs super-block data in memory + */ +struct ext4_sb_info { + unsigned long s_frag_size; /* Size of a fragment in bytes */ + unsigned long s_desc_size; /* Size of a group descriptor in bytes */ + unsigned long s_frags_per_block;/* Number of fragments per block */ + unsigned long s_inodes_per_block;/* Number of inodes per block */ + unsigned long s_frags_per_group;/* Number of fragments in a group */ + unsigned long s_blocks_per_group;/* Number of blocks in a group */ + unsigned long s_inodes_per_group;/* Number of inodes in a group */ + unsigned long s_itb_per_group; /* Number of inode table blocks per group */ + unsigned long s_gdb_count; /* Number of group descriptor blocks */ + unsigned long s_desc_per_block; /* Number of group descriptors per block */ + unsigned long s_groups_count; /* Number of groups in the fs */ + struct buffer_head * s_sbh; /* Buffer containing the super block */ + struct ext4_super_block * s_es; /* Pointer to the super block in the buffer */ + struct buffer_head ** s_group_desc; + unsigned long s_mount_opt; + uid_t s_resuid; + gid_t s_resgid; + unsigned short s_mount_state; + unsigned short s_pad; + int s_addr_per_block_bits; + int s_desc_per_block_bits; + int s_inode_size; + int s_first_ino; + spinlock_t s_next_gen_lock; + u32 s_next_generation; + u32 s_hash_seed[4]; + int s_def_hash_version; + struct percpu_counter s_freeblocks_counter; + struct percpu_counter s_freeinodes_counter; + struct percpu_counter s_dirs_counter; + struct blockgroup_lock s_blockgroup_lock; + + /* root of the per fs reservation window tree */ + spinlock_t s_rsv_window_lock; + struct rb_root s_rsv_window_root; + struct ext4_reserve_window_node s_rsv_window_head; + + /* Journaling */ + struct inode * s_journal_inode; + struct journal_s * s_journal; + struct list_head s_orphan; + unsigned long s_commit_interval; + struct block_device *journal_bdev; +#ifdef CONFIG_JBD_DEBUG + struct timer_list turn_ro_timer; /* For turning read-only (crash simulation) */ + wait_queue_head_t ro_wait_queue; /* For people waiting for the fs to go read-only */ +#endif +#ifdef CONFIG_QUOTA + char *s_qf_names[MAXQUOTAS]; /* Names of quota files with journalled quota */ + int s_jquota_fmt; /* Format of quota to use */ +#endif + +#ifdef EXTENTS_STATS + /* ext4 extents stats */ + unsigned long s_ext_min; + unsigned long s_ext_max; + unsigned long s_depth_max; + spinlock_t s_ext_stats_lock; + unsigned long s_ext_blocks; + unsigned long s_ext_extents; +#endif +}; + +#endif /* _LINUX_EXT4_FS_SB */ diff --git a/include/linux/ext4_jbd2.h b/include/linux/ext4_jbd2.h new file mode 100644 index 000000000000..72dd631912e4 --- /dev/null +++ b/include/linux/ext4_jbd2.h @@ -0,0 +1,273 @@ +/* + * linux/include/linux/ext4_jbd2.h + * + * Written by Stephen C. Tweedie <sct@redhat.com>, 1999 + * + * Copyright 1998--1999 Red Hat corp --- All Rights Reserved + * + * This file is part of the Linux kernel and is made available under + * the terms of the GNU General Public License, version 2, or at your + * option, any later version, incorporated herein by reference. + * + * Ext4-specific journaling extensions. + */ + +#ifndef _LINUX_EXT4_JBD_H +#define _LINUX_EXT4_JBD_H + +#include <linux/fs.h> +#include <linux/jbd2.h> +#include <linux/ext4_fs.h> + +#define EXT4_JOURNAL(inode) (EXT4_SB((inode)->i_sb)->s_journal) + +/* Define the number of blocks we need to account to a transaction to + * modify one block of data. + * + * We may have to touch one inode, one bitmap buffer, up to three + * indirection blocks, the group and superblock summaries, and the data + * block to complete the transaction. + * + * For extents-enabled fs we may have to allocate and modify up to + * 5 levels of tree + root which are stored in the inode. */ + +#define EXT4_SINGLEDATA_TRANS_BLOCKS(sb) \ + (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_EXTENTS) \ + || test_opt(sb, EXTENTS) ? 27U : 8U) + +/* Extended attribute operations touch at most two data buffers, + * two bitmap buffers, and two group summaries, in addition to the inode + * and the superblock, which are already accounted for. */ + +#define EXT4_XATTR_TRANS_BLOCKS 6U + +/* Define the minimum size for a transaction which modifies data. This + * needs to take into account the fact that we may end up modifying two + * quota files too (one for the group, one for the user quota). The + * superblock only gets updated once, of course, so don't bother + * counting that again for the quota updates. */ + +#define EXT4_DATA_TRANS_BLOCKS(sb) (EXT4_SINGLEDATA_TRANS_BLOCKS(sb) + \ + EXT4_XATTR_TRANS_BLOCKS - 2 + \ + 2*EXT4_QUOTA_TRANS_BLOCKS(sb)) + +/* Delete operations potentially hit one directory's namespace plus an + * entire inode, plus arbitrary amounts of bitmap/indirection data. Be + * generous. We can grow the delete transaction later if necessary. */ + +#define EXT4_DELETE_TRANS_BLOCKS(sb) (2 * EXT4_DATA_TRANS_BLOCKS(sb) + 64) + +/* Define an arbitrary limit for the amount of data we will anticipate + * writing to any given transaction. For unbounded transactions such as + * write(2) and truncate(2) we can write more than this, but we always + * start off at the maximum transaction size and grow the transaction + * optimistically as we go. */ + +#define EXT4_MAX_TRANS_DATA 64U + +/* We break up a large truncate or write transaction once the handle's + * buffer credits gets this low, we need either to extend the + * transaction or to start a new one. Reserve enough space here for + * inode, bitmap, superblock, group and indirection updates for at least + * one block, plus two quota updates. Quota allocations are not + * needed. */ + +#define EXT4_RESERVE_TRANS_BLOCKS 12U + +#define EXT4_INDEX_EXTRA_TRANS_BLOCKS 8 + +#ifdef CONFIG_QUOTA +/* Amount of blocks needed for quota update - we know that the structure was + * allocated so we need to update only inode+data */ +#define EXT4_QUOTA_TRANS_BLOCKS(sb) (test_opt(sb, QUOTA) ? 2 : 0) +/* Amount of blocks needed for quota insert/delete - we do some block writes + * but inode, sb and group updates are done only once */ +#define EXT4_QUOTA_INIT_BLOCKS(sb) (test_opt(sb, QUOTA) ? (DQUOT_INIT_ALLOC*\ + (EXT4_SINGLEDATA_TRANS_BLOCKS(sb)-3)+3+DQUOT_INIT_REWRITE) : 0) +#define EXT4_QUOTA_DEL_BLOCKS(sb) (test_opt(sb, QUOTA) ? (DQUOT_DEL_ALLOC*\ + (EXT4_SINGLEDATA_TRANS_BLOCKS(sb)-3)+3+DQUOT_DEL_REWRITE) : 0) +#else +#define EXT4_QUOTA_TRANS_BLOCKS(sb) 0 +#define EXT4_QUOTA_INIT_BLOCKS(sb) 0 +#define EXT4_QUOTA_DEL_BLOCKS(sb) 0 +#endif + +int +ext4_mark_iloc_dirty(handle_t *handle, + struct inode *inode, + struct ext4_iloc *iloc); + +/* + * On success, We end up with an outstanding reference count against + * iloc->bh. This _must_ be cleaned up later. + */ + +int ext4_reserve_inode_write(handle_t *handle, struct inode *inode, + struct ext4_iloc *iloc); + +int ext4_mark_inode_dirty(handle_t *handle, struct inode *inode); + +/* + * Wrapper functions with which ext4 calls into JBD. The intent here is + * to allow these to be turned into appropriate stubs so ext4 can control + * ext2 filesystems, so ext2+ext4 systems only nee one fs. This work hasn't + * been done yet. + */ + +void ext4_journal_abort_handle(const char *caller, const char *err_fn, + struct buffer_head *bh, handle_t *handle, int err); + +static inline int +__ext4_journal_get_undo_access(const char *where, handle_t *handle, + struct buffer_head *bh) +{ + int err = jbd2_journal_get_undo_access(handle, bh); + if (err) + ext4_journal_abort_handle(where, __FUNCTION__, bh, handle,err); + return err; +} + +static inline int +__ext4_journal_get_write_access(const char *where, handle_t *handle, + struct buffer_head *bh) +{ + int err = jbd2_journal_get_write_access(handle, bh); + if (err) + ext4_journal_abort_handle(where, __FUNCTION__, bh, handle,err); + return err; +} + +static inline void +ext4_journal_release_buffer(handle_t *handle, struct buffer_head *bh) +{ + jbd2_journal_release_buffer(handle, bh); +} + +static inline int +__ext4_journal_forget(const char *where, handle_t *handle, struct buffer_head *bh) +{ + int err = jbd2_journal_forget(handle, bh); + if (err) + ext4_journal_abort_handle(where, __FUNCTION__, bh, handle,err); + return err; +} + +static inline int +__ext4_journal_revoke(const char *where, handle_t *handle, + ext4_fsblk_t blocknr, struct buffer_head *bh) +{ + int err = jbd2_journal_revoke(handle, blocknr, bh); + if (err) + ext4_journal_abort_handle(where, __FUNCTION__, bh, handle,err); + return err; +} + +static inline int +__ext4_journal_get_create_access(const char *where, + handle_t *handle, struct buffer_head *bh) +{ + int err = jbd2_journal_get_create_access(handle, bh); + if (err) + ext4_journal_abort_handle(where, __FUNCTION__, bh, handle,err); + return err; +} + +static inline int +__ext4_journal_dirty_metadata(const char *where, + handle_t *handle, struct buffer_head *bh) +{ + int err = jbd2_journal_dirty_metadata(handle, bh); + if (err) + ext4_journal_abort_handle(where, __FUNCTION__, bh, handle,err); + return err; +} + + +#define ext4_journal_get_undo_access(handle, bh) \ + __ext4_journal_get_undo_access(__FUNCTION__, (handle), (bh)) +#define ext4_journal_get_write_access(handle, bh) \ + __ext4_journal_get_write_access(__FUNCTION__, (handle), (bh)) +#define ext4_journal_revoke(handle, blocknr, bh) \ + __ext4_journal_revoke(__FUNCTION__, (handle), (blocknr), (bh)) +#define ext4_journal_get_create_access(handle, bh) \ + __ext4_journal_get_create_access(__FUNCTION__, (handle), (bh)) +#define ext4_journal_dirty_metadata(handle, bh) \ + __ext4_journal_dirty_metadata(__FUNCTION__, (handle), (bh)) +#define ext4_journal_forget(handle, bh) \ + __ext4_journal_forget(__FUNCTION__, (handle), (bh)) + +int ext4_journal_dirty_data(handle_t *handle, struct buffer_head *bh); + +handle_t *ext4_journal_start_sb(struct super_block *sb, int nblocks); +int __ext4_journal_stop(const char *where, handle_t *handle); + +static inline handle_t *ext4_journal_start(struct inode *inode, int nblocks) +{ + return ext4_journal_start_sb(inode->i_sb, nblocks); +} + +#define ext4_journal_stop(handle) \ + __ext4_journal_stop(__FUNCTION__, (handle)) + +static inline handle_t *ext4_journal_current_handle(void) +{ + return journal_current_handle(); +} + +static inline int ext4_journal_extend(handle_t *handle, int nblocks) +{ + return jbd2_journal_extend(handle, nblocks); +} + +static inline int ext4_journal_restart(handle_t *handle, int nblocks) +{ + return jbd2_journal_restart(handle, nblocks); +} + +static inline int ext4_journal_blocks_per_page(struct inode *inode) +{ + return jbd2_journal_blocks_per_page(inode); +} + +static inline int ext4_journal_force_commit(journal_t *journal) +{ + return jbd2_journal_force_commit(journal); +} + +/* super.c */ +int ext4_force_commit(struct super_block *sb); + +static inline int ext4_should_journal_data(struct inode *inode) +{ + if (!S_ISREG(inode->i_mode)) + return 1; + if (test_opt(inode->i_sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA) + return 1; + if (EXT4_I(inode)->i_flags & EXT4_JOURNAL_DATA_FL) + return 1; + return 0; +} + +static inline int ext4_should_order_data(struct inode *inode) +{ + if (!S_ISREG(inode->i_mode)) + return 0; + if (EXT4_I(inode)->i_flags & EXT4_JOURNAL_DATA_FL) + return 0; + if (test_opt(inode->i_sb, DATA_FLAGS) == EXT4_MOUNT_ORDERED_DATA) + return 1; + return 0; +} + +static inline int ext4_should_writeback_data(struct inode *inode) +{ + if (!S_ISREG(inode->i_mode)) + return 0; + if (EXT4_I(inode)->i_flags & EXT4_JOURNAL_DATA_FL) + return 0; + if (test_opt(inode->i_sb, DATA_FLAGS) == EXT4_MOUNT_WRITEBACK_DATA) + return 1; + return 0; +} + +#endif /* _LINUX_EXT4_JBD_H */ diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h index c25a38d8f600..5081d27bfa27 100644 --- a/include/linux/hugetlb.h +++ b/include/linux/hugetlb.h @@ -17,6 +17,7 @@ int hugetlb_sysctl_handler(struct ctl_table *, int, struct file *, void __user * int copy_hugetlb_page_range(struct mm_struct *, struct mm_struct *, struct vm_area_struct *); int follow_hugetlb_page(struct mm_struct *, struct vm_area_struct *, struct page **, struct vm_area_struct **, unsigned long *, int *, int); void unmap_hugepage_range(struct vm_area_struct *, unsigned long, unsigned long); +void __unmap_hugepage_range(struct vm_area_struct *, unsigned long, unsigned long); int hugetlb_prefault(struct address_space *, struct vm_area_struct *); int hugetlb_report_meminfo(char *); int hugetlb_report_node_meminfo(int, char *); diff --git a/include/linux/if_vlan.h b/include/linux/if_vlan.h index ab2740832742..35cb38573583 100644 --- a/include/linux/if_vlan.h +++ b/include/linux/if_vlan.h @@ -44,7 +44,7 @@ struct vlan_ethhdr { unsigned char h_source[ETH_ALEN]; /* source ether addr */ __be16 h_vlan_proto; /* Should always be 0x8100 */ __be16 h_vlan_TCI; /* Encapsulates priority and VLAN ID */ - unsigned short h_vlan_encapsulated_proto; /* packet type ID field (or len) */ + __be16 h_vlan_encapsulated_proto; /* packet type ID field (or len) */ }; #include <linux/skbuff.h> diff --git a/include/linux/io.h b/include/linux/io.h index aa3f5af670b5..81877ea39309 100644 --- a/include/linux/io.h +++ b/include/linux/io.h @@ -18,6 +18,7 @@ #ifndef _LINUX_IO_H #define _LINUX_IO_H +#include <linux/types.h> #include <asm/io.h> #include <asm/page.h> @@ -27,4 +28,31 @@ void __iowrite64_copy(void __iomem *to, const void *from, size_t count); int ioremap_page_range(unsigned long addr, unsigned long end, unsigned long phys_addr, pgprot_t prot); +/** + * check_signature - find BIOS signatures + * @io_addr: mmio address to check + * @signature: signature block + * @length: length of signature + * + * Perform a signature comparison with the mmio address io_addr. This + * address should have been obtained by ioremap. + * Returns 1 on a match. + */ + +static inline int check_signature(const volatile void __iomem *io_addr, + const unsigned char *signature, int length) +{ + int retval = 0; + do { + if (readb(io_addr) != *signature) + goto out; + io_addr++; + signature++; + length--; + } while (length); + retval = 1; +out: + return retval; +} + #endif /* _LINUX_IO_H */ diff --git a/include/linux/ioc4.h b/include/linux/ioc4.h index de73a3289cc2..51e2b9fb6372 100644 --- a/include/linux/ioc4.h +++ b/include/linux/ioc4.h @@ -157,7 +157,7 @@ struct ioc4_driver_data { unsigned long idd_bar0; struct pci_dev *idd_pdev; const struct pci_device_id *idd_pci_id; - struct __iomem ioc4_misc_regs *idd_misc_regs; + struct ioc4_misc_regs __iomem *idd_misc_regs; unsigned long count_period; void *idd_serial_data; unsigned int idd_variant; diff --git a/include/linux/istallion.h b/include/linux/istallion.h index 1f996621bc9c..b55e2a035605 100644 --- a/include/linux/istallion.h +++ b/include/linux/istallion.h @@ -100,7 +100,7 @@ typedef struct stlibrd { unsigned int iobase; int iosize; unsigned long memaddr; - void *membase; + void __iomem *membase; int memsize; int pagesize; int hostoffset; @@ -113,7 +113,7 @@ typedef struct stlibrd { void (*enable)(struct stlibrd *brdp); void (*reenable)(struct stlibrd *brdp); void (*disable)(struct stlibrd *brdp); - char *(*getmemptr)(struct stlibrd *brdp, unsigned long offset, int line); + void __iomem *(*getmemptr)(struct stlibrd *brdp, unsigned long offset, int line); void (*intr)(struct stlibrd *brdp); void (*reset)(struct stlibrd *brdp); stliport_t *ports[STL_MAXPORTS]; diff --git a/include/linux/jbd2.h b/include/linux/jbd2.h new file mode 100644 index 000000000000..ddb128795781 --- /dev/null +++ b/include/linux/jbd2.h @@ -0,0 +1,1107 @@ +/* + * linux/include/linux/jbd2.h + * + * Written by Stephen C. Tweedie <sct@redhat.com> + * + * Copyright 1998-2000 Red Hat, Inc --- All Rights Reserved + * + * This file is part of the Linux kernel and is made available under + * the terms of the GNU General Public License, version 2, or at your + * option, any later version, incorporated herein by reference. + * + * Definitions for transaction data structures for the buffer cache + * filesystem journaling support. + */ + +#ifndef _LINUX_JBD_H +#define _LINUX_JBD_H + +/* Allow this file to be included directly into e2fsprogs */ +#ifndef __KERNEL__ +#include "jfs_compat.h" +#define JBD2_DEBUG +#define jfs_debug jbd_debug +#else + +#include <linux/types.h> +#include <linux/buffer_head.h> +#include <linux/journal-head.h> +#include <linux/stddef.h> +#include <linux/bit_spinlock.h> +#include <linux/mutex.h> +#include <linux/timer.h> + +#include <asm/semaphore.h> +#endif + +#define journal_oom_retry 1 + +/* + * Define JBD_PARANIOD_IOFAIL to cause a kernel BUG() if ext3 finds + * certain classes of error which can occur due to failed IOs. Under + * normal use we want ext3 to continue after such errors, because + * hardware _can_ fail, but for debugging purposes when running tests on + * known-good hardware we may want to trap these errors. + */ +#undef JBD_PARANOID_IOFAIL + +/* + * The default maximum commit age, in seconds. + */ +#define JBD_DEFAULT_MAX_COMMIT_AGE 5 + +#ifdef CONFIG_JBD_DEBUG +/* + * Define JBD_EXPENSIVE_CHECKING to enable more expensive internal + * consistency checks. By default we don't do this unless + * CONFIG_JBD_DEBUG is on. + */ +#define JBD_EXPENSIVE_CHECKING +extern int jbd2_journal_enable_debug; + +#define jbd_debug(n, f, a...) \ + do { \ + if ((n) <= jbd2_journal_enable_debug) { \ + printk (KERN_DEBUG "(%s, %d): %s: ", \ + __FILE__, __LINE__, __FUNCTION__); \ + printk (f, ## a); \ + } \ + } while (0) +#else +#define jbd_debug(f, a...) /**/ +#endif + +extern void * __jbd2_kmalloc (const char *where, size_t size, gfp_t flags, int retry); +extern void * jbd2_slab_alloc(size_t size, gfp_t flags); +extern void jbd2_slab_free(void *ptr, size_t size); + +#define jbd_kmalloc(size, flags) \ + __jbd2_kmalloc(__FUNCTION__, (size), (flags), journal_oom_retry) +#define jbd_rep_kmalloc(size, flags) \ + __jbd2_kmalloc(__FUNCTION__, (size), (flags), 1) + +#define JBD2_MIN_JOURNAL_BLOCKS 1024 + +#ifdef __KERNEL__ + +/** + * typedef handle_t - The handle_t type represents a single atomic update being performed by some process. + * + * All filesystem modifications made by the process go + * through this handle. Recursive operations (such as quota operations) + * are gathered into a single update. + * + * The buffer credits field is used to account for journaled buffers + * being modified by the running process. To ensure that there is + * enough log space for all outstanding operations, we need to limit the + * number of outstanding buffers possible at any time. When the + * operation completes, any buffer credits not used are credited back to + * the transaction, so that at all times we know how many buffers the + * outstanding updates on a transaction might possibly touch. + * + * This is an opaque datatype. + **/ +typedef struct handle_s handle_t; /* Atomic operation type */ + + +/** + * typedef journal_t - The journal_t maintains all of the journaling state information for a single filesystem. + * + * journal_t is linked to from the fs superblock structure. + * + * We use the journal_t to keep track of all outstanding transaction + * activity on the filesystem, and to manage the state of the log + * writing process. + * + * This is an opaque datatype. + **/ +typedef struct journal_s journal_t; /* Journal control structure */ +#endif + +/* + * Internal structures used by the logging mechanism: + */ + +#define JBD2_MAGIC_NUMBER 0xc03b3998U /* The first 4 bytes of /dev/random! */ + +/* + * On-disk structures + */ + +/* + * Descriptor block types: + */ + +#define JBD2_DESCRIPTOR_BLOCK 1 +#define JBD2_COMMIT_BLOCK 2 +#define JBD2_SUPERBLOCK_V1 3 +#define JBD2_SUPERBLOCK_V2 4 +#define JBD2_REVOKE_BLOCK 5 + +/* + * Standard header for all descriptor blocks: + */ +typedef struct journal_header_s +{ + __be32 h_magic; + __be32 h_blocktype; + __be32 h_sequence; +} journal_header_t; + + +/* + * The block tag: used to describe a single buffer in the journal. + * t_blocknr_high is only used if INCOMPAT_64BIT is set, so this + * raw struct shouldn't be used for pointer math or sizeof() - use + * journal_tag_bytes(journal) instead to compute this. + */ +typedef struct journal_block_tag_s +{ + __be32 t_blocknr; /* The on-disk block number */ + __be32 t_flags; /* See below */ + __be32 t_blocknr_high; /* most-significant high 32bits. */ +} journal_block_tag_t; + +#define JBD_TAG_SIZE32 (offsetof(journal_block_tag_t, t_blocknr_high)) +#define JBD_TAG_SIZE64 (sizeof(journal_block_tag_t)) + +/* + * The revoke descriptor: used on disk to describe a series of blocks to + * be revoked from the log + */ +typedef struct jbd2_journal_revoke_header_s +{ + journal_header_t r_header; + __be32 r_count; /* Count of bytes used in the block */ +} jbd2_journal_revoke_header_t; + + +/* Definitions for the journal tag flags word: */ +#define JBD2_FLAG_ESCAPE 1 /* on-disk block is escaped */ +#define JBD2_FLAG_SAME_UUID 2 /* block has same uuid as previous */ +#define JBD2_FLAG_DELETED 4 /* block deleted by this transaction */ +#define JBD2_FLAG_LAST_TAG 8 /* last tag in this descriptor block */ + + +/* + * The journal superblock. All fields are in big-endian byte order. + */ +typedef struct journal_superblock_s +{ +/* 0x0000 */ + journal_header_t s_header; + +/* 0x000C */ + /* Static information describing the journal */ + __be32 s_blocksize; /* journal device blocksize */ + __be32 s_maxlen; /* total blocks in journal file */ + __be32 s_first; /* first block of log information */ + +/* 0x0018 */ + /* Dynamic information describing the current state of the log */ + __be32 s_sequence; /* first commit ID expected in log */ + __be32 s_start; /* blocknr of start of log */ + +/* 0x0020 */ + /* Error value, as set by jbd2_journal_abort(). */ + __be32 s_errno; + +/* 0x0024 */ + /* Remaining fields are only valid in a version-2 superblock */ + __be32 s_feature_compat; /* compatible feature set */ + __be32 s_feature_incompat; /* incompatible feature set */ + __be32 s_feature_ro_compat; /* readonly-compatible feature set */ +/* 0x0030 */ + __u8 s_uuid[16]; /* 128-bit uuid for journal */ + +/* 0x0040 */ + __be32 s_nr_users; /* Nr of filesystems sharing log */ + + __be32 s_dynsuper; /* Blocknr of dynamic superblock copy*/ + +/* 0x0048 */ + __be32 s_max_transaction; /* Limit of journal blocks per trans.*/ + __be32 s_max_trans_data; /* Limit of data blocks per trans. */ + +/* 0x0050 */ + __u32 s_padding[44]; + +/* 0x0100 */ + __u8 s_users[16*48]; /* ids of all fs'es sharing the log */ +/* 0x0400 */ +} journal_superblock_t; + +#define JBD2_HAS_COMPAT_FEATURE(j,mask) \ + ((j)->j_format_version >= 2 && \ + ((j)->j_superblock->s_feature_compat & cpu_to_be32((mask)))) +#define JBD2_HAS_RO_COMPAT_FEATURE(j,mask) \ + ((j)->j_format_version >= 2 && \ + ((j)->j_superblock->s_feature_ro_compat & cpu_to_be32((mask)))) +#define JBD2_HAS_INCOMPAT_FEATURE(j,mask) \ + ((j)->j_format_version >= 2 && \ + ((j)->j_superblock->s_feature_incompat & cpu_to_be32((mask)))) + +#define JBD2_FEATURE_INCOMPAT_REVOKE 0x00000001 +#define JBD2_FEATURE_INCOMPAT_64BIT 0x00000002 + +/* Features known to this kernel version: */ +#define JBD2_KNOWN_COMPAT_FEATURES 0 +#define JBD2_KNOWN_ROCOMPAT_FEATURES 0 +#define JBD2_KNOWN_INCOMPAT_FEATURES (JBD2_FEATURE_INCOMPAT_REVOKE | \ + JBD2_FEATURE_INCOMPAT_64BIT) + +#ifdef __KERNEL__ + +#include <linux/fs.h> +#include <linux/sched.h> + +#define JBD_ASSERTIONS +#ifdef JBD_ASSERTIONS +#define J_ASSERT(assert) \ +do { \ + if (!(assert)) { \ + printk (KERN_EMERG \ + "Assertion failure in %s() at %s:%d: \"%s\"\n", \ + __FUNCTION__, __FILE__, __LINE__, # assert); \ + BUG(); \ + } \ +} while (0) + +#if defined(CONFIG_BUFFER_DEBUG) +void buffer_assertion_failure(struct buffer_head *bh); +#define J_ASSERT_BH(bh, expr) \ + do { \ + if (!(expr)) \ + buffer_assertion_failure(bh); \ + J_ASSERT(expr); \ + } while (0) +#define J_ASSERT_JH(jh, expr) J_ASSERT_BH(jh2bh(jh), expr) +#else +#define J_ASSERT_BH(bh, expr) J_ASSERT(expr) +#define J_ASSERT_JH(jh, expr) J_ASSERT(expr) +#endif + +#else +#define J_ASSERT(assert) do { } while (0) +#endif /* JBD_ASSERTIONS */ + +#if defined(JBD_PARANOID_IOFAIL) +#define J_EXPECT(expr, why...) J_ASSERT(expr) +#define J_EXPECT_BH(bh, expr, why...) J_ASSERT_BH(bh, expr) +#define J_EXPECT_JH(jh, expr, why...) J_ASSERT_JH(jh, expr) +#else +#define __journal_expect(expr, why...) \ + ({ \ + int val = (expr); \ + if (!val) { \ + printk(KERN_ERR \ + "EXT3-fs unexpected failure: %s;\n",# expr); \ + printk(KERN_ERR why "\n"); \ + } \ + val; \ + }) +#define J_EXPECT(expr, why...) __journal_expect(expr, ## why) +#define J_EXPECT_BH(bh, expr, why...) __journal_expect(expr, ## why) +#define J_EXPECT_JH(jh, expr, why...) __journal_expect(expr, ## why) +#endif + +enum jbd_state_bits { + BH_JBD /* Has an attached ext3 journal_head */ + = BH_PrivateStart, + BH_JWrite, /* Being written to log (@@@ DEBUGGING) */ + BH_Freed, /* Has been freed (truncated) */ + BH_Revoked, /* Has been revoked from the log */ + BH_RevokeValid, /* Revoked flag is valid */ + BH_JBDDirty, /* Is dirty but journaled */ + BH_State, /* Pins most journal_head state */ + BH_JournalHead, /* Pins bh->b_private and jh->b_bh */ + BH_Unshadow, /* Dummy bit, for BJ_Shadow wakeup filtering */ +}; + +BUFFER_FNS(JBD, jbd) +BUFFER_FNS(JWrite, jwrite) +BUFFER_FNS(JBDDirty, jbddirty) +TAS_BUFFER_FNS(JBDDirty, jbddirty) +BUFFER_FNS(Revoked, revoked) +TAS_BUFFER_FNS(Revoked, revoked) +BUFFER_FNS(RevokeValid, revokevalid) +TAS_BUFFER_FNS(RevokeValid, revokevalid) +BUFFER_FNS(Freed, freed) + +static inline struct buffer_head *jh2bh(struct journal_head *jh) +{ + return jh->b_bh; +} + +static inline struct journal_head *bh2jh(struct buffer_head *bh) +{ + return bh->b_private; +} + +static inline void jbd_lock_bh_state(struct buffer_head *bh) +{ + bit_spin_lock(BH_State, &bh->b_state); +} + +static inline int jbd_trylock_bh_state(struct buffer_head *bh) +{ + return bit_spin_trylock(BH_State, &bh->b_state); +} + +static inline int jbd_is_locked_bh_state(struct buffer_head *bh) +{ + return bit_spin_is_locked(BH_State, &bh->b_state); +} + +static inline void jbd_unlock_bh_state(struct buffer_head *bh) +{ + bit_spin_unlock(BH_State, &bh->b_state); +} + +static inline void jbd_lock_bh_journal_head(struct buffer_head *bh) +{ + bit_spin_lock(BH_JournalHead, &bh->b_state); +} + +static inline void jbd_unlock_bh_journal_head(struct buffer_head *bh) +{ + bit_spin_unlock(BH_JournalHead, &bh->b_state); +} + +struct jbd2_revoke_table_s; + +/** + * struct handle_s - The handle_s type is the concrete type associated with + * handle_t. + * @h_transaction: Which compound transaction is this update a part of? + * @h_buffer_credits: Number of remaining buffers we are allowed to dirty. + * @h_ref: Reference count on this handle + * @h_err: Field for caller's use to track errors through large fs operations + * @h_sync: flag for sync-on-close + * @h_jdata: flag to force data journaling + * @h_aborted: flag indicating fatal error on handle + **/ + +/* Docbook can't yet cope with the bit fields, but will leave the documentation + * in so it can be fixed later. + */ + +struct handle_s +{ + /* Which compound transaction is this update a part of? */ + transaction_t *h_transaction; + + /* Number of remaining buffers we are allowed to dirty: */ + int h_buffer_credits; + + /* Reference count on this handle */ + int h_ref; + + /* Field for caller's use to track errors through large fs */ + /* operations */ + int h_err; + + /* Flags [no locking] */ + unsigned int h_sync: 1; /* sync-on-close */ + unsigned int h_jdata: 1; /* force data journaling */ + unsigned int h_aborted: 1; /* fatal error on handle */ +}; + + +/* The transaction_t type is the guts of the journaling mechanism. It + * tracks a compound transaction through its various states: + * + * RUNNING: accepting new updates + * LOCKED: Updates still running but we don't accept new ones + * RUNDOWN: Updates are tidying up but have finished requesting + * new buffers to modify (state not used for now) + * FLUSH: All updates complete, but we are still writing to disk + * COMMIT: All data on disk, writing commit record + * FINISHED: We still have to keep the transaction for checkpointing. + * + * The transaction keeps track of all of the buffers modified by a + * running transaction, and all of the buffers committed but not yet + * flushed to home for finished transactions. + */ + +/* + * Lock ranking: + * + * j_list_lock + * ->jbd_lock_bh_journal_head() (This is "innermost") + * + * j_state_lock + * ->jbd_lock_bh_state() + * + * jbd_lock_bh_state() + * ->j_list_lock + * + * j_state_lock + * ->t_handle_lock + * + * j_state_lock + * ->j_list_lock (journal_unmap_buffer) + * + */ + +struct transaction_s +{ + /* Pointer to the journal for this transaction. [no locking] */ + journal_t *t_journal; + + /* Sequence number for this transaction [no locking] */ + tid_t t_tid; + + /* + * Transaction's current state + * [no locking - only kjournald2 alters this] + * FIXME: needs barriers + * KLUDGE: [use j_state_lock] + */ + enum { + T_RUNNING, + T_LOCKED, + T_RUNDOWN, + T_FLUSH, + T_COMMIT, + T_FINISHED + } t_state; + + /* + * Where in the log does this transaction's commit start? [no locking] + */ + unsigned long t_log_start; + + /* Number of buffers on the t_buffers list [j_list_lock] */ + int t_nr_buffers; + + /* + * Doubly-linked circular list of all buffers reserved but not yet + * modified by this transaction [j_list_lock] + */ + struct journal_head *t_reserved_list; + + /* + * Doubly-linked circular list of all buffers under writeout during + * commit [j_list_lock] + */ + struct journal_head *t_locked_list; + + /* + * Doubly-linked circular list of all metadata buffers owned by this + * transaction [j_list_lock] + */ + struct journal_head *t_buffers; + + /* + * Doubly-linked circular list of all data buffers still to be + * flushed before this transaction can be committed [j_list_lock] + */ + struct journal_head *t_sync_datalist; + + /* + * Doubly-linked circular list of all forget buffers (superseded + * buffers which we can un-checkpoint once this transaction commits) + * [j_list_lock] + */ + struct journal_head *t_forget; + + /* + * Doubly-linked circular list of all buffers still to be flushed before + * this transaction can be checkpointed. [j_list_lock] + */ + struct journal_head *t_checkpoint_list; + + /* + * Doubly-linked circular list of all buffers submitted for IO while + * checkpointing. [j_list_lock] + */ + struct journal_head *t_checkpoint_io_list; + + /* + * Doubly-linked circular list of temporary buffers currently undergoing + * IO in the log [j_list_lock] + */ + struct journal_head *t_iobuf_list; + + /* + * Doubly-linked circular list of metadata buffers being shadowed by log + * IO. The IO buffers on the iobuf list and the shadow buffers on this + * list match each other one for one at all times. [j_list_lock] + */ + struct journal_head *t_shadow_list; + + /* + * Doubly-linked circular list of control buffers being written to the + * log. [j_list_lock] + */ + struct journal_head *t_log_list; + + /* + * Protects info related to handles + */ + spinlock_t t_handle_lock; + + /* + * Number of outstanding updates running on this transaction + * [t_handle_lock] + */ + int t_updates; + + /* + * Number of buffers reserved for use by all handles in this transaction + * handle but not yet modified. [t_handle_lock] + */ + int t_outstanding_credits; + + /* + * Forward and backward links for the circular list of all transactions + * awaiting checkpoint. [j_list_lock] + */ + transaction_t *t_cpnext, *t_cpprev; + + /* + * When will the transaction expire (become due for commit), in jiffies? + * [no locking] + */ + unsigned long t_expires; + + /* + * How many handles used this transaction? [t_handle_lock] + */ + int t_handle_count; + +}; + +/** + * struct journal_s - The journal_s type is the concrete type associated with + * journal_t. + * @j_flags: General journaling state flags + * @j_errno: Is there an outstanding uncleared error on the journal (from a + * prior abort)? + * @j_sb_buffer: First part of superblock buffer + * @j_superblock: Second part of superblock buffer + * @j_format_version: Version of the superblock format + * @j_state_lock: Protect the various scalars in the journal + * @j_barrier_count: Number of processes waiting to create a barrier lock + * @j_barrier: The barrier lock itself + * @j_running_transaction: The current running transaction.. + * @j_committing_transaction: the transaction we are pushing to disk + * @j_checkpoint_transactions: a linked circular list of all transactions + * waiting for checkpointing + * @j_wait_transaction_locked: Wait queue for waiting for a locked transaction + * to start committing, or for a barrier lock to be released + * @j_wait_logspace: Wait queue for waiting for checkpointing to complete + * @j_wait_done_commit: Wait queue for waiting for commit to complete + * @j_wait_checkpoint: Wait queue to trigger checkpointing + * @j_wait_commit: Wait queue to trigger commit + * @j_wait_updates: Wait queue to wait for updates to complete + * @j_checkpoint_mutex: Mutex for locking against concurrent checkpoints + * @j_head: Journal head - identifies the first unused block in the journal + * @j_tail: Journal tail - identifies the oldest still-used block in the + * journal. + * @j_free: Journal free - how many free blocks are there in the journal? + * @j_first: The block number of the first usable block + * @j_last: The block number one beyond the last usable block + * @j_dev: Device where we store the journal + * @j_blocksize: blocksize for the location where we store the journal. + * @j_blk_offset: starting block offset for into the device where we store the + * journal + * @j_fs_dev: Device which holds the client fs. For internal journal this will + * be equal to j_dev + * @j_maxlen: Total maximum capacity of the journal region on disk. + * @j_list_lock: Protects the buffer lists and internal buffer state. + * @j_inode: Optional inode where we store the journal. If present, all journal + * block numbers are mapped into this inode via bmap(). + * @j_tail_sequence: Sequence number of the oldest transaction in the log + * @j_transaction_sequence: Sequence number of the next transaction to grant + * @j_commit_sequence: Sequence number of the most recently committed + * transaction + * @j_commit_request: Sequence number of the most recent transaction wanting + * commit + * @j_uuid: Uuid of client object. + * @j_task: Pointer to the current commit thread for this journal + * @j_max_transaction_buffers: Maximum number of metadata buffers to allow in a + * single compound commit transaction + * @j_commit_interval: What is the maximum transaction lifetime before we begin + * a commit? + * @j_commit_timer: The timer used to wakeup the commit thread + * @j_revoke_lock: Protect the revoke table + * @j_revoke: The revoke table - maintains the list of revoked blocks in the + * current transaction. + * @j_revoke_table: alternate revoke tables for j_revoke + * @j_wbuf: array of buffer_heads for jbd2_journal_commit_transaction + * @j_wbufsize: maximum number of buffer_heads allowed in j_wbuf, the + * number that will fit in j_blocksize + * @j_last_sync_writer: most recent pid which did a synchronous write + * @j_private: An opaque pointer to fs-private information. + */ + +struct journal_s +{ + /* General journaling state flags [j_state_lock] */ + unsigned long j_flags; + + /* + * Is there an outstanding uncleared error on the journal (from a prior + * abort)? [j_state_lock] + */ + int j_errno; + + /* The superblock buffer */ + struct buffer_head *j_sb_buffer; + journal_superblock_t *j_superblock; + + /* Version of the superblock format */ + int j_format_version; + + /* + * Protect the various scalars in the journal + */ + spinlock_t j_state_lock; + + /* + * Number of processes waiting to create a barrier lock [j_state_lock] + */ + int j_barrier_count; + + /* The barrier lock itself */ + struct mutex j_barrier; + + /* + * Transactions: The current running transaction... + * [j_state_lock] [caller holding open handle] + */ + transaction_t *j_running_transaction; + + /* + * the transaction we are pushing to disk + * [j_state_lock] [caller holding open handle] + */ + transaction_t *j_committing_transaction; + + /* + * ... and a linked circular list of all transactions waiting for + * checkpointing. [j_list_lock] + */ + transaction_t *j_checkpoint_transactions; + + /* + * Wait queue for waiting for a locked transaction to start committing, + * or for a barrier lock to be released + */ + wait_queue_head_t j_wait_transaction_locked; + + /* Wait queue for waiting for checkpointing to complete */ + wait_queue_head_t j_wait_logspace; + + /* Wait queue for waiting for commit to complete */ + wait_queue_head_t j_wait_done_commit; + + /* Wait queue to trigger checkpointing */ + wait_queue_head_t j_wait_checkpoint; + + /* Wait queue to trigger commit */ + wait_queue_head_t j_wait_commit; + + /* Wait queue to wait for updates to complete */ + wait_queue_head_t j_wait_updates; + + /* Semaphore for locking against concurrent checkpoints */ + struct mutex j_checkpoint_mutex; + + /* + * Journal head: identifies the first unused block in the journal. + * [j_state_lock] + */ + unsigned long j_head; + + /* + * Journal tail: identifies the oldest still-used block in the journal. + * [j_state_lock] + */ + unsigned long j_tail; + + /* + * Journal free: how many free blocks are there in the journal? + * [j_state_lock] + */ + unsigned long j_free; + + /* + * Journal start and end: the block numbers of the first usable block + * and one beyond the last usable block in the journal. [j_state_lock] + */ + unsigned long j_first; + unsigned long j_last; + + /* + * Device, blocksize and starting block offset for the location where we + * store the journal. + */ + struct block_device *j_dev; + int j_blocksize; + unsigned long long j_blk_offset; + + /* + * Device which holds the client fs. For internal journal this will be + * equal to j_dev. + */ + struct block_device *j_fs_dev; + + /* Total maximum capacity of the journal region on disk. */ + unsigned int j_maxlen; + + /* + * Protects the buffer lists and internal buffer state. + */ + spinlock_t j_list_lock; + + /* Optional inode where we store the journal. If present, all */ + /* journal block numbers are mapped into this inode via */ + /* bmap(). */ + struct inode *j_inode; + + /* + * Sequence number of the oldest transaction in the log [j_state_lock] + */ + tid_t j_tail_sequence; + + /* + * Sequence number of the next transaction to grant [j_state_lock] + */ + tid_t j_transaction_sequence; + + /* + * Sequence number of the most recently committed transaction + * [j_state_lock]. + */ + tid_t j_commit_sequence; + + /* + * Sequence number of the most recent transaction wanting commit + * [j_state_lock] + */ + tid_t j_commit_request; + + /* + * Journal uuid: identifies the object (filesystem, LVM volume etc) + * backed by this journal. This will eventually be replaced by an array + * of uuids, allowing us to index multiple devices within a single + * journal and to perform atomic updates across them. + */ + __u8 j_uuid[16]; + + /* Pointer to the current commit thread for this journal */ + struct task_struct *j_task; + + /* + * Maximum number of metadata buffers to allow in a single compound + * commit transaction + */ + int j_max_transaction_buffers; + + /* + * What is the maximum transaction lifetime before we begin a commit? + */ + unsigned long j_commit_interval; + + /* The timer used to wakeup the commit thread: */ + struct timer_list j_commit_timer; + + /* + * The revoke table: maintains the list of revoked blocks in the + * current transaction. [j_revoke_lock] + */ + spinlock_t j_revoke_lock; + struct jbd2_revoke_table_s *j_revoke; + struct jbd2_revoke_table_s *j_revoke_table[2]; + + /* + * array of bhs for jbd2_journal_commit_transaction + */ + struct buffer_head **j_wbuf; + int j_wbufsize; + + pid_t j_last_sync_writer; + + /* + * An opaque pointer to fs-private information. ext3 puts its + * superblock pointer here + */ + void *j_private; +}; + +/* + * Journal flag definitions + */ +#define JBD2_UNMOUNT 0x001 /* Journal thread is being destroyed */ +#define JBD2_ABORT 0x002 /* Journaling has been aborted for errors. */ +#define JBD2_ACK_ERR 0x004 /* The errno in the sb has been acked */ +#define JBD2_FLUSHED 0x008 /* The journal superblock has been flushed */ +#define JBD2_LOADED 0x010 /* The journal superblock has been loaded */ +#define JBD2_BARRIER 0x020 /* Use IDE barriers */ + +/* + * Function declarations for the journaling transaction and buffer + * management + */ + +/* Filing buffers */ +extern void __jbd2_journal_temp_unlink_buffer(struct journal_head *jh); +extern void jbd2_journal_unfile_buffer(journal_t *, struct journal_head *); +extern void __jbd2_journal_unfile_buffer(struct journal_head *); +extern void __jbd2_journal_refile_buffer(struct journal_head *); +extern void jbd2_journal_refile_buffer(journal_t *, struct journal_head *); +extern void __jbd2_journal_file_buffer(struct journal_head *, transaction_t *, int); +extern void __journal_free_buffer(struct journal_head *bh); +extern void jbd2_journal_file_buffer(struct journal_head *, transaction_t *, int); +extern void __journal_clean_data_list(transaction_t *transaction); + +/* Log buffer allocation */ +extern struct journal_head * jbd2_journal_get_descriptor_buffer(journal_t *); +int jbd2_journal_next_log_block(journal_t *, unsigned long long *); + +/* Commit management */ +extern void jbd2_journal_commit_transaction(journal_t *); + +/* Checkpoint list management */ +int __jbd2_journal_clean_checkpoint_list(journal_t *journal); +int __jbd2_journal_remove_checkpoint(struct journal_head *); +void __jbd2_journal_insert_checkpoint(struct journal_head *, transaction_t *); + +/* Buffer IO */ +extern int +jbd2_journal_write_metadata_buffer(transaction_t *transaction, + struct journal_head *jh_in, + struct journal_head **jh_out, + unsigned long long blocknr); + +/* Transaction locking */ +extern void __wait_on_journal (journal_t *); + +/* + * Journal locking. + * + * We need to lock the journal during transaction state changes so that nobody + * ever tries to take a handle on the running transaction while we are in the + * middle of moving it to the commit phase. j_state_lock does this. + * + * Note that the locking is completely interrupt unsafe. We never touch + * journal structures from interrupts. + */ + +static inline handle_t *journal_current_handle(void) +{ + return current->journal_info; +} + +/* The journaling code user interface: + * + * Create and destroy handles + * Register buffer modifications against the current transaction. + */ + +extern handle_t *jbd2_journal_start(journal_t *, int nblocks); +extern int jbd2_journal_restart (handle_t *, int nblocks); +extern int jbd2_journal_extend (handle_t *, int nblocks); +extern int jbd2_journal_get_write_access(handle_t *, struct buffer_head *); +extern int jbd2_journal_get_create_access (handle_t *, struct buffer_head *); +extern int jbd2_journal_get_undo_access(handle_t *, struct buffer_head *); +extern int jbd2_journal_dirty_data (handle_t *, struct buffer_head *); +extern int jbd2_journal_dirty_metadata (handle_t *, struct buffer_head *); +extern void jbd2_journal_release_buffer (handle_t *, struct buffer_head *); +extern int jbd2_journal_forget (handle_t *, struct buffer_head *); +extern void journal_sync_buffer (struct buffer_head *); +extern void jbd2_journal_invalidatepage(journal_t *, + struct page *, unsigned long); +extern int jbd2_journal_try_to_free_buffers(journal_t *, struct page *, gfp_t); +extern int jbd2_journal_stop(handle_t *); +extern int jbd2_journal_flush (journal_t *); +extern void jbd2_journal_lock_updates (journal_t *); +extern void jbd2_journal_unlock_updates (journal_t *); + +extern journal_t * jbd2_journal_init_dev(struct block_device *bdev, + struct block_device *fs_dev, + unsigned long long start, int len, int bsize); +extern journal_t * jbd2_journal_init_inode (struct inode *); +extern int jbd2_journal_update_format (journal_t *); +extern int jbd2_journal_check_used_features + (journal_t *, unsigned long, unsigned long, unsigned long); +extern int jbd2_journal_check_available_features + (journal_t *, unsigned long, unsigned long, unsigned long); +extern int jbd2_journal_set_features + (journal_t *, unsigned long, unsigned long, unsigned long); +extern int jbd2_journal_create (journal_t *); +extern int jbd2_journal_load (journal_t *journal); +extern void jbd2_journal_destroy (journal_t *); +extern int jbd2_journal_recover (journal_t *journal); +extern int jbd2_journal_wipe (journal_t *, int); +extern int jbd2_journal_skip_recovery (journal_t *); +extern void jbd2_journal_update_superblock (journal_t *, int); +extern void __jbd2_journal_abort_hard (journal_t *); +extern void jbd2_journal_abort (journal_t *, int); +extern int jbd2_journal_errno (journal_t *); +extern void jbd2_journal_ack_err (journal_t *); +extern int jbd2_journal_clear_err (journal_t *); +extern int jbd2_journal_bmap(journal_t *, unsigned long, unsigned long long *); +extern int jbd2_journal_force_commit(journal_t *); + +/* + * journal_head management + */ +struct journal_head *jbd2_journal_add_journal_head(struct buffer_head *bh); +struct journal_head *jbd2_journal_grab_journal_head(struct buffer_head *bh); +void jbd2_journal_remove_journal_head(struct buffer_head *bh); +void jbd2_journal_put_journal_head(struct journal_head *jh); + +/* + * handle management + */ +extern kmem_cache_t *jbd2_handle_cache; + +static inline handle_t *jbd_alloc_handle(gfp_t gfp_flags) +{ + return kmem_cache_alloc(jbd2_handle_cache, gfp_flags); +} + +static inline void jbd_free_handle(handle_t *handle) +{ + kmem_cache_free(jbd2_handle_cache, handle); +} + +/* Primary revoke support */ +#define JOURNAL_REVOKE_DEFAULT_HASH 256 +extern int jbd2_journal_init_revoke(journal_t *, int); +extern void jbd2_journal_destroy_revoke_caches(void); +extern int jbd2_journal_init_revoke_caches(void); + +extern void jbd2_journal_destroy_revoke(journal_t *); +extern int jbd2_journal_revoke (handle_t *, unsigned long long, struct buffer_head *); +extern int jbd2_journal_cancel_revoke(handle_t *, struct journal_head *); +extern void jbd2_journal_write_revoke_records(journal_t *, transaction_t *); + +/* Recovery revoke support */ +extern int jbd2_journal_set_revoke(journal_t *, unsigned long long, tid_t); +extern int jbd2_journal_test_revoke(journal_t *, unsigned long long, tid_t); +extern void jbd2_journal_clear_revoke(journal_t *); +extern void jbd2_journal_switch_revoke_table(journal_t *journal); + +/* + * The log thread user interface: + * + * Request space in the current transaction, and force transaction commit + * transitions on demand. + */ + +int __jbd2_log_space_left(journal_t *); /* Called with journal locked */ +int jbd2_log_start_commit(journal_t *journal, tid_t tid); +int __jbd2_log_start_commit(journal_t *journal, tid_t tid); +int jbd2_journal_start_commit(journal_t *journal, tid_t *tid); +int jbd2_journal_force_commit_nested(journal_t *journal); +int jbd2_log_wait_commit(journal_t *journal, tid_t tid); +int jbd2_log_do_checkpoint(journal_t *journal); + +void __jbd2_log_wait_for_space(journal_t *journal); +extern void __jbd2_journal_drop_transaction(journal_t *, transaction_t *); +extern int jbd2_cleanup_journal_tail(journal_t *); + +/* Debugging code only: */ + +#define jbd_ENOSYS() \ +do { \ + printk (KERN_ERR "JBD unimplemented function %s\n", __FUNCTION__); \ + current->state = TASK_UNINTERRUPTIBLE; \ + schedule(); \ +} while (1) + +/* + * is_journal_abort + * + * Simple test wrapper function to test the JBD2_ABORT state flag. This + * bit, when set, indicates that we have had a fatal error somewhere, + * either inside the journaling layer or indicated to us by the client + * (eg. ext3), and that we and should not commit any further + * transactions. + */ + +static inline int is_journal_aborted(journal_t *journal) +{ + return journal->j_flags & JBD2_ABORT; +} + +static inline int is_handle_aborted(handle_t *handle) +{ + if (handle->h_aborted) + return 1; + return is_journal_aborted(handle->h_transaction->t_journal); +} + +static inline void jbd2_journal_abort_handle(handle_t *handle) +{ + handle->h_aborted = 1; +} + +#endif /* __KERNEL__ */ + +/* Comparison functions for transaction IDs: perform comparisons using + * modulo arithmetic so that they work over sequence number wraps. */ + +static inline int tid_gt(tid_t x, tid_t y) +{ + int difference = (x - y); + return (difference > 0); +} + +static inline int tid_geq(tid_t x, tid_t y) +{ + int difference = (x - y); + return (difference >= 0); +} + +extern int jbd2_journal_blocks_per_page(struct inode *inode); +extern size_t journal_tag_bytes(journal_t *journal); + +/* + * Return the minimum number of blocks which must be free in the journal + * before a new transaction may be started. Must be called under j_state_lock. + */ +static inline int jbd_space_needed(journal_t *journal) +{ + int nblocks = journal->j_max_transaction_buffers; + if (journal->j_committing_transaction) + nblocks += journal->j_committing_transaction-> + t_outstanding_credits; + return nblocks; +} + +/* + * Definitions which augment the buffer_head layer + */ + +/* journaling buffer types */ +#define BJ_None 0 /* Not journaled */ +#define BJ_SyncData 1 /* Normal data: flush before commit */ +#define BJ_Metadata 2 /* Normal journaled metadata */ +#define BJ_Forget 3 /* Buffer superseded by this transaction */ +#define BJ_IO 4 /* Buffer is for temporary IO use */ +#define BJ_Shadow 5 /* Buffer contents being shadowed to the log */ +#define BJ_LogCtl 6 /* Buffer contains log descriptors */ +#define BJ_Reserved 7 /* Buffer is reserved for access by journal */ +#define BJ_Locked 8 /* Locked for I/O during commit */ +#define BJ_Types 9 + +extern int jbd_blocks_per_page(struct inode *inode); + +#ifdef __KERNEL__ + +#define buffer_trace_init(bh) do {} while (0) +#define print_buffer_fields(bh) do {} while (0) +#define print_buffer_trace(bh) do {} while (0) +#define BUFFER_TRACE(bh, info) do {} while (0) +#define BUFFER_TRACE2(bh, bh2, info) do {} while (0) +#define JBUFFER_TRACE(jh, info) do {} while (0) + +#endif /* __KERNEL__ */ + +#endif /* _LINUX_JBD_H */ diff --git a/include/linux/magic.h b/include/linux/magic.h index 22036dd2ba36..156c40fc664e 100644 --- a/include/linux/magic.h +++ b/include/linux/magic.h @@ -8,6 +8,7 @@ #define EFS_SUPER_MAGIC 0x414A53 #define EXT2_SUPER_MAGIC 0xEF53 #define EXT3_SUPER_MAGIC 0xEF53 +#define EXT4_SUPER_MAGIC 0xEF53 #define HPFS_SUPER_MAGIC 0xf995e849 #define ISOFS_SUPER_MAGIC 0x9660 #define JFFS2_SUPER_MAGIC 0x72b6 diff --git a/include/linux/mm.h b/include/linux/mm.h index b7966ab8cb6a..5a6068ff5556 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -593,6 +593,7 @@ static inline int page_mapped(struct page *page) */ #define NOPAGE_SIGBUS (NULL) #define NOPAGE_OOM ((struct page *) (-1)) +#define NOPAGE_REFAULT ((struct page *) (-2)) /* Return to userspace, rerun */ /* * Error return values for the *_nopfn functions @@ -1102,12 +1103,7 @@ static inline void vm_stat_account(struct mm_struct *mm, #ifndef CONFIG_DEBUG_PAGEALLOC static inline void -kernel_map_pages(struct page *page, int numpages, int enable) -{ - if (!PageHighMem(page) && !enable) - debug_check_no_locks_freed(page_address(page), - numpages * PAGE_SIZE); -} +kernel_map_pages(struct page *page, int numpages, int enable) {} #endif extern struct vm_area_struct *get_gate_vma(struct task_struct *tsk); diff --git a/include/linux/mmc/protocol.h b/include/linux/mmc/protocol.h index 81c3f77f652c..08dec8d9e703 100644 --- a/include/linux/mmc/protocol.h +++ b/include/linux/mmc/protocol.h @@ -83,6 +83,7 @@ /* Application commands */ #define SD_APP_SET_BUS_WIDTH 6 /* ac [1:0] bus width R1 */ +#define SD_APP_SEND_NUM_WR_BLKS 22 /* adtc R1 */ #define SD_APP_OP_COND 41 /* bcr [31:0] OCR R3 */ #define SD_APP_SEND_SCR 51 /* adtc R1 */ diff --git a/include/linux/module.h b/include/linux/module.h index 4b2d8091a410..d1d00ce8f4ed 100644 --- a/include/linux/module.h +++ b/include/linux/module.h @@ -317,9 +317,6 @@ struct module /* Am I unsafe to unload? */ int unsafe; - /* Am I GPL-compatible */ - int license_gplok; - unsigned int taints; /* same bits as kernel:tainted */ #ifdef CONFIG_MODULE_UNLOAD diff --git a/include/linux/nbd.h b/include/linux/nbd.h index e712e7d47cc2..d6b6dc09ad97 100644 --- a/include/linux/nbd.h +++ b/include/linux/nbd.h @@ -15,6 +15,8 @@ #ifndef LINUX_NBD_H #define LINUX_NBD_H +#include <linux/types.h> + #define NBD_SET_SOCK _IO( 0xab, 0 ) #define NBD_SET_BLKSIZE _IO( 0xab, 1 ) #define NBD_SET_SIZE _IO( 0xab, 2 ) diff --git a/include/linux/nodemask.h b/include/linux/nodemask.h index 5dce5c21822c..b1063e9cdb1b 100644 --- a/include/linux/nodemask.h +++ b/include/linux/nodemask.h @@ -8,8 +8,8 @@ * See detailed comments in the file linux/bitmap.h describing the * data type on which these nodemasks are based. * - * For details of nodemask_scnprintf() and nodemask_parse(), - * see bitmap_scnprintf() and bitmap_parse() in lib/bitmap.c. + * For details of nodemask_scnprintf() and nodemask_parse_user(), + * see bitmap_scnprintf() and bitmap_parse_user() in lib/bitmap.c. * For details of nodelist_scnprintf() and nodelist_parse(), see * bitmap_scnlistprintf() and bitmap_parselist(), also in bitmap.c. * For details of node_remap(), see bitmap_bitremap in lib/bitmap.c. @@ -51,7 +51,7 @@ * unsigned long *nodes_addr(mask) Array of unsigned long's in mask * * int nodemask_scnprintf(buf, len, mask) Format nodemask for printing - * int nodemask_parse(ubuf, ulen, mask) Parse ascii string as nodemask + * int nodemask_parse_user(ubuf, ulen, mask) Parse ascii string as nodemask * int nodelist_scnprintf(buf, len, mask) Format nodemask as list for printing * int nodelist_parse(buf, map) Parse ascii string as nodelist * int node_remap(oldbit, old, new) newbit = map(old, new)(oldbit) @@ -288,12 +288,12 @@ static inline int __nodemask_scnprintf(char *buf, int len, return bitmap_scnprintf(buf, len, srcp->bits, nbits); } -#define nodemask_parse(ubuf, ulen, dst) \ - __nodemask_parse((ubuf), (ulen), &(dst), MAX_NUMNODES) -static inline int __nodemask_parse(const char __user *buf, int len, +#define nodemask_parse_user(ubuf, ulen, dst) \ + __nodemask_parse_user((ubuf), (ulen), &(dst), MAX_NUMNODES) +static inline int __nodemask_parse_user(const char __user *buf, int len, nodemask_t *dstp, int nbits) { - return bitmap_parse(buf, len, dstp->bits, nbits); + return bitmap_parse_user(buf, len, dstp->bits, nbits); } #define nodelist_scnprintf(buf, len, src) \ diff --git a/include/linux/percpu.h b/include/linux/percpu.h index 46ec72fa2c84..600e3d387ffc 100644 --- a/include/linux/percpu.h +++ b/include/linux/percpu.h @@ -19,7 +19,7 @@ * we force a syntax error here if it isn't. */ #define get_cpu_var(var) (*({ \ - extern int simple_indentifier_##var(void); \ + extern int simple_identifier_##var(void); \ preempt_disable(); \ &__get_cpu_var(var); })) #define put_cpu_var(var) preempt_enable() diff --git a/include/linux/sched.h b/include/linux/sched.h index 331f4502e92b..6735c1cf334c 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1065,9 +1065,10 @@ static inline int pid_alive(struct task_struct *p) } /** - * is_init - check if a task structure is the first user space - * task the kernel created. - * @p: Task structure to be checked. + * is_init - check if a task structure is init + * @tsk: Task structure to be checked. + * + * Check if a task structure is the first user space task the kernel created. */ static inline int is_init(struct task_struct *tsk) { diff --git a/include/linux/security.h b/include/linux/security.h index 9b5fea81f55e..b200b9856f32 100644 --- a/include/linux/security.h +++ b/include/linux/security.h @@ -882,7 +882,8 @@ struct request_sock; * Check permission when a flow selects a xfrm_policy for processing * XFRMs on a packet. The hook is called when selecting either a * per-socket policy or a generic xfrm policy. - * Return 0 if permission is granted. + * Return 0 if permission is granted, -ESRCH otherwise, or -errno + * on other errors. * @xfrm_state_pol_flow_match: * @x contains the state to match. * @xp contains the policy to check for a match. @@ -891,6 +892,7 @@ struct request_sock; * @xfrm_flow_state_match: * @fl contains the flow key to match. * @xfrm points to the xfrm_state to match. + * @xp points to the xfrm_policy to match. * Return 1 if there is a match. * @xfrm_decode_session: * @skb points to skb to decode. @@ -1388,7 +1390,8 @@ struct security_operations { int (*xfrm_policy_lookup)(struct xfrm_policy *xp, u32 fl_secid, u8 dir); int (*xfrm_state_pol_flow_match)(struct xfrm_state *x, struct xfrm_policy *xp, struct flowi *fl); - int (*xfrm_flow_state_match)(struct flowi *fl, struct xfrm_state *xfrm); + int (*xfrm_flow_state_match)(struct flowi *fl, struct xfrm_state *xfrm, + struct xfrm_policy *xp); int (*xfrm_decode_session)(struct sk_buff *skb, u32 *secid, int ckall); #endif /* CONFIG_SECURITY_NETWORK_XFRM */ @@ -3120,11 +3123,6 @@ static inline int security_xfrm_policy_alloc(struct xfrm_policy *xp, struct xfrm return security_ops->xfrm_policy_alloc_security(xp, sec_ctx, NULL); } -static inline int security_xfrm_sock_policy_alloc(struct xfrm_policy *xp, struct sock *sk) -{ - return security_ops->xfrm_policy_alloc_security(xp, NULL, sk); -} - static inline int security_xfrm_policy_clone(struct xfrm_policy *old, struct xfrm_policy *new) { return security_ops->xfrm_policy_clone_security(old, new); @@ -3175,9 +3173,10 @@ static inline int security_xfrm_state_pol_flow_match(struct xfrm_state *x, return security_ops->xfrm_state_pol_flow_match(x, xp, fl); } -static inline int security_xfrm_flow_state_match(struct flowi *fl, struct xfrm_state *xfrm) +static inline int security_xfrm_flow_state_match(struct flowi *fl, + struct xfrm_state *xfrm, struct xfrm_policy *xp) { - return security_ops->xfrm_flow_state_match(fl, xfrm); + return security_ops->xfrm_flow_state_match(fl, xfrm, xp); } static inline int security_xfrm_decode_session(struct sk_buff *skb, u32 *secid) @@ -3197,11 +3196,6 @@ static inline int security_xfrm_policy_alloc(struct xfrm_policy *xp, struct xfrm return 0; } -static inline int security_xfrm_sock_policy_alloc(struct xfrm_policy *xp, struct sock *sk) -{ - return 0; -} - static inline int security_xfrm_policy_clone(struct xfrm_policy *old, struct xfrm_policy *new) { return 0; @@ -3249,7 +3243,7 @@ static inline int security_xfrm_state_pol_flow_match(struct xfrm_state *x, } static inline int security_xfrm_flow_state_match(struct flowi *fl, - struct xfrm_state *xfrm) + struct xfrm_state *xfrm, struct xfrm_policy *xp) { return 1; } diff --git a/include/linux/smb_fs.h b/include/linux/smb_fs.h index 367d6c3e8ed4..13b3af547864 100644 --- a/include/linux/smb_fs.h +++ b/include/linux/smb_fs.h @@ -43,17 +43,17 @@ static inline struct smb_inode_info *SMB_I(struct inode *inode) /* macro names are short for word, double-word, long value (?) */ #define WVAL(buf,pos) \ - (le16_to_cpu(get_unaligned((u16 *)((u8 *)(buf) + (pos))))) + (le16_to_cpu(get_unaligned((__le16 *)((u8 *)(buf) + (pos))))) #define DVAL(buf,pos) \ - (le32_to_cpu(get_unaligned((u32 *)((u8 *)(buf) + (pos))))) + (le32_to_cpu(get_unaligned((__le32 *)((u8 *)(buf) + (pos))))) #define LVAL(buf,pos) \ - (le64_to_cpu(get_unaligned((u64 *)((u8 *)(buf) + (pos))))) + (le64_to_cpu(get_unaligned((__le64 *)((u8 *)(buf) + (pos))))) #define WSET(buf,pos,val) \ - put_unaligned(cpu_to_le16((u16)(val)), (u16 *)((u8 *)(buf) + (pos))) + put_unaligned(cpu_to_le16((u16)(val)), (__le16 *)((u8 *)(buf) + (pos))) #define DSET(buf,pos,val) \ - put_unaligned(cpu_to_le32((u32)(val)), (u32 *)((u8 *)(buf) + (pos))) + put_unaligned(cpu_to_le32((u32)(val)), (__le32 *)((u8 *)(buf) + (pos))) #define LSET(buf,pos,val) \ - put_unaligned(cpu_to_le64((u64)(val)), (u64 *)((u8 *)(buf) + (pos))) + put_unaligned(cpu_to_le64((u64)(val)), (__le64 *)((u8 *)(buf) + (pos))) /* where to find the base of the SMB packet proper */ #define smb_base(buf) ((u8 *)(((u8 *)(buf))+4)) diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h index d6288e89fd9d..9c9a8ad92477 100644 --- a/include/linux/sunrpc/svc.h +++ b/include/linux/sunrpc/svc.h @@ -57,7 +57,8 @@ struct svc_serv { struct svc_stat * sv_stats; /* RPC statistics */ spinlock_t sv_lock; unsigned int sv_nrthreads; /* # of server threads */ - unsigned int sv_bufsz; /* datagram buffer size */ + unsigned int sv_max_payload; /* datagram payload size */ + unsigned int sv_max_mesg; /* max_payload + 1 page for overheads */ unsigned int sv_xdrsize; /* XDR buffer size */ struct list_head sv_permsocks; /* all permanent sockets */ diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index 3efcfc7e9c6c..1912c6cbef55 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h @@ -431,6 +431,10 @@ asmlinkage long sys_epoll_ctl(int epfd, int op, int fd, struct epoll_event __user *event); asmlinkage long sys_epoll_wait(int epfd, struct epoll_event __user *events, int maxevents, int timeout); +asmlinkage long sys_epoll_pwait(int epfd, struct epoll_event __user *events, + int maxevents, int timeout, + const sigset_t __user *sigmask, + size_t sigsetsize); asmlinkage long sys_gethostname(char __user *name, int len); asmlinkage long sys_sethostname(char __user *name, int len); asmlinkage long sys_setdomainname(char __user *name, int len); @@ -593,7 +597,7 @@ asmlinkage long sys_tee(int fdin, int fdout, size_t len, unsigned int flags); asmlinkage long sys_sync_file_range(int fd, loff_t offset, loff_t nbytes, unsigned int flags); asmlinkage long sys_get_robust_list(int pid, - struct robust_list_head __user **head_ptr, + struct robust_list_head __user * __user *head_ptr, size_t __user *len_ptr); asmlinkage long sys_set_robust_list(struct robust_list_head __user *head, size_t len); diff --git a/include/linux/tifm.h b/include/linux/tifm.h index 203dd5e11ecb..dfb8052eee5e 100644 --- a/include/linux/tifm.h +++ b/include/linux/tifm.h @@ -17,6 +17,7 @@ #include <linux/wait.h> #include <linux/delay.h> #include <linux/pci.h> +#include <linux/scatterlist.h> /* Host registers (relative to pci base address): */ enum { diff --git a/include/linux/timex.h b/include/linux/timex.h index 049dfe4a11f2..db501dc23c29 100644 --- a/include/linux/timex.h +++ b/include/linux/timex.h @@ -293,6 +293,9 @@ extern void second_overflow(void); extern void update_ntp_one_tick(void); extern int do_adjtimex(struct timex *); +/* Don't use! Compatibility define for existing users. */ +#define tickadj (500/HZ ? : 1) + #endif /* KERNEL */ #endif /* LINUX_TIMEX_H */ diff --git a/include/linux/ufs_fs.h b/include/linux/ufs_fs.h index fc62887c5206..61eef508b041 100644 --- a/include/linux/ufs_fs.h +++ b/include/linux/ufs_fs.h @@ -351,6 +351,14 @@ struct ufs2_csum_total { __fs64 cs_spare[3]; /* future expansion */ }; +struct ufs_csum_core { + __u64 cs_ndir; /* number of directories */ + __u64 cs_nbfree; /* number of free blocks */ + __u64 cs_nifree; /* number of free inodes */ + __u64 cs_nffree; /* number of free frags */ + __u64 cs_numclusters; /* number of free clusters */ +}; + /* * File system flags */ @@ -715,7 +723,7 @@ struct ufs_cg_private_info { struct ufs_sb_private_info { struct ufs_buffer_head s_ubh; /* buffer containing super block */ - struct ufs2_csum_total cs_total; + struct ufs_csum_core cs_total; __u32 s_sblkno; /* offset of super-blocks in filesys */ __u32 s_cblkno; /* offset of cg-block in filesys */ __u32 s_iblkno; /* offset of inode-blocks in filesys */ diff --git a/include/linux/xattr.h b/include/linux/xattr.h index cda8a96e2fa0..0e7f1e20ea45 100644 --- a/include/linux/xattr.h +++ b/include/linux/xattr.h @@ -41,6 +41,7 @@ struct xattr_handler { }; ssize_t vfs_getxattr(struct dentry *, char *, void *, size_t); +ssize_t vfs_listxattr(struct dentry *d, char *list, size_t size); int vfs_setxattr(struct dentry *, char *, void *, size_t, int); int vfs_removexattr(struct dentry *, char *); diff --git a/include/net/flow.h b/include/net/flow.h index ddf5f3ca1720..3b44d72b27d3 100644 --- a/include/net/flow.h +++ b/include/net/flow.h @@ -97,7 +97,7 @@ struct flowi { #define FLOW_DIR_FWD 2 struct sock; -typedef void (*flow_resolve_t)(struct flowi *key, u16 family, u8 dir, +typedef int (*flow_resolve_t)(struct flowi *key, u16 family, u8 dir, void **objp, atomic_t **obj_refp); extern void *flow_cache_lookup(struct flowi *key, u16 family, u8 dir, diff --git a/include/net/inet_timewait_sock.h b/include/net/inet_timewait_sock.h index 6d14c22a00c5..5f48748fe017 100644 --- a/include/net/inet_timewait_sock.h +++ b/include/net/inet_timewait_sock.h @@ -196,6 +196,7 @@ static inline void inet_twsk_put(struct inet_timewait_sock *tw) { if (atomic_dec_and_test(&tw->tw_refcnt)) { struct module *owner = tw->tw_prot->owner; + twsk_destructor((struct sock *)tw); #ifdef SOCK_REFCNT_DEBUG printk(KERN_DEBUG "%s timewait_sock %p released\n", tw->tw_prot->name, tw); diff --git a/include/net/netlabel.h b/include/net/netlabel.h index c63a58058e21..113337c27955 100644 --- a/include/net/netlabel.h +++ b/include/net/netlabel.h @@ -34,6 +34,7 @@ #include <linux/net.h> #include <linux/skbuff.h> #include <net/netlink.h> +#include <asm/atomic.h> /* * NetLabel - A management interface for maintaining network packet label @@ -106,6 +107,7 @@ int netlbl_domhsh_remove(const char *domain, struct netlbl_audit *audit_info); /* LSM security attributes */ struct netlbl_lsm_cache { + atomic_t refcount; void (*free) (const void *data); void *data; }; @@ -117,7 +119,7 @@ struct netlbl_lsm_secattr { unsigned char *mls_cat; size_t mls_cat_len; - struct netlbl_lsm_cache cache; + struct netlbl_lsm_cache *cache; }; /* @@ -126,6 +128,43 @@ struct netlbl_lsm_secattr { /** + * netlbl_secattr_cache_alloc - Allocate and initialize a secattr cache + * @flags: the memory allocation flags + * + * Description: + * Allocate and initialize a netlbl_lsm_cache structure. Returns a pointer + * on success, NULL on failure. + * + */ +static inline struct netlbl_lsm_cache *netlbl_secattr_cache_alloc(int flags) +{ + struct netlbl_lsm_cache *cache; + + cache = kzalloc(sizeof(*cache), flags); + if (cache) + atomic_set(&cache->refcount, 1); + return cache; +} + +/** + * netlbl_secattr_cache_free - Frees a netlbl_lsm_cache struct + * @cache: the struct to free + * + * Description: + * Frees @secattr including all of the internal buffers. + * + */ +static inline void netlbl_secattr_cache_free(struct netlbl_lsm_cache *cache) +{ + if (!atomic_dec_and_test(&cache->refcount)) + return; + + if (cache->free) + cache->free(cache->data); + kfree(cache); +} + +/** * netlbl_secattr_init - Initialize a netlbl_lsm_secattr struct * @secattr: the struct to initialize * @@ -143,20 +182,16 @@ static inline int netlbl_secattr_init(struct netlbl_lsm_secattr *secattr) /** * netlbl_secattr_destroy - Clears a netlbl_lsm_secattr struct * @secattr: the struct to clear - * @clear_cache: cache clear flag * * Description: * Destroys the @secattr struct, including freeing all of the internal buffers. - * If @clear_cache is true then free the cache fields, otherwise leave them - * intact. The struct must be reset with a call to netlbl_secattr_init() - * before reuse. + * The struct must be reset with a call to netlbl_secattr_init() before reuse. * */ -static inline void netlbl_secattr_destroy(struct netlbl_lsm_secattr *secattr, - u32 clear_cache) +static inline void netlbl_secattr_destroy(struct netlbl_lsm_secattr *secattr) { - if (clear_cache && secattr->cache.data != NULL && secattr->cache.free) - secattr->cache.free(secattr->cache.data); + if (secattr->cache) + netlbl_secattr_cache_free(secattr->cache); kfree(secattr->domain); kfree(secattr->mls_cat); } @@ -178,17 +213,14 @@ static inline struct netlbl_lsm_secattr *netlbl_secattr_alloc(int flags) /** * netlbl_secattr_free - Frees a netlbl_lsm_secattr struct * @secattr: the struct to free - * @clear_cache: cache clear flag * * Description: - * Frees @secattr including all of the internal buffers. If @clear_cache is - * true then free the cache fields, otherwise leave them intact. + * Frees @secattr including all of the internal buffers. * */ -static inline void netlbl_secattr_free(struct netlbl_lsm_secattr *secattr, - u32 clear_cache) +static inline void netlbl_secattr_free(struct netlbl_lsm_secattr *secattr) { - netlbl_secattr_destroy(secattr, clear_cache); + netlbl_secattr_destroy(secattr); kfree(secattr); } diff --git a/include/net/sctp/sctp.h b/include/net/sctp/sctp.h index ee68a3124076..764e3af5be93 100644 --- a/include/net/sctp/sctp.h +++ b/include/net/sctp/sctp.h @@ -139,6 +139,7 @@ int sctp_inet_listen(struct socket *sock, int backlog); void sctp_write_space(struct sock *sk); unsigned int sctp_poll(struct file *file, struct socket *sock, poll_table *wait); +void sctp_sock_rfree(struct sk_buff *skb); /* * sctp/primitive.c @@ -444,6 +445,19 @@ static inline struct list_head *sctp_list_dequeue(struct list_head *list) return result; } +/* SCTP version of skb_set_owner_r. We need this one because + * of the way we have to do receive buffer accounting on bundled + * chunks. + */ +static inline void sctp_skb_set_owner_r(struct sk_buff *skb, struct sock *sk) +{ + struct sctp_ulpevent *event = sctp_skb2event(skb); + + skb->sk = sk; + skb->destructor = sctp_sock_rfree; + atomic_add(event->rmem_len, &sk->sk_rmem_alloc); +} + /* Tests if the list has one and only one entry. */ static inline int sctp_list_single_entry(struct list_head *head) { diff --git a/include/net/sctp/ulpevent.h b/include/net/sctp/ulpevent.h index 6c40cfc4832d..1a4ddc1ec7d2 100644 --- a/include/net/sctp/ulpevent.h +++ b/include/net/sctp/ulpevent.h @@ -63,6 +63,7 @@ struct sctp_ulpevent { __u32 cumtsn; int msg_flags; int iif; + unsigned int rmem_len; }; /* Retrieve the skb this event sits inside of. */ diff --git a/include/net/timewait_sock.h b/include/net/timewait_sock.h index 2544281e1d5e..be293d795e38 100644 --- a/include/net/timewait_sock.h +++ b/include/net/timewait_sock.h @@ -19,6 +19,7 @@ struct timewait_sock_ops { unsigned int twsk_obj_size; int (*twsk_unique)(struct sock *sk, struct sock *sktw, void *twp); + void (*twsk_destructor)(struct sock *sk); }; static inline int twsk_unique(struct sock *sk, struct sock *sktw, void *twp) @@ -28,4 +29,10 @@ static inline int twsk_unique(struct sock *sk, struct sock *sktw, void *twp) return 0; } +static inline void twsk_destructor(struct sock *sk) +{ + if (sk->sk_prot->twsk_prot->twsk_destructor != NULL) + sk->sk_prot->twsk_prot->twsk_destructor(sk); +} + #endif /* _TIMEWAIT_SOCK_H */ diff --git a/include/net/xfrm.h b/include/net/xfrm.h index 1e2a4ddec96e..737fdb2ee8a4 100644 --- a/include/net/xfrm.h +++ b/include/net/xfrm.h @@ -995,7 +995,8 @@ struct xfrm_state * xfrm_find_acq(u8 mode, u32 reqid, u8 proto, int create, unsigned short family); extern void xfrm_policy_flush(u8 type); extern int xfrm_sk_policy_insert(struct sock *sk, int dir, struct xfrm_policy *pol); -extern int xfrm_bundle_ok(struct xfrm_dst *xdst, struct flowi *fl, int family, int strict); +extern int xfrm_bundle_ok(struct xfrm_policy *pol, struct xfrm_dst *xdst, + struct flowi *fl, int family, int strict); extern void xfrm_init_pmtu(struct dst_entry *dst); extern wait_queue_head_t km_waitq; diff --git a/include/sound/core.h b/include/sound/core.h index b056ea925ecf..fa1ca0127bab 100644 --- a/include/sound/core.h +++ b/include/sound/core.h @@ -89,10 +89,10 @@ struct snd_device { struct snd_monitor_file { struct file *file; struct snd_monitor_file *next; + const struct file_operations *disconnected_f_op; + struct list_head shutdown_list; }; -struct snd_shutdown_f_ops; /* define it later in init.c */ - /* main structure for soundcard */ struct snd_card { diff --git a/include/sound/version.h b/include/sound/version.h index 2ee849d0e198..4ad86eb6440b 100644 --- a/include/sound/version.h +++ b/include/sound/version.h @@ -1,3 +1,3 @@ -/* include/version.h. Generated by configure. */ -#define CONFIG_SND_VERSION "1.0.12rc1" -#define CONFIG_SND_DATE " (Thu Jun 22 13:55:50 2006 UTC)" +/* include/version.h. Generated by alsa/ksync script. */ +#define CONFIG_SND_VERSION "1.0.13" +#define CONFIG_SND_DATE " (Fri Oct 06 18:28:19 2006 UTC)" diff --git a/kernel/audit.c b/kernel/audit.c index f9889ee77825..98106f6078b0 100644 --- a/kernel/audit.c +++ b/kernel/audit.c @@ -340,7 +340,7 @@ static int kauditd_thread(void *dummy) { struct sk_buff *skb; - while (1) { + while (!kthread_should_stop()) { skb = skb_dequeue(&audit_skb_queue); wake_up(&audit_backlog_wait); if (skb) { @@ -369,6 +369,7 @@ static int kauditd_thread(void *dummy) remove_wait_queue(&kauditd_wait, &wait); } } + return 0; } int audit_send_list(void *_dest) diff --git a/kernel/cpuset.c b/kernel/cpuset.c index 9d850ae13b1b..6313c38c930e 100644 --- a/kernel/cpuset.c +++ b/kernel/cpuset.c @@ -2137,7 +2137,7 @@ static int cpuset_handle_cpuhp(struct notifier_block *nb, * See also the previous routine cpuset_handle_cpuhp(). */ -void cpuset_track_online_nodes() +void cpuset_track_online_nodes(void) { common_cpu_mem_hotplug_unplug(); } diff --git a/kernel/futex.c b/kernel/futex.c index 4aaf91951a43..b364e0026191 100644 --- a/kernel/futex.c +++ b/kernel/futex.c @@ -1612,10 +1612,10 @@ sys_set_robust_list(struct robust_list_head __user *head, * @len_ptr: pointer to a length field, the kernel fills in the header size */ asmlinkage long -sys_get_robust_list(int pid, struct robust_list_head __user **head_ptr, +sys_get_robust_list(int pid, struct robust_list_head __user * __user *head_ptr, size_t __user *len_ptr) { - struct robust_list_head *head; + struct robust_list_head __user *head; unsigned long ret; if (!pid) @@ -1694,14 +1694,15 @@ retry: * Fetch a robust-list pointer. Bit 0 signals PI futexes: */ static inline int fetch_robust_entry(struct robust_list __user **entry, - struct robust_list __user **head, int *pi) + struct robust_list __user * __user *head, + int *pi) { unsigned long uentry; - if (get_user(uentry, (unsigned long *)head)) + if (get_user(uentry, (unsigned long __user *)head)) return -EFAULT; - *entry = (void *)(uentry & ~1UL); + *entry = (void __user *)(uentry & ~1UL); *pi = uentry & 1; return 0; @@ -1739,7 +1740,7 @@ void exit_robust_list(struct task_struct *curr) return; if (pending) - handle_futex_death((void *)pending + futex_offset, curr, pip); + handle_futex_death((void __user *)pending + futex_offset, curr, pip); while (entry != &head->list) { /* @@ -1747,7 +1748,7 @@ void exit_robust_list(struct task_struct *curr) * don't process it twice: */ if (entry != pending) - if (handle_futex_death((void *)entry + futex_offset, + if (handle_futex_death((void __user *)entry + futex_offset, curr, pi)) return; /* diff --git a/kernel/futex_compat.c b/kernel/futex_compat.c index c5cca3f65cb7..50f24eea6cd0 100644 --- a/kernel/futex_compat.c +++ b/kernel/futex_compat.c @@ -18,7 +18,7 @@ */ static inline int fetch_robust_entry(compat_uptr_t *uentry, struct robust_list __user **entry, - compat_uptr_t *head, int *pi) + compat_uptr_t __user *head, int *pi) { if (get_user(*uentry, head)) return -EFAULT; @@ -62,7 +62,7 @@ void compat_exit_robust_list(struct task_struct *curr) &head->list_op_pending, &pip)) return; if (upending) - handle_futex_death((void *)pending + futex_offset, curr, pip); + handle_futex_death((void __user *)pending + futex_offset, curr, pip); while (compat_ptr(uentry) != &head->list) { /* @@ -70,7 +70,7 @@ void compat_exit_robust_list(struct task_struct *curr) * dont process it twice: */ if (entry != pending) - if (handle_futex_death((void *)entry + futex_offset, + if (handle_futex_death((void __user *)entry + futex_offset, curr, pi)) return; @@ -78,7 +78,7 @@ void compat_exit_robust_list(struct task_struct *curr) * Fetch the next entry in the list: */ if (fetch_robust_entry(&uentry, &entry, - (compat_uptr_t *)&entry->next, &pi)) + (compat_uptr_t __user *)&entry->next, &pi)) return; /* * Avoid excessively long or circular lists: @@ -103,10 +103,10 @@ compat_sys_set_robust_list(struct compat_robust_list_head __user *head, } asmlinkage long -compat_sys_get_robust_list(int pid, compat_uptr_t *head_ptr, +compat_sys_get_robust_list(int pid, compat_uptr_t __user *head_ptr, compat_size_t __user *len_ptr) { - struct compat_robust_list_head *head; + struct compat_robust_list_head __user *head; unsigned long ret; if (!pid) diff --git a/kernel/irq/proc.c b/kernel/irq/proc.c index 607c7809ad01..9a352667007c 100644 --- a/kernel/irq/proc.c +++ b/kernel/irq/proc.c @@ -57,7 +57,7 @@ static int irq_affinity_write_proc(struct file *file, const char __user *buffer, if (!irq_desc[irq].chip->set_affinity || no_irq_affinity) return -EIO; - err = cpumask_parse(buffer, count, new_value); + err = cpumask_parse_user(buffer, count, new_value); if (err) return err; diff --git a/kernel/irq/resend.c b/kernel/irq/resend.c index 35f10f7ff94a..5bfeaed7e487 100644 --- a/kernel/irq/resend.c +++ b/kernel/irq/resend.c @@ -38,7 +38,7 @@ static void resend_irqs(unsigned long arg) clear_bit(irq, irqs_resend); desc = irq_desc + irq; local_irq_disable(); - desc->handle_irq(irq, desc, NULL); + desc->handle_irq(irq, desc); local_irq_enable(); } } diff --git a/kernel/lockdep.c b/kernel/lockdep.c index 4c0553461000..805a322a5655 100644 --- a/kernel/lockdep.c +++ b/kernel/lockdep.c @@ -1114,8 +1114,6 @@ static int count_matching_names(struct lock_class *new_class) return count + 1; } -extern void __error_too_big_MAX_LOCKDEP_SUBCLASSES(void); - /* * Register a lock's class in the hash-table, if the class is not present * yet. Otherwise we look it up. We cache the result in the lock object @@ -1153,8 +1151,7 @@ look_up_lock_class(struct lockdep_map *lock, unsigned int subclass) * (or spin_lock_init()) call - which acts as the key. For static * locks we use the lock object itself as the key. */ - if (sizeof(struct lock_class_key) > sizeof(struct lock_class)) - __error_too_big_MAX_LOCKDEP_SUBCLASSES(); + BUILD_BUG_ON(sizeof(struct lock_class_key) > sizeof(struct lock_class)); key = lock->key->subkeys + subclass; diff --git a/kernel/module.c b/kernel/module.c index 7f60e782de1e..67009bd56c52 100644 --- a/kernel/module.c +++ b/kernel/module.c @@ -87,6 +87,12 @@ static inline int strong_try_module_get(struct module *mod) return try_module_get(mod); } +static inline void add_taint_module(struct module *mod, unsigned flag) +{ + add_taint(flag); + mod->taints |= flag; +} + /* A thread that wants to hold a reference to a module only while it * is running can call ths to safely exit. * nfsd and lockd use this. @@ -847,12 +853,10 @@ static int check_version(Elf_Shdr *sechdrs, return 0; } /* Not in module's version table. OK, but that taints the kernel. */ - if (!(tainted & TAINT_FORCED_MODULE)) { + if (!(tainted & TAINT_FORCED_MODULE)) printk("%s: no version for \"%s\" found: kernel tainted.\n", mod->name, symname); - add_taint(TAINT_FORCED_MODULE); - mod->taints |= TAINT_FORCED_MODULE; - } + add_taint_module(mod, TAINT_FORCED_MODULE); return 1; } @@ -910,7 +914,8 @@ static unsigned long resolve_symbol(Elf_Shdr *sechdrs, unsigned long ret; const unsigned long *crc; - ret = __find_symbol(name, &owner, &crc, mod->license_gplok); + ret = __find_symbol(name, &owner, &crc, + !(mod->taints & TAINT_PROPRIETARY_MODULE)); if (ret) { /* use_module can fail due to OOM, or module unloading */ if (!check_version(sechdrs, versindex, name, mod, crc) || @@ -1335,12 +1340,11 @@ static void set_license(struct module *mod, const char *license) if (!license) license = "unspecified"; - mod->license_gplok = license_is_gpl_compatible(license); - if (!mod->license_gplok && !(tainted & TAINT_PROPRIETARY_MODULE)) { - printk(KERN_WARNING "%s: module license '%s' taints kernel.\n", - mod->name, license); - add_taint(TAINT_PROPRIETARY_MODULE); - mod->taints |= TAINT_PROPRIETARY_MODULE; + if (!license_is_gpl_compatible(license)) { + if (!(tainted & TAINT_PROPRIETARY_MODULE)) + printk(KERN_WARNING "%s: module license '%s' taints" + "kernel.\n", mod->name, license); + add_taint_module(mod, TAINT_PROPRIETARY_MODULE); } } @@ -1619,8 +1623,7 @@ static struct module *load_module(void __user *umod, modmagic = get_modinfo(sechdrs, infoindex, "vermagic"); /* This is allowed: modprobe --force will invalidate it. */ if (!modmagic) { - add_taint(TAINT_FORCED_MODULE); - mod->taints |= TAINT_FORCED_MODULE; + add_taint_module(mod, TAINT_FORCED_MODULE); printk(KERN_WARNING "%s: no version magic, tainting kernel.\n", mod->name); } else if (!same_magic(modmagic, vermagic)) { @@ -1714,14 +1717,10 @@ static struct module *load_module(void __user *umod, /* Set up license info based on the info section */ set_license(mod, get_modinfo(sechdrs, infoindex, "license")); - if (strcmp(mod->name, "ndiswrapper") == 0) { - add_taint(TAINT_PROPRIETARY_MODULE); - mod->taints |= TAINT_PROPRIETARY_MODULE; - } - if (strcmp(mod->name, "driverloader") == 0) { - add_taint(TAINT_PROPRIETARY_MODULE); - mod->taints |= TAINT_PROPRIETARY_MODULE; - } + if (strcmp(mod->name, "ndiswrapper") == 0) + add_taint_module(mod, TAINT_PROPRIETARY_MODULE); + if (strcmp(mod->name, "driverloader") == 0) + add_taint_module(mod, TAINT_PROPRIETARY_MODULE); /* Set up MODINFO_ATTR fields */ setup_modinfo(mod, sechdrs, infoindex); @@ -1766,8 +1765,7 @@ static struct module *load_module(void __user *umod, (mod->num_unused_gpl_syms && !unusedgplcrcindex)) { printk(KERN_WARNING "%s: No versions for exported symbols." " Tainting kernel.\n", mod->name); - add_taint(TAINT_FORCED_MODULE); - mod->taints |= TAINT_FORCED_MODULE; + add_taint_module(mod, TAINT_FORCED_MODULE); } #endif @@ -2132,9 +2130,33 @@ static void m_stop(struct seq_file *m, void *p) mutex_unlock(&module_mutex); } +static char *taint_flags(unsigned int taints, char *buf) +{ + int bx = 0; + + if (taints) { + buf[bx++] = '('; + if (taints & TAINT_PROPRIETARY_MODULE) + buf[bx++] = 'P'; + if (taints & TAINT_FORCED_MODULE) + buf[bx++] = 'F'; + /* + * TAINT_FORCED_RMMOD: could be added. + * TAINT_UNSAFE_SMP, TAINT_MACHINE_CHECK, TAINT_BAD_PAGE don't + * apply to modules. + */ + buf[bx++] = ')'; + } + buf[bx] = '\0'; + + return buf; +} + static int m_show(struct seq_file *m, void *p) { struct module *mod = list_entry(p, struct module, list); + char buf[8]; + seq_printf(m, "%s %lu", mod->name, mod->init_size + mod->core_size); print_unload_info(m, mod); @@ -2147,6 +2169,10 @@ static int m_show(struct seq_file *m, void *p) /* Used by oprofile and other similar tools. */ seq_printf(m, " 0x%p", mod->module_core); + /* Taints info */ + if (mod->taints) + seq_printf(m, " %s", taint_flags(mod->taints, buf)); + seq_printf(m, "\n"); return 0; } @@ -2235,28 +2261,6 @@ struct module *module_text_address(unsigned long addr) return mod; } -static char *taint_flags(unsigned int taints, char *buf) -{ - *buf = '\0'; - if (taints) { - int bx; - - buf[0] = '('; - bx = 1; - if (taints & TAINT_PROPRIETARY_MODULE) - buf[bx++] = 'P'; - if (taints & TAINT_FORCED_MODULE) - buf[bx++] = 'F'; - /* - * TAINT_FORCED_RMMOD: could be added. - * TAINT_UNSAFE_SMP, TAINT_MACHINE_CHECK, TAINT_BAD_PAGE don't - * apply to modules. - */ - buf[bx] = ')'; - } - return buf; -} - /* Don't grab lock, we're oopsing. */ void print_modules(void) { diff --git a/kernel/power/disk.c b/kernel/power/disk.c index d72234942798..d3a158a60312 100644 --- a/kernel/power/disk.c +++ b/kernel/power/disk.c @@ -18,6 +18,7 @@ #include <linux/fs.h> #include <linux/mount.h> #include <linux/pm.h> +#include <linux/console.h> #include <linux/cpu.h> #include "power.h" @@ -119,8 +120,10 @@ int pm_suspend_disk(void) if (error) return error; + suspend_console(); error = device_suspend(PMSG_FREEZE); if (error) { + resume_console(); printk("Some devices failed to suspend\n"); unprepare_processes(); return error; @@ -133,6 +136,7 @@ int pm_suspend_disk(void) if (in_suspend) { device_resume(); + resume_console(); pr_debug("PM: writing image.\n"); error = swsusp_write(); if (!error) @@ -148,6 +152,7 @@ int pm_suspend_disk(void) swsusp_free(); Done: device_resume(); + resume_console(); unprepare_processes(); return error; } @@ -212,7 +217,9 @@ static int software_resume(void) pr_debug("PM: Preparing devices for restore.\n"); + suspend_console(); if ((error = device_suspend(PMSG_PRETHAW))) { + resume_console(); printk("Some devices failed to suspend\n"); swsusp_free(); goto Thaw; @@ -224,6 +231,7 @@ static int software_resume(void) swsusp_resume(); pr_debug("PM: Restore failed, recovering.n"); device_resume(); + resume_console(); Thaw: unprepare_processes(); Done: diff --git a/kernel/power/user.c b/kernel/power/user.c index 72825c853cd7..d991d3b0e5a4 100644 --- a/kernel/power/user.c +++ b/kernel/power/user.c @@ -19,6 +19,7 @@ #include <linux/swapops.h> #include <linux/pm.h> #include <linux/fs.h> +#include <linux/console.h> #include <linux/cpu.h> #include <asm/uaccess.h> @@ -145,10 +146,10 @@ static int snapshot_ioctl(struct inode *inode, struct file *filp, error = freeze_processes(); if (error) { thaw_processes(); + enable_nonboot_cpus(); error = -EBUSY; } } - enable_nonboot_cpus(); up(&pm_sem); if (!error) data->frozen = 1; @@ -173,12 +174,14 @@ static int snapshot_ioctl(struct inode *inode, struct file *filp, /* Free memory before shutting down devices. */ error = swsusp_shrink_memory(); if (!error) { + suspend_console(); error = device_suspend(PMSG_FREEZE); if (!error) { in_suspend = 1; error = swsusp_suspend(); device_resume(); } + resume_console(); } up(&pm_sem); if (!error) @@ -196,11 +199,13 @@ static int snapshot_ioctl(struct inode *inode, struct file *filp, snapshot_free_unused_memory(&data->handle); down(&pm_sem); pm_prepare_console(); + suspend_console(); error = device_suspend(PMSG_PRETHAW); if (!error) { error = swsusp_resume(); device_resume(); } + resume_console(); pm_restore_console(); up(&pm_sem); break; @@ -289,6 +294,7 @@ static int snapshot_ioctl(struct inode *inode, struct file *filp, } /* Put devices to sleep */ + suspend_console(); error = device_suspend(PMSG_SUSPEND); if (error) { printk(KERN_ERR "Failed to suspend some devices.\n"); @@ -299,7 +305,7 @@ static int snapshot_ioctl(struct inode *inode, struct file *filp, /* Wake up devices */ device_resume(); } - + resume_console(); if (pm_ops->finish) pm_ops->finish(PM_SUSPEND_MEM); diff --git a/kernel/printk.c b/kernel/printk.c index 771f5e861bcd..f7d427ef5038 100644 --- a/kernel/printk.c +++ b/kernel/printk.c @@ -820,15 +820,8 @@ void release_console_sem(void) console_locked = 0; up(&console_sem); spin_unlock_irqrestore(&logbuf_lock, flags); - if (wake_klogd && !oops_in_progress && waitqueue_active(&log_wait)) { - /* - * If we printk from within the lock dependency code, - * from within the scheduler code, then do not lock - * up due to self-recursion: - */ - if (!lockdep_internal()) - wake_up_interruptible(&log_wait); - } + if (wake_klogd && !oops_in_progress && waitqueue_active(&log_wait)) + wake_up_interruptible(&log_wait); } EXPORT_SYMBOL(release_console_sem); diff --git a/kernel/profile.c b/kernel/profile.c index 857300a2afec..f940b462eec9 100644 --- a/kernel/profile.c +++ b/kernel/profile.c @@ -399,7 +399,7 @@ static int prof_cpu_mask_write_proc (struct file *file, const char __user *buffe unsigned long full_count = count, err; cpumask_t new_value; - err = cpumask_parse(buffer, count, new_value); + err = cpumask_parse_user(buffer, count, new_value); if (err) return err; diff --git a/kernel/relay.c b/kernel/relay.c index 1d63ecddfa70..f04bbdb56ac2 100644 --- a/kernel/relay.c +++ b/kernel/relay.c @@ -887,7 +887,7 @@ static int subbuf_read_actor(size_t read_start, from = buf->start + read_start; ret = avail; - if (copy_to_user(desc->arg.data, from, avail)) { + if (copy_to_user(desc->arg.buf, from, avail)) { desc->error = -EFAULT; ret = 0; } @@ -946,24 +946,17 @@ typedef int (*subbuf_actor_t) (size_t read_start, */ static inline ssize_t relay_file_read_subbufs(struct file *filp, loff_t *ppos, - size_t count, subbuf_actor_t subbuf_actor, read_actor_t actor, - void *target) + read_descriptor_t *desc) { struct rchan_buf *buf = filp->private_data; size_t read_start, avail; - read_descriptor_t desc; int ret; - if (!count) + if (!desc->count) return 0; - desc.written = 0; - desc.count = count; - desc.arg.data = target; - desc.error = 0; - mutex_lock(&filp->f_dentry->d_inode->i_mutex); do { if (!relay_file_read_avail(buf, *ppos)) @@ -974,19 +967,19 @@ static inline ssize_t relay_file_read_subbufs(struct file *filp, if (!avail) break; - avail = min(desc.count, avail); - ret = subbuf_actor(read_start, buf, avail, &desc, actor); - if (desc.error < 0) + avail = min(desc->count, avail); + ret = subbuf_actor(read_start, buf, avail, desc, actor); + if (desc->error < 0) break; if (ret) { relay_file_read_consume(buf, read_start, ret); *ppos = relay_file_read_end_pos(buf, read_start, ret); } - } while (desc.count && ret); + } while (desc->count && ret); mutex_unlock(&filp->f_dentry->d_inode->i_mutex); - return desc.written; + return desc->written; } static ssize_t relay_file_read(struct file *filp, @@ -994,8 +987,13 @@ static ssize_t relay_file_read(struct file *filp, size_t count, loff_t *ppos) { - return relay_file_read_subbufs(filp, ppos, count, subbuf_read_actor, - NULL, buffer); + read_descriptor_t desc; + desc.written = 0; + desc.count = count; + desc.arg.buf = buffer; + desc.error = 0; + return relay_file_read_subbufs(filp, ppos, subbuf_read_actor, + NULL, &desc); } static ssize_t relay_file_sendfile(struct file *filp, @@ -1004,8 +1002,13 @@ static ssize_t relay_file_sendfile(struct file *filp, read_actor_t actor, void *target) { - return relay_file_read_subbufs(filp, ppos, count, subbuf_send_actor, - actor, target); + read_descriptor_t desc; + desc.written = 0; + desc.count = count; + desc.arg.data = target; + desc.error = 0; + return relay_file_read_subbufs(filp, ppos, subbuf_send_actor, + actor, &desc); } struct file_operations relay_file_operations = { diff --git a/kernel/sched.c b/kernel/sched.c index 53608a59d6e3..094b5687eef6 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -1822,14 +1822,14 @@ context_switch(struct rq *rq, struct task_struct *prev, struct mm_struct *mm = next->mm; struct mm_struct *oldmm = prev->active_mm; - if (unlikely(!mm)) { + if (!mm) { next->active_mm = oldmm; atomic_inc(&oldmm->mm_count); enter_lazy_tlb(oldmm, next); } else switch_mm(oldmm, mm, next); - if (unlikely(!prev->mm)) { + if (!prev->mm) { prev->active_mm = NULL; WARN_ON(rq->prev_mm); rq->prev_mm = oldmm; @@ -3491,7 +3491,7 @@ asmlinkage void __sched preempt_schedule(void) * If there is a non-zero preempt_count or interrupts are disabled, * we do not want to preempt the current task. Just return.. */ - if (unlikely(ti->preempt_count || irqs_disabled())) + if (likely(ti->preempt_count || irqs_disabled())) return; need_resched: diff --git a/kernel/workqueue.c b/kernel/workqueue.c index cfc737bffe6d..3df9bfc7ff78 100644 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c @@ -28,6 +28,7 @@ #include <linux/notifier.h> #include <linux/kthread.h> #include <linux/hardirq.h> +#include <linux/mempolicy.h> /* * The per-CPU workqueue (if single thread, we always use the first @@ -245,6 +246,12 @@ static int worker_thread(void *__cwq) sigprocmask(SIG_BLOCK, &blocked, NULL); flush_signals(current); + /* + * We inherited MPOL_INTERLEAVE from the booting kernel. + * Set MPOL_DEFAULT to insure node local allocations. + */ + numa_default_policy(); + /* SIG_IGN makes children autoreap: see do_notify_parent(). */ sa.sa.sa_handler = SIG_IGN; sa.sa.sa_flags = 0; diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index 756a908c441d..8fd2dbf7eb5b 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -71,7 +71,7 @@ config LOG_BUF_SHIFT config DETECT_SOFTLOCKUP bool "Detect Soft Lockups" - depends on DEBUG_KERNEL + depends on DEBUG_KERNEL && !S390 default y help Say Y here to enable the kernel to detect "soft lockups", @@ -371,6 +371,19 @@ config FORCED_INLINING become the default in the future, until then this option is there to test gcc for this. +config HEADERS_CHECK + bool "Run 'make headers_check' when building vmlinux" + help + This option will extract the user-visible kernel headers whenever + building the kernel, and will run basic sanity checks on them to + ensure that exported files do not attempt to include files which + were not exported, etc. + + If you're making modifications to header files which are + relevant for userspace, say 'Y', and check the headers + exported to $(INSTALL_HDR_PATH) (usually 'usr/include' in + your build tree), to make sure they're suitable. + config RCU_TORTURE_TEST tristate "torture tests for RCU" depends on DEBUG_KERNEL diff --git a/lib/Makefile b/lib/Makefile index 8e6662bb9c37..59070dbfbeb4 100644 --- a/lib/Makefile +++ b/lib/Makefile @@ -5,7 +5,7 @@ lib-y := ctype.o string.o vsprintf.o cmdline.o \ bust_spinlocks.o rbtree.o radix-tree.o dump_stack.o \ idr.o div64.o int_sqrt.o bitmap.o extable.o prio_tree.o \ - sha1.o irq_regs.o + sha1.o irq_regs.o carta_random32.o lib-$(CONFIG_MMU) += ioremap.o lib-$(CONFIG_SMP) += cpumask.o diff --git a/lib/bitmap.c b/lib/bitmap.c index d71e38c54ea5..037fa9aa2ed7 100644 --- a/lib/bitmap.c +++ b/lib/bitmap.c @@ -316,10 +316,11 @@ int bitmap_scnprintf(char *buf, unsigned int buflen, EXPORT_SYMBOL(bitmap_scnprintf); /** - * bitmap_parse - convert an ASCII hex string into a bitmap. - * @ubuf: pointer to buffer in user space containing string. - * @ubuflen: buffer size in bytes. If string is smaller than this + * __bitmap_parse - convert an ASCII hex string into a bitmap. + * @buf: pointer to buffer containing string. + * @buflen: buffer size in bytes. If string is smaller than this * then it must be terminated with a \0. + * @is_user: location of buffer, 0 indicates kernel space * @maskp: pointer to bitmap array that will contain result. * @nmaskbits: size of bitmap, in bits. * @@ -330,11 +331,13 @@ EXPORT_SYMBOL(bitmap_scnprintf); * characters and for grouping errors such as "1,,5", ",44", "," and "". * Leading and trailing whitespace accepted, but not embedded whitespace. */ -int bitmap_parse(const char __user *ubuf, unsigned int ubuflen, - unsigned long *maskp, int nmaskbits) +int __bitmap_parse(const char *buf, unsigned int buflen, + int is_user, unsigned long *maskp, + int nmaskbits) { int c, old_c, totaldigits, ndigits, nchunks, nbits; u32 chunk; + const char __user *ubuf = buf; bitmap_zero(maskp, nmaskbits); @@ -343,11 +346,15 @@ int bitmap_parse(const char __user *ubuf, unsigned int ubuflen, chunk = ndigits = 0; /* Get the next chunk of the bitmap */ - while (ubuflen) { + while (buflen) { old_c = c; - if (get_user(c, ubuf++)) - return -EFAULT; - ubuflen--; + if (is_user) { + if (__get_user(c, ubuf++)) + return -EFAULT; + } + else + c = *buf++; + buflen--; if (isspace(c)) continue; @@ -388,11 +395,36 @@ int bitmap_parse(const char __user *ubuf, unsigned int ubuflen, nbits += (nchunks == 1) ? nbits_to_hold_value(chunk) : CHUNKSZ; if (nbits > nmaskbits) return -EOVERFLOW; - } while (ubuflen && c == ','); + } while (buflen && c == ','); return 0; } -EXPORT_SYMBOL(bitmap_parse); +EXPORT_SYMBOL(__bitmap_parse); + +/** + * bitmap_parse_user() + * + * @ubuf: pointer to user buffer containing string. + * @ulen: buffer size in bytes. If string is smaller than this + * then it must be terminated with a \0. + * @maskp: pointer to bitmap array that will contain result. + * @nmaskbits: size of bitmap, in bits. + * + * Wrapper for __bitmap_parse(), providing it with user buffer. + * + * We cannot have this as an inline function in bitmap.h because it needs + * linux/uaccess.h to get the access_ok() declaration and this causes + * cyclic dependencies. + */ +int bitmap_parse_user(const char __user *ubuf, + unsigned int ulen, unsigned long *maskp, + int nmaskbits) +{ + if (!access_ok(VERIFY_READ, ubuf, ulen)) + return -EFAULT; + return __bitmap_parse((const char *)ubuf, ulen, 1, maskp, nmaskbits); +} +EXPORT_SYMBOL(bitmap_parse_user); /* * bscnl_emit(buf, buflen, rbot, rtop, bp) diff --git a/lib/carta_random32.c b/lib/carta_random32.c new file mode 100644 index 000000000000..ca82df70eee4 --- /dev/null +++ b/lib/carta_random32.c @@ -0,0 +1,41 @@ +/* + * Copyright (c) 2002-2006 Hewlett-Packard Development Company, L.P. + * Contributed by David Mosberger-Tang <davidm@hpl.hp.com> + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA + * 02111-1307 USA + */ +#include <linux/types.h> +#include <linux/module.h> + +/* + * Fast, simple, yet decent quality random number generator based on + * a paper by David G. Carta ("Two Fast Implementations of the + * `Minimal Standard' Random Number Generator," Communications of the + * ACM, January, 1990). + */ +u64 carta_random32 (u64 seed) +{ +# define A 16807 +# define M ((u32) 1 << 31) + u64 s, prod = A * seed, p, q; + + p = (prod >> 31) & (M - 1); + q = (prod >> 0) & (M - 1); + s = p + q; + if (s >= M) + s -= M - 1; + return s; +} +EXPORT_SYMBOL_GPL(carta_random32); diff --git a/lib/irq_regs.c b/lib/irq_regs.c index 101b1a4f9b14..753880a5440c 100644 --- a/lib/irq_regs.c +++ b/lib/irq_regs.c @@ -8,8 +8,10 @@ * as published by the Free Software Foundation; either version * 2 of the License, or (at your option) any later version. */ +#include <linux/module.h> #include <asm/irq_regs.h> #ifndef ARCH_HAS_OWN_IRQ_REGS DEFINE_PER_CPU(struct pt_regs *, __irq_regs); +EXPORT_PER_CPU_SYMBOL(__irq_regs); #endif diff --git a/lib/radix-tree.c b/lib/radix-tree.c index 637d55608de5..aa9bfd0bdbd1 100644 --- a/lib/radix-tree.c +++ b/lib/radix-tree.c @@ -160,13 +160,13 @@ static inline int tag_get(struct radix_tree_node *node, unsigned int tag, static inline void root_tag_set(struct radix_tree_root *root, unsigned int tag) { - root->gfp_mask |= (1 << (tag + __GFP_BITS_SHIFT)); + root->gfp_mask |= (__force gfp_t)(1 << (tag + __GFP_BITS_SHIFT)); } static inline void root_tag_clear(struct radix_tree_root *root, unsigned int tag) { - root->gfp_mask &= ~(1 << (tag + __GFP_BITS_SHIFT)); + root->gfp_mask &= (__force gfp_t)~(1 << (tag + __GFP_BITS_SHIFT)); } static inline void root_tag_clear_all(struct radix_tree_root *root) @@ -176,7 +176,7 @@ static inline void root_tag_clear_all(struct radix_tree_root *root) static inline int root_tag_get(struct radix_tree_root *root, unsigned int tag) { - return root->gfp_mask & (1 << (tag + __GFP_BITS_SHIFT)); + return (__force unsigned)root->gfp_mask & (1 << (tag + __GFP_BITS_SHIFT)); } /* diff --git a/mm/hugetlb.c b/mm/hugetlb.c index 1d709ff528e1..2dbec90dc3ba 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -356,8 +356,8 @@ nomem: return -ENOMEM; } -void unmap_hugepage_range(struct vm_area_struct *vma, unsigned long start, - unsigned long end) +void __unmap_hugepage_range(struct vm_area_struct *vma, unsigned long start, + unsigned long end) { struct mm_struct *mm = vma->vm_mm; unsigned long address; @@ -398,6 +398,24 @@ void unmap_hugepage_range(struct vm_area_struct *vma, unsigned long start, } } +void unmap_hugepage_range(struct vm_area_struct *vma, unsigned long start, + unsigned long end) +{ + /* + * It is undesirable to test vma->vm_file as it should be non-null + * for valid hugetlb area. However, vm_file will be NULL in the error + * cleanup path of do_mmap_pgoff. When hugetlbfs ->mmap method fails, + * do_mmap_pgoff() nullifies vma->vm_file before calling this function + * to clean up. Since no pte has actually been setup, it is safe to + * do nothing in this case. + */ + if (vma->vm_file) { + spin_lock(&vma->vm_file->f_mapping->i_mmap_lock); + __unmap_hugepage_range(vma, start, end); + spin_unlock(&vma->vm_file->f_mapping->i_mmap_lock); + } +} + static int hugetlb_cow(struct mm_struct *mm, struct vm_area_struct *vma, unsigned long address, pte_t *ptep, pte_t pte) { diff --git a/mm/memory.c b/mm/memory.c index 9cf3f341a28a..b5a4aadd961a 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -1086,6 +1086,7 @@ int get_user_pages(struct task_struct *tsk, struct mm_struct *mm, default: BUG(); } + cond_resched(); } if (pages) { pages[i] = page; @@ -2169,11 +2170,13 @@ retry: * after the next truncate_count read. */ - /* no page was available -- either SIGBUS or OOM */ - if (new_page == NOPAGE_SIGBUS) + /* no page was available -- either SIGBUS, OOM or REFAULT */ + if (unlikely(new_page == NOPAGE_SIGBUS)) return VM_FAULT_SIGBUS; - if (new_page == NOPAGE_OOM) + else if (unlikely(new_page == NOPAGE_OOM)) return VM_FAULT_OOM; + else if (unlikely(new_page == NOPAGE_REFAULT)) + return VM_FAULT_MINOR; /* * Should we do an early C-O-W break? diff --git a/mm/mempolicy.c b/mm/mempolicy.c index 25788b1b7fcf..617fb31086ee 100644 --- a/mm/mempolicy.c +++ b/mm/mempolicy.c @@ -727,7 +727,7 @@ int do_migrate_pages(struct mm_struct *mm, return -ENOSYS; } -static struct page *new_vma_page(struct page *page, unsigned long private) +static struct page *new_vma_page(struct page *page, unsigned long private, int **x) { return NULL; } diff --git a/mm/page_alloc.c b/mm/page_alloc.c index a8c003e7b3d5..40db96a655d0 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -495,17 +495,16 @@ static void __free_pages_ok(struct page *page, unsigned int order) int i; int reserved = 0; - arch_free_page(page, order); - if (!PageHighMem(page)) - debug_check_no_locks_freed(page_address(page), - PAGE_SIZE<<order); - for (i = 0 ; i < (1 << order) ; ++i) reserved += free_pages_check(page + i); if (reserved) return; + if (!PageHighMem(page)) + debug_check_no_locks_freed(page_address(page),PAGE_SIZE<<order); + arch_free_page(page, order); kernel_map_pages(page, 1 << order, 0); + local_irq_save(flags); __count_vm_events(PGFREE, 1 << order); free_one_page(page_zone(page), page, order); @@ -781,13 +780,14 @@ static void fastcall free_hot_cold_page(struct page *page, int cold) struct per_cpu_pages *pcp; unsigned long flags; - arch_free_page(page, 0); - if (PageAnon(page)) page->mapping = NULL; if (free_pages_check(page)) return; + if (!PageHighMem(page)) + debug_check_no_locks_freed(page_address(page), PAGE_SIZE); + arch_free_page(page, 0); kernel_map_pages(page, 1, 0); pcp = &zone_pcp(zone, get_cpu())->pcp[cold]; @@ -2294,19 +2294,6 @@ unsigned long __init zone_absent_pages_in_node(int nid, return __absent_pages_in_range(nid, zone_start_pfn, zone_end_pfn); } -/* Return the zone index a PFN is in */ -int memmap_zone_idx(struct page *lmem_map) -{ - int i; - unsigned long phys_addr = virt_to_phys(lmem_map); - unsigned long pfn = phys_addr >> PAGE_SHIFT; - - for (i = 0; i < MAX_NR_ZONES; i++) - if (pfn < arch_zone_highest_possible_pfn[i]) - break; - - return i; -} #else static inline unsigned long zone_spanned_pages_in_node(int nid, unsigned long zone_type, @@ -2325,10 +2312,6 @@ static inline unsigned long zone_absent_pages_in_node(int nid, return zholes_size[zone_type]; } -static inline int memmap_zone_idx(struct page *lmem_map) -{ - return MAX_NR_ZONES; -} #endif static void __init calculate_node_totalpages(struct pglist_data *pgdat, diff --git a/mm/rmap.c b/mm/rmap.c index e2155d791d99..a9136d8b7577 100644 --- a/mm/rmap.c +++ b/mm/rmap.c @@ -576,15 +576,14 @@ void page_add_file_rmap(struct page *page) void page_remove_rmap(struct page *page) { if (atomic_add_negative(-1, &page->_mapcount)) { -#ifdef CONFIG_DEBUG_VM if (unlikely(page_mapcount(page) < 0)) { printk (KERN_EMERG "Eeek! page_mapcount(page) went negative! (%d)\n", page_mapcount(page)); printk (KERN_EMERG " page->flags = %lx\n", page->flags); printk (KERN_EMERG " page->count = %x\n", page_count(page)); printk (KERN_EMERG " page->mapping = %p\n", page->mapping); + BUG(); } -#endif - BUG_ON(page_mapcount(page) < 0); + /* * It would be tidy to reset the PageAnon mapping here, * but that might overwrite a racing page_add_anon_rmap diff --git a/mm/shmem_acl.c b/mm/shmem_acl.c index c946bf468718..f5664c5b9eb1 100644 --- a/mm/shmem_acl.c +++ b/mm/shmem_acl.c @@ -35,7 +35,7 @@ shmem_get_acl(struct inode *inode, int type) } /** - * shmem_get_acl - generic_acl_operations->setacl() operation + * shmem_set_acl - generic_acl_operations->setacl() operation */ static void shmem_set_acl(struct inode *inode, int type, struct posix_acl *acl) diff --git a/mm/slab.c b/mm/slab.c index e9a63b5a7fb9..266449d604bd 100644 --- a/mm/slab.c +++ b/mm/slab.c @@ -1106,15 +1106,18 @@ static inline int cache_free_alien(struct kmem_cache *cachep, void *objp) int nodeid = slabp->nodeid; struct kmem_list3 *l3; struct array_cache *alien = NULL; + int node; + + node = numa_node_id(); /* * Make sure we are not freeing a object from another node to the array * cache on this cpu. */ - if (likely(slabp->nodeid == numa_node_id())) + if (likely(slabp->nodeid == node)) return 0; - l3 = cachep->nodelists[numa_node_id()]; + l3 = cachep->nodelists[node]; STATS_INC_NODEFREES(cachep); if (l3->alien && l3->alien[nodeid]) { alien = l3->alien[nodeid]; @@ -1325,7 +1328,6 @@ static void init_list(struct kmem_cache *cachep, struct kmem_list3 *list, { struct kmem_list3 *ptr; - BUG_ON(cachep->nodelists[nodeid] != list); ptr = kmalloc_node(sizeof(struct kmem_list3), GFP_KERNEL, nodeid); BUG_ON(!ptr); @@ -1352,6 +1354,7 @@ void __init kmem_cache_init(void) struct cache_names *names; int i; int order; + int node; for (i = 0; i < NUM_INIT_LISTS; i++) { kmem_list3_init(&initkmem_list3[i]); @@ -1386,12 +1389,14 @@ void __init kmem_cache_init(void) * 6) Resize the head arrays of the kmalloc caches to their final sizes. */ + node = numa_node_id(); + /* 1) create the cache_cache */ INIT_LIST_HEAD(&cache_chain); list_add(&cache_cache.next, &cache_chain); cache_cache.colour_off = cache_line_size(); cache_cache.array[smp_processor_id()] = &initarray_cache.cache; - cache_cache.nodelists[numa_node_id()] = &initkmem_list3[CACHE_CACHE]; + cache_cache.nodelists[node] = &initkmem_list3[CACHE_CACHE]; cache_cache.buffer_size = ALIGN(cache_cache.buffer_size, cache_line_size()); @@ -1496,19 +1501,18 @@ void __init kmem_cache_init(void) } /* 5) Replace the bootstrap kmem_list3's */ { - int node; + int nid; + /* Replace the static kmem_list3 structures for the boot cpu */ - init_list(&cache_cache, &initkmem_list3[CACHE_CACHE], - numa_node_id()); + init_list(&cache_cache, &initkmem_list3[CACHE_CACHE], node); - for_each_online_node(node) { + for_each_online_node(nid) { init_list(malloc_sizes[INDEX_AC].cs_cachep, - &initkmem_list3[SIZE_AC + node], node); + &initkmem_list3[SIZE_AC + nid], nid); if (INDEX_AC != INDEX_L3) { init_list(malloc_sizes[INDEX_L3].cs_cachep, - &initkmem_list3[SIZE_L3 + node], - node); + &initkmem_list3[SIZE_L3 + nid], nid); } } } @@ -2918,6 +2922,9 @@ static void *cache_alloc_refill(struct kmem_cache *cachep, gfp_t flags) int batchcount; struct kmem_list3 *l3; struct array_cache *ac; + int node; + + node = numa_node_id(); check_irq_off(); ac = cpu_cache_get(cachep); @@ -2931,7 +2938,7 @@ retry: */ batchcount = BATCHREFILL_LIMIT; } - l3 = cachep->nodelists[numa_node_id()]; + l3 = cachep->nodelists[node]; BUG_ON(ac->avail > 0 || !l3); spin_lock(&l3->list_lock); @@ -2961,7 +2968,7 @@ retry: STATS_SET_HIGH(cachep); ac->entry[ac->avail++] = slab_get_obj(cachep, slabp, - numa_node_id()); + node); } check_slabp(cachep, slabp); @@ -2980,7 +2987,7 @@ alloc_done: if (unlikely(!ac->avail)) { int x; - x = cache_grow(cachep, flags, numa_node_id()); + x = cache_grow(cachep, flags, node); /* cache_grow can reenable interrupts, then ac could change. */ ac = cpu_cache_get(cachep); diff --git a/mm/truncate.c b/mm/truncate.c index f4edbc179d14..11ca480701dd 100644 --- a/mm/truncate.c +++ b/mm/truncate.c @@ -302,7 +302,7 @@ invalidate_complete_page2(struct address_space *mapping, struct page *page) if (page->mapping != mapping) return 0; - if (PagePrivate(page) && !try_to_release_page(page, 0)) + if (PagePrivate(page) && !try_to_release_page(page, GFP_KERNEL)) return 0; write_lock_irq(&mapping->tree_lock); @@ -396,6 +396,7 @@ int invalidate_inode_pages2_range(struct address_space *mapping, pagevec_release(&pvec); cond_resched(); } + WARN_ON_ONCE(ret); return ret; } EXPORT_SYMBOL_GPL(invalidate_inode_pages2_range); diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c index da9cfe927158..60a508eb1945 100644 --- a/net/8021q/vlan_dev.c +++ b/net/8021q/vlan_dev.c @@ -62,7 +62,7 @@ int vlan_dev_rebuild_header(struct sk_buff *skb) default: printk(VLAN_DBG "%s: unable to resolve type %X addresses.\n", - dev->name, (int)veth->h_vlan_encapsulated_proto); + dev->name, ntohs(veth->h_vlan_encapsulated_proto)); memcpy(veth->h_source, dev->dev_addr, ETH_ALEN); break; diff --git a/net/compat.c b/net/compat.c index d5d69fa15d07..52d32f1bc728 100644 --- a/net/compat.c +++ b/net/compat.c @@ -285,8 +285,7 @@ void scm_detach_fds_compat(struct msghdr *kmsg, struct scm_cookie *scm) if (i > 0) { int cmlen = CMSG_COMPAT_LEN(i * sizeof(int)); - if (!err) - err = put_user(SOL_SOCKET, &cm->cmsg_level); + err = put_user(SOL_SOCKET, &cm->cmsg_level); if (!err) err = put_user(SCM_RIGHTS, &cm->cmsg_type); if (!err) diff --git a/net/core/flow.c b/net/core/flow.c index f23e7e386543..b16d31ae5e54 100644 --- a/net/core/flow.c +++ b/net/core/flow.c @@ -85,6 +85,14 @@ static void flow_cache_new_hashrnd(unsigned long arg) add_timer(&flow_hash_rnd_timer); } +static void flow_entry_kill(int cpu, struct flow_cache_entry *fle) +{ + if (fle->object) + atomic_dec(fle->object_ref); + kmem_cache_free(flow_cachep, fle); + flow_count(cpu)--; +} + static void __flow_cache_shrink(int cpu, int shrink_to) { struct flow_cache_entry *fle, **flp; @@ -100,10 +108,7 @@ static void __flow_cache_shrink(int cpu, int shrink_to) } while ((fle = *flp) != NULL) { *flp = fle->next; - if (fle->object) - atomic_dec(fle->object_ref); - kmem_cache_free(flow_cachep, fle); - flow_count(cpu)--; + flow_entry_kill(cpu, fle); } } } @@ -220,24 +225,33 @@ void *flow_cache_lookup(struct flowi *key, u16 family, u8 dir, nocache: { + int err; void *obj; atomic_t *obj_ref; - resolver(key, family, dir, &obj, &obj_ref); + err = resolver(key, family, dir, &obj, &obj_ref); if (fle) { - fle->genid = atomic_read(&flow_cache_genid); - - if (fle->object) - atomic_dec(fle->object_ref); - - fle->object = obj; - fle->object_ref = obj_ref; - if (obj) - atomic_inc(fle->object_ref); + if (err) { + /* Force security policy check on next lookup */ + *head = fle->next; + flow_entry_kill(cpu, fle); + } else { + fle->genid = atomic_read(&flow_cache_genid); + + if (fle->object) + atomic_dec(fle->object_ref); + + fle->object = obj; + fle->object_ref = obj_ref; + if (obj) + atomic_inc(fle->object_ref); + } } local_bh_enable(); + if (err) + obj = ERR_PTR(err); return obj; } } diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index 221e4038216b..02f3c7947898 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -602,7 +602,7 @@ static int rtnl_getlink(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg) goto errout; } - err = rtnl_unicast(skb, NETLINK_CB(skb).pid); + err = rtnl_unicast(nskb, NETLINK_CB(skb).pid); errout: kfree(iw_buf); dev_put(dev); diff --git a/net/core/scm.c b/net/core/scm.c index 649d01ef35b6..271cf060ef8c 100644 --- a/net/core/scm.c +++ b/net/core/scm.c @@ -245,8 +245,7 @@ void scm_detach_fds(struct msghdr *msg, struct scm_cookie *scm) if (i > 0) { int cmlen = CMSG_LEN(i*sizeof(int)); - if (!err) - err = put_user(SOL_SOCKET, &cm->cmsg_level); + err = put_user(SOL_SOCKET, &cm->cmsg_level); if (!err) err = put_user(SCM_RIGHTS, &cm->cmsg_type); if (!err) diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c index bf692c1c116f..7e746c4c1688 100644 --- a/net/dccp/ipv4.c +++ b/net/dccp/ipv4.c @@ -311,7 +311,7 @@ static void dccp_v4_err(struct sk_buff *skb, u32 info) } if (sk->sk_state == DCCP_TIME_WAIT) { - inet_twsk_put((struct inet_timewait_sock *)sk); + inet_twsk_put(inet_twsk(sk)); return; } @@ -614,7 +614,7 @@ static struct sock *dccp_v4_hnd_req(struct sock *sk, struct sk_buff *skb) bh_lock_sock(nsk); return nsk; } - inet_twsk_put((struct inet_timewait_sock *)nsk); + inet_twsk_put(inet_twsk(nsk)); return NULL; } @@ -980,7 +980,7 @@ discard_and_relse: goto discard_it; do_time_wait: - inet_twsk_put((struct inet_timewait_sock *)sk); + inet_twsk_put(inet_twsk(sk)); goto no_dccp_socket; } diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c index 7a47399cf31f..7171a78671aa 100644 --- a/net/dccp/ipv6.c +++ b/net/dccp/ipv6.c @@ -285,7 +285,7 @@ static void dccp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, } if (sk->sk_state == DCCP_TIME_WAIT) { - inet_twsk_put((struct inet_timewait_sock *)sk); + inet_twsk_put(inet_twsk(sk)); return; } @@ -663,7 +663,7 @@ static struct sock *dccp_v6_hnd_req(struct sock *sk,struct sk_buff *skb) bh_lock_sock(nsk); return nsk; } - inet_twsk_put((struct inet_timewait_sock *)nsk); + inet_twsk_put(inet_twsk(nsk)); return NULL; } @@ -1109,7 +1109,7 @@ discard_and_relse: goto discard_it; do_time_wait: - inet_twsk_put((struct inet_timewait_sock *)sk); + inet_twsk_put(inet_twsk(sk)); goto no_dccp_socket; } diff --git a/net/decnet/af_decnet.c b/net/decnet/af_decnet.c index 70e027375682..3456cd331835 100644 --- a/net/decnet/af_decnet.c +++ b/net/decnet/af_decnet.c @@ -1178,8 +1178,10 @@ static int dn_getname(struct socket *sock, struct sockaddr *uaddr,int *uaddr_len if (peer) { if ((sock->state != SS_CONNECTED && sock->state != SS_CONNECTING) && - scp->accept_mode == ACC_IMMED) + scp->accept_mode == ACC_IMMED) { + release_sock(sk); return -ENOTCONN; + } memcpy(sa, &scp->peer, sizeof(struct sockaddr_dn)); } else { diff --git a/net/decnet/dn_route.c b/net/decnet/dn_route.c index dd0761e3d280..a2a43d8d93fe 100644 --- a/net/decnet/dn_route.c +++ b/net/decnet/dn_route.c @@ -267,9 +267,14 @@ static void dn_dst_link_failure(struct sk_buff *skb) static inline int compare_keys(struct flowi *fl1, struct flowi *fl2) { - return memcmp(&fl1->nl_u.dn_u, &fl2->nl_u.dn_u, sizeof(fl1->nl_u.dn_u)) == 0 && - fl1->oif == fl2->oif && - fl1->iif == fl2->iif; + return ((fl1->nl_u.dn_u.daddr ^ fl2->nl_u.dn_u.daddr) | + (fl1->nl_u.dn_u.saddr ^ fl2->nl_u.dn_u.saddr) | +#ifdef CONFIG_IP_ROUTE_FWMARK + (fl1->nl_u.dn_u.fwmark ^ fl2->nl_u.dn_u.fwmark) | +#endif + (fl1->nl_u.dn_u.scope ^ fl2->nl_u.dn_u.scope) | + (fl1->oif ^ fl2->oif) | + (fl1->iif ^ fl2->iif)) == 0; } static int dn_insert_route(struct dn_route *rt, unsigned hash, struct dn_route **rp) diff --git a/net/ipv4/cipso_ipv4.c b/net/ipv4/cipso_ipv4.c index a8e2e879a647..bde8ccaa1531 100644 --- a/net/ipv4/cipso_ipv4.c +++ b/net/ipv4/cipso_ipv4.c @@ -43,6 +43,7 @@ #include <net/tcp.h> #include <net/netlabel.h> #include <net/cipso_ipv4.h> +#include <asm/atomic.h> #include <asm/bug.h> struct cipso_v4_domhsh_entry { @@ -79,7 +80,7 @@ struct cipso_v4_map_cache_entry { unsigned char *key; size_t key_len; - struct netlbl_lsm_cache lsm_data; + struct netlbl_lsm_cache *lsm_data; u32 activity; struct list_head list; @@ -188,13 +189,14 @@ static void cipso_v4_doi_domhsh_free(struct rcu_head *entry) * @entry: the entry to free * * Description: - * This function frees the memory associated with a cache entry. + * This function frees the memory associated with a cache entry including the + * LSM cache data if there are no longer any users, i.e. reference count == 0. * */ static void cipso_v4_cache_entry_free(struct cipso_v4_map_cache_entry *entry) { - if (entry->lsm_data.free) - entry->lsm_data.free(entry->lsm_data.data); + if (entry->lsm_data) + netlbl_secattr_cache_free(entry->lsm_data); kfree(entry->key); kfree(entry); } @@ -315,8 +317,8 @@ static int cipso_v4_cache_check(const unsigned char *key, entry->key_len == key_len && memcmp(entry->key, key, key_len) == 0) { entry->activity += 1; - secattr->cache.free = entry->lsm_data.free; - secattr->cache.data = entry->lsm_data.data; + atomic_inc(&entry->lsm_data->refcount); + secattr->cache = entry->lsm_data; if (prev_entry == NULL) { spin_unlock_bh(&cipso_v4_cache[bkt].lock); return 0; @@ -383,8 +385,8 @@ int cipso_v4_cache_add(const struct sk_buff *skb, memcpy(entry->key, cipso_ptr, cipso_ptr_len); entry->key_len = cipso_ptr_len; entry->hash = cipso_v4_map_cache_hash(cipso_ptr, cipso_ptr_len); - entry->lsm_data.free = secattr->cache.free; - entry->lsm_data.data = secattr->cache.data; + atomic_inc(&secattr->cache->refcount); + entry->lsm_data = secattr->cache; bkt = entry->hash & (CIPSO_V4_CACHE_BUCKETBITS - 1); spin_lock_bh(&cipso_v4_cache[bkt].lock); diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c index f5fba051df3d..d5b5dec075b8 100644 --- a/net/ipv4/ip_gre.c +++ b/net/ipv4/ip_gre.c @@ -611,8 +611,8 @@ static int ipgre_rcv(struct sk_buff *skb) * - When dealing with WCCPv2, Skip extra 4 bytes in GRE header */ if (flags == 0 && - skb->protocol == __constant_htons(ETH_P_WCCP)) { - skb->protocol = __constant_htons(ETH_P_IP); + skb->protocol == htons(ETH_P_WCCP)) { + skb->protocol = htons(ETH_P_IP); if ((*(h + offset) & 0xF0) != 0x40) offset += 4; } diff --git a/net/ipv4/ipvs/ip_vs_ftp.c b/net/ipv4/ipvs/ip_vs_ftp.c index e433cb0ff894..6d398f10aa91 100644 --- a/net/ipv4/ipvs/ip_vs_ftp.c +++ b/net/ipv4/ipvs/ip_vs_ftp.c @@ -274,7 +274,7 @@ static int ip_vs_ftp_in(struct ip_vs_app *app, struct ip_vs_conn *cp, while (data <= data_limit - 6) { if (strnicmp(data, "PASV\r\n", 6) == 0) { /* Passive mode on */ - IP_VS_DBG(7, "got PASV at %zd of %zd\n", + IP_VS_DBG(7, "got PASV at %td of %td\n", data - data_start, data_limit - data_start); cp->app_data = &ip_vs_ftp_pasv; diff --git a/net/ipv4/route.c b/net/ipv4/route.c index c41ddba02e9d..925ee4dfc32c 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -566,9 +566,15 @@ static inline u32 rt_score(struct rtable *rt) static inline int compare_keys(struct flowi *fl1, struct flowi *fl2) { - return memcmp(&fl1->nl_u.ip4_u, &fl2->nl_u.ip4_u, sizeof(fl1->nl_u.ip4_u)) == 0 && - fl1->oif == fl2->oif && - fl1->iif == fl2->iif; + return ((fl1->nl_u.ip4_u.daddr ^ fl2->nl_u.ip4_u.daddr) | + (fl1->nl_u.ip4_u.saddr ^ fl2->nl_u.ip4_u.saddr) | +#ifdef CONFIG_IP_ROUTE_FWMARK + (fl1->nl_u.ip4_u.fwmark ^ fl2->nl_u.ip4_u.fwmark) | +#endif + (*(u16 *)&fl1->nl_u.ip4_u.tos ^ + *(u16 *)&fl2->nl_u.ip4_u.tos) | + (fl1->oif ^ fl2->oif) | + (fl1->iif ^ fl2->iif)) == 0; } #ifdef CONFIG_IP_ROUTE_MULTIPATH_CACHED diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index c83938b8fcb1..6bbd98575172 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -355,7 +355,7 @@ void tcp_v4_err(struct sk_buff *skb, u32 info) return; } if (sk->sk_state == TCP_TIME_WAIT) { - inet_twsk_put((struct inet_timewait_sock *)sk); + inet_twsk_put(inet_twsk(sk)); return; } @@ -578,7 +578,7 @@ static void tcp_v4_send_ack(struct sk_buff *skb, u32 seq, u32 ack, struct tcphdr *th = skb->h.th; struct { struct tcphdr th; - u32 tsopt[3]; + u32 tsopt[TCPOLEN_TSTAMP_ALIGNED >> 2]; } rep; struct ip_reply_arg arg; @@ -960,7 +960,7 @@ static struct sock *tcp_v4_hnd_req(struct sock *sk, struct sk_buff *skb) bh_lock_sock(nsk); return nsk; } - inet_twsk_put((struct inet_timewait_sock *)nsk); + inet_twsk_put(inet_twsk(nsk)); return NULL; } @@ -1154,26 +1154,24 @@ discard_and_relse: do_time_wait: if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) { - inet_twsk_put((struct inet_timewait_sock *) sk); + inet_twsk_put(inet_twsk(sk)); goto discard_it; } if (skb->len < (th->doff << 2) || tcp_checksum_complete(skb)) { TCP_INC_STATS_BH(TCP_MIB_INERRS); - inet_twsk_put((struct inet_timewait_sock *) sk); + inet_twsk_put(inet_twsk(sk)); goto discard_it; } - switch (tcp_timewait_state_process((struct inet_timewait_sock *)sk, - skb, th)) { + switch (tcp_timewait_state_process(inet_twsk(sk), skb, th)) { case TCP_TW_SYN: { struct sock *sk2 = inet_lookup_listener(&tcp_hashinfo, skb->nh.iph->daddr, th->dest, inet_iif(skb)); if (sk2) { - inet_twsk_deschedule((struct inet_timewait_sock *)sk, - &tcp_death_row); - inet_twsk_put((struct inet_timewait_sock *)sk); + inet_twsk_deschedule(inet_twsk(sk), &tcp_death_row); + inet_twsk_put(inet_twsk(sk)); sk = sk2; goto process; } diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 9a253faefc81..f22536e32cb1 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -273,10 +273,10 @@ static void tcp_build_and_update_options(__be32 *ptr, struct tcp_sock *tp, __u32 tstamp) { if (tp->rx_opt.tstamp_ok) { - *ptr++ = __constant_htonl((TCPOPT_NOP << 24) | - (TCPOPT_NOP << 16) | - (TCPOPT_TIMESTAMP << 8) | - TCPOLEN_TIMESTAMP); + *ptr++ = htonl((TCPOPT_NOP << 24) | + (TCPOPT_NOP << 16) | + (TCPOPT_TIMESTAMP << 8) | + TCPOLEN_TIMESTAMP); *ptr++ = htonl(tstamp); *ptr++ = htonl(tp->rx_opt.ts_recent); } @@ -325,18 +325,27 @@ static void tcp_syn_build_options(__be32 *ptr, int mss, int ts, int sack, *ptr++ = htonl((TCPOPT_MSS << 24) | (TCPOLEN_MSS << 16) | mss); if (ts) { if(sack) - *ptr++ = __constant_htonl((TCPOPT_SACK_PERM << 24) | (TCPOLEN_SACK_PERM << 16) | - (TCPOPT_TIMESTAMP << 8) | TCPOLEN_TIMESTAMP); + *ptr++ = htonl((TCPOPT_SACK_PERM << 24) | + (TCPOLEN_SACK_PERM << 16) | + (TCPOPT_TIMESTAMP << 8) | + TCPOLEN_TIMESTAMP); else - *ptr++ = __constant_htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) | - (TCPOPT_TIMESTAMP << 8) | TCPOLEN_TIMESTAMP); + *ptr++ = htonl((TCPOPT_NOP << 24) | + (TCPOPT_NOP << 16) | + (TCPOPT_TIMESTAMP << 8) | + TCPOLEN_TIMESTAMP); *ptr++ = htonl(tstamp); /* TSVAL */ *ptr++ = htonl(ts_recent); /* TSECR */ } else if(sack) - *ptr++ = __constant_htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) | - (TCPOPT_SACK_PERM << 8) | TCPOLEN_SACK_PERM); + *ptr++ = htonl((TCPOPT_NOP << 24) | + (TCPOPT_NOP << 16) | + (TCPOPT_SACK_PERM << 8) | + TCPOLEN_SACK_PERM); if (offer_wscale) - *ptr++ = htonl((TCPOPT_NOP << 24) | (TCPOPT_WINDOW << 16) | (TCPOLEN_WINDOW << 8) | (wscale)); + *ptr++ = htonl((TCPOPT_NOP << 24) | + (TCPOPT_WINDOW << 16) | + (TCPOLEN_WINDOW << 8) | + (wscale)); } /* This routine actually transmits TCP packets queued in by diff --git a/net/ipv4/xfrm4_policy.c b/net/ipv4/xfrm4_policy.c index 7a7a00147e55..1bed0cdf53e3 100644 --- a/net/ipv4/xfrm4_policy.c +++ b/net/ipv4/xfrm4_policy.c @@ -52,7 +52,7 @@ __xfrm4_find_bundle(struct flowi *fl, struct xfrm_policy *policy) xdst->u.rt.fl.fl4_dst == fl->fl4_dst && xdst->u.rt.fl.fl4_src == fl->fl4_src && xdst->u.rt.fl.fl4_tos == fl->fl4_tos && - xfrm_bundle_ok(xdst, fl, AF_INET, 0)) { + xfrm_bundle_ok(policy, xdst, fl, AF_INET, 0)) { dst_clone(dst); break; } diff --git a/net/ipv6/Kconfig b/net/ipv6/Kconfig index a460e8132b4d..ef5eaad44851 100644 --- a/net/ipv6/Kconfig +++ b/net/ipv6/Kconfig @@ -153,6 +153,19 @@ config INET6_XFRM_MODE_ROUTEOPTIMIZATION ---help--- Support for MIPv6 route optimization mode. +config IPV6_SIT + tristate "IPv6: IPv6-in-IPv4 tunnel (SIT driver)" + depends on IPV6 + default y + ---help--- + Tunneling means encapsulating data of one protocol type within + another protocol and sending it over a channel that understands the + encapsulating protocol. This driver implements encapsulation of IPv6 + into IPv4 packets. This is useful if you want to connect two IPv6 + networks over an IPv4-only path. + + Saying M here will produce a module called sit.ko. If unsure, say Y. + config IPV6_TUNNEL tristate "IPv6: IPv6-in-IPv6 tunnel" select INET6_TUNNEL diff --git a/net/ipv6/Makefile b/net/ipv6/Makefile index 87274e47fe32..addcc011bc01 100644 --- a/net/ipv6/Makefile +++ b/net/ipv6/Makefile @@ -4,7 +4,7 @@ obj-$(CONFIG_IPV6) += ipv6.o -ipv6-objs := af_inet6.o anycast.o ip6_output.o ip6_input.o addrconf.o sit.o \ +ipv6-objs := af_inet6.o anycast.o ip6_output.o ip6_input.o addrconf.o \ route.o ip6_fib.o ipv6_sockglue.o ndisc.o udp.o raw.o \ protocol.o icmp.o mcast.o reassembly.o tcp_ipv6.o \ exthdrs.o sysctl_net_ipv6.o datagram.o proc.o \ @@ -29,6 +29,7 @@ obj-$(CONFIG_INET6_XFRM_MODE_ROUTEOPTIMIZATION) += xfrm6_mode_ro.o obj-$(CONFIG_INET6_XFRM_MODE_BEET) += xfrm6_mode_beet.o obj-$(CONFIG_NETFILTER) += netfilter/ +obj-$(CONFIG_IPV6_SIT) += sit.o obj-$(CONFIG_IPV6_TUNNEL) += ip6_tunnel.o obj-y += exthdrs_core.o diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index e03c33b2465b..b312a5f7a759 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -396,8 +396,10 @@ static struct inet6_dev * ipv6_add_dev(struct net_device *dev) ndev->regen_timer.data = (unsigned long) ndev; if ((dev->flags&IFF_LOOPBACK) || dev->type == ARPHRD_TUNNEL || - dev->type == ARPHRD_NONE || - dev->type == ARPHRD_SIT) { +#if defined(CONFIG_IPV6_SIT) || defined(CONFIG_IPV6_SIT_MODULE) + dev->type == ARPHRD_SIT || +#endif + dev->type == ARPHRD_NONE) { printk(KERN_INFO "%s: Disabled Privacy Extensions\n", dev->name); @@ -1546,8 +1548,10 @@ addrconf_prefix_route(struct in6_addr *pfx, int plen, struct net_device *dev, This thing is done here expecting that the whole class of non-broadcast devices need not cloning. */ +#if defined(CONFIG_IPV6_SIT) || defined(CONFIG_IPV6_SIT_MODULE) if (dev->type == ARPHRD_SIT && (dev->flags & IFF_POINTOPOINT)) cfg.fc_flags |= RTF_NONEXTHOP; +#endif ip6_route_add(&cfg); } @@ -1569,6 +1573,7 @@ static void addrconf_add_mroute(struct net_device *dev) ip6_route_add(&cfg); } +#if defined(CONFIG_IPV6_SIT) || defined(CONFIG_IPV6_SIT_MODULE) static void sit_route_add(struct net_device *dev) { struct fib6_config cfg = { @@ -1582,6 +1587,7 @@ static void sit_route_add(struct net_device *dev) /* prefix length - 96 bits "::d.d.d.d" */ ip6_route_add(&cfg); } +#endif static void addrconf_add_lroute(struct net_device *dev) { @@ -1852,6 +1858,7 @@ int addrconf_set_dstaddr(void __user *arg) if (dev == NULL) goto err_exit; +#if defined(CONFIG_IPV6_SIT) || defined(CONFIG_IPV6_SIT_MODULE) if (dev->type == ARPHRD_SIT) { struct ifreq ifr; mm_segment_t oldfs; @@ -1881,6 +1888,7 @@ int addrconf_set_dstaddr(void __user *arg) err = dev_open(dev); } } +#endif err_exit: rtnl_unlock(); @@ -2010,6 +2018,7 @@ int addrconf_del_ifaddr(void __user *arg) return err; } +#if defined(CONFIG_IPV6_SIT) || defined(CONFIG_IPV6_SIT_MODULE) static void sit_add_v4_addrs(struct inet6_dev *idev) { struct inet6_ifaddr * ifp; @@ -2078,6 +2087,7 @@ static void sit_add_v4_addrs(struct inet6_dev *idev) } } } +#endif static void init_loopback(struct net_device *dev) { @@ -2141,6 +2151,7 @@ static void addrconf_dev_config(struct net_device *dev) addrconf_add_linklocal(idev, &addr); } +#if defined(CONFIG_IPV6_SIT) || defined(CONFIG_IPV6_SIT_MODULE) static void addrconf_sit_config(struct net_device *dev) { struct inet6_dev *idev; @@ -2166,6 +2177,7 @@ static void addrconf_sit_config(struct net_device *dev) } else sit_route_add(dev); } +#endif static inline int ipv6_inherit_linklocal(struct inet6_dev *idev, struct net_device *link_dev) @@ -2260,9 +2272,11 @@ static int addrconf_notify(struct notifier_block *this, unsigned long event, } switch(dev->type) { +#if defined(CONFIG_IPV6_SIT) || defined(CONFIG_IPV6_SIT_MODULE) case ARPHRD_SIT: addrconf_sit_config(dev); break; +#endif case ARPHRD_TUNNEL6: addrconf_ip6_tnl_config(dev); break; diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c index e94eccb99707..858cae29581c 100644 --- a/net/ipv6/af_inet6.c +++ b/net/ipv6/af_inet6.c @@ -850,7 +850,6 @@ static int __init inet6_init(void) err = addrconf_init(); if (err) goto addrconf_fail; - sit_init(); /* Init v6 extension headers. */ ipv6_rthdr_init(); @@ -927,7 +926,6 @@ static void __exit inet6_exit(void) mip6_fini(); #endif /* Cleanup code parts. */ - sit_cleanup(); ip6_flowlabel_cleanup(); addrconf_cleanup(); ip6_route_cleanup(); diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c index 836eecd7e62b..dc5765b62b87 100644 --- a/net/ipv6/sit.c +++ b/net/ipv6/sit.c @@ -850,3 +850,6 @@ int __init sit_init(void) inet_del_protocol(&sit_protocol, IPPROTO_IPV6); goto out; } + +module_init(sit_init); +module_exit(sit_cleanup); diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 3b6575478fcc..4c2a7c0cafef 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -329,7 +329,7 @@ static void tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, } if (sk->sk_state == TCP_TIME_WAIT) { - inet_twsk_put((struct inet_timewait_sock *)sk); + inet_twsk_put(inet_twsk(sk)); return; } @@ -653,7 +653,7 @@ static void tcp_v6_send_ack(struct sk_buff *skb, u32 seq, u32 ack, u32 win, u32 int tot_len = sizeof(struct tcphdr); if (ts) - tot_len += 3*4; + tot_len += TCPOLEN_TSTAMP_ALIGNED; buff = alloc_skb(MAX_HEADER + sizeof(struct ipv6hdr) + tot_len, GFP_ATOMIC); @@ -749,7 +749,7 @@ static struct sock *tcp_v6_hnd_req(struct sock *sk,struct sk_buff *skb) bh_lock_sock(nsk); return nsk; } - inet_twsk_put((struct inet_timewait_sock *)nsk); + inet_twsk_put(inet_twsk(nsk)); return NULL; } @@ -1283,18 +1283,17 @@ discard_and_relse: do_time_wait: if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb)) { - inet_twsk_put((struct inet_timewait_sock *)sk); + inet_twsk_put(inet_twsk(sk)); goto discard_it; } if (skb->len < (th->doff<<2) || tcp_checksum_complete(skb)) { TCP_INC_STATS_BH(TCP_MIB_INERRS); - inet_twsk_put((struct inet_timewait_sock *)sk); + inet_twsk_put(inet_twsk(sk)); goto discard_it; } - switch (tcp_timewait_state_process((struct inet_timewait_sock *)sk, - skb, th)) { + switch (tcp_timewait_state_process(inet_twsk(sk), skb, th)) { case TCP_TW_SYN: { struct sock *sk2; diff --git a/net/ipv6/xfrm6_policy.c b/net/ipv6/xfrm6_policy.c index 6a252e2134d1..73cee2ec07e8 100644 --- a/net/ipv6/xfrm6_policy.c +++ b/net/ipv6/xfrm6_policy.c @@ -73,7 +73,7 @@ __xfrm6_find_bundle(struct flowi *fl, struct xfrm_policy *policy) xdst->u.rt6.rt6i_src.plen); if (ipv6_addr_equal(&xdst->u.rt6.rt6i_dst.addr, &fl_dst_prefix) && ipv6_addr_equal(&xdst->u.rt6.rt6i_src.addr, &fl_src_prefix) && - xfrm_bundle_ok(xdst, fl, AF_INET6, + xfrm_bundle_ok(policy, xdst, fl, AF_INET6, (xdst->u.rt6.rt6i_dst.plen != 128 || xdst->u.rt6.rt6i_src.plen != 128))) { dst_clone(dst); diff --git a/net/irda/irias_object.c b/net/irda/irias_object.c index a154b1d71c0f..56292ab7d652 100644 --- a/net/irda/irias_object.c +++ b/net/irda/irias_object.c @@ -43,7 +43,7 @@ struct ias_value irias_missing = { IAS_MISSING, 0, 0, 0, {0}}; * * Faster, check boundary... Jean II */ -static char *strndup(char *str, int max) +static char *strndup(char *str, size_t max) { char *new_str; int len; diff --git a/net/key/af_key.c b/net/key/af_key.c index ff98e70b0931..20ff7cca1d07 100644 --- a/net/key/af_key.c +++ b/net/key/af_key.c @@ -2928,11 +2928,6 @@ static struct xfrm_policy *pfkey_compile_policy(struct sock *sk, int opt, if (*dir) goto out; } - else { - *dir = security_xfrm_sock_policy_alloc(xp, sk); - if (*dir) - goto out; - } *dir = pol->sadb_x_policy_dir-1; return xp; diff --git a/net/netlabel/netlabel_kapi.c b/net/netlabel/netlabel_kapi.c index 54fb7de3c2b1..ff971103fd0c 100644 --- a/net/netlabel/netlabel_kapi.c +++ b/net/netlabel/netlabel_kapi.c @@ -200,7 +200,7 @@ void netlbl_cache_invalidate(void) int netlbl_cache_add(const struct sk_buff *skb, const struct netlbl_lsm_secattr *secattr) { - if (secattr->cache.data == NULL) + if (secattr->cache == NULL) return -ENOMSG; if (CIPSO_V4_OPTEXIST(skb)) diff --git a/net/sched/sch_htb.c b/net/sched/sch_htb.c index bb3ddd4784b1..9b9c555c713f 100644 --- a/net/sched/sch_htb.c +++ b/net/sched/sch_htb.c @@ -786,11 +786,10 @@ static long htb_do_events(struct htb_sched *q, int level) for (i = 0; i < 500; i++) { struct htb_class *cl; long diff; - struct rb_node *p = q->wait_pq[level].rb_node; + struct rb_node *p = rb_first(&q->wait_pq[level]); + if (!p) return 0; - while (p->rb_left) - p = p->rb_left; cl = rb_entry(p, struct htb_class, pq_node); if (time_after(cl->pq_key, q->jiffies)) { diff --git a/net/sctp/proc.c b/net/sctp/proc.c index a356d8d310a9..7f49e769080e 100644 --- a/net/sctp/proc.c +++ b/net/sctp/proc.c @@ -344,7 +344,7 @@ static int sctp_assocs_seq_show(struct seq_file *seq, void *v) assoc, sk, sctp_sk(sk)->type, sk->sk_state, assoc->state, hash, assoc->assoc_id, assoc->sndbuf_used, - (sk->sk_rcvbuf - assoc->rwnd), + atomic_read(&assoc->rmem_alloc), sock_i_uid(sk), sock_i_ino(sk), epb->bind_addr.port, assoc->peer.port); diff --git a/net/sctp/socket.c b/net/sctp/socket.c index 3fe906d65069..9deec4391187 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -5362,6 +5362,20 @@ static void sctp_wfree(struct sk_buff *skb) sctp_association_put(asoc); } +/* Do accounting for the receive space on the socket. + * Accounting for the association is done in ulpevent.c + * We set this as a destructor for the cloned data skbs so that + * accounting is done at the correct time. + */ +void sctp_sock_rfree(struct sk_buff *skb) +{ + struct sock *sk = skb->sk; + struct sctp_ulpevent *event = sctp_skb2event(skb); + + atomic_sub(event->rmem_len, &sk->sk_rmem_alloc); +} + + /* Helper function to wait for space in the sndbuf. */ static int sctp_wait_for_sndbuf(struct sctp_association *asoc, long *timeo_p, size_t msg_len) @@ -5634,10 +5648,10 @@ static void sctp_sock_migrate(struct sock *oldsk, struct sock *newsk, sctp_skb_for_each(skb, &oldsk->sk_receive_queue, tmp) { event = sctp_skb2event(skb); if (event->asoc == assoc) { - sock_rfree(skb); + sctp_sock_rfree(skb); __skb_unlink(skb, &oldsk->sk_receive_queue); __skb_queue_tail(&newsk->sk_receive_queue, skb); - skb_set_owner_r(skb, newsk); + sctp_skb_set_owner_r(skb, newsk); } } @@ -5665,10 +5679,10 @@ static void sctp_sock_migrate(struct sock *oldsk, struct sock *newsk, sctp_skb_for_each(skb, &oldsp->pd_lobby, tmp) { event = sctp_skb2event(skb); if (event->asoc == assoc) { - sock_rfree(skb); + sctp_sock_rfree(skb); __skb_unlink(skb, &oldsp->pd_lobby); __skb_queue_tail(queue, skb); - skb_set_owner_r(skb, newsk); + sctp_skb_set_owner_r(skb, newsk); } } diff --git a/net/sctp/ulpevent.c b/net/sctp/ulpevent.c index ee236784a6bb..a015283a9087 100644 --- a/net/sctp/ulpevent.c +++ b/net/sctp/ulpevent.c @@ -55,10 +55,13 @@ static void sctp_ulpevent_release_frag_data(struct sctp_ulpevent *event); /* Initialize an ULP event from an given skb. */ -SCTP_STATIC void sctp_ulpevent_init(struct sctp_ulpevent *event, int msg_flags) +SCTP_STATIC void sctp_ulpevent_init(struct sctp_ulpevent *event, + int msg_flags, + unsigned int len) { memset(event, 0, sizeof(struct sctp_ulpevent)); event->msg_flags = msg_flags; + event->rmem_len = len; } /* Create a new sctp_ulpevent. */ @@ -73,7 +76,7 @@ SCTP_STATIC struct sctp_ulpevent *sctp_ulpevent_new(int size, int msg_flags, goto fail; event = sctp_skb2event(skb); - sctp_ulpevent_init(event, msg_flags); + sctp_ulpevent_init(event, msg_flags, skb->truesize); return event; @@ -101,17 +104,16 @@ static inline void sctp_ulpevent_set_owner(struct sctp_ulpevent *event, sctp_association_hold((struct sctp_association *)asoc); skb = sctp_event2skb(event); event->asoc = (struct sctp_association *)asoc; - atomic_add(skb->truesize, &event->asoc->rmem_alloc); - skb_set_owner_r(skb, asoc->base.sk); + atomic_add(event->rmem_len, &event->asoc->rmem_alloc); + sctp_skb_set_owner_r(skb, asoc->base.sk); } /* A simple destructor to give up the reference to the association. */ static inline void sctp_ulpevent_release_owner(struct sctp_ulpevent *event) { struct sctp_association *asoc = event->asoc; - struct sk_buff *skb = sctp_event2skb(event); - atomic_sub(skb->truesize, &asoc->rmem_alloc); + atomic_sub(event->rmem_len, &asoc->rmem_alloc); sctp_association_put(asoc); } @@ -372,7 +374,7 @@ struct sctp_ulpevent *sctp_ulpevent_make_remote_error( /* Embed the event fields inside the cloned skb. */ event = sctp_skb2event(skb); - sctp_ulpevent_init(event, MSG_NOTIFICATION); + sctp_ulpevent_init(event, MSG_NOTIFICATION, skb->truesize); sre = (struct sctp_remote_error *) skb_push(skb, sizeof(struct sctp_remote_error)); @@ -464,7 +466,7 @@ struct sctp_ulpevent *sctp_ulpevent_make_send_failed( /* Embed the event fields inside the cloned skb. */ event = sctp_skb2event(skb); - sctp_ulpevent_init(event, MSG_NOTIFICATION); + sctp_ulpevent_init(event, MSG_NOTIFICATION, skb->truesize); ssf = (struct sctp_send_failed *) skb_push(skb, sizeof(struct sctp_send_failed)); @@ -682,8 +684,11 @@ struct sctp_ulpevent *sctp_ulpevent_make_rcvmsg(struct sctp_association *asoc, /* Embed the event fields inside the cloned skb. */ event = sctp_skb2event(skb); - /* Initialize event with flags 0. */ - sctp_ulpevent_init(event, 0); + /* Initialize event with flags 0 and correct length + * Since this is a clone of the original skb, only account for + * the data of this chunk as other chunks will be accounted separately. + */ + sctp_ulpevent_init(event, 0, skb->len + sizeof(struct sk_buff)); sctp_ulpevent_receive_data(event, asoc); diff --git a/net/sctp/ulpqueue.c b/net/sctp/ulpqueue.c index 575e556aeb3e..e1d144275f97 100644 --- a/net/sctp/ulpqueue.c +++ b/net/sctp/ulpqueue.c @@ -309,7 +309,7 @@ static struct sctp_ulpevent *sctp_make_reassembled_event(struct sk_buff_head *qu if (!new) return NULL; /* try again later */ - new->sk = f_frag->sk; + sctp_skb_set_owner_r(new, f_frag->sk); skb_shinfo(new)->frag_list = pos; } else diff --git a/net/sunrpc/auth_gss/svcauth_gss.c b/net/sunrpc/auth_gss/svcauth_gss.c index 447d9aef4605..1f0f079ffa65 100644 --- a/net/sunrpc/auth_gss/svcauth_gss.c +++ b/net/sunrpc/auth_gss/svcauth_gss.c @@ -1146,10 +1146,11 @@ out: return ret; } -u32 * +static __be32 * svcauth_gss_prepare_to_wrap(struct xdr_buf *resbuf, struct gss_svc_data *gsd) { - u32 *p, verf_len; + __be32 *p; + u32 verf_len; p = gsd->verf_start; gsd->verf_start = NULL; diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c index c2c8bb20d07f..2807fa0eab40 100644 --- a/net/sunrpc/svc.c +++ b/net/sunrpc/svc.c @@ -282,7 +282,10 @@ __svc_create(struct svc_program *prog, unsigned int bufsize, int npools, serv->sv_program = prog; serv->sv_nrthreads = 1; serv->sv_stats = prog->pg_stats; - serv->sv_bufsz = bufsize? bufsize : 4096; + if (bufsize > RPCSVC_MAXPAYLOAD) + bufsize = RPCSVC_MAXPAYLOAD; + serv->sv_max_payload = bufsize? bufsize : 4096; + serv->sv_max_mesg = roundup(serv->sv_max_payload + PAGE_SIZE, PAGE_SIZE); serv->sv_shutdown = shutdown; xdrsize = 0; while (prog) { @@ -414,9 +417,9 @@ svc_init_buffer(struct svc_rqst *rqstp, unsigned int size) int pages; int arghi; - if (size > RPCSVC_MAXPAYLOAD) - size = RPCSVC_MAXPAYLOAD; - pages = 2 + (size+ PAGE_SIZE -1) / PAGE_SIZE; + pages = size / PAGE_SIZE + 1; /* extra page as we hold both request and reply. + * We assume one is at most one page + */ arghi = 0; BUG_ON(pages > RPCSVC_MAXPAGES); while (pages) { @@ -463,7 +466,7 @@ __svc_create_thread(svc_thread_fn func, struct svc_serv *serv, if (!(rqstp->rq_argp = kmalloc(serv->sv_xdrsize, GFP_KERNEL)) || !(rqstp->rq_resp = kmalloc(serv->sv_xdrsize, GFP_KERNEL)) - || !svc_init_buffer(rqstp, serv->sv_bufsz)) + || !svc_init_buffer(rqstp, serv->sv_max_mesg)) goto out_thread; serv->sv_nrthreads++; @@ -938,8 +941,8 @@ u32 svc_max_payload(const struct svc_rqst *rqstp) if (rqstp->rq_sock->sk_sock->type == SOCK_DGRAM) max = RPCSVC_MAXPAYLOAD_UDP; - if (rqstp->rq_server->sv_bufsz < max) - max = rqstp->rq_server->sv_bufsz; + if (rqstp->rq_server->sv_max_payload < max) + max = rqstp->rq_server->sv_max_payload; return max; } EXPORT_SYMBOL_GPL(svc_max_payload); diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c index b39e7e2b648f..61e307cca13d 100644 --- a/net/sunrpc/svcsock.c +++ b/net/sunrpc/svcsock.c @@ -192,13 +192,13 @@ svc_sock_enqueue(struct svc_sock *svsk) svsk->sk_pool = pool; set_bit(SOCK_NOSPACE, &svsk->sk_sock->flags); - if (((atomic_read(&svsk->sk_reserved) + serv->sv_bufsz)*2 + if (((atomic_read(&svsk->sk_reserved) + serv->sv_max_mesg)*2 > svc_sock_wspace(svsk)) && !test_bit(SK_CLOSE, &svsk->sk_flags) && !test_bit(SK_CONN, &svsk->sk_flags)) { /* Don't enqueue while not enough space for reply */ dprintk("svc: socket %p no space, %d*2 > %ld, not enqueued\n", - svsk->sk_sk, atomic_read(&svsk->sk_reserved)+serv->sv_bufsz, + svsk->sk_sk, atomic_read(&svsk->sk_reserved)+serv->sv_max_mesg, svc_sock_wspace(svsk)); svsk->sk_pool = NULL; clear_bit(SK_BUSY, &svsk->sk_flags); @@ -220,7 +220,7 @@ svc_sock_enqueue(struct svc_sock *svsk) rqstp, rqstp->rq_sock); rqstp->rq_sock = svsk; atomic_inc(&svsk->sk_inuse); - rqstp->rq_reserved = serv->sv_bufsz; + rqstp->rq_reserved = serv->sv_max_mesg; atomic_add(rqstp->rq_reserved, &svsk->sk_reserved); BUG_ON(svsk->sk_pool != pool); wake_up(&rqstp->rq_wait); @@ -639,8 +639,8 @@ svc_udp_recvfrom(struct svc_rqst *rqstp) * which will access the socket. */ svc_sock_setbufsize(svsk->sk_sock, - (serv->sv_nrthreads+3) * serv->sv_bufsz, - (serv->sv_nrthreads+3) * serv->sv_bufsz); + (serv->sv_nrthreads+3) * serv->sv_max_mesg, + (serv->sv_nrthreads+3) * serv->sv_max_mesg); if ((rqstp->rq_deferred = svc_deferred_dequeue(svsk))) { svc_sock_received(svsk); @@ -749,8 +749,8 @@ svc_udp_init(struct svc_sock *svsk) * svc_udp_recvfrom will re-adjust if necessary */ svc_sock_setbufsize(svsk->sk_sock, - 3 * svsk->sk_server->sv_bufsz, - 3 * svsk->sk_server->sv_bufsz); + 3 * svsk->sk_server->sv_max_mesg, + 3 * svsk->sk_server->sv_max_mesg); set_bit(SK_DATA, &svsk->sk_flags); /* might have come in before data_ready set up */ set_bit(SK_CHNGBUF, &svsk->sk_flags); @@ -993,8 +993,8 @@ svc_tcp_recvfrom(struct svc_rqst *rqstp) * as soon a a complete request arrives. */ svc_sock_setbufsize(svsk->sk_sock, - (serv->sv_nrthreads+3) * serv->sv_bufsz, - 3 * serv->sv_bufsz); + (serv->sv_nrthreads+3) * serv->sv_max_mesg, + 3 * serv->sv_max_mesg); clear_bit(SK_DATA, &svsk->sk_flags); @@ -1032,7 +1032,7 @@ svc_tcp_recvfrom(struct svc_rqst *rqstp) } svsk->sk_reclen &= 0x7fffffff; dprintk("svc: TCP record, %d bytes\n", svsk->sk_reclen); - if (svsk->sk_reclen > serv->sv_bufsz) { + if (svsk->sk_reclen > serv->sv_max_mesg) { printk(KERN_NOTICE "RPC: bad TCP reclen 0x%08lx (large)\n", (unsigned long) svsk->sk_reclen); goto err_delete; @@ -1171,8 +1171,8 @@ svc_tcp_init(struct svc_sock *svsk) * svc_tcp_recvfrom will re-adjust if necessary */ svc_sock_setbufsize(svsk->sk_sock, - 3 * svsk->sk_server->sv_bufsz, - 3 * svsk->sk_server->sv_bufsz); + 3 * svsk->sk_server->sv_max_mesg, + 3 * svsk->sk_server->sv_max_mesg); set_bit(SK_CHNGBUF, &svsk->sk_flags); set_bit(SK_DATA, &svsk->sk_flags); @@ -1234,7 +1234,7 @@ svc_recv(struct svc_rqst *rqstp, long timeout) /* now allocate needed pages. If we get a failure, sleep briefly */ - pages = 2 + (serv->sv_bufsz + PAGE_SIZE -1) / PAGE_SIZE; + pages = (serv->sv_max_mesg + PAGE_SIZE) / PAGE_SIZE; for (i=0; i < pages ; i++) while (rqstp->rq_pages[i] == NULL) { struct page *p = alloc_page(GFP_KERNEL); @@ -1263,7 +1263,7 @@ svc_recv(struct svc_rqst *rqstp, long timeout) if ((svsk = svc_sock_dequeue(pool)) != NULL) { rqstp->rq_sock = svsk; atomic_inc(&svsk->sk_inuse); - rqstp->rq_reserved = serv->sv_bufsz; + rqstp->rq_reserved = serv->sv_max_mesg; atomic_add(rqstp->rq_reserved, &svsk->sk_reserved); } else { /* No data pending. Go to sleep */ diff --git a/net/tipc/socket.c b/net/tipc/socket.c index 32d778448a00..acfb852e7c98 100644 --- a/net/tipc/socket.c +++ b/net/tipc/socket.c @@ -941,7 +941,7 @@ static int recv_stream(struct kiocb *iocb, struct socket *sock, int sz_to_copy; int sz_copied = 0; int needed; - char *crs = m->msg_iov->iov_base; + char __user *crs = m->msg_iov->iov_base; unsigned char *buf_crs; u32 err; int res; @@ -1496,7 +1496,7 @@ static int setsockopt(struct socket *sock, return -ENOPROTOOPT; if (ol < sizeof(value)) return -EINVAL; - if ((res = get_user(value, (u32 *)ov))) + if ((res = get_user(value, (u32 __user *)ov))) return res; if (down_interruptible(&tsock->sem)) @@ -1541,7 +1541,7 @@ static int setsockopt(struct socket *sock, */ static int getsockopt(struct socket *sock, - int lvl, int opt, char __user *ov, int *ol) + int lvl, int opt, char __user *ov, int __user *ol) { struct tipc_sock *tsock = tipc_sk(sock->sk); int len; diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index 2a7861661f14..7736b23c3f03 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -883,30 +883,32 @@ out: } EXPORT_SYMBOL(xfrm_policy_walk); -/* Find policy to apply to this flow. */ - +/* + * Find policy to apply to this flow. + * + * Returns 0 if policy found, else an -errno. + */ static int xfrm_policy_match(struct xfrm_policy *pol, struct flowi *fl, u8 type, u16 family, int dir) { struct xfrm_selector *sel = &pol->selector; - int match; + int match, ret = -ESRCH; if (pol->family != family || pol->type != type) - return 0; + return ret; match = xfrm_selector_match(sel, fl, family); - if (match) { - if (!security_xfrm_policy_lookup(pol, fl->secid, dir)) - return 1; - } + if (match) + ret = security_xfrm_policy_lookup(pol, fl->secid, dir); - return 0; + return ret; } static struct xfrm_policy *xfrm_policy_lookup_bytype(u8 type, struct flowi *fl, u16 family, u8 dir) { + int err; struct xfrm_policy *pol, *ret; xfrm_address_t *daddr, *saddr; struct hlist_node *entry; @@ -922,7 +924,15 @@ static struct xfrm_policy *xfrm_policy_lookup_bytype(u8 type, struct flowi *fl, chain = policy_hash_direct(daddr, saddr, family, dir); ret = NULL; hlist_for_each_entry(pol, entry, chain, bydst) { - if (xfrm_policy_match(pol, fl, type, family, dir)) { + err = xfrm_policy_match(pol, fl, type, family, dir); + if (err) { + if (err == -ESRCH) + continue; + else { + ret = ERR_PTR(err); + goto fail; + } + } else { ret = pol; priority = ret->priority; break; @@ -930,36 +940,53 @@ static struct xfrm_policy *xfrm_policy_lookup_bytype(u8 type, struct flowi *fl, } chain = &xfrm_policy_inexact[dir]; hlist_for_each_entry(pol, entry, chain, bydst) { - if (xfrm_policy_match(pol, fl, type, family, dir) && - pol->priority < priority) { + err = xfrm_policy_match(pol, fl, type, family, dir); + if (err) { + if (err == -ESRCH) + continue; + else { + ret = ERR_PTR(err); + goto fail; + } + } else if (pol->priority < priority) { ret = pol; break; } } if (ret) xfrm_pol_hold(ret); +fail: read_unlock_bh(&xfrm_policy_lock); return ret; } -static void xfrm_policy_lookup(struct flowi *fl, u16 family, u8 dir, +static int xfrm_policy_lookup(struct flowi *fl, u16 family, u8 dir, void **objp, atomic_t **obj_refp) { struct xfrm_policy *pol; + int err = 0; #ifdef CONFIG_XFRM_SUB_POLICY pol = xfrm_policy_lookup_bytype(XFRM_POLICY_TYPE_SUB, fl, family, dir); - if (pol) + if (IS_ERR(pol)) { + err = PTR_ERR(pol); + pol = NULL; + } + if (pol || err) goto end; #endif pol = xfrm_policy_lookup_bytype(XFRM_POLICY_TYPE_MAIN, fl, family, dir); - + if (IS_ERR(pol)) { + err = PTR_ERR(pol); + pol = NULL; + } #ifdef CONFIG_XFRM_SUB_POLICY end: #endif if ((*objp = (void *) pol) != NULL) *obj_refp = &pol->refcnt; + return err; } static inline int policy_to_flow_dir(int dir) @@ -989,12 +1016,16 @@ static struct xfrm_policy *xfrm_sk_policy_lookup(struct sock *sk, int dir, struc sk->sk_family); int err = 0; - if (match) - err = security_xfrm_policy_lookup(pol, fl->secid, policy_to_flow_dir(dir)); - - if (match && !err) - xfrm_pol_hold(pol); - else + if (match) { + err = security_xfrm_policy_lookup(pol, fl->secid, + policy_to_flow_dir(dir)); + if (!err) + xfrm_pol_hold(pol); + else if (err == -ESRCH) + pol = NULL; + else + pol = ERR_PTR(err); + } else pol = NULL; } read_unlock_bh(&xfrm_policy_lock); @@ -1286,8 +1317,11 @@ restart: pol_dead = 0; xfrm_nr = 0; - if (sk && sk->sk_policy[1]) + if (sk && sk->sk_policy[1]) { policy = xfrm_sk_policy_lookup(sk, XFRM_POLICY_OUT, fl); + if (IS_ERR(policy)) + return PTR_ERR(policy); + } if (!policy) { /* To accelerate a bit... */ @@ -1297,6 +1331,8 @@ restart: policy = flow_cache_lookup(fl, dst_orig->ops->family, dir, xfrm_policy_lookup); + if (IS_ERR(policy)) + return PTR_ERR(policy); } if (!policy) @@ -1343,6 +1379,10 @@ restart: fl, family, XFRM_POLICY_OUT); if (pols[1]) { + if (IS_ERR(pols[1])) { + err = PTR_ERR(pols[1]); + goto error; + } if (pols[1]->action == XFRM_POLICY_BLOCK) { err = -EPERM; goto error; @@ -1574,13 +1614,19 @@ int __xfrm_policy_check(struct sock *sk, int dir, struct sk_buff *skb, } pol = NULL; - if (sk && sk->sk_policy[dir]) + if (sk && sk->sk_policy[dir]) { pol = xfrm_sk_policy_lookup(sk, dir, &fl); + if (IS_ERR(pol)) + return 0; + } if (!pol) pol = flow_cache_lookup(&fl, family, fl_dir, xfrm_policy_lookup); + if (IS_ERR(pol)) + return 0; + if (!pol) { if (skb->sp && secpath_has_nontransport(skb->sp, 0, &xerr_idx)) { xfrm_secpath_reject(xerr_idx, skb, &fl); @@ -1599,6 +1645,8 @@ int __xfrm_policy_check(struct sock *sk, int dir, struct sk_buff *skb, &fl, family, XFRM_POLICY_IN); if (pols[1]) { + if (IS_ERR(pols[1])) + return 0; pols[1]->curlft.use_time = (unsigned long)xtime.tv_sec; npols ++; } @@ -1706,7 +1754,7 @@ static struct dst_entry *xfrm_dst_check(struct dst_entry *dst, u32 cookie) static int stale_bundle(struct dst_entry *dst) { - return !xfrm_bundle_ok((struct xfrm_dst *)dst, NULL, AF_UNSPEC, 0); + return !xfrm_bundle_ok(NULL, (struct xfrm_dst *)dst, NULL, AF_UNSPEC, 0); } void xfrm_dst_ifdown(struct dst_entry *dst, struct net_device *dev) @@ -1828,7 +1876,8 @@ EXPORT_SYMBOL(xfrm_init_pmtu); * still valid. */ -int xfrm_bundle_ok(struct xfrm_dst *first, struct flowi *fl, int family, int strict) +int xfrm_bundle_ok(struct xfrm_policy *pol, struct xfrm_dst *first, + struct flowi *fl, int family, int strict) { struct dst_entry *dst = &first->u.dst; struct xfrm_dst *last; @@ -1845,7 +1894,7 @@ int xfrm_bundle_ok(struct xfrm_dst *first, struct flowi *fl, int family, int str if (fl && !xfrm_selector_match(&dst->xfrm->sel, fl, family)) return 0; - if (fl && !security_xfrm_flow_state_match(fl, dst->xfrm)) + if (fl && !security_xfrm_flow_state_match(fl, dst->xfrm, pol)) return 0; if (dst->xfrm->km.state != XFRM_STATE_VALID) return 0; diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c index d54b3a70d5df..2b2e59d8ffbc 100644 --- a/net/xfrm/xfrm_user.c +++ b/net/xfrm/xfrm_user.c @@ -1992,15 +1992,6 @@ static struct xfrm_policy *xfrm_compile_policy(struct sock *sk, int opt, xp->type = XFRM_POLICY_TYPE_MAIN; copy_templates(xp, ut, nr); - if (!xp->security) { - int err = security_xfrm_sock_policy_alloc(xp, sk); - if (err) { - kfree(xp); - *dir = err; - return NULL; - } - } - *dir = p->dir; return xp; diff --git a/scripts/kconfig/lxdialog/dialog.h b/scripts/kconfig/lxdialog/dialog.h index 8dea47f9d3e4..fd695e1070f7 100644 --- a/scripts/kconfig/lxdialog/dialog.h +++ b/scripts/kconfig/lxdialog/dialog.h @@ -24,6 +24,7 @@ #include <ctype.h> #include <stdlib.h> #include <string.h> +#include <stdbool.h> #ifdef __sun__ #define CURS_MACROS diff --git a/scripts/kernel-doc b/scripts/kernel-doc index 00d1ad19b2cc..187f5de4612c 100755 --- a/scripts/kernel-doc +++ b/scripts/kernel-doc @@ -1262,7 +1262,9 @@ sub output_intro_text(%) { } ## -# generic output function for typedefs +# generic output function for all types (function, struct/union, typedef, enum); +# calls the generated, variable output_ function name based on +# functype and output_mode sub output_declaration { no strict 'refs'; my $name = shift; @@ -1278,8 +1280,7 @@ sub output_declaration { } ## -# generic output function - calls the right one based -# on current output mode. +# generic output function - calls the right one based on current output mode. sub output_intro { no strict 'refs'; my $func = "output_intro_".$output_mode; @@ -1518,6 +1519,9 @@ sub dump_function($$) { $prototype =~ s/^asmlinkage +//; $prototype =~ s/^inline +//; $prototype =~ s/^__inline__ +//; + $prototype =~ s/^__inline +//; + $prototype =~ s/^__always_inline +//; + $prototype =~ s/^noinline +//; $prototype =~ s/__devinit +//; $prototype =~ s/^#define +//; #ak added $prototype =~ s/__attribute__ \(\([a-z,]*\)\)//; @@ -1778,8 +1782,9 @@ sub process_file($) { $in_doc_sect = 1; $contents = $newcontents; if ($contents ne "") { - if (substr($contents, 0, 1) eq " ") { - $contents = substr($contents, 1); + while ((substr($contents, 0, 1) eq " ") || + substr($contents, 0, 1) eq "\t") { + $contents = substr($contents, 1); } $contents .= "\n"; } diff --git a/security/dummy.c b/security/dummy.c index aeee70565509..43874c1e6e23 100644 --- a/security/dummy.c +++ b/security/dummy.c @@ -881,7 +881,8 @@ static int dummy_xfrm_state_pol_flow_match(struct xfrm_state *x, return 1; } -static int dummy_xfrm_flow_state_match(struct flowi *fl, struct xfrm_state *xfrm) +static int dummy_xfrm_flow_state_match(struct flowi *fl, struct xfrm_state *xfrm, + struct xfrm_policy *xp) { return 1; } diff --git a/security/selinux/include/xfrm.h b/security/selinux/include/xfrm.h index 81eb59890162..526b28019aca 100644 --- a/security/selinux/include/xfrm.h +++ b/security/selinux/include/xfrm.h @@ -19,7 +19,8 @@ int selinux_xfrm_state_delete(struct xfrm_state *x); int selinux_xfrm_policy_lookup(struct xfrm_policy *xp, u32 fl_secid, u8 dir); int selinux_xfrm_state_pol_flow_match(struct xfrm_state *x, struct xfrm_policy *xp, struct flowi *fl); -int selinux_xfrm_flow_state_match(struct flowi *fl, struct xfrm_state *xfrm); +int selinux_xfrm_flow_state_match(struct flowi *fl, struct xfrm_state *xfrm, + struct xfrm_policy *xp); /* diff --git a/security/selinux/ss/policydb.c b/security/selinux/ss/policydb.c index b18895302555..ba48961f9d05 100644 --- a/security/selinux/ss/policydb.c +++ b/security/selinux/ss/policydb.c @@ -618,6 +618,7 @@ void policydb_destroy(struct policydb *p) c = c->next; ocontext_destroy(ctmp,i); } + p->ocontexts[i] = NULL; } g = p->genfs; @@ -633,6 +634,7 @@ void policydb_destroy(struct policydb *p) g = g->next; kfree(gtmp); } + p->genfs = NULL; cond_policydb_destroy(p); diff --git a/security/selinux/ss/services.c b/security/selinux/ss/services.c index 0c219a1b3243..18274b005090 100644 --- a/security/selinux/ss/services.c +++ b/security/selinux/ss/services.c @@ -2172,7 +2172,12 @@ struct netlbl_cache { */ static void selinux_netlbl_cache_free(const void *data) { - struct netlbl_cache *cache = NETLBL_CACHE(data); + struct netlbl_cache *cache; + + if (data == NULL) + return; + + cache = NETLBL_CACHE(data); switch (cache->type) { case NETLBL_CACHE_T_MLS: ebitmap_destroy(&cache->data.mls_label.level[0].cat); @@ -2197,17 +2202,20 @@ static void selinux_netlbl_cache_add(struct sk_buff *skb, struct context *ctx) struct netlbl_lsm_secattr secattr; netlbl_secattr_init(&secattr); + secattr.cache = netlbl_secattr_cache_alloc(GFP_ATOMIC); + if (secattr.cache == NULL) + goto netlbl_cache_add_return; cache = kzalloc(sizeof(*cache), GFP_ATOMIC); if (cache == NULL) - goto netlbl_cache_add_failure; - secattr.cache.free = selinux_netlbl_cache_free; - secattr.cache.data = (void *)cache; + goto netlbl_cache_add_return; + secattr.cache->free = selinux_netlbl_cache_free; + secattr.cache->data = (void *)cache; cache->type = NETLBL_CACHE_T_MLS; if (ebitmap_cpy(&cache->data.mls_label.level[0].cat, &ctx->range.level[0].cat) != 0) - goto netlbl_cache_add_failure; + goto netlbl_cache_add_return; cache->data.mls_label.level[1].cat.highbit = cache->data.mls_label.level[0].cat.highbit; cache->data.mls_label.level[1].cat.node = @@ -2215,13 +2223,10 @@ static void selinux_netlbl_cache_add(struct sk_buff *skb, struct context *ctx) cache->data.mls_label.level[0].sens = ctx->range.level[0].sens; cache->data.mls_label.level[1].sens = ctx->range.level[0].sens; - if (netlbl_cache_add(skb, &secattr) != 0) - goto netlbl_cache_add_failure; + netlbl_cache_add(skb, &secattr); - return; - -netlbl_cache_add_failure: - netlbl_secattr_destroy(&secattr, 1); +netlbl_cache_add_return: + netlbl_secattr_destroy(&secattr); } /** @@ -2263,8 +2268,8 @@ static int selinux_netlbl_secattr_to_sid(struct sk_buff *skb, POLICY_RDLOCK; - if (secattr->cache.data) { - cache = NETLBL_CACHE(secattr->cache.data); + if (secattr->cache) { + cache = NETLBL_CACHE(secattr->cache->data); switch (cache->type) { case NETLBL_CACHE_T_SID: *sid = cache->data.sid; @@ -2331,7 +2336,7 @@ static int selinux_netlbl_secattr_to_sid(struct sk_buff *skb, selinux_netlbl_cache_add(skb, &ctx_new); ebitmap_destroy(&ctx_new.range.level[0].cat); } else { - *sid = SECINITSID_UNLABELED; + *sid = SECSID_NULL; rc = 0; } @@ -2369,7 +2374,7 @@ static int selinux_netlbl_skbuff_getsid(struct sk_buff *skb, &secattr, base_sid, sid); - netlbl_secattr_destroy(&secattr, 0); + netlbl_secattr_destroy(&secattr); return rc; } @@ -2415,7 +2420,7 @@ static int selinux_netlbl_socket_setsid(struct socket *sock, u32 sid) if (rc == 0) sksec->nlbl_state = NLBL_LABELED; - netlbl_secattr_destroy(&secattr, 0); + netlbl_secattr_destroy(&secattr); netlbl_socket_setsid_return: POLICY_RDUNLOCK; @@ -2514,10 +2519,10 @@ void selinux_netlbl_sock_graft(struct sock *sk, struct socket *sock) if (netlbl_sock_getattr(sk, &secattr) == 0 && selinux_netlbl_secattr_to_sid(NULL, &secattr, - sksec->sid, + SECINITSID_UNLABELED, &nlbl_peer_sid) == 0) sksec->peer_sid = nlbl_peer_sid; - netlbl_secattr_destroy(&secattr, 0); + netlbl_secattr_destroy(&secattr); sksec->nlbl_state = NLBL_REQUIRE; @@ -2547,9 +2552,6 @@ u32 selinux_netlbl_inet_conn_request(struct sk_buff *skb, u32 sock_sid) if (rc != 0) return SECSID_NULL; - if (peer_sid == SECINITSID_UNLABELED) - return SECSID_NULL; - return peer_sid; } @@ -2611,11 +2613,13 @@ int selinux_netlbl_sock_rcv_skb(struct sk_security_struct *sksec, u32 netlbl_sid; u32 recv_perm; - rc = selinux_netlbl_skbuff_getsid(skb, SECINITSID_NETMSG, &netlbl_sid); + rc = selinux_netlbl_skbuff_getsid(skb, + SECINITSID_UNLABELED, + &netlbl_sid); if (rc != 0) return rc; - if (netlbl_sid == SECINITSID_UNLABELED) + if (netlbl_sid == SECSID_NULL) return 0; switch (sksec->sclass) { @@ -2653,10 +2657,6 @@ int selinux_netlbl_sock_rcv_skb(struct sk_security_struct *sksec, u32 selinux_netlbl_socket_getpeersec_stream(struct socket *sock) { struct sk_security_struct *sksec = sock->sk->sk_security; - - if (sksec->peer_sid == SECINITSID_UNLABELED) - return SECSID_NULL; - return sksec->peer_sid; } @@ -2672,16 +2672,10 @@ u32 selinux_netlbl_socket_getpeersec_stream(struct socket *sock) u32 selinux_netlbl_socket_getpeersec_dgram(struct sk_buff *skb) { int peer_sid; - struct sock *sk = skb->sk; - struct inode_security_struct *isec; - if (sk == NULL || sk->sk_socket == NULL) - return SECSID_NULL; - - isec = SOCK_INODE(sk->sk_socket)->i_security; - if (selinux_netlbl_skbuff_getsid(skb, isec->sid, &peer_sid) != 0) - return SECSID_NULL; - if (peer_sid == SECINITSID_UNLABELED) + if (selinux_netlbl_skbuff_getsid(skb, + SECINITSID_UNLABELED, + &peer_sid) != 0) return SECSID_NULL; return peer_sid; diff --git a/security/selinux/xfrm.c b/security/selinux/xfrm.c index 3e742b850af6..675b995a67c3 100644 --- a/security/selinux/xfrm.c +++ b/security/selinux/xfrm.c @@ -77,8 +77,8 @@ static inline int selinux_authorizable_xfrm(struct xfrm_state *x) */ int selinux_xfrm_policy_lookup(struct xfrm_policy *xp, u32 fl_secid, u8 dir) { - int rc = 0; - u32 sel_sid = SECINITSID_UNLABELED; + int rc; + u32 sel_sid; struct xfrm_sec_ctx *ctx; /* Context sid is either set to label or ANY_ASSOC */ @@ -88,11 +88,21 @@ int selinux_xfrm_policy_lookup(struct xfrm_policy *xp, u32 fl_secid, u8 dir) sel_sid = ctx->ctx_sid; } + else + /* + * All flows should be treated as polmatch'ing an + * otherwise applicable "non-labeled" policy. This + * would prevent inadvertent "leaks". + */ + return 0; rc = avc_has_perm(fl_secid, sel_sid, SECCLASS_ASSOCIATION, ASSOCIATION__POLMATCH, NULL); + if (rc == -EACCES) + rc = -ESRCH; + return rc; } @@ -108,15 +118,20 @@ int selinux_xfrm_state_pol_flow_match(struct xfrm_state *x, struct xfrm_policy * u32 pol_sid; int err; - if (x->security) - state_sid = x->security->ctx_sid; - else - state_sid = SECINITSID_UNLABELED; - - if (xp->security) + if (xp->security) { + if (!x->security) + /* unlabeled SA and labeled policy can't match */ + return 0; + else + state_sid = x->security->ctx_sid; pol_sid = xp->security->ctx_sid; - else - pol_sid = SECINITSID_UNLABELED; + } else + if (x->security) + /* unlabeled policy and labeled SA can't match */ + return 0; + else + /* unlabeled policy and unlabeled SA match all flows */ + return 1; err = avc_has_perm(state_sid, pol_sid, SECCLASS_ASSOCIATION, ASSOCIATION__POLMATCH, @@ -125,7 +140,11 @@ int selinux_xfrm_state_pol_flow_match(struct xfrm_state *x, struct xfrm_policy * if (err) return 0; - return selinux_xfrm_flow_state_match(fl, x); + err = avc_has_perm(fl->secid, state_sid, SECCLASS_ASSOCIATION, + ASSOCIATION__SENDTO, + NULL)? 0:1; + + return err; } /* @@ -133,12 +152,22 @@ int selinux_xfrm_state_pol_flow_match(struct xfrm_state *x, struct xfrm_policy * * can use a given security association. */ -int selinux_xfrm_flow_state_match(struct flowi *fl, struct xfrm_state *xfrm) +int selinux_xfrm_flow_state_match(struct flowi *fl, struct xfrm_state *xfrm, + struct xfrm_policy *xp) { int rc = 0; u32 sel_sid = SECINITSID_UNLABELED; struct xfrm_sec_ctx *ctx; + if (!xp->security) + if (!xfrm->security) + return 1; + else + return 0; + else + if (!xfrm->security) + return 0; + /* Context sid is either set to label or ANY_ASSOC */ if ((ctx = xfrm->security)) { if (!selinux_authorizable_ctx(ctx)) diff --git a/sound/core/hwdep.c b/sound/core/hwdep.c index 9aa9d94891f0..46b47689362c 100644 --- a/sound/core/hwdep.c +++ b/sound/core/hwdep.c @@ -158,6 +158,7 @@ static int snd_hwdep_release(struct inode *inode, struct file * file) { int err = -ENXIO; struct snd_hwdep *hw = file->private_data; + struct module *mod = hw->card->module; mutex_lock(&hw->open_mutex); if (hw->ops.release) { err = hw->ops.release(hw, file); @@ -167,7 +168,7 @@ static int snd_hwdep_release(struct inode *inode, struct file * file) hw->used--; snd_card_file_remove(hw->card, file); mutex_unlock(&hw->open_mutex); - module_put(hw->card->module); + module_put(mod); return err; } diff --git a/sound/core/hwdep_compat.c b/sound/core/hwdep_compat.c index 938f77580966..3827c0ceec8f 100644 --- a/sound/core/hwdep_compat.c +++ b/sound/core/hwdep_compat.c @@ -33,7 +33,7 @@ struct snd_hwdep_dsp_image32 { static int snd_hwdep_dsp_load_compat(struct snd_hwdep *hw, struct snd_hwdep_dsp_image32 __user *src) { - struct snd_hwdep_dsp_image *dst; + struct snd_hwdep_dsp_image __user *dst; compat_caddr_t ptr; u32 val; diff --git a/sound/core/init.c b/sound/core/init.c index d7607a25acdf..3058d626a90a 100644 --- a/sound/core/init.c +++ b/sound/core/init.c @@ -33,10 +33,10 @@ #include <sound/control.h> #include <sound/info.h> -struct snd_shutdown_f_ops { - struct file_operations f_ops; - struct snd_shutdown_f_ops *next; -}; +static DEFINE_SPINLOCK(shutdown_lock); +static LIST_HEAD(shutdown_files); + +static struct file_operations snd_shutdown_f_ops; static unsigned int snd_cards_lock; /* locked for registering/using */ struct snd_card *snd_cards[SNDRV_CARDS]; @@ -198,6 +198,25 @@ static ssize_t snd_disconnect_write(struct file *file, const char __user *buf, return -ENODEV; } +static int snd_disconnect_release(struct inode *inode, struct file *file) +{ + struct snd_monitor_file *df = NULL, *_df; + + spin_lock(&shutdown_lock); + list_for_each_entry(_df, &shutdown_files, shutdown_list) { + if (_df->file == file) { + df = _df; + break; + } + } + spin_unlock(&shutdown_lock); + + if (likely(df)) + return df->disconnected_f_op->release(inode, file); + + panic("%s(%p, %p) failed!", __FUNCTION__, inode, file); +} + static unsigned int snd_disconnect_poll(struct file * file, poll_table * wait) { return POLLERR | POLLNVAL; @@ -219,6 +238,22 @@ static int snd_disconnect_fasync(int fd, struct file *file, int on) return -ENODEV; } +static struct file_operations snd_shutdown_f_ops = +{ + .owner = THIS_MODULE, + .llseek = snd_disconnect_llseek, + .read = snd_disconnect_read, + .write = snd_disconnect_write, + .release = snd_disconnect_release, + .poll = snd_disconnect_poll, + .unlocked_ioctl = snd_disconnect_ioctl, +#ifdef CONFIG_COMPAT + .compat_ioctl = snd_disconnect_ioctl, +#endif + .mmap = snd_disconnect_mmap, + .fasync = snd_disconnect_fasync +}; + /** * snd_card_disconnect - disconnect all APIs from the file-operations (user space) * @card: soundcard structure @@ -234,9 +269,6 @@ int snd_card_disconnect(struct snd_card *card) { struct snd_monitor_file *mfile; struct file *file; - struct snd_shutdown_f_ops *s_f_ops; - struct file_operations *f_ops; - const struct file_operations *old_f_ops; int err; spin_lock(&card->files_lock); @@ -261,34 +293,14 @@ int snd_card_disconnect(struct snd_card *card) /* it's critical part, use endless loop */ /* we have no room to fail */ - s_f_ops = kmalloc(sizeof(struct snd_shutdown_f_ops), GFP_ATOMIC); - if (s_f_ops == NULL) - panic("Atomic allocation failed for snd_shutdown_f_ops!"); - - f_ops = &s_f_ops->f_ops; - - memset(f_ops, 0, sizeof(*f_ops)); - f_ops->owner = file->f_op->owner; - f_ops->release = file->f_op->release; - f_ops->llseek = snd_disconnect_llseek; - f_ops->read = snd_disconnect_read; - f_ops->write = snd_disconnect_write; - f_ops->poll = snd_disconnect_poll; - f_ops->unlocked_ioctl = snd_disconnect_ioctl; -#ifdef CONFIG_COMPAT - f_ops->compat_ioctl = snd_disconnect_ioctl; -#endif - f_ops->mmap = snd_disconnect_mmap; - f_ops->fasync = snd_disconnect_fasync; + mfile->disconnected_f_op = mfile->file->f_op; - s_f_ops->next = card->s_f_ops; - card->s_f_ops = s_f_ops; - - f_ops = fops_get(f_ops); + spin_lock(&shutdown_lock); + list_add(&mfile->shutdown_list, &shutdown_files); + spin_unlock(&shutdown_lock); - old_f_ops = file->f_op; - file->f_op = f_ops; /* must be atomic */ - fops_put(old_f_ops); + fops_get(&snd_shutdown_f_ops); + mfile->file->f_op = &snd_shutdown_f_ops; mfile = mfile->next; } @@ -326,8 +338,6 @@ EXPORT_SYMBOL(snd_card_disconnect); */ static int snd_card_do_free(struct snd_card *card) { - struct snd_shutdown_f_ops *s_f_ops; - #if defined(CONFIG_SND_MIXER_OSS) || defined(CONFIG_SND_MIXER_OSS_MODULE) if (snd_mixer_oss_notify_callback) snd_mixer_oss_notify_callback(card, SND_MIXER_OSS_NOTIFY_FREE); @@ -351,11 +361,6 @@ static int snd_card_do_free(struct snd_card *card) snd_printk(KERN_WARNING "unable to free card info\n"); /* Not fatal error */ } - while (card->s_f_ops) { - s_f_ops = card->s_f_ops; - card->s_f_ops = s_f_ops->next; - kfree(s_f_ops); - } kfree(card); return 0; } @@ -670,6 +675,7 @@ int snd_card_file_add(struct snd_card *card, struct file *file) if (mfile == NULL) return -ENOMEM; mfile->file = file; + mfile->disconnected_f_op = NULL; mfile->next = NULL; spin_lock(&card->files_lock); if (card->shutdown) { @@ -716,6 +722,12 @@ int snd_card_file_remove(struct snd_card *card, struct file *file) pfile = mfile; mfile = mfile->next; } + if (mfile && mfile->disconnected_f_op) { + fops_put(mfile->disconnected_f_op); + spin_lock(&shutdown_lock); + list_del(&mfile->shutdown_list); + spin_unlock(&shutdown_lock); + } if (card->files == NULL) last_close = 1; spin_unlock(&card->files_lock); diff --git a/sound/isa/es18xx.c b/sound/isa/es18xx.c index 2398d2c55feb..725c115ff97d 100644 --- a/sound/isa/es18xx.c +++ b/sound/isa/es18xx.c @@ -2154,6 +2154,7 @@ static int __devinit snd_audiodrive_pnpc(int dev, struct snd_audiodrive *acard, } /* Control port initialization */ if (pnp_activate_dev(acard->devc) < 0) { + kfree(cfg); snd_printk(KERN_ERR PFX "PnP control configure failure (out of resources?)\n"); return -EAGAIN; } diff --git a/sound/isa/gus/gusmax.c b/sound/isa/gus/gusmax.c index 52498c9d411e..c1c69e3cbfd0 100644 --- a/sound/isa/gus/gusmax.c +++ b/sound/isa/gus/gusmax.c @@ -107,7 +107,7 @@ static int __init snd_gusmax_detect(struct snd_gus_card * gus) static irqreturn_t snd_gusmax_interrupt(int irq, void *dev_id) { - struct snd_gusmax *maxcard = (struct snd_gusmax *) dev_id; + struct snd_gusmax *maxcard = dev_id; int loop, max = 5; int handled = 0; diff --git a/sound/isa/gus/interwave.c b/sound/isa/gus/interwave.c index 5c474b831edc..f12cd09d1fcc 100644 --- a/sound/isa/gus/interwave.c +++ b/sound/isa/gus/interwave.c @@ -301,7 +301,7 @@ static int __devinit snd_interwave_detect(struct snd_interwave *iwcard, static irqreturn_t snd_interwave_interrupt(int irq, void *dev_id) { - struct snd_interwave *iwcard = (struct snd_interwave *) dev_id; + struct snd_interwave *iwcard = dev_id; int loop, max = 5; int handled = 0; diff --git a/sound/oss/es1371.c b/sound/oss/es1371.c index 2562f4769b90..ddf6b0a0bca5 100644 --- a/sound/oss/es1371.c +++ b/sound/oss/es1371.c @@ -1102,7 +1102,7 @@ static void es1371_handle_midi(struct es1371_state *s) static irqreturn_t es1371_interrupt(int irq, void *dev_id) { - struct es1371_state *s = (struct es1371_state *)dev_id; + struct es1371_state *s = dev_id; unsigned int intsrc, sctl; /* fastpath out, to ease interrupt sharing */ diff --git a/sound/oss/hal2.c b/sound/oss/hal2.c index 7807abac0625..784bdd707055 100644 --- a/sound/oss/hal2.c +++ b/sound/oss/hal2.c @@ -372,7 +372,7 @@ static void hal2_adc_interrupt(struct hal2_codec *adc) static irqreturn_t hal2_interrupt(int irq, void *dev_id) { - struct hal2_card *hal2 = (struct hal2_card*)dev_id; + struct hal2_card *hal2 = dev_id; irqreturn_t ret = IRQ_NONE; /* decide what caused this interrupt */ diff --git a/sound/oss/i810_audio.c b/sound/oss/i810_audio.c index a48af879b466..240cc7939b69 100644 --- a/sound/oss/i810_audio.c +++ b/sound/oss/i810_audio.c @@ -1525,7 +1525,7 @@ static void i810_channel_interrupt(struct i810_card *card) static irqreturn_t i810_interrupt(int irq, void *dev_id) { - struct i810_card *card = (struct i810_card *)dev_id; + struct i810_card *card = dev_id; u32 status; spin_lock(&card->lock); diff --git a/sound/oss/mpu401.c b/sound/oss/mpu401.c index 58d4a5d05a27..e96220541971 100644 --- a/sound/oss/mpu401.c +++ b/sound/oss/mpu401.c @@ -435,7 +435,7 @@ static void mpu401_input_loop(struct mpu_config *devc) static irqreturn_t mpuintr(int irq, void *dev_id) { struct mpu_config *devc; - int dev = (int) dev_id; + int dev = (int)(unsigned long) dev_id; int handled = 0; devc = &dev_conf[dev]; diff --git a/sound/oss/vidc.c b/sound/oss/vidc.c index 8932d89408ef..bb4a0969f461 100644 --- a/sound/oss/vidc.c +++ b/sound/oss/vidc.c @@ -372,7 +372,7 @@ static void vidc_audio_trigger(int dev, int enable_bits) adev->flags |= DMA_ACTIVE; dma_interrupt = vidc_audio_dma_interrupt; - vidc_sound_dma_irq(0, NULL, NULL); + vidc_sound_dma_irq(0, NULL); iomd_writeb(DMA_CR_E | 0x10, IOMD_SD0CR); local_irq_restore(flags); diff --git a/sound/oss/vwsnd.c b/sound/oss/vwsnd.c index 0cd4d6ec9862..6dfb9f4b03ec 100644 --- a/sound/oss/vwsnd.c +++ b/sound/oss/vwsnd.c @@ -2235,7 +2235,7 @@ static void vwsnd_audio_write_intr(vwsnd_dev_t *devc, unsigned int status) static irqreturn_t vwsnd_audio_intr(int irq, void *dev_id) { - vwsnd_dev_t *devc = (vwsnd_dev_t *) dev_id; + vwsnd_dev_t *devc = dev_id; unsigned int status; DBGEV("(irq=%d, dev_id=0x%p)\n", irq, dev_id); diff --git a/sound/pci/ac97/ac97_patch.c b/sound/pci/ac97/ac97_patch.c index dc28b111a06d..15be6ba87c82 100644 --- a/sound/pci/ac97/ac97_patch.c +++ b/sound/pci/ac97/ac97_patch.c @@ -530,7 +530,7 @@ AC97_ENUM("ALC Headphone Mux", wm9711_enum[1]), AC97_SINGLE("ALC Headphone Volume", AC97_VIDEO, 7, 7, 1), AC97_SINGLE("Out3 Switch", AC97_AUX, 15, 1, 1), -AC97_SINGLE("Out3 ZC Switch", AC97_AUX, 7, 1, 1), +AC97_SINGLE("Out3 ZC Switch", AC97_AUX, 7, 1, 0), AC97_ENUM("Out3 Mux", wm9711_enum[2]), AC97_ENUM("Out3 LR Mux", wm9711_enum[3]), AC97_SINGLE("Out3 Volume", AC97_AUX, 0, 31, 1), @@ -575,13 +575,14 @@ AC97_SINGLE("Playback Attenuate (-6dB) Switch", AC97_MASTER_TONE, 6, 1, 0), AC97_SINGLE("ADC Switch", AC97_REC_GAIN, 15, 1, 1), AC97_ENUM("Capture Volume Steps", wm9711_enum[6]), -AC97_DOUBLE("Capture Volume", AC97_REC_GAIN, 8, 0, 15, 1), +AC97_DOUBLE("Capture Volume", AC97_REC_GAIN, 8, 0, 63, 1), AC97_SINGLE("Capture ZC Switch", AC97_REC_GAIN, 7, 1, 0), AC97_SINGLE("Mic 1 to Phone Switch", AC97_MIC, 14, 1, 1), AC97_SINGLE("Mic 2 to Phone Switch", AC97_MIC, 13, 1, 1), AC97_ENUM("Mic Select Source", wm9711_enum[7]), -AC97_SINGLE("Mic 1 Volume", AC97_MIC, 8, 32, 1), +AC97_SINGLE("Mic 1 Volume", AC97_MIC, 8, 31, 1), +AC97_SINGLE("Mic 2 Volume", AC97_MIC, 0, 31, 1), AC97_SINGLE("Mic 20dB Boost Switch", AC97_MIC, 7, 1, 0), AC97_SINGLE("Master ZC Switch", AC97_MASTER, 7, 1, 0), diff --git a/sound/pci/au88x0/au88x0.c b/sound/pci/au88x0/au88x0.c index ef189d7f09d3..6ed5ad59f5b5 100644 --- a/sound/pci/au88x0/au88x0.c +++ b/sound/pci/au88x0/au88x0.c @@ -128,6 +128,7 @@ static int snd_vortex_dev_free(struct snd_device *device) // Take down PCI interface. synchronize_irq(vortex->irq); free_irq(vortex->irq, vortex); + iounmap(vortex->mmio); pci_release_regions(vortex->pci_dev); pci_disable_device(vortex->pci_dev); kfree(vortex); diff --git a/sound/pci/emu10k1/emu10k1_main.c b/sound/pci/emu10k1/emu10k1_main.c index be65d4db8e27..8058059c56e9 100644 --- a/sound/pci/emu10k1/emu10k1_main.c +++ b/sound/pci/emu10k1/emu10k1_main.c @@ -1461,8 +1461,8 @@ void snd_emu10k1_resume_regs(struct snd_emu10k1 *emu) /* resore for spdif */ if (emu->audigy) - outl(emu->port + A_IOCFG, emu->saved_a_iocfg); - outl(emu->port + HCFG, emu->saved_hcfg); + outl(emu->saved_a_iocfg, emu->port + A_IOCFG); + outl(emu->saved_hcfg, emu->port + HCFG); val = emu->saved_ptr; for (reg = saved_regs; *reg != 0xff; reg++) diff --git a/sound/pci/hda/hda_intel.c b/sound/pci/hda/hda_intel.c index a76a778d0a1f..feeed12920b4 100644 --- a/sound/pci/hda/hda_intel.c +++ b/sound/pci/hda/hda_intel.c @@ -1682,6 +1682,7 @@ static struct pci_device_id azx_ids[] = { { 0x10b9, 0x5461, PCI_ANY_ID, PCI_ANY_ID, 0, 0, AZX_DRIVER_ULI }, /* ULI M5461 */ { 0x10de, 0x026c, PCI_ANY_ID, PCI_ANY_ID, 0, 0, AZX_DRIVER_NVIDIA }, /* NVIDIA 026c */ { 0x10de, 0x0371, PCI_ANY_ID, PCI_ANY_ID, 0, 0, AZX_DRIVER_NVIDIA }, /* NVIDIA 0371 */ + { 0x10de, 0x03f0, PCI_ANY_ID, PCI_ANY_ID, 0, 0, AZX_DRIVER_NVIDIA }, /* NVIDIA 03f0 */ { 0, } }; MODULE_DEVICE_TABLE(pci, azx_ids); diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index d08d2e399c8f..84a3eb8aacc2 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -5076,6 +5076,10 @@ static struct hda_board_config alc883_cfg_tbl[] = { { .modelname = "acer", .config = ALC883_ACER }, { .pci_subvendor = 0x1025, .pci_subdevice = 0/*0x0102*/, .config = ALC883_ACER }, + { .pci_subvendor = 0x1025, .pci_subdevice = 0x0102, + .config = ALC883_ACER }, + { .pci_subvendor = 0x1025, .pci_subdevice = 0x009f, + .config = ALC883_ACER }, { .modelname = "auto", .config = ALC883_AUTO }, {} }; diff --git a/sound/pci/hda/patch_si3054.c b/sound/pci/hda/patch_si3054.c index 76ec3d75fa9e..cc87dff1eb56 100644 --- a/sound/pci/hda/patch_si3054.c +++ b/sound/pci/hda/patch_si3054.c @@ -297,8 +297,13 @@ static int patch_si3054(struct hda_codec *codec) struct hda_codec_preset snd_hda_preset_si3054[] = { { .id = 0x163c3055, .name = "Si3054", .patch = patch_si3054 }, { .id = 0x163c3155, .name = "Si3054", .patch = patch_si3054 }, + { .id = 0x11c11040, .name = "Si3054", .patch = patch_si3054 }, { .id = 0x11c13026, .name = "Si3054", .patch = patch_si3054 }, + { .id = 0x11c13055, .name = "Si3054", .patch = patch_si3054 }, + { .id = 0x11c13155, .name = "Si3054", .patch = patch_si3054 }, + { .id = 0x10573055, .name = "Si3054", .patch = patch_si3054 }, { .id = 0x10573057, .name = "Si3054", .patch = patch_si3054 }, + { .id = 0x10573155, .name = "Si3054", .patch = patch_si3054 }, {} }; diff --git a/sound/pci/korg1212/korg1212.c b/sound/pci/korg1212/korg1212.c index 398aa10a06e8..fa8cd8cecc21 100644 --- a/sound/pci/korg1212/korg1212.c +++ b/sound/pci/korg1212/korg1212.c @@ -1124,9 +1124,6 @@ static irqreturn_t snd_korg1212_interrupt(int irq, void *dev_id) u32 doorbellValue; struct snd_korg1212 *korg1212 = dev_id; - if(irq != korg1212->irq) - return IRQ_NONE; - doorbellValue = readl(korg1212->inDoorbellPtr); if (!doorbellValue) @@ -1140,7 +1137,6 @@ static irqreturn_t snd_korg1212_interrupt(int irq, void *dev_id) korg1212->inIRQ++; - switch (doorbellValue) { case K1212_DB_DSPDownloadDone: K1212_DEBUG_PRINTK("K1212_DEBUG: IRQ DNLD count - %ld, %x, [%s].\n", diff --git a/sound/pcmcia/pdaudiocf/pdaudiocf_irq.c b/sound/pcmcia/pdaudiocf/pdaudiocf_irq.c index 732263e4a437..5bd69206ba65 100644 --- a/sound/pcmcia/pdaudiocf/pdaudiocf_irq.c +++ b/sound/pcmcia/pdaudiocf/pdaudiocf_irq.c @@ -22,6 +22,7 @@ #include <sound/core.h> #include "pdaudiocf.h" #include <sound/initval.h> +#include <asm/irq_regs.h> /* * diff --git a/sound/usb/usx2y/usbusx2yaudio.c b/sound/usb/usx2y/usbusx2yaudio.c index e662281a751a..367f8a32a665 100644 --- a/sound/usb/usx2y/usbusx2yaudio.c +++ b/sound/usb/usx2y/usbusx2yaudio.c @@ -322,7 +322,7 @@ static void i_usX2Y_urb_complete(struct urb *urb) usX2Y_error_urb_status(usX2Y, subs, urb); return; } - if (likely((0xFFFF & urb->start_frame) == usX2Y->wait_iso_frame)) + if (likely(urb->start_frame == usX2Y->wait_iso_frame)) subs->completed_urb = urb; else { usX2Y_error_sequence(usX2Y, subs, urb); @@ -335,13 +335,9 @@ static void i_usX2Y_urb_complete(struct urb *urb) atomic_read(&capsubs->state) >= state_PREPARED && (playbacksubs->completed_urb || atomic_read(&playbacksubs->state) < state_PREPARED)) { - if (!usX2Y_usbframe_complete(capsubs, playbacksubs, urb->start_frame)) { - if (nr_of_packs() <= urb->start_frame && - urb->start_frame <= (2 * nr_of_packs() - 1)) // uhci and ohci - usX2Y->wait_iso_frame = urb->start_frame - nr_of_packs(); - else - usX2Y->wait_iso_frame += nr_of_packs(); - } else { + if (!usX2Y_usbframe_complete(capsubs, playbacksubs, urb->start_frame)) + usX2Y->wait_iso_frame += nr_of_packs(); + else { snd_printdd("\n"); usX2Y_clients_stop(usX2Y); } @@ -495,7 +491,6 @@ static int usX2Y_urbs_start(struct snd_usX2Y_substream *subs) if (subs != NULL && atomic_read(&subs->state) >= state_PREPARED) goto start; } - usX2Y->wait_iso_frame = -1; start: usX2Y_subs_startup(subs); @@ -516,10 +511,9 @@ static int usX2Y_urbs_start(struct snd_usX2Y_substream *subs) snd_printk (KERN_ERR "cannot submit datapipe for urb %d, err = %d\n", i, err); err = -EPIPE; goto cleanup; - } else { - if (0 > usX2Y->wait_iso_frame) + } else + if (i == 0) usX2Y->wait_iso_frame = urb->start_frame; - } urb->transfer_flags = 0; } else { atomic_set(&subs->state, state_STARTING1); diff --git a/sound/usb/usx2y/usx2yhwdeppcm.c b/sound/usb/usx2y/usx2yhwdeppcm.c index 9acef9d90543..8f3e35e24e72 100644 --- a/sound/usb/usx2y/usx2yhwdeppcm.c +++ b/sound/usb/usx2y/usx2yhwdeppcm.c @@ -243,7 +243,7 @@ static void i_usX2Y_usbpcm_urb_complete(struct urb *urb) usX2Y_error_urb_status(usX2Y, subs, urb); return; } - if (likely((0xFFFF & urb->start_frame) == usX2Y->wait_iso_frame)) + if (likely(urb->start_frame == usX2Y->wait_iso_frame)) subs->completed_urb = urb; else { usX2Y_error_sequence(usX2Y, subs, urb); @@ -256,13 +256,9 @@ static void i_usX2Y_usbpcm_urb_complete(struct urb *urb) if (capsubs->completed_urb && atomic_read(&capsubs->state) >= state_PREPARED && (NULL == capsubs2 || capsubs2->completed_urb) && (playbacksubs->completed_urb || atomic_read(&playbacksubs->state) < state_PREPARED)) { - if (!usX2Y_usbpcm_usbframe_complete(capsubs, capsubs2, playbacksubs, urb->start_frame)) { - if (nr_of_packs() <= urb->start_frame && - urb->start_frame <= (2 * nr_of_packs() - 1)) // uhci and ohci - usX2Y->wait_iso_frame = urb->start_frame - nr_of_packs(); - else - usX2Y->wait_iso_frame += nr_of_packs(); - } else { + if (!usX2Y_usbpcm_usbframe_complete(capsubs, capsubs2, playbacksubs, urb->start_frame)) + usX2Y->wait_iso_frame += nr_of_packs(); + else { snd_printdd("\n"); usX2Y_clients_stop(usX2Y); } @@ -433,7 +429,6 @@ static int usX2Y_usbpcm_urbs_start(struct snd_usX2Y_substream *subs) if (subs != NULL && atomic_read(&subs->state) >= state_PREPARED) goto start; } - usX2Y->wait_iso_frame = -1; start: usX2Y_usbpcm_subs_startup(subs); @@ -459,7 +454,7 @@ static int usX2Y_usbpcm_urbs_start(struct snd_usX2Y_substream *subs) goto cleanup; } else { snd_printdd("%i\n", urb->start_frame); - if (0 > usX2Y->wait_iso_frame) + if (u == 0) usX2Y->wait_iso_frame = urb->start_frame; } urb->transfer_flags = 0; @@ -632,7 +627,7 @@ static int usX2Y_pcms_lock_check(struct snd_card *card) for (s = 0; s < 2; ++s) { struct snd_pcm_substream *substream; substream = pcm->streams[s].substream; - if (SUBSTREAM_BUSY(substream)) + if (substream && SUBSTREAM_BUSY(substream)) err = -EBUSY; } } |