diff options
339 files changed, 5563 insertions, 4264 deletions
diff --git a/Documentation/ABI/testing/sysfs-devices-system-cpu b/Documentation/ABI/testing/sysfs-devices-system-cpu index 99983e67c13c..da95513571ea 100644 --- a/Documentation/ABI/testing/sysfs-devices-system-cpu +++ b/Documentation/ABI/testing/sysfs-devices-system-cpu @@ -162,7 +162,7 @@ Description: Discover CPUs in the same CPU frequency coordination domain What: /sys/devices/system/cpu/cpu*/cache/index3/cache_disable_{0,1} Date: August 2008 KernelVersion: 2.6.27 -Contact: discuss@x86-64.org +Contact: Linux kernel mailing list <linux-kernel@vger.kernel.org> Description: Disable L3 cache indices These files exist in every CPU's cache/index3 directory. Each diff --git a/Documentation/hwmon/tmp401 b/Documentation/hwmon/tmp401 index 8eb88e974055..711f75e189eb 100644 --- a/Documentation/hwmon/tmp401 +++ b/Documentation/hwmon/tmp401 @@ -20,7 +20,7 @@ Supported chips: Datasheet: http://focus.ti.com/docs/prod/folders/print/tmp432.html * Texas Instruments TMP435 Prefix: 'tmp435' - Addresses scanned: I2C 0x37, 0x48 - 0x4f + Addresses scanned: I2C 0x48 - 0x4f Datasheet: http://focus.ti.com/docs/prod/folders/print/tmp435.html Authors: diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index 61ab1628a057..a320a41e7412 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt @@ -746,6 +746,12 @@ bytes respectively. Such letter suffixes can also be entirely omitted. cpuidle.off=1 [CPU_IDLE] disable the cpuidle sub-system + cpu_init_udelay=N + [X86] Delay for N microsec between assert and de-assert + of APIC INIT to start processors. This delay occurs + on every CPU online, such as boot, and resume from suspend. + Default: 10000 + cpcihp_generic= [HW,PCI] Generic port I/O CompactPCI driver Format: <first_slot>,<last_slot>,<port>,<enum_bit>[,<debug>] diff --git a/Documentation/target/tcmu-design.txt b/Documentation/target/tcmu-design.txt index 43e94ea6d2ca..263b907517ac 100644 --- a/Documentation/target/tcmu-design.txt +++ b/Documentation/target/tcmu-design.txt @@ -15,8 +15,7 @@ Contents: a) Discovering and configuring TCMU uio devices b) Waiting for events on the device(s) c) Managing the command ring -3) Command filtering and pass_level -4) A final note +3) A final note TCM Userspace Design @@ -324,7 +323,7 @@ int handle_device_events(int fd, void *map) /* Process events from cmd ring until we catch up with cmd_head */ while (ent != (void *)mb + mb->cmdr_off + mb->cmd_head) { - if (tcmu_hdr_get_op(&ent->hdr) == TCMU_OP_CMD) { + if (tcmu_hdr_get_op(ent->hdr.len_op) == TCMU_OP_CMD) { uint8_t *cdb = (void *)mb + ent->req.cdb_off; bool success = true; @@ -339,8 +338,12 @@ int handle_device_events(int fd, void *map) ent->rsp.scsi_status = SCSI_CHECK_CONDITION; } } + else if (tcmu_hdr_get_op(ent->hdr.len_op) != TCMU_OP_PAD) { + /* Tell the kernel we didn't handle unknown opcodes */ + ent->hdr.uflags |= TCMU_UFLAG_UNKNOWN_OP; + } else { - /* Do nothing for PAD entries */ + /* Do nothing for PAD entries except update cmd_tail */ } /* update cmd_tail */ @@ -360,28 +363,6 @@ int handle_device_events(int fd, void *map) } -Command filtering and pass_level --------------------------------- - -TCMU supports a "pass_level" option with valid values of 0 or 1. When -the value is 0 (the default), nearly all SCSI commands received for -the device are passed through to the handler. This allows maximum -flexibility but increases the amount of code required by the handler, -to support all mandatory SCSI commands. If pass_level is set to 1, -then only IO-related commands are presented, and the rest are handled -by LIO's in-kernel command emulation. The commands presented at level -1 include all versions of: - -READ -WRITE -WRITE_VERIFY -XDWRITEREAD -WRITE_SAME -COMPARE_AND_WRITE -SYNCHRONIZE_CACHE -UNMAP - - A final note ------------ diff --git a/MAINTAINERS b/MAINTAINERS index 474bcb6c0bac..e30871880fdb 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -2427,7 +2427,6 @@ L: linux-security-module@vger.kernel.org S: Supported F: include/linux/capability.h F: include/uapi/linux/capability.h -F: security/capability.c F: security/commoncap.c F: kernel/capability.c @@ -10588,8 +10587,7 @@ F: drivers/virtio/virtio_input.c F: include/uapi/linux/virtio_input.h VIA RHINE NETWORK DRIVER -M: Roger Luethi <rl@hellgate.ch> -S: Maintained +S: Orphan F: drivers/net/ethernet/via/via-rhine.c VIA SD/MMC CARD CONTROLLER DRIVER @@ -1,7 +1,7 @@ VERSION = 4 PATCHLEVEL = 1 SUBLEVEL = 0 -EXTRAVERSION = -rc5 +EXTRAVERSION = -rc6 NAME = Hurr durr I'ma sheep # *DOCUMENTATION* diff --git a/arch/alpha/boot/Makefile b/arch/alpha/boot/Makefile index cd143887380a..8399bd0e68e8 100644 --- a/arch/alpha/boot/Makefile +++ b/arch/alpha/boot/Makefile @@ -14,6 +14,9 @@ targets := vmlinux.gz vmlinux \ tools/bootpzh bootloader bootpheader bootpzheader OBJSTRIP := $(obj)/tools/objstrip +HOSTCFLAGS := -Wall -I$(objtree)/usr/include +BOOTCFLAGS += -I$(obj) -I$(srctree)/$(obj) + # SRM bootable image. Copy to offset 512 of a partition. $(obj)/bootimage: $(addprefix $(obj)/tools/,mkbb lxboot bootlx) $(obj)/vmlinux.nh ( cat $(obj)/tools/lxboot $(obj)/tools/bootlx $(obj)/vmlinux.nh ) > $@ @@ -96,13 +99,14 @@ $(obj)/tools/bootph: $(obj)/bootpheader $(OBJSTRIP) FORCE $(obj)/tools/bootpzh: $(obj)/bootpzheader $(OBJSTRIP) FORCE $(call if_changed,objstrip) -LDFLAGS_bootloader := -static -uvsprintf -T #-N -relax -LDFLAGS_bootpheader := -static -uvsprintf -T #-N -relax -LDFLAGS_bootpzheader := -static -uvsprintf -T #-N -relax +LDFLAGS_bootloader := -static -T # -N -relax +LDFLAGS_bootloader := -static -T # -N -relax +LDFLAGS_bootpheader := -static -T # -N -relax +LDFLAGS_bootpzheader := -static -T # -N -relax -OBJ_bootlx := $(obj)/head.o $(obj)/main.o -OBJ_bootph := $(obj)/head.o $(obj)/bootp.o -OBJ_bootpzh := $(obj)/head.o $(obj)/bootpz.o $(obj)/misc.o +OBJ_bootlx := $(obj)/head.o $(obj)/stdio.o $(obj)/main.o +OBJ_bootph := $(obj)/head.o $(obj)/stdio.o $(obj)/bootp.o +OBJ_bootpzh := $(obj)/head.o $(obj)/stdio.o $(obj)/bootpz.o $(obj)/misc.o $(obj)/bootloader: $(obj)/bootloader.lds $(OBJ_bootlx) $(LIBS_Y) FORCE $(call if_changed,ld) diff --git a/arch/alpha/boot/main.c b/arch/alpha/boot/main.c index 3baf2d1e908d..dd6eb4a33582 100644 --- a/arch/alpha/boot/main.c +++ b/arch/alpha/boot/main.c @@ -19,7 +19,6 @@ #include "ksize.h" -extern int vsprintf(char *, const char *, va_list); extern unsigned long switch_to_osf_pal(unsigned long nr, struct pcb_struct * pcb_va, struct pcb_struct * pcb_pa, unsigned long *vptb); diff --git a/arch/alpha/boot/stdio.c b/arch/alpha/boot/stdio.c new file mode 100644 index 000000000000..f844dae8a54a --- /dev/null +++ b/arch/alpha/boot/stdio.c @@ -0,0 +1,306 @@ +/* + * Copyright (C) Paul Mackerras 1997. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ +#include <stdarg.h> +#include <stddef.h> + +size_t strnlen(const char * s, size_t count) +{ + const char *sc; + + for (sc = s; count-- && *sc != '\0'; ++sc) + /* nothing */; + return sc - s; +} + +# define do_div(n, base) ({ \ + unsigned int __base = (base); \ + unsigned int __rem; \ + __rem = ((unsigned long long)(n)) % __base; \ + (n) = ((unsigned long long)(n)) / __base; \ + __rem; \ +}) + + +static int skip_atoi(const char **s) +{ + int i, c; + + for (i = 0; '0' <= (c = **s) && c <= '9'; ++*s) + i = i*10 + c - '0'; + return i; +} + +#define ZEROPAD 1 /* pad with zero */ +#define SIGN 2 /* unsigned/signed long */ +#define PLUS 4 /* show plus */ +#define SPACE 8 /* space if plus */ +#define LEFT 16 /* left justified */ +#define SPECIAL 32 /* 0x */ +#define LARGE 64 /* use 'ABCDEF' instead of 'abcdef' */ + +static char * number(char * str, unsigned long long num, int base, int size, int precision, int type) +{ + char c,sign,tmp[66]; + const char *digits="0123456789abcdefghijklmnopqrstuvwxyz"; + int i; + + if (type & LARGE) + digits = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ"; + if (type & LEFT) + type &= ~ZEROPAD; + if (base < 2 || base > 36) + return 0; + c = (type & ZEROPAD) ? '0' : ' '; + sign = 0; + if (type & SIGN) { + if ((signed long long)num < 0) { + sign = '-'; + num = - (signed long long)num; + size--; + } else if (type & PLUS) { + sign = '+'; + size--; + } else if (type & SPACE) { + sign = ' '; + size--; + } + } + if (type & SPECIAL) { + if (base == 16) + size -= 2; + else if (base == 8) + size--; + } + i = 0; + if (num == 0) + tmp[i++]='0'; + else while (num != 0) { + tmp[i++] = digits[do_div(num, base)]; + } + if (i > precision) + precision = i; + size -= precision; + if (!(type&(ZEROPAD+LEFT))) + while(size-->0) + *str++ = ' '; + if (sign) + *str++ = sign; + if (type & SPECIAL) { + if (base==8) + *str++ = '0'; + else if (base==16) { + *str++ = '0'; + *str++ = digits[33]; + } + } + if (!(type & LEFT)) + while (size-- > 0) + *str++ = c; + while (i < precision--) + *str++ = '0'; + while (i-- > 0) + *str++ = tmp[i]; + while (size-- > 0) + *str++ = ' '; + return str; +} + +int vsprintf(char *buf, const char *fmt, va_list args) +{ + int len; + unsigned long long num; + int i, base; + char * str; + const char *s; + + int flags; /* flags to number() */ + + int field_width; /* width of output field */ + int precision; /* min. # of digits for integers; max + number of chars for from string */ + int qualifier; /* 'h', 'l', or 'L' for integer fields */ + /* 'z' support added 23/7/1999 S.H. */ + /* 'z' changed to 'Z' --davidm 1/25/99 */ + + + for (str=buf ; *fmt ; ++fmt) { + if (*fmt != '%') { + *str++ = *fmt; + continue; + } + + /* process flags */ + flags = 0; + repeat: + ++fmt; /* this also skips first '%' */ + switch (*fmt) { + case '-': flags |= LEFT; goto repeat; + case '+': flags |= PLUS; goto repeat; + case ' ': flags |= SPACE; goto repeat; + case '#': flags |= SPECIAL; goto repeat; + case '0': flags |= ZEROPAD; goto repeat; + } + + /* get field width */ + field_width = -1; + if ('0' <= *fmt && *fmt <= '9') + field_width = skip_atoi(&fmt); + else if (*fmt == '*') { + ++fmt; + /* it's the next argument */ + field_width = va_arg(args, int); + if (field_width < 0) { + field_width = -field_width; + flags |= LEFT; + } + } + + /* get the precision */ + precision = -1; + if (*fmt == '.') { + ++fmt; + if ('0' <= *fmt && *fmt <= '9') + precision = skip_atoi(&fmt); + else if (*fmt == '*') { + ++fmt; + /* it's the next argument */ + precision = va_arg(args, int); + } + if (precision < 0) + precision = 0; + } + + /* get the conversion qualifier */ + qualifier = -1; + if (*fmt == 'l' && *(fmt + 1) == 'l') { + qualifier = 'q'; + fmt += 2; + } else if (*fmt == 'h' || *fmt == 'l' || *fmt == 'L' + || *fmt == 'Z') { + qualifier = *fmt; + ++fmt; + } + + /* default base */ + base = 10; + + switch (*fmt) { + case 'c': + if (!(flags & LEFT)) + while (--field_width > 0) + *str++ = ' '; + *str++ = (unsigned char) va_arg(args, int); + while (--field_width > 0) + *str++ = ' '; + continue; + + case 's': + s = va_arg(args, char *); + if (!s) + s = "<NULL>"; + + len = strnlen(s, precision); + + if (!(flags & LEFT)) + while (len < field_width--) + *str++ = ' '; + for (i = 0; i < len; ++i) + *str++ = *s++; + while (len < field_width--) + *str++ = ' '; + continue; + + case 'p': + if (field_width == -1) { + field_width = 2*sizeof(void *); + flags |= ZEROPAD; + } + str = number(str, + (unsigned long) va_arg(args, void *), 16, + field_width, precision, flags); + continue; + + + case 'n': + if (qualifier == 'l') { + long * ip = va_arg(args, long *); + *ip = (str - buf); + } else if (qualifier == 'Z') { + size_t * ip = va_arg(args, size_t *); + *ip = (str - buf); + } else { + int * ip = va_arg(args, int *); + *ip = (str - buf); + } + continue; + + case '%': + *str++ = '%'; + continue; + + /* integer number formats - set up the flags and "break" */ + case 'o': + base = 8; + break; + + case 'X': + flags |= LARGE; + case 'x': + base = 16; + break; + + case 'd': + case 'i': + flags |= SIGN; + case 'u': + break; + + default: + *str++ = '%'; + if (*fmt) + *str++ = *fmt; + else + --fmt; + continue; + } + if (qualifier == 'l') { + num = va_arg(args, unsigned long); + if (flags & SIGN) + num = (signed long) num; + } else if (qualifier == 'q') { + num = va_arg(args, unsigned long long); + if (flags & SIGN) + num = (signed long long) num; + } else if (qualifier == 'Z') { + num = va_arg(args, size_t); + } else if (qualifier == 'h') { + num = (unsigned short) va_arg(args, int); + if (flags & SIGN) + num = (signed short) num; + } else { + num = va_arg(args, unsigned int); + if (flags & SIGN) + num = (signed int) num; + } + str = number(str, num, base, field_width, precision, flags); + } + *str = '\0'; + return str-buf; +} + +int sprintf(char * buf, const char *fmt, ...) +{ + va_list args; + int i; + + va_start(args, fmt); + i=vsprintf(buf,fmt,args); + va_end(args); + return i; +} diff --git a/arch/alpha/boot/tools/objstrip.c b/arch/alpha/boot/tools/objstrip.c index 367d53d031fc..dee82695f48b 100644 --- a/arch/alpha/boot/tools/objstrip.c +++ b/arch/alpha/boot/tools/objstrip.c @@ -27,6 +27,9 @@ #include <linux/param.h> #ifdef __ELF__ # include <linux/elf.h> +# define elfhdr elf64_hdr +# define elf_phdr elf64_phdr +# define elf_check_arch(x) ((x)->e_machine == EM_ALPHA) #endif /* bootfile size must be multiple of BLOCK_SIZE: */ diff --git a/arch/alpha/include/asm/types.h b/arch/alpha/include/asm/types.h index f61e1a56c378..4cb4b6d3452c 100644 --- a/arch/alpha/include/asm/types.h +++ b/arch/alpha/include/asm/types.h @@ -2,6 +2,5 @@ #define _ALPHA_TYPES_H #include <asm-generic/int-ll64.h> -#include <uapi/asm/types.h> #endif /* _ALPHA_TYPES_H */ diff --git a/arch/alpha/include/asm/unistd.h b/arch/alpha/include/asm/unistd.h index c509d306db45..a56e608db2f9 100644 --- a/arch/alpha/include/asm/unistd.h +++ b/arch/alpha/include/asm/unistd.h @@ -3,7 +3,7 @@ #include <uapi/asm/unistd.h> -#define NR_SYSCALLS 511 +#define NR_SYSCALLS 514 #define __ARCH_WANT_OLD_READDIR #define __ARCH_WANT_STAT64 diff --git a/arch/alpha/include/uapi/asm/unistd.h b/arch/alpha/include/uapi/asm/unistd.h index d214a0358100..aa33bf5aacb6 100644 --- a/arch/alpha/include/uapi/asm/unistd.h +++ b/arch/alpha/include/uapi/asm/unistd.h @@ -472,5 +472,8 @@ #define __NR_sched_setattr 508 #define __NR_sched_getattr 509 #define __NR_renameat2 510 +#define __NR_getrandom 511 +#define __NR_memfd_create 512 +#define __NR_execveat 513 #endif /* _UAPI_ALPHA_UNISTD_H */ diff --git a/arch/alpha/kernel/err_ev6.c b/arch/alpha/kernel/err_ev6.c index 253cf1a87481..51267ac5729b 100644 --- a/arch/alpha/kernel/err_ev6.c +++ b/arch/alpha/kernel/err_ev6.c @@ -6,7 +6,6 @@ * Error handling code supporting Alpha systems */ -#include <linux/init.h> #include <linux/sched.h> #include <asm/io.h> diff --git a/arch/alpha/kernel/irq.c b/arch/alpha/kernel/irq.c index 7b2be251c30f..51f2c8654253 100644 --- a/arch/alpha/kernel/irq.c +++ b/arch/alpha/kernel/irq.c @@ -19,7 +19,6 @@ #include <linux/ptrace.h> #include <linux/interrupt.h> #include <linux/random.h> -#include <linux/init.h> #include <linux/irq.h> #include <linux/proc_fs.h> #include <linux/seq_file.h> diff --git a/arch/alpha/kernel/osf_sys.c b/arch/alpha/kernel/osf_sys.c index e51f578636a5..36dc91ace83a 100644 --- a/arch/alpha/kernel/osf_sys.c +++ b/arch/alpha/kernel/osf_sys.c @@ -1019,14 +1019,13 @@ SYSCALL_DEFINE2(osf_settimeofday, struct timeval32 __user *, tv, if (tv) { if (get_tv32((struct timeval *)&kts, tv)) return -EFAULT; + kts.tv_nsec *= 1000; } if (tz) { if (copy_from_user(&ktz, tz, sizeof(*tz))) return -EFAULT; } - kts.tv_nsec *= 1000; - return do_sys_settimeofday(tv ? &kts : NULL, tz ? &ktz : NULL); } diff --git a/arch/alpha/kernel/process.c b/arch/alpha/kernel/process.c index 1941a07b5811..84d13263ce46 100644 --- a/arch/alpha/kernel/process.c +++ b/arch/alpha/kernel/process.c @@ -236,12 +236,11 @@ release_thread(struct task_struct *dead_task) } /* - * Copy an alpha thread.. + * Copy architecture-specific thread state */ - int copy_thread(unsigned long clone_flags, unsigned long usp, - unsigned long arg, + unsigned long kthread_arg, struct task_struct *p) { extern void ret_from_fork(void); @@ -262,7 +261,7 @@ copy_thread(unsigned long clone_flags, unsigned long usp, sizeof(struct switch_stack) + sizeof(struct pt_regs)); childstack->r26 = (unsigned long) ret_from_kernel_thread; childstack->r9 = usp; /* function */ - childstack->r10 = arg; + childstack->r10 = kthread_arg; childregs->hae = alpha_mv.hae_cache, childti->pcb.usp = 0; return 0; diff --git a/arch/alpha/kernel/smp.c b/arch/alpha/kernel/smp.c index 99ac36d5de4e..2f24447fef92 100644 --- a/arch/alpha/kernel/smp.c +++ b/arch/alpha/kernel/smp.c @@ -63,7 +63,6 @@ static struct { enum ipi_message_type { IPI_RESCHEDULE, IPI_CALL_FUNC, - IPI_CALL_FUNC_SINGLE, IPI_CPU_STOP, }; @@ -506,7 +505,6 @@ setup_profiling_timer(unsigned int multiplier) return -EINVAL; } - static void send_ipi_message(const struct cpumask *to_whom, enum ipi_message_type operation) { @@ -552,10 +550,6 @@ handle_ipi(struct pt_regs *regs) generic_smp_call_function_interrupt(); break; - case IPI_CALL_FUNC_SINGLE: - generic_smp_call_function_single_interrupt(); - break; - case IPI_CPU_STOP: halt(); @@ -606,7 +600,7 @@ void arch_send_call_function_ipi_mask(const struct cpumask *mask) void arch_send_call_function_single_ipi(int cpu) { - send_ipi_message(cpumask_of(cpu), IPI_CALL_FUNC_SINGLE); + send_ipi_message(cpumask_of(cpu), IPI_CALL_FUNC); } static void diff --git a/arch/alpha/kernel/srmcons.c b/arch/alpha/kernel/srmcons.c index 6f01d9ad7b81..72b59511e59a 100644 --- a/arch/alpha/kernel/srmcons.c +++ b/arch/alpha/kernel/srmcons.c @@ -237,8 +237,7 @@ srmcons_init(void) return -ENODEV; } - -module_init(srmcons_init); +device_initcall(srmcons_init); /* diff --git a/arch/alpha/kernel/sys_marvel.c b/arch/alpha/kernel/sys_marvel.c index f21d61fab678..24e41bd7d3c9 100644 --- a/arch/alpha/kernel/sys_marvel.c +++ b/arch/alpha/kernel/sys_marvel.c @@ -331,7 +331,7 @@ marvel_map_irq(const struct pci_dev *cdev, u8 slot, u8 pin) pci_read_config_byte(dev, PCI_INTERRUPT_LINE, &intline); irq = intline; - msi_loc = pci_find_capability(dev, PCI_CAP_ID_MSI); + msi_loc = dev->msi_cap; msg_ctl = 0; if (msi_loc) pci_read_config_word(dev, msi_loc + PCI_MSI_FLAGS, &msg_ctl); diff --git a/arch/alpha/kernel/systbls.S b/arch/alpha/kernel/systbls.S index 24789713f1ea..9b62e3fd4f03 100644 --- a/arch/alpha/kernel/systbls.S +++ b/arch/alpha/kernel/systbls.S @@ -529,6 +529,9 @@ sys_call_table: .quad sys_sched_setattr .quad sys_sched_getattr .quad sys_renameat2 /* 510 */ + .quad sys_getrandom + .quad sys_memfd_create + .quad sys_execveat .size sys_call_table, . - sys_call_table .type sys_call_table, @object diff --git a/arch/alpha/kernel/traps.c b/arch/alpha/kernel/traps.c index 9c4c189eb22f..74aceead06e9 100644 --- a/arch/alpha/kernel/traps.c +++ b/arch/alpha/kernel/traps.c @@ -14,7 +14,6 @@ #include <linux/tty.h> #include <linux/delay.h> #include <linux/module.h> -#include <linux/init.h> #include <linux/kallsyms.h> #include <linux/ratelimit.h> diff --git a/arch/alpha/oprofile/op_model_ev4.c b/arch/alpha/oprofile/op_model_ev4.c index 18aa9b4f94f1..086a0d5445c5 100644 --- a/arch/alpha/oprofile/op_model_ev4.c +++ b/arch/alpha/oprofile/op_model_ev4.c @@ -8,7 +8,6 @@ */ #include <linux/oprofile.h> -#include <linux/init.h> #include <linux/smp.h> #include <asm/ptrace.h> diff --git a/arch/alpha/oprofile/op_model_ev5.c b/arch/alpha/oprofile/op_model_ev5.c index c32f8a0ad925..c300f5ef3482 100644 --- a/arch/alpha/oprofile/op_model_ev5.c +++ b/arch/alpha/oprofile/op_model_ev5.c @@ -8,7 +8,6 @@ */ #include <linux/oprofile.h> -#include <linux/init.h> #include <linux/smp.h> #include <asm/ptrace.h> diff --git a/arch/alpha/oprofile/op_model_ev6.c b/arch/alpha/oprofile/op_model_ev6.c index 1c84cc257fc7..02edf5971614 100644 --- a/arch/alpha/oprofile/op_model_ev6.c +++ b/arch/alpha/oprofile/op_model_ev6.c @@ -8,7 +8,6 @@ */ #include <linux/oprofile.h> -#include <linux/init.h> #include <linux/smp.h> #include <asm/ptrace.h> diff --git a/arch/alpha/oprofile/op_model_ev67.c b/arch/alpha/oprofile/op_model_ev67.c index 34a57a126553..adb1744d20f3 100644 --- a/arch/alpha/oprofile/op_model_ev67.c +++ b/arch/alpha/oprofile/op_model_ev67.c @@ -9,7 +9,6 @@ */ #include <linux/oprofile.h> -#include <linux/init.h> #include <linux/smp.h> #include <asm/ptrace.h> diff --git a/arch/arm/boot/dts/Makefile b/arch/arm/boot/dts/Makefile index 86217db2937a..992736b5229b 100644 --- a/arch/arm/boot/dts/Makefile +++ b/arch/arm/boot/dts/Makefile @@ -223,7 +223,7 @@ dtb-$(CONFIG_SOC_IMX25) += \ imx25-eukrea-mbimxsd25-baseboard-dvi-vga.dtb \ imx25-karo-tx25.dtb \ imx25-pdk.dtb -dtb-$(CONFIG_SOC_IMX31) += \ +dtb-$(CONFIG_SOC_IMX27) += \ imx27-apf27.dtb \ imx27-apf27dev.dtb \ imx27-eukrea-mbimxsd27-baseboard.dtb \ diff --git a/arch/arm/boot/dts/am335x-boneblack.dts b/arch/arm/boot/dts/am335x-boneblack.dts index 5c42d259fa68..901739fcb85a 100644 --- a/arch/arm/boot/dts/am335x-boneblack.dts +++ b/arch/arm/boot/dts/am335x-boneblack.dts @@ -80,7 +80,3 @@ status = "okay"; }; }; - -&rtc { - system-power-controller; -}; diff --git a/arch/arm/boot/dts/am335x-evmsk.dts b/arch/arm/boot/dts/am335x-evmsk.dts index 87fc7a35e802..156d05efcb70 100644 --- a/arch/arm/boot/dts/am335x-evmsk.dts +++ b/arch/arm/boot/dts/am335x-evmsk.dts @@ -654,7 +654,7 @@ wlcore: wlcore@2 { compatible = "ti,wl1271"; reg = <2>; - interrupt-parent = <&gpio1>; + interrupt-parent = <&gpio0>; interrupts = <31 IRQ_TYPE_LEVEL_HIGH>; /* gpio 31 */ ref-clock-frequency = <38400000>; }; diff --git a/arch/arm/boot/dts/exynos4412-trats2.dts b/arch/arm/boot/dts/exynos4412-trats2.dts index 173ffa479ad3..792394dd0f2a 100644 --- a/arch/arm/boot/dts/exynos4412-trats2.dts +++ b/arch/arm/boot/dts/exynos4412-trats2.dts @@ -736,7 +736,7 @@ display-timings { timing-0 { - clock-frequency = <0>; + clock-frequency = <57153600>; hactive = <720>; vactive = <1280>; hfront-porch = <5>; diff --git a/arch/arm/boot/dts/imx27.dtsi b/arch/arm/boot/dts/imx27.dtsi index 6951b66d1ab7..bc215e4b75fd 100644 --- a/arch/arm/boot/dts/imx27.dtsi +++ b/arch/arm/boot/dts/imx27.dtsi @@ -533,7 +533,7 @@ fec: ethernet@1002b000 { compatible = "fsl,imx27-fec"; - reg = <0x1002b000 0x4000>; + reg = <0x1002b000 0x1000>; interrupts = <50>; clocks = <&clks IMX27_CLK_FEC_IPG_GATE>, <&clks IMX27_CLK_FEC_AHB_GATE>; diff --git a/arch/arm/boot/dts/omap3-devkit8000.dts b/arch/arm/boot/dts/omap3-devkit8000.dts index 134d3f27a8ec..921de6605f07 100644 --- a/arch/arm/boot/dts/omap3-devkit8000.dts +++ b/arch/arm/boot/dts/omap3-devkit8000.dts @@ -110,6 +110,8 @@ nand@0,0 { reg = <0 0 4>; /* CS0, offset 0, IO size 4 */ nand-bus-width = <16>; + gpmc,device-width = <2>; + ti,nand-ecc-opt = "sw"; gpmc,sync-clk-ps = <0>; gpmc,cs-on-ns = <0>; diff --git a/arch/arm/configs/multi_v7_defconfig b/arch/arm/configs/multi_v7_defconfig index 0ca4a3eaf65d..fbbb1915c6a9 100644 --- a/arch/arm/configs/multi_v7_defconfig +++ b/arch/arm/configs/multi_v7_defconfig @@ -429,7 +429,7 @@ CONFIG_USB_EHCI_EXYNOS=y CONFIG_USB_EHCI_TEGRA=y CONFIG_USB_EHCI_HCD_STI=y CONFIG_USB_EHCI_HCD_PLATFORM=y -CONFIG_USB_ISP1760_HCD=y +CONFIG_USB_ISP1760=y CONFIG_USB_OHCI_HCD=y CONFIG_USB_OHCI_HCD_STI=y CONFIG_USB_OHCI_HCD_PLATFORM=y diff --git a/arch/arm/kernel/entry-common.S b/arch/arm/kernel/entry-common.S index f8ccc21fa032..4e7f40c577e6 100644 --- a/arch/arm/kernel/entry-common.S +++ b/arch/arm/kernel/entry-common.S @@ -33,7 +33,9 @@ ret_fast_syscall: UNWIND(.fnstart ) UNWIND(.cantunwind ) disable_irq @ disable interrupts - ldr r1, [tsk, #TI_FLAGS] + ldr r1, [tsk, #TI_FLAGS] @ re-check for syscall tracing + tst r1, #_TIF_SYSCALL_WORK + bne __sys_trace_return tst r1, #_TIF_WORK_MASK bne fast_work_pending asm_trace_hardirqs_on diff --git a/arch/arm/kernel/perf_event_cpu.c b/arch/arm/kernel/perf_event_cpu.c index 213919ba326f..3b8c2833c537 100644 --- a/arch/arm/kernel/perf_event_cpu.c +++ b/arch/arm/kernel/perf_event_cpu.c @@ -304,16 +304,17 @@ static int probe_current_pmu(struct arm_pmu *pmu) static int of_pmu_irq_cfg(struct platform_device *pdev) { int i, irq; - int *irqs = kcalloc(pdev->num_resources, sizeof(*irqs), GFP_KERNEL); - - if (!irqs) - return -ENOMEM; + int *irqs; /* Don't bother with PPIs; they're already affine */ irq = platform_get_irq(pdev, 0); if (irq >= 0 && irq_is_percpu(irq)) return 0; + irqs = kcalloc(pdev->num_resources, sizeof(*irqs), GFP_KERNEL); + if (!irqs) + return -ENOMEM; + for (i = 0; i < pdev->num_resources; ++i) { struct device_node *dn; int cpu; diff --git a/arch/arm/mach-imx/gpc.c b/arch/arm/mach-imx/gpc.c index 4d60005e9277..6d0893a3828e 100644 --- a/arch/arm/mach-imx/gpc.c +++ b/arch/arm/mach-imx/gpc.c @@ -280,9 +280,15 @@ void __init imx_gpc_check_dt(void) struct device_node *np; np = of_find_compatible_node(NULL, NULL, "fsl,imx6q-gpc"); - if (WARN_ON(!np || - !of_find_property(np, "interrupt-controller", NULL))) - pr_warn("Outdated DT detected, system is about to crash!!!\n"); + if (WARN_ON(!np)) + return; + + if (WARN_ON(!of_find_property(np, "interrupt-controller", NULL))) { + pr_warn("Outdated DT detected, suspend/resume will NOT work\n"); + + /* map GPC, so that at least CPUidle and WARs keep working */ + gpc_base = of_iomap(np, 0); + } } #ifdef CONFIG_PM_GENERIC_DOMAINS @@ -443,6 +449,10 @@ static int imx_gpc_probe(struct platform_device *pdev) struct regulator *pu_reg; int ret; + /* bail out if DT too old and doesn't provide the necessary info */ + if (!of_property_read_bool(pdev->dev.of_node, "#power-domain-cells")) + return 0; + pu_reg = devm_regulator_get_optional(&pdev->dev, "pu"); if (PTR_ERR(pu_reg) == -ENODEV) pu_reg = NULL; diff --git a/arch/arm/mach-pxa/pxa_cplds_irqs.c b/arch/arm/mach-pxa/pxa_cplds_irqs.c index f1aeb54fabe3..2385052b0ce1 100644 --- a/arch/arm/mach-pxa/pxa_cplds_irqs.c +++ b/arch/arm/mach-pxa/pxa_cplds_irqs.c @@ -107,7 +107,7 @@ static int cplds_probe(struct platform_device *pdev) struct resource *res; struct cplds *fpga; int ret; - unsigned int base_irq = 0; + int base_irq; unsigned long irqflags = 0; fpga = devm_kzalloc(&pdev->dev, sizeof(*fpga), GFP_KERNEL); diff --git a/arch/arm/mm/mmu.c b/arch/arm/mm/mmu.c index 4e6ef896c619..7186382672b5 100644 --- a/arch/arm/mm/mmu.c +++ b/arch/arm/mm/mmu.c @@ -1112,22 +1112,22 @@ void __init sanity_check_meminfo(void) } /* - * Find the first non-section-aligned page, and point + * Find the first non-pmd-aligned page, and point * memblock_limit at it. This relies on rounding the - * limit down to be section-aligned, which happens at - * the end of this function. + * limit down to be pmd-aligned, which happens at the + * end of this function. * * With this algorithm, the start or end of almost any - * bank can be non-section-aligned. The only exception - * is that the start of the bank 0 must be section- + * bank can be non-pmd-aligned. The only exception is + * that the start of the bank 0 must be section- * aligned, since otherwise memory would need to be * allocated when mapping the start of bank 0, which * occurs before any free memory is mapped. */ if (!memblock_limit) { - if (!IS_ALIGNED(block_start, SECTION_SIZE)) + if (!IS_ALIGNED(block_start, PMD_SIZE)) memblock_limit = block_start; - else if (!IS_ALIGNED(block_end, SECTION_SIZE)) + else if (!IS_ALIGNED(block_end, PMD_SIZE)) memblock_limit = arm_lowmem_limit; } @@ -1137,12 +1137,12 @@ void __init sanity_check_meminfo(void) high_memory = __va(arm_lowmem_limit - 1) + 1; /* - * Round the memblock limit down to a section size. This + * Round the memblock limit down to a pmd size. This * helps to ensure that we will allocate memory from the - * last full section, which should be mapped. + * last full pmd, which should be mapped. */ if (memblock_limit) - memblock_limit = round_down(memblock_limit, SECTION_SIZE); + memblock_limit = round_down(memblock_limit, PMD_SIZE); if (!memblock_limit) memblock_limit = arm_lowmem_limit; diff --git a/arch/ia64/include/asm/irq_remapping.h b/arch/ia64/include/asm/irq_remapping.h index e3b3556e2e1b..a8687b1d8906 100644 --- a/arch/ia64/include/asm/irq_remapping.h +++ b/arch/ia64/include/asm/irq_remapping.h @@ -1,6 +1,4 @@ #ifndef __IA64_INTR_REMAPPING_H #define __IA64_INTR_REMAPPING_H #define irq_remapping_enabled 0 -#define dmar_alloc_hwirq create_irq -#define dmar_free_hwirq destroy_irq #endif diff --git a/arch/ia64/kernel/msi_ia64.c b/arch/ia64/kernel/msi_ia64.c index 9dd7464f8c17..d70bf15c690a 100644 --- a/arch/ia64/kernel/msi_ia64.c +++ b/arch/ia64/kernel/msi_ia64.c @@ -165,7 +165,7 @@ static struct irq_chip dmar_msi_type = { .irq_retrigger = ia64_msi_retrigger_irq, }; -static int +static void msi_compose_msg(struct pci_dev *pdev, unsigned int irq, struct msi_msg *msg) { struct irq_cfg *cfg = irq_cfg + irq; @@ -186,21 +186,29 @@ msi_compose_msg(struct pci_dev *pdev, unsigned int irq, struct msi_msg *msg) MSI_DATA_LEVEL_ASSERT | MSI_DATA_DELIVERY_FIXED | MSI_DATA_VECTOR(cfg->vector); - return 0; } -int arch_setup_dmar_msi(unsigned int irq) +int dmar_alloc_hwirq(int id, int node, void *arg) { - int ret; + int irq; struct msi_msg msg; - ret = msi_compose_msg(NULL, irq, &msg); - if (ret < 0) - return ret; - dmar_msi_write(irq, &msg); - irq_set_chip_and_handler_name(irq, &dmar_msi_type, handle_edge_irq, - "edge"); - return 0; + irq = create_irq(); + if (irq > 0) { + irq_set_handler_data(irq, arg); + irq_set_chip_and_handler_name(irq, &dmar_msi_type, + handle_edge_irq, "edge"); + msi_compose_msg(NULL, irq, &msg); + dmar_msi_write(irq, &msg); + } + + return irq; +} + +void dmar_free_hwirq(int irq) +{ + irq_set_handler_data(irq, NULL); + destroy_irq(irq); } #endif /* CONFIG_INTEL_IOMMU */ diff --git a/arch/ia64/pci/pci.c b/arch/ia64/pci/pci.c index d4e162d35b34..7cc3be9fa7c6 100644 --- a/arch/ia64/pci/pci.c +++ b/arch/ia64/pci/pci.c @@ -478,9 +478,16 @@ struct pci_bus *pci_acpi_scan_root(struct acpi_pci_root *root) int pcibios_root_bridge_prepare(struct pci_host_bridge *bridge) { - struct pci_controller *controller = bridge->bus->sysdata; - - ACPI_COMPANION_SET(&bridge->dev, controller->companion); + /* + * We pass NULL as parent to pci_create_root_bus(), so if it is not NULL + * here, pci_create_root_bus() has been called by someone else and + * sysdata is likely to be different from what we expect. Let it go in + * that case. + */ + if (!bridge->dev.parent) { + struct pci_controller *controller = bridge->bus->sysdata; + ACPI_COMPANION_SET(&bridge->dev, controller->companion); + } return 0; } diff --git a/arch/mips/ath79/prom.c b/arch/mips/ath79/prom.c index e1fe63051136..597899ad5438 100644 --- a/arch/mips/ath79/prom.c +++ b/arch/mips/ath79/prom.c @@ -1,6 +1,7 @@ /* * Atheros AR71XX/AR724X/AR913X specific prom routines * + * Copyright (C) 2015 Laurent Fasnacht <l@libres.ch> * Copyright (C) 2008-2010 Gabor Juhos <juhosg@openwrt.org> * Copyright (C) 2008 Imre Kaloz <kaloz@openwrt.org> * @@ -25,12 +26,14 @@ void __init prom_init(void) { fw_init_cmdline(); +#ifdef CONFIG_BLK_DEV_INITRD /* Read the initrd address from the firmware environment */ initrd_start = fw_getenvl("initrd_start"); if (initrd_start) { initrd_start = KSEG0ADDR(initrd_start); initrd_end = initrd_start + fw_getenvl("initrd_size"); } +#endif } void __init prom_free_prom_memory(void) diff --git a/arch/mips/configs/fuloong2e_defconfig b/arch/mips/configs/fuloong2e_defconfig index 002680648dcb..b2a577ebce0b 100644 --- a/arch/mips/configs/fuloong2e_defconfig +++ b/arch/mips/configs/fuloong2e_defconfig @@ -194,7 +194,7 @@ CONFIG_USB_WUSB_CBAF=m CONFIG_USB_C67X00_HCD=m CONFIG_USB_EHCI_HCD=y CONFIG_USB_EHCI_ROOT_HUB_TT=y -CONFIG_USB_ISP1760_HCD=m +CONFIG_USB_ISP1760=m CONFIG_USB_OHCI_HCD=y CONFIG_USB_UHCI_HCD=m CONFIG_USB_R8A66597_HCD=m diff --git a/arch/mips/kernel/irq.c b/arch/mips/kernel/irq.c index d2bfbc2e8995..51f57d841662 100644 --- a/arch/mips/kernel/irq.c +++ b/arch/mips/kernel/irq.c @@ -29,7 +29,7 @@ int kgdb_early_setup; #endif -static unsigned long irq_map[NR_IRQS / BITS_PER_LONG]; +static DECLARE_BITMAP(irq_map, NR_IRQS); int allocate_irqno(void) { diff --git a/arch/mips/kernel/smp-bmips.c b/arch/mips/kernel/smp-bmips.c index fd528d7ea278..336708ae5c5b 100644 --- a/arch/mips/kernel/smp-bmips.c +++ b/arch/mips/kernel/smp-bmips.c @@ -444,7 +444,7 @@ struct plat_smp_ops bmips5000_smp_ops = { static void bmips_wr_vec(unsigned long dst, char *start, char *end) { memcpy((void *)dst, start, end - start); - dma_cache_wback((unsigned long)start, end - start); + dma_cache_wback(dst, end - start); local_flush_icache_range(dst, dst + (end - start)); instruction_hazard(); } diff --git a/arch/mips/lib/strnlen_user.S b/arch/mips/lib/strnlen_user.S index 7d12c0dded3d..77e64942f004 100644 --- a/arch/mips/lib/strnlen_user.S +++ b/arch/mips/lib/strnlen_user.S @@ -34,7 +34,12 @@ LEAF(__strnlen_\func\()_asm) FEXPORT(__strnlen_\func\()_nocheck_asm) move v0, a0 PTR_ADDU a1, a0 # stop pointer -1: beq v0, a1, 1f # limit reached? +1: +#ifdef CONFIG_CPU_DADDI_WORKAROUNDS + .set noat + li AT, 1 +#endif + beq v0, a1, 1f # limit reached? .ifeqs "\func", "kernel" EX(lb, t0, (v0), .Lfault\@) .else @@ -42,7 +47,13 @@ FEXPORT(__strnlen_\func\()_nocheck_asm) .endif .set noreorder bnez t0, 1b -1: PTR_ADDIU v0, 1 +1: +#ifndef CONFIG_CPU_DADDI_WORKAROUNDS + PTR_ADDIU v0, 1 +#else + PTR_ADDU v0, AT + .set at +#endif .set reorder PTR_SUBU v0, a0 jr ra diff --git a/arch/sparc/include/asm/cpudata_64.h b/arch/sparc/include/asm/cpudata_64.h index a6e424d185d0..a6cfdabb6054 100644 --- a/arch/sparc/include/asm/cpudata_64.h +++ b/arch/sparc/include/asm/cpudata_64.h @@ -24,7 +24,8 @@ typedef struct { unsigned int icache_line_size; unsigned int ecache_size; unsigned int ecache_line_size; - int core_id; + unsigned short sock_id; + unsigned short core_id; int proc_id; } cpuinfo_sparc; diff --git a/arch/sparc/include/asm/pgtable_64.h b/arch/sparc/include/asm/pgtable_64.h index dc165ebdf05a..2a52c91d2c8a 100644 --- a/arch/sparc/include/asm/pgtable_64.h +++ b/arch/sparc/include/asm/pgtable_64.h @@ -308,12 +308,26 @@ static inline pte_t pte_modify(pte_t pte, pgprot_t prot) " sllx %1, 32, %1\n" " or %0, %1, %0\n" " .previous\n" + " .section .sun_m7_2insn_patch, \"ax\"\n" + " .word 661b\n" + " sethi %%uhi(%4), %1\n" + " sethi %%hi(%4), %0\n" + " .word 662b\n" + " or %1, %%ulo(%4), %1\n" + " or %0, %%lo(%4), %0\n" + " .word 663b\n" + " sllx %1, 32, %1\n" + " or %0, %1, %0\n" + " .previous\n" : "=r" (mask), "=r" (tmp) : "i" (_PAGE_PADDR_4U | _PAGE_MODIFIED_4U | _PAGE_ACCESSED_4U | _PAGE_CP_4U | _PAGE_CV_4U | _PAGE_E_4U | _PAGE_SPECIAL | _PAGE_PMD_HUGE | _PAGE_SZALL_4U), "i" (_PAGE_PADDR_4V | _PAGE_MODIFIED_4V | _PAGE_ACCESSED_4V | _PAGE_CP_4V | _PAGE_CV_4V | _PAGE_E_4V | + _PAGE_SPECIAL | _PAGE_PMD_HUGE | _PAGE_SZALL_4V), + "i" (_PAGE_PADDR_4V | _PAGE_MODIFIED_4V | _PAGE_ACCESSED_4V | + _PAGE_CP_4V | _PAGE_E_4V | _PAGE_SPECIAL | _PAGE_PMD_HUGE | _PAGE_SZALL_4V)); return __pte((pte_val(pte) & mask) | (pgprot_val(prot) & ~mask)); @@ -342,9 +356,15 @@ static inline pgprot_t pgprot_noncached(pgprot_t prot) " andn %0, %4, %0\n" " or %0, %5, %0\n" " .previous\n" + " .section .sun_m7_2insn_patch, \"ax\"\n" + " .word 661b\n" + " andn %0, %6, %0\n" + " or %0, %5, %0\n" + " .previous\n" : "=r" (val) : "0" (val), "i" (_PAGE_CP_4U | _PAGE_CV_4U), "i" (_PAGE_E_4U), - "i" (_PAGE_CP_4V | _PAGE_CV_4V), "i" (_PAGE_E_4V)); + "i" (_PAGE_CP_4V | _PAGE_CV_4V), "i" (_PAGE_E_4V), + "i" (_PAGE_CP_4V)); return __pgprot(val); } diff --git a/arch/sparc/include/asm/topology_64.h b/arch/sparc/include/asm/topology_64.h index ed8f071132e4..d1761df5cca6 100644 --- a/arch/sparc/include/asm/topology_64.h +++ b/arch/sparc/include/asm/topology_64.h @@ -40,11 +40,12 @@ static inline int pcibus_to_node(struct pci_bus *pbus) #ifdef CONFIG_SMP #define topology_physical_package_id(cpu) (cpu_data(cpu).proc_id) #define topology_core_id(cpu) (cpu_data(cpu).core_id) -#define topology_core_cpumask(cpu) (&cpu_core_map[cpu]) +#define topology_core_cpumask(cpu) (&cpu_core_sib_map[cpu]) #define topology_thread_cpumask(cpu) (&per_cpu(cpu_sibling_map, cpu)) #endif /* CONFIG_SMP */ extern cpumask_t cpu_core_map[NR_CPUS]; +extern cpumask_t cpu_core_sib_map[NR_CPUS]; static inline const struct cpumask *cpu_coregroup_mask(int cpu) { return &cpu_core_map[cpu]; diff --git a/arch/sparc/include/asm/trap_block.h b/arch/sparc/include/asm/trap_block.h index 6fd4436d32f0..ec9c04de3664 100644 --- a/arch/sparc/include/asm/trap_block.h +++ b/arch/sparc/include/asm/trap_block.h @@ -79,6 +79,8 @@ struct sun4v_2insn_patch_entry { }; extern struct sun4v_2insn_patch_entry __sun4v_2insn_patch, __sun4v_2insn_patch_end; +extern struct sun4v_2insn_patch_entry __sun_m7_2insn_patch, + __sun_m7_2insn_patch_end; #endif /* !(__ASSEMBLY__) */ diff --git a/arch/sparc/kernel/entry.h b/arch/sparc/kernel/entry.h index 07cc49e541f4..0f679421b468 100644 --- a/arch/sparc/kernel/entry.h +++ b/arch/sparc/kernel/entry.h @@ -69,6 +69,8 @@ void sun4v_patch_1insn_range(struct sun4v_1insn_patch_entry *, struct sun4v_1insn_patch_entry *); void sun4v_patch_2insn_range(struct sun4v_2insn_patch_entry *, struct sun4v_2insn_patch_entry *); +void sun_m7_patch_2insn_range(struct sun4v_2insn_patch_entry *, + struct sun4v_2insn_patch_entry *); extern unsigned int dcache_parity_tl1_occurred; extern unsigned int icache_parity_tl1_occurred; diff --git a/arch/sparc/kernel/leon_pci_grpci2.c b/arch/sparc/kernel/leon_pci_grpci2.c index 94e392bdee7d..814fb1729b12 100644 --- a/arch/sparc/kernel/leon_pci_grpci2.c +++ b/arch/sparc/kernel/leon_pci_grpci2.c @@ -723,7 +723,6 @@ static int grpci2_of_probe(struct platform_device *ofdev) err = -ENOMEM; goto err1; } - memset(grpci2priv, 0, sizeof(*grpci2priv)); priv->regs = regs; priv->irq = ofdev->archdata.irqs[0]; /* BASE IRQ */ priv->irq_mode = (capability & STS_IRQMODE) >> STS_IRQMODE_BIT; diff --git a/arch/sparc/kernel/mdesc.c b/arch/sparc/kernel/mdesc.c index 26c80e18d7b1..6f80936e0eea 100644 --- a/arch/sparc/kernel/mdesc.c +++ b/arch/sparc/kernel/mdesc.c @@ -614,45 +614,68 @@ static void fill_in_one_cache(cpuinfo_sparc *c, struct mdesc_handle *hp, u64 mp) } } -static void mark_core_ids(struct mdesc_handle *hp, u64 mp, int core_id) +static void find_back_node_value(struct mdesc_handle *hp, u64 node, + char *srch_val, + void (*func)(struct mdesc_handle *, u64, int), + u64 val, int depth) { - u64 a; - - mdesc_for_each_arc(a, hp, mp, MDESC_ARC_TYPE_BACK) { - u64 t = mdesc_arc_target(hp, a); - const char *name; - const u64 *id; + u64 arc; - name = mdesc_node_name(hp, t); - if (!strcmp(name, "cpu")) { - id = mdesc_get_property(hp, t, "id", NULL); - if (*id < NR_CPUS) - cpu_data(*id).core_id = core_id; - } else { - u64 j; + /* Since we have an estimate of recursion depth, do a sanity check. */ + if (depth == 0) + return; - mdesc_for_each_arc(j, hp, t, MDESC_ARC_TYPE_BACK) { - u64 n = mdesc_arc_target(hp, j); - const char *n_name; + mdesc_for_each_arc(arc, hp, node, MDESC_ARC_TYPE_BACK) { + u64 n = mdesc_arc_target(hp, arc); + const char *name = mdesc_node_name(hp, n); - n_name = mdesc_node_name(hp, n); - if (strcmp(n_name, "cpu")) - continue; + if (!strcmp(srch_val, name)) + (*func)(hp, n, val); - id = mdesc_get_property(hp, n, "id", NULL); - if (*id < NR_CPUS) - cpu_data(*id).core_id = core_id; - } - } + find_back_node_value(hp, n, srch_val, func, val, depth-1); } } +static void __mark_core_id(struct mdesc_handle *hp, u64 node, + int core_id) +{ + const u64 *id = mdesc_get_property(hp, node, "id", NULL); + + if (*id < num_possible_cpus()) + cpu_data(*id).core_id = core_id; +} + +static void __mark_sock_id(struct mdesc_handle *hp, u64 node, + int sock_id) +{ + const u64 *id = mdesc_get_property(hp, node, "id", NULL); + + if (*id < num_possible_cpus()) + cpu_data(*id).sock_id = sock_id; +} + +static void mark_core_ids(struct mdesc_handle *hp, u64 mp, + int core_id) +{ + find_back_node_value(hp, mp, "cpu", __mark_core_id, core_id, 10); +} + +static void mark_sock_ids(struct mdesc_handle *hp, u64 mp, + int sock_id) +{ + find_back_node_value(hp, mp, "cpu", __mark_sock_id, sock_id, 10); +} + static void set_core_ids(struct mdesc_handle *hp) { int idx; u64 mp; idx = 1; + + /* Identify unique cores by looking for cpus backpointed to by + * level 1 instruction caches. + */ mdesc_for_each_node_by_name(hp, mp, "cache") { const u64 *level; const char *type; @@ -667,11 +690,72 @@ static void set_core_ids(struct mdesc_handle *hp) continue; mark_core_ids(hp, mp, idx); + idx++; + } +} + +static int set_sock_ids_by_cache(struct mdesc_handle *hp, int level) +{ + u64 mp; + int idx = 1; + int fnd = 0; + + /* Identify unique sockets by looking for cpus backpointed to by + * shared level n caches. + */ + mdesc_for_each_node_by_name(hp, mp, "cache") { + const u64 *cur_lvl; + + cur_lvl = mdesc_get_property(hp, mp, "level", NULL); + if (*cur_lvl != level) + continue; + + mark_sock_ids(hp, mp, idx); + idx++; + fnd = 1; + } + return fnd; +} + +static void set_sock_ids_by_socket(struct mdesc_handle *hp, u64 mp) +{ + int idx = 1; + mdesc_for_each_node_by_name(hp, mp, "socket") { + u64 a; + + mdesc_for_each_arc(a, hp, mp, MDESC_ARC_TYPE_FWD) { + u64 t = mdesc_arc_target(hp, a); + const char *name; + const u64 *id; + + name = mdesc_node_name(hp, t); + if (strcmp(name, "cpu")) + continue; + + id = mdesc_get_property(hp, t, "id", NULL); + if (*id < num_possible_cpus()) + cpu_data(*id).sock_id = idx; + } idx++; } } +static void set_sock_ids(struct mdesc_handle *hp) +{ + u64 mp; + + /* If machine description exposes sockets data use it. + * Otherwise fallback to use shared L3 or L2 caches. + */ + mp = mdesc_node_by_name(hp, MDESC_NODE_NULL, "sockets"); + if (mp != MDESC_NODE_NULL) + return set_sock_ids_by_socket(hp, mp); + + if (!set_sock_ids_by_cache(hp, 3)) + set_sock_ids_by_cache(hp, 2); +} + static void mark_proc_ids(struct mdesc_handle *hp, u64 mp, int proc_id) { u64 a; @@ -707,7 +791,6 @@ static void __set_proc_ids(struct mdesc_handle *hp, const char *exec_unit_name) continue; mark_proc_ids(hp, mp, idx); - idx++; } } @@ -900,6 +983,7 @@ void mdesc_fill_in_cpu_data(cpumask_t *mask) set_core_ids(hp); set_proc_ids(hp); + set_sock_ids(hp); mdesc_release(hp); diff --git a/arch/sparc/kernel/pci.c b/arch/sparc/kernel/pci.c index 6f7251fd2eab..c928bc64b4ba 100644 --- a/arch/sparc/kernel/pci.c +++ b/arch/sparc/kernel/pci.c @@ -1002,6 +1002,38 @@ static int __init pcibios_init(void) subsys_initcall(pcibios_init); #ifdef CONFIG_SYSFS + +#define SLOT_NAME_SIZE 11 /* Max decimal digits + null in u32 */ + +static void pcie_bus_slot_names(struct pci_bus *pbus) +{ + struct pci_dev *pdev; + struct pci_bus *bus; + + list_for_each_entry(pdev, &pbus->devices, bus_list) { + char name[SLOT_NAME_SIZE]; + struct pci_slot *pci_slot; + const u32 *slot_num; + int len; + + slot_num = of_get_property(pdev->dev.of_node, + "physical-slot#", &len); + + if (slot_num == NULL || len != 4) + continue; + + snprintf(name, sizeof(name), "%u", slot_num[0]); + pci_slot = pci_create_slot(pbus, slot_num[0], name, NULL); + + if (IS_ERR(pci_slot)) + pr_err("PCI: pci_create_slot returned %ld.\n", + PTR_ERR(pci_slot)); + } + + list_for_each_entry(bus, &pbus->children, node) + pcie_bus_slot_names(bus); +} + static void pci_bus_slot_names(struct device_node *node, struct pci_bus *bus) { const struct pci_slot_names { @@ -1053,18 +1085,29 @@ static int __init of_pci_slot_init(void) while ((pbus = pci_find_next_bus(pbus)) != NULL) { struct device_node *node; + struct pci_dev *pdev; + + pdev = list_first_entry(&pbus->devices, struct pci_dev, + bus_list); - if (pbus->self) { - /* PCI->PCI bridge */ - node = pbus->self->dev.of_node; + if (pdev && pci_is_pcie(pdev)) { + pcie_bus_slot_names(pbus); } else { - struct pci_pbm_info *pbm = pbus->sysdata; - /* Host PCI controller */ - node = pbm->op->dev.of_node; - } + if (pbus->self) { + + /* PCI->PCI bridge */ + node = pbus->self->dev.of_node; + + } else { + struct pci_pbm_info *pbm = pbus->sysdata; - pci_bus_slot_names(node, pbus); + /* Host PCI controller */ + node = pbm->op->dev.of_node; + } + + pci_bus_slot_names(node, pbus); + } } return 0; diff --git a/arch/sparc/kernel/setup_64.c b/arch/sparc/kernel/setup_64.c index c38d19fc27ba..f7b261749383 100644 --- a/arch/sparc/kernel/setup_64.c +++ b/arch/sparc/kernel/setup_64.c @@ -255,6 +255,24 @@ void sun4v_patch_2insn_range(struct sun4v_2insn_patch_entry *start, } } +void sun_m7_patch_2insn_range(struct sun4v_2insn_patch_entry *start, + struct sun4v_2insn_patch_entry *end) +{ + while (start < end) { + unsigned long addr = start->addr; + + *(unsigned int *) (addr + 0) = start->insns[0]; + wmb(); + __asm__ __volatile__("flush %0" : : "r" (addr + 0)); + + *(unsigned int *) (addr + 4) = start->insns[1]; + wmb(); + __asm__ __volatile__("flush %0" : : "r" (addr + 4)); + + start++; + } +} + static void __init sun4v_patch(void) { extern void sun4v_hvapi_init(void); @@ -267,6 +285,9 @@ static void __init sun4v_patch(void) sun4v_patch_2insn_range(&__sun4v_2insn_patch, &__sun4v_2insn_patch_end); + if (sun4v_chip_type == SUN4V_CHIP_SPARC_M7) + sun_m7_patch_2insn_range(&__sun_m7_2insn_patch, + &__sun_m7_2insn_patch_end); sun4v_hvapi_init(); } diff --git a/arch/sparc/kernel/smp_64.c b/arch/sparc/kernel/smp_64.c index 61139d9924ca..19cd08d18672 100644 --- a/arch/sparc/kernel/smp_64.c +++ b/arch/sparc/kernel/smp_64.c @@ -60,8 +60,12 @@ DEFINE_PER_CPU(cpumask_t, cpu_sibling_map) = CPU_MASK_NONE; cpumask_t cpu_core_map[NR_CPUS] __read_mostly = { [0 ... NR_CPUS-1] = CPU_MASK_NONE }; +cpumask_t cpu_core_sib_map[NR_CPUS] __read_mostly = { + [0 ... NR_CPUS-1] = CPU_MASK_NONE }; + EXPORT_PER_CPU_SYMBOL(cpu_sibling_map); EXPORT_SYMBOL(cpu_core_map); +EXPORT_SYMBOL(cpu_core_sib_map); static cpumask_t smp_commenced_mask; @@ -1243,6 +1247,15 @@ void smp_fill_in_sib_core_maps(void) } } + for_each_present_cpu(i) { + unsigned int j; + + for_each_present_cpu(j) { + if (cpu_data(i).sock_id == cpu_data(j).sock_id) + cpumask_set_cpu(j, &cpu_core_sib_map[i]); + } + } + for_each_present_cpu(i) { unsigned int j; diff --git a/arch/sparc/kernel/vmlinux.lds.S b/arch/sparc/kernel/vmlinux.lds.S index 09243057cb0b..f1a2f688b28a 100644 --- a/arch/sparc/kernel/vmlinux.lds.S +++ b/arch/sparc/kernel/vmlinux.lds.S @@ -138,6 +138,11 @@ SECTIONS *(.pause_3insn_patch) __pause_3insn_patch_end = .; } + .sun_m7_2insn_patch : { + __sun_m7_2insn_patch = .; + *(.sun_m7_2insn_patch) + __sun_m7_2insn_patch_end = .; + } PERCPU_SECTION(SMP_CACHE_BYTES) . = ALIGN(PAGE_SIZE); diff --git a/arch/sparc/mm/init_64.c b/arch/sparc/mm/init_64.c index 4ca0d6ba5ec8..559cb744112c 100644 --- a/arch/sparc/mm/init_64.c +++ b/arch/sparc/mm/init_64.c @@ -54,6 +54,7 @@ #include "init_64.h" unsigned long kern_linear_pte_xor[4] __read_mostly; +static unsigned long page_cache4v_flag; /* A bitmap, two bits for every 256MB of physical memory. These two * bits determine what page size we use for kernel linear @@ -1909,11 +1910,24 @@ static void __init sun4u_linear_pte_xor_finalize(void) static void __init sun4v_linear_pte_xor_finalize(void) { + unsigned long pagecv_flag; + + /* Bit 9 of TTE is no longer CV bit on M7 processor and it instead + * enables MCD error. Do not set bit 9 on M7 processor. + */ + switch (sun4v_chip_type) { + case SUN4V_CHIP_SPARC_M7: + pagecv_flag = 0x00; + break; + default: + pagecv_flag = _PAGE_CV_4V; + break; + } #ifndef CONFIG_DEBUG_PAGEALLOC if (cpu_pgsz_mask & HV_PGSZ_MASK_256MB) { kern_linear_pte_xor[1] = (_PAGE_VALID | _PAGE_SZ256MB_4V) ^ PAGE_OFFSET; - kern_linear_pte_xor[1] |= (_PAGE_CP_4V | _PAGE_CV_4V | + kern_linear_pte_xor[1] |= (_PAGE_CP_4V | pagecv_flag | _PAGE_P_4V | _PAGE_W_4V); } else { kern_linear_pte_xor[1] = kern_linear_pte_xor[0]; @@ -1922,7 +1936,7 @@ static void __init sun4v_linear_pte_xor_finalize(void) if (cpu_pgsz_mask & HV_PGSZ_MASK_2GB) { kern_linear_pte_xor[2] = (_PAGE_VALID | _PAGE_SZ2GB_4V) ^ PAGE_OFFSET; - kern_linear_pte_xor[2] |= (_PAGE_CP_4V | _PAGE_CV_4V | + kern_linear_pte_xor[2] |= (_PAGE_CP_4V | pagecv_flag | _PAGE_P_4V | _PAGE_W_4V); } else { kern_linear_pte_xor[2] = kern_linear_pte_xor[1]; @@ -1931,7 +1945,7 @@ static void __init sun4v_linear_pte_xor_finalize(void) if (cpu_pgsz_mask & HV_PGSZ_MASK_16GB) { kern_linear_pte_xor[3] = (_PAGE_VALID | _PAGE_SZ16GB_4V) ^ PAGE_OFFSET; - kern_linear_pte_xor[3] |= (_PAGE_CP_4V | _PAGE_CV_4V | + kern_linear_pte_xor[3] |= (_PAGE_CP_4V | pagecv_flag | _PAGE_P_4V | _PAGE_W_4V); } else { kern_linear_pte_xor[3] = kern_linear_pte_xor[2]; @@ -1958,6 +1972,13 @@ static phys_addr_t __init available_memory(void) return available; } +#define _PAGE_CACHE_4U (_PAGE_CP_4U | _PAGE_CV_4U) +#define _PAGE_CACHE_4V (_PAGE_CP_4V | _PAGE_CV_4V) +#define __DIRTY_BITS_4U (_PAGE_MODIFIED_4U | _PAGE_WRITE_4U | _PAGE_W_4U) +#define __DIRTY_BITS_4V (_PAGE_MODIFIED_4V | _PAGE_WRITE_4V | _PAGE_W_4V) +#define __ACCESS_BITS_4U (_PAGE_ACCESSED_4U | _PAGE_READ_4U | _PAGE_R) +#define __ACCESS_BITS_4V (_PAGE_ACCESSED_4V | _PAGE_READ_4V | _PAGE_R) + /* We need to exclude reserved regions. This exclusion will include * vmlinux and initrd. To be more precise the initrd size could be used to * compute a new lower limit because it is freed later during initialization. @@ -2034,6 +2055,25 @@ void __init paging_init(void) memset(swapper_4m_tsb, 0x40, sizeof(swapper_4m_tsb)); #endif + /* TTE.cv bit on sparc v9 occupies the same position as TTE.mcde + * bit on M7 processor. This is a conflicting usage of the same + * bit. Enabling TTE.cv on M7 would turn on Memory Corruption + * Detection error on all pages and this will lead to problems + * later. Kernel does not run with MCD enabled and hence rest + * of the required steps to fully configure memory corruption + * detection are not taken. We need to ensure TTE.mcde is not + * set on M7 processor. Compute the value of cacheability + * flag for use later taking this into consideration. + */ + switch (sun4v_chip_type) { + case SUN4V_CHIP_SPARC_M7: + page_cache4v_flag = _PAGE_CP_4V; + break; + default: + page_cache4v_flag = _PAGE_CACHE_4V; + break; + } + if (tlb_type == hypervisor) sun4v_pgprot_init(); else @@ -2274,13 +2314,6 @@ void free_initrd_mem(unsigned long start, unsigned long end) } #endif -#define _PAGE_CACHE_4U (_PAGE_CP_4U | _PAGE_CV_4U) -#define _PAGE_CACHE_4V (_PAGE_CP_4V | _PAGE_CV_4V) -#define __DIRTY_BITS_4U (_PAGE_MODIFIED_4U | _PAGE_WRITE_4U | _PAGE_W_4U) -#define __DIRTY_BITS_4V (_PAGE_MODIFIED_4V | _PAGE_WRITE_4V | _PAGE_W_4V) -#define __ACCESS_BITS_4U (_PAGE_ACCESSED_4U | _PAGE_READ_4U | _PAGE_R) -#define __ACCESS_BITS_4V (_PAGE_ACCESSED_4V | _PAGE_READ_4V | _PAGE_R) - pgprot_t PAGE_KERNEL __read_mostly; EXPORT_SYMBOL(PAGE_KERNEL); @@ -2312,8 +2345,7 @@ int __meminit vmemmap_populate(unsigned long vstart, unsigned long vend, _PAGE_P_4U | _PAGE_W_4U); if (tlb_type == hypervisor) pte_base = (_PAGE_VALID | _PAGE_SZ4MB_4V | - _PAGE_CP_4V | _PAGE_CV_4V | - _PAGE_P_4V | _PAGE_W_4V); + page_cache4v_flag | _PAGE_P_4V | _PAGE_W_4V); pte_base |= _PAGE_PMD_HUGE; @@ -2450,14 +2482,14 @@ static void __init sun4v_pgprot_init(void) int i; PAGE_KERNEL = __pgprot (_PAGE_PRESENT_4V | _PAGE_VALID | - _PAGE_CACHE_4V | _PAGE_P_4V | + page_cache4v_flag | _PAGE_P_4V | __ACCESS_BITS_4V | __DIRTY_BITS_4V | _PAGE_EXEC_4V); PAGE_KERNEL_LOCKED = PAGE_KERNEL; _PAGE_IE = _PAGE_IE_4V; _PAGE_E = _PAGE_E_4V; - _PAGE_CACHE = _PAGE_CACHE_4V; + _PAGE_CACHE = page_cache4v_flag; #ifdef CONFIG_DEBUG_PAGEALLOC kern_linear_pte_xor[0] = _PAGE_VALID ^ PAGE_OFFSET; @@ -2465,8 +2497,8 @@ static void __init sun4v_pgprot_init(void) kern_linear_pte_xor[0] = (_PAGE_VALID | _PAGE_SZ4MB_4V) ^ PAGE_OFFSET; #endif - kern_linear_pte_xor[0] |= (_PAGE_CP_4V | _PAGE_CV_4V | - _PAGE_P_4V | _PAGE_W_4V); + kern_linear_pte_xor[0] |= (page_cache4v_flag | _PAGE_P_4V | + _PAGE_W_4V); for (i = 1; i < 4; i++) kern_linear_pte_xor[i] = kern_linear_pte_xor[0]; @@ -2479,12 +2511,12 @@ static void __init sun4v_pgprot_init(void) _PAGE_SZ4MB_4V | _PAGE_SZ512K_4V | _PAGE_SZ64K_4V | _PAGE_SZ8K_4V); - page_none = _PAGE_PRESENT_4V | _PAGE_ACCESSED_4V | _PAGE_CACHE_4V; - page_shared = (_PAGE_VALID | _PAGE_PRESENT_4V | _PAGE_CACHE_4V | + page_none = _PAGE_PRESENT_4V | _PAGE_ACCESSED_4V | page_cache4v_flag; + page_shared = (_PAGE_VALID | _PAGE_PRESENT_4V | page_cache4v_flag | __ACCESS_BITS_4V | _PAGE_WRITE_4V | _PAGE_EXEC_4V); - page_copy = (_PAGE_VALID | _PAGE_PRESENT_4V | _PAGE_CACHE_4V | + page_copy = (_PAGE_VALID | _PAGE_PRESENT_4V | page_cache4v_flag | __ACCESS_BITS_4V | _PAGE_EXEC_4V); - page_readonly = (_PAGE_VALID | _PAGE_PRESENT_4V | _PAGE_CACHE_4V | + page_readonly = (_PAGE_VALID | _PAGE_PRESENT_4V | page_cache4v_flag | __ACCESS_BITS_4V | _PAGE_EXEC_4V); page_exec_bit = _PAGE_EXEC_4V; @@ -2542,7 +2574,7 @@ static unsigned long kern_large_tte(unsigned long paddr) _PAGE_EXEC_4U | _PAGE_L_4U | _PAGE_W_4U); if (tlb_type == hypervisor) val = (_PAGE_VALID | _PAGE_SZ4MB_4V | - _PAGE_CP_4V | _PAGE_CV_4V | _PAGE_P_4V | + page_cache4v_flag | _PAGE_P_4V | _PAGE_EXEC_4V | _PAGE_W_4V); return val | paddr; diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 4eb0b0ffae85..e68408facf7a 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -341,7 +341,7 @@ config X86_FEATURE_NAMES config X86_X2APIC bool "Support x2apic" - depends on X86_LOCAL_APIC && X86_64 && IRQ_REMAP + depends on X86_LOCAL_APIC && X86_64 && (IRQ_REMAP || HYPERVISOR_GUEST) ---help--- This enables x2apic support on CPUs that have this feature. @@ -441,6 +441,7 @@ config X86_UV depends on X86_EXTENDED_PLATFORM depends on NUMA depends on X86_X2APIC + depends on PCI ---help--- This option is needed in order to support SGI Ultraviolet systems. If you don't have one of these, you should say N here. @@ -466,7 +467,6 @@ config X86_INTEL_CE select X86_REBOOTFIXUPS select OF select OF_EARLY_FLATTREE - select IRQ_DOMAIN ---help--- Select for the Intel CE media processor (CE4100) SOC. This option compiles in support for the CE4100 SOC for settop @@ -851,11 +851,12 @@ config NR_CPUS default "1" if !SMP default "8192" if MAXSMP default "32" if SMP && X86_BIGSMP - default "8" if SMP + default "8" if SMP && X86_32 + default "64" if SMP ---help--- This allows you to specify the maximum number of CPUs which this kernel will support. If CPUMASK_OFFSTACK is enabled, the maximum - supported value is 4096, otherwise the maximum value is 512. The + supported value is 8192, otherwise the maximum value is 512. The minimum value which makes sense is 2. This is purely to save memory - each supported CPU adds @@ -914,12 +915,12 @@ config X86_UP_IOAPIC config X86_LOCAL_APIC def_bool y depends on X86_64 || SMP || X86_32_NON_STANDARD || X86_UP_APIC || PCI_MSI - select GENERIC_IRQ_LEGACY_ALLOC_HWIRQ + select IRQ_DOMAIN_HIERARCHY + select PCI_MSI_IRQ_DOMAIN if PCI_MSI config X86_IO_APIC def_bool y depends on X86_LOCAL_APIC || X86_UP_IOAPIC - select IRQ_DOMAIN config X86_REROUTE_FOR_BROKEN_BOOT_IRQS bool "Reroute for broken boot IRQs" diff --git a/arch/x86/Kconfig.debug b/arch/x86/Kconfig.debug index 72484a645f05..a5973f851750 100644 --- a/arch/x86/Kconfig.debug +++ b/arch/x86/Kconfig.debug @@ -332,4 +332,15 @@ config X86_DEBUG_STATIC_CPU_HAS If unsure, say N. +config PUNIT_ATOM_DEBUG + tristate "ATOM Punit debug driver" + select DEBUG_FS + select IOSF_MBI + ---help--- + This is a debug driver, which gets the power states + of all Punit North Complex devices. The power states of + each device is exposed as part of the debugfs interface. + The current power state can be read from + /sys/kernel/debug/punit_atom/dev_power_state + endmenu diff --git a/arch/x86/Makefile b/arch/x86/Makefile index 2fda005bb334..57996ee840dd 100644 --- a/arch/x86/Makefile +++ b/arch/x86/Makefile @@ -77,6 +77,12 @@ else KBUILD_AFLAGS += -m64 KBUILD_CFLAGS += -m64 + # Align jump targets to 1 byte, not the default 16 bytes: + KBUILD_CFLAGS += -falign-jumps=1 + + # Pack loops tightly as well: + KBUILD_CFLAGS += -falign-loops=1 + # Don't autogenerate traditional x87 instructions KBUILD_CFLAGS += $(call cc-option,-mno-80387) KBUILD_CFLAGS += $(call cc-option,-mno-fp-ret-in-387) @@ -84,6 +90,9 @@ else # Use -mpreferred-stack-boundary=3 if supported. KBUILD_CFLAGS += $(call cc-option,-mpreferred-stack-boundary=3) + # Use -mskip-rax-setup if supported. + KBUILD_CFLAGS += $(call cc-option,-mskip-rax-setup) + # FIXME - should be integrated in Makefile.cpu (Makefile_32.cpu) cflags-$(CONFIG_MK8) += $(call cc-option,-march=k8) cflags-$(CONFIG_MPSC) += $(call cc-option,-march=nocona) diff --git a/arch/x86/ia32/ia32entry.S b/arch/x86/ia32/ia32entry.S index 72bf2680f819..63450a596800 100644 --- a/arch/x86/ia32/ia32entry.S +++ b/arch/x86/ia32/ia32entry.S @@ -77,12 +77,6 @@ ENTRY(native_usergs_sysret32) swapgs sysretl ENDPROC(native_usergs_sysret32) - -ENTRY(native_irq_enable_sysexit) - swapgs - sti - sysexit -ENDPROC(native_irq_enable_sysexit) #endif /* @@ -119,7 +113,7 @@ ENTRY(ia32_sysenter_target) * it is too small to ever cause noticeable irq latency. */ SWAPGS_UNSAFE_STACK - movq PER_CPU_VAR(cpu_tss + TSS_sp0), %rsp + movq PER_CPU_VAR(cpu_current_top_of_stack), %rsp ENABLE_INTERRUPTS(CLBR_NONE) /* Zero-extending 32-bit regs, do not remove */ @@ -142,7 +136,7 @@ ENTRY(ia32_sysenter_target) pushq_cfi_reg rsi /* pt_regs->si */ pushq_cfi_reg rdx /* pt_regs->dx */ pushq_cfi_reg rcx /* pt_regs->cx */ - pushq_cfi_reg rax /* pt_regs->ax */ + pushq_cfi $-ENOSYS /* pt_regs->ax */ cld sub $(10*8),%rsp /* pt_regs->r8-11,bp,bx,r12-15 not saved */ CFI_ADJUST_CFA_OFFSET 10*8 @@ -169,8 +163,6 @@ sysenter_flags_fixed: testl $_TIF_WORK_SYSCALL_ENTRY, ASM_THREAD_INFO(TI_flags, %rsp, SIZEOF_PTREGS) CFI_REMEMBER_STATE jnz sysenter_tracesys - cmpq $(IA32_NR_syscalls-1),%rax - ja ia32_badsys sysenter_do_call: /* 32bit syscall -> 64bit C ABI argument conversion */ movl %edi,%r8d /* arg5 */ @@ -179,8 +171,11 @@ sysenter_do_call: movl %ebx,%edi /* arg1 */ movl %edx,%edx /* arg3 (zero extension) */ sysenter_dispatch: + cmpq $(IA32_NR_syscalls-1),%rax + ja 1f call *ia32_sys_call_table(,%rax,8) movq %rax,RAX(%rsp) +1: DISABLE_INTERRUPTS(CLBR_NONE) TRACE_IRQS_OFF testl $_TIF_ALLWORK_MASK, ASM_THREAD_INFO(TI_flags, %rsp, SIZEOF_PTREGS) @@ -247,9 +242,7 @@ sysexit_from_sys_call: movl %ebx,%esi /* 2nd arg: 1st syscall arg */ movl %eax,%edi /* 1st arg: syscall number */ call __audit_syscall_entry - movl RAX(%rsp),%eax /* reload syscall number */ - cmpq $(IA32_NR_syscalls-1),%rax - ja ia32_badsys + movl ORIG_RAX(%rsp),%eax /* reload syscall number */ movl %ebx,%edi /* reload 1st syscall arg */ movl RCX(%rsp),%esi /* reload 2nd syscall arg */ movl RDX(%rsp),%edx /* reload 3rd syscall arg */ @@ -300,13 +293,10 @@ sysenter_tracesys: #endif SAVE_EXTRA_REGS CLEAR_RREGS - movq $-ENOSYS,RAX(%rsp)/* ptrace can change this for a bad syscall */ movq %rsp,%rdi /* &pt_regs -> arg1 */ call syscall_trace_enter LOAD_ARGS32 /* reload args from stack in case ptrace changed it */ RESTORE_EXTRA_REGS - cmpq $(IA32_NR_syscalls-1),%rax - ja int_ret_from_sys_call /* sysenter_tracesys has set RAX(%rsp) */ jmp sysenter_do_call CFI_ENDPROC ENDPROC(ia32_sysenter_target) @@ -356,7 +346,7 @@ ENTRY(ia32_cstar_target) SWAPGS_UNSAFE_STACK movl %esp,%r8d CFI_REGISTER rsp,r8 - movq PER_CPU_VAR(kernel_stack),%rsp + movq PER_CPU_VAR(cpu_current_top_of_stack),%rsp ENABLE_INTERRUPTS(CLBR_NONE) /* Zero-extending 32-bit regs, do not remove */ @@ -376,7 +366,7 @@ ENTRY(ia32_cstar_target) pushq_cfi_reg rdx /* pt_regs->dx */ pushq_cfi_reg rbp /* pt_regs->cx */ movl %ebp,%ecx - pushq_cfi_reg rax /* pt_regs->ax */ + pushq_cfi $-ENOSYS /* pt_regs->ax */ sub $(10*8),%rsp /* pt_regs->r8-11,bp,bx,r12-15 not saved */ CFI_ADJUST_CFA_OFFSET 10*8 @@ -392,8 +382,6 @@ ENTRY(ia32_cstar_target) testl $_TIF_WORK_SYSCALL_ENTRY, ASM_THREAD_INFO(TI_flags, %rsp, SIZEOF_PTREGS) CFI_REMEMBER_STATE jnz cstar_tracesys - cmpq $IA32_NR_syscalls-1,%rax - ja ia32_badsys cstar_do_call: /* 32bit syscall -> 64bit C ABI argument conversion */ movl %edi,%r8d /* arg5 */ @@ -402,8 +390,11 @@ cstar_do_call: movl %ebx,%edi /* arg1 */ movl %edx,%edx /* arg3 (zero extension) */ cstar_dispatch: + cmpq $(IA32_NR_syscalls-1),%rax + ja 1f call *ia32_sys_call_table(,%rax,8) movq %rax,RAX(%rsp) +1: DISABLE_INTERRUPTS(CLBR_NONE) TRACE_IRQS_OFF testl $_TIF_ALLWORK_MASK, ASM_THREAD_INFO(TI_flags, %rsp, SIZEOF_PTREGS) @@ -457,14 +448,11 @@ cstar_tracesys: xchgl %r9d,%ebp SAVE_EXTRA_REGS CLEAR_RREGS r9 - movq $-ENOSYS,RAX(%rsp) /* ptrace can change this for a bad syscall */ movq %rsp,%rdi /* &pt_regs -> arg1 */ call syscall_trace_enter LOAD_ARGS32 1 /* reload args from stack in case ptrace changed it */ RESTORE_EXTRA_REGS xchgl %ebp,%r9d - cmpq $(IA32_NR_syscalls-1),%rax - ja int_ret_from_sys_call /* cstar_tracesys has set RAX(%rsp) */ jmp cstar_do_call END(ia32_cstar_target) @@ -523,7 +511,7 @@ ENTRY(ia32_syscall) pushq_cfi_reg rsi /* pt_regs->si */ pushq_cfi_reg rdx /* pt_regs->dx */ pushq_cfi_reg rcx /* pt_regs->cx */ - pushq_cfi_reg rax /* pt_regs->ax */ + pushq_cfi $-ENOSYS /* pt_regs->ax */ cld sub $(10*8),%rsp /* pt_regs->r8-11,bp,bx,r12-15 not saved */ CFI_ADJUST_CFA_OFFSET 10*8 @@ -531,8 +519,6 @@ ENTRY(ia32_syscall) orl $TS_COMPAT, ASM_THREAD_INFO(TI_status, %rsp, SIZEOF_PTREGS) testl $_TIF_WORK_SYSCALL_ENTRY, ASM_THREAD_INFO(TI_flags, %rsp, SIZEOF_PTREGS) jnz ia32_tracesys - cmpq $(IA32_NR_syscalls-1),%rax - ja ia32_badsys ia32_do_call: /* 32bit syscall -> 64bit C ABI argument conversion */ movl %edi,%r8d /* arg5 */ @@ -540,9 +526,12 @@ ia32_do_call: xchg %ecx,%esi /* rsi:arg2, rcx:arg4 */ movl %ebx,%edi /* arg1 */ movl %edx,%edx /* arg3 (zero extension) */ + cmpq $(IA32_NR_syscalls-1),%rax + ja 1f call *ia32_sys_call_table(,%rax,8) # xxx: rip relative ia32_sysret: movq %rax,RAX(%rsp) +1: ia32_ret_from_sys_call: CLEAR_RREGS jmp int_ret_from_sys_call @@ -550,23 +539,14 @@ ia32_ret_from_sys_call: ia32_tracesys: SAVE_EXTRA_REGS CLEAR_RREGS - movq $-ENOSYS,RAX(%rsp) /* ptrace can change this for a bad syscall */ movq %rsp,%rdi /* &pt_regs -> arg1 */ call syscall_trace_enter LOAD_ARGS32 /* reload args from stack in case ptrace changed it */ RESTORE_EXTRA_REGS - cmpq $(IA32_NR_syscalls-1),%rax - ja int_ret_from_sys_call /* ia32_tracesys has set RAX(%rsp) */ jmp ia32_do_call + CFI_ENDPROC END(ia32_syscall) -ia32_badsys: - movq $0,ORIG_RAX(%rsp) - movq $-ENOSYS,%rax - jmp ia32_sysret - - CFI_ENDPROC - .macro PTREGSCALL label, func ALIGN GLOBAL(\label) diff --git a/arch/x86/include/asm/alternative-asm.h b/arch/x86/include/asm/alternative-asm.h index bdf02eeee765..e7636bac7372 100644 --- a/arch/x86/include/asm/alternative-asm.h +++ b/arch/x86/include/asm/alternative-asm.h @@ -18,6 +18,12 @@ .endm #endif +/* + * Issue one struct alt_instr descriptor entry (need to put it into + * the section .altinstructions, see below). This entry contains + * enough information for the alternatives patching code to patch an + * instruction. See apply_alternatives(). + */ .macro altinstruction_entry orig alt feature orig_len alt_len pad_len .long \orig - . .long \alt - . @@ -27,6 +33,12 @@ .byte \pad_len .endm +/* + * Define an alternative between two instructions. If @feature is + * present, early code in apply_alternatives() replaces @oldinstr with + * @newinstr. ".skip" directive takes care of proper instruction padding + * in case @newinstr is longer than @oldinstr. + */ .macro ALTERNATIVE oldinstr, newinstr, feature 140: \oldinstr @@ -55,6 +67,12 @@ */ #define alt_max_short(a, b) ((a) ^ (((a) ^ (b)) & -(-((a) < (b))))) + +/* + * Same as ALTERNATIVE macro above but for two alternatives. If CPU + * has @feature1, it replaces @oldinstr with @newinstr1. If CPU has + * @feature2, it replaces @oldinstr with @feature2. + */ .macro ALTERNATIVE_2 oldinstr, newinstr1, feature1, newinstr2, feature2 140: \oldinstr diff --git a/arch/x86/include/asm/apic.h b/arch/x86/include/asm/apic.h index 976b86a325e5..c8393634ca0c 100644 --- a/arch/x86/include/asm/apic.h +++ b/arch/x86/include/asm/apic.h @@ -644,6 +644,12 @@ static inline void entering_ack_irq(void) entering_irq(); } +static inline void ipi_entering_ack_irq(void) +{ + ack_APIC_irq(); + irq_enter(); +} + static inline void exiting_irq(void) { irq_exit(); diff --git a/arch/x86/include/asm/asm.h b/arch/x86/include/asm/asm.h index 7730c1c5c83a..189679aba703 100644 --- a/arch/x86/include/asm/asm.h +++ b/arch/x86/include/asm/asm.h @@ -63,6 +63,31 @@ _ASM_ALIGN ; \ _ASM_PTR (entry); \ .popsection + +.macro ALIGN_DESTINATION + /* check for bad alignment of destination */ + movl %edi,%ecx + andl $7,%ecx + jz 102f /* already aligned */ + subl $8,%ecx + negl %ecx + subl %ecx,%edx +100: movb (%rsi),%al +101: movb %al,(%rdi) + incq %rsi + incq %rdi + decl %ecx + jnz 100b +102: + .section .fixup,"ax" +103: addl %ecx,%edx /* ecx is zerorest also */ + jmp copy_user_handle_tail + .previous + + _ASM_EXTABLE(100b,103b) + _ASM_EXTABLE(101b,103b) + .endm + #else # define _ASM_EXTABLE(from,to) \ " .pushsection \"__ex_table\",\"a\"\n" \ diff --git a/arch/x86/include/asm/atomic.h b/arch/x86/include/asm/atomic.h index 5e5cd123fdfb..e9168955c42f 100644 --- a/arch/x86/include/asm/atomic.h +++ b/arch/x86/include/asm/atomic.h @@ -22,7 +22,7 @@ * * Atomically reads the value of @v. */ -static inline int atomic_read(const atomic_t *v) +static __always_inline int atomic_read(const atomic_t *v) { return ACCESS_ONCE((v)->counter); } @@ -34,7 +34,7 @@ static inline int atomic_read(const atomic_t *v) * * Atomically sets the value of @v to @i. */ -static inline void atomic_set(atomic_t *v, int i) +static __always_inline void atomic_set(atomic_t *v, int i) { v->counter = i; } @@ -46,7 +46,7 @@ static inline void atomic_set(atomic_t *v, int i) * * Atomically adds @i to @v. */ -static inline void atomic_add(int i, atomic_t *v) +static __always_inline void atomic_add(int i, atomic_t *v) { asm volatile(LOCK_PREFIX "addl %1,%0" : "+m" (v->counter) @@ -60,7 +60,7 @@ static inline void atomic_add(int i, atomic_t *v) * * Atomically subtracts @i from @v. */ -static inline void atomic_sub(int i, atomic_t *v) +static __always_inline void atomic_sub(int i, atomic_t *v) { asm volatile(LOCK_PREFIX "subl %1,%0" : "+m" (v->counter) @@ -76,7 +76,7 @@ static inline void atomic_sub(int i, atomic_t *v) * true if the result is zero, or false for all * other cases. */ -static inline int atomic_sub_and_test(int i, atomic_t *v) +static __always_inline int atomic_sub_and_test(int i, atomic_t *v) { GEN_BINARY_RMWcc(LOCK_PREFIX "subl", v->counter, "er", i, "%0", "e"); } @@ -87,7 +87,7 @@ static inline int atomic_sub_and_test(int i, atomic_t *v) * * Atomically increments @v by 1. */ -static inline void atomic_inc(atomic_t *v) +static __always_inline void atomic_inc(atomic_t *v) { asm volatile(LOCK_PREFIX "incl %0" : "+m" (v->counter)); @@ -99,7 +99,7 @@ static inline void atomic_inc(atomic_t *v) * * Atomically decrements @v by 1. */ -static inline void atomic_dec(atomic_t *v) +static __always_inline void atomic_dec(atomic_t *v) { asm volatile(LOCK_PREFIX "decl %0" : "+m" (v->counter)); @@ -113,7 +113,7 @@ static inline void atomic_dec(atomic_t *v) * returns true if the result is 0, or false for all other * cases. */ -static inline int atomic_dec_and_test(atomic_t *v) +static __always_inline int atomic_dec_and_test(atomic_t *v) { GEN_UNARY_RMWcc(LOCK_PREFIX "decl", v->counter, "%0", "e"); } @@ -126,7 +126,7 @@ static inline int atomic_dec_and_test(atomic_t *v) * and returns true if the result is zero, or false for all * other cases. */ -static inline int atomic_inc_and_test(atomic_t *v) +static __always_inline int atomic_inc_and_test(atomic_t *v) { GEN_UNARY_RMWcc(LOCK_PREFIX "incl", v->counter, "%0", "e"); } @@ -140,7 +140,7 @@ static inline int atomic_inc_and_test(atomic_t *v) * if the result is negative, or false when * result is greater than or equal to zero. */ -static inline int atomic_add_negative(int i, atomic_t *v) +static __always_inline int atomic_add_negative(int i, atomic_t *v) { GEN_BINARY_RMWcc(LOCK_PREFIX "addl", v->counter, "er", i, "%0", "s"); } @@ -152,7 +152,7 @@ static inline int atomic_add_negative(int i, atomic_t *v) * * Atomically adds @i to @v and returns @i + @v */ -static inline int atomic_add_return(int i, atomic_t *v) +static __always_inline int atomic_add_return(int i, atomic_t *v) { return i + xadd(&v->counter, i); } @@ -164,7 +164,7 @@ static inline int atomic_add_return(int i, atomic_t *v) * * Atomically subtracts @i from @v and returns @v - @i */ -static inline int atomic_sub_return(int i, atomic_t *v) +static __always_inline int atomic_sub_return(int i, atomic_t *v) { return atomic_add_return(-i, v); } @@ -172,7 +172,7 @@ static inline int atomic_sub_return(int i, atomic_t *v) #define atomic_inc_return(v) (atomic_add_return(1, v)) #define atomic_dec_return(v) (atomic_sub_return(1, v)) -static inline int atomic_cmpxchg(atomic_t *v, int old, int new) +static __always_inline int atomic_cmpxchg(atomic_t *v, int old, int new) { return cmpxchg(&v->counter, old, new); } @@ -191,7 +191,7 @@ static inline int atomic_xchg(atomic_t *v, int new) * Atomically adds @a to @v, so long as @v was not already @u. * Returns the old value of @v. */ -static inline int __atomic_add_unless(atomic_t *v, int a, int u) +static __always_inline int __atomic_add_unless(atomic_t *v, int a, int u) { int c, old; c = atomic_read(v); @@ -213,7 +213,7 @@ static inline int __atomic_add_unless(atomic_t *v, int a, int u) * Atomically adds 1 to @v * Returns the new value of @u */ -static inline short int atomic_inc_short(short int *v) +static __always_inline short int atomic_inc_short(short int *v) { asm(LOCK_PREFIX "addw $1, %0" : "+m" (*v)); return *v; diff --git a/arch/x86/include/asm/atomic64_64.h b/arch/x86/include/asm/atomic64_64.h index f8d273e18516..b965f9e03f2a 100644 --- a/arch/x86/include/asm/atomic64_64.h +++ b/arch/x86/include/asm/atomic64_64.h @@ -40,7 +40,7 @@ static inline void atomic64_set(atomic64_t *v, long i) * * Atomically adds @i to @v. */ -static inline void atomic64_add(long i, atomic64_t *v) +static __always_inline void atomic64_add(long i, atomic64_t *v) { asm volatile(LOCK_PREFIX "addq %1,%0" : "=m" (v->counter) @@ -81,7 +81,7 @@ static inline int atomic64_sub_and_test(long i, atomic64_t *v) * * Atomically increments @v by 1. */ -static inline void atomic64_inc(atomic64_t *v) +static __always_inline void atomic64_inc(atomic64_t *v) { asm volatile(LOCK_PREFIX "incq %0" : "=m" (v->counter) @@ -94,7 +94,7 @@ static inline void atomic64_inc(atomic64_t *v) * * Atomically decrements @v by 1. */ -static inline void atomic64_dec(atomic64_t *v) +static __always_inline void atomic64_dec(atomic64_t *v) { asm volatile(LOCK_PREFIX "decq %0" : "=m" (v->counter) @@ -148,7 +148,7 @@ static inline int atomic64_add_negative(long i, atomic64_t *v) * * Atomically adds @i to @v and returns @i + @v */ -static inline long atomic64_add_return(long i, atomic64_t *v) +static __always_inline long atomic64_add_return(long i, atomic64_t *v) { return i + xadd(&v->counter, i); } diff --git a/arch/x86/include/asm/entry_arch.h b/arch/x86/include/asm/entry_arch.h index dc5fa661465f..27ca0afcccd7 100644 --- a/arch/x86/include/asm/entry_arch.h +++ b/arch/x86/include/asm/entry_arch.h @@ -23,6 +23,8 @@ BUILD_INTERRUPT(x86_platform_ipi, X86_PLATFORM_IPI_VECTOR) #ifdef CONFIG_HAVE_KVM BUILD_INTERRUPT3(kvm_posted_intr_ipi, POSTED_INTR_VECTOR, smp_kvm_posted_intr_ipi) +BUILD_INTERRUPT3(kvm_posted_intr_wakeup_ipi, POSTED_INTR_WAKEUP_VECTOR, + smp_kvm_posted_intr_wakeup_ipi) #endif /* diff --git a/arch/x86/include/asm/hardirq.h b/arch/x86/include/asm/hardirq.h index 0f5fb6b6567e..986606539395 100644 --- a/arch/x86/include/asm/hardirq.h +++ b/arch/x86/include/asm/hardirq.h @@ -14,6 +14,7 @@ typedef struct { #endif #ifdef CONFIG_HAVE_KVM unsigned int kvm_posted_intr_ipis; + unsigned int kvm_posted_intr_wakeup_ipis; #endif unsigned int x86_platform_ipis; /* arch dependent */ unsigned int apic_perf_irqs; diff --git a/arch/x86/include/asm/hpet.h b/arch/x86/include/asm/hpet.h index 36f7125945e3..5fa9fb0f8809 100644 --- a/arch/x86/include/asm/hpet.h +++ b/arch/x86/include/asm/hpet.h @@ -74,20 +74,16 @@ extern unsigned int hpet_readl(unsigned int a); extern void force_hpet_resume(void); struct irq_data; +struct hpet_dev; +struct irq_domain; + extern void hpet_msi_unmask(struct irq_data *data); extern void hpet_msi_mask(struct irq_data *data); -struct hpet_dev; extern void hpet_msi_write(struct hpet_dev *hdev, struct msi_msg *msg); extern void hpet_msi_read(struct hpet_dev *hdev, struct msi_msg *msg); - -#ifdef CONFIG_PCI_MSI -extern int default_setup_hpet_msi(unsigned int irq, unsigned int id); -#else -static inline int default_setup_hpet_msi(unsigned int irq, unsigned int id) -{ - return -EINVAL; -} -#endif +extern struct irq_domain *hpet_create_irq_domain(int hpet_id); +extern int hpet_assign_irq(struct irq_domain *domain, + struct hpet_dev *dev, int dev_num); #ifdef CONFIG_HPET_EMULATE_RTC diff --git a/arch/x86/include/asm/hw_irq.h b/arch/x86/include/asm/hw_irq.h index e9571ddabc4f..10c80d4f8386 100644 --- a/arch/x86/include/asm/hw_irq.h +++ b/arch/x86/include/asm/hw_irq.h @@ -29,6 +29,7 @@ extern asmlinkage void apic_timer_interrupt(void); extern asmlinkage void x86_platform_ipi(void); extern asmlinkage void kvm_posted_intr_ipi(void); +extern asmlinkage void kvm_posted_intr_wakeup_ipi(void); extern asmlinkage void error_interrupt(void); extern asmlinkage void irq_work_interrupt(void); @@ -36,40 +37,6 @@ extern asmlinkage void spurious_interrupt(void); extern asmlinkage void thermal_interrupt(void); extern asmlinkage void reschedule_interrupt(void); -extern asmlinkage void invalidate_interrupt(void); -extern asmlinkage void invalidate_interrupt0(void); -extern asmlinkage void invalidate_interrupt1(void); -extern asmlinkage void invalidate_interrupt2(void); -extern asmlinkage void invalidate_interrupt3(void); -extern asmlinkage void invalidate_interrupt4(void); -extern asmlinkage void invalidate_interrupt5(void); -extern asmlinkage void invalidate_interrupt6(void); -extern asmlinkage void invalidate_interrupt7(void); -extern asmlinkage void invalidate_interrupt8(void); -extern asmlinkage void invalidate_interrupt9(void); -extern asmlinkage void invalidate_interrupt10(void); -extern asmlinkage void invalidate_interrupt11(void); -extern asmlinkage void invalidate_interrupt12(void); -extern asmlinkage void invalidate_interrupt13(void); -extern asmlinkage void invalidate_interrupt14(void); -extern asmlinkage void invalidate_interrupt15(void); -extern asmlinkage void invalidate_interrupt16(void); -extern asmlinkage void invalidate_interrupt17(void); -extern asmlinkage void invalidate_interrupt18(void); -extern asmlinkage void invalidate_interrupt19(void); -extern asmlinkage void invalidate_interrupt20(void); -extern asmlinkage void invalidate_interrupt21(void); -extern asmlinkage void invalidate_interrupt22(void); -extern asmlinkage void invalidate_interrupt23(void); -extern asmlinkage void invalidate_interrupt24(void); -extern asmlinkage void invalidate_interrupt25(void); -extern asmlinkage void invalidate_interrupt26(void); -extern asmlinkage void invalidate_interrupt27(void); -extern asmlinkage void invalidate_interrupt28(void); -extern asmlinkage void invalidate_interrupt29(void); -extern asmlinkage void invalidate_interrupt30(void); -extern asmlinkage void invalidate_interrupt31(void); - extern asmlinkage void irq_move_cleanup_interrupt(void); extern asmlinkage void reboot_interrupt(void); extern asmlinkage void threshold_interrupt(void); @@ -92,55 +59,87 @@ extern void trace_call_function_single_interrupt(void); #define trace_irq_move_cleanup_interrupt irq_move_cleanup_interrupt #define trace_reboot_interrupt reboot_interrupt #define trace_kvm_posted_intr_ipi kvm_posted_intr_ipi +#define trace_kvm_posted_intr_wakeup_ipi kvm_posted_intr_wakeup_ipi #endif /* CONFIG_TRACING */ -#ifdef CONFIG_IRQ_REMAP -/* Intel specific interrupt remapping information */ -struct irq_2_iommu { - struct intel_iommu *iommu; - u16 irte_index; - u16 sub_handle; - u8 irte_mask; -}; - -/* AMD specific interrupt remapping information */ -struct irq_2_irte { - u16 devid; /* Device ID for IRTE table */ - u16 index; /* Index into IRTE table*/ -}; -#endif /* CONFIG_IRQ_REMAP */ - #ifdef CONFIG_X86_LOCAL_APIC struct irq_data; +struct pci_dev; +struct msi_desc; + +enum irq_alloc_type { + X86_IRQ_ALLOC_TYPE_IOAPIC = 1, + X86_IRQ_ALLOC_TYPE_HPET, + X86_IRQ_ALLOC_TYPE_MSI, + X86_IRQ_ALLOC_TYPE_MSIX, + X86_IRQ_ALLOC_TYPE_DMAR, + X86_IRQ_ALLOC_TYPE_UV, +}; -struct irq_cfg { - cpumask_var_t domain; - cpumask_var_t old_domain; - u8 vector; - u8 move_in_progress : 1; -#ifdef CONFIG_IRQ_REMAP - u8 remapped : 1; +struct irq_alloc_info { + enum irq_alloc_type type; + u32 flags; + const struct cpumask *mask; /* CPU mask for vector allocation */ union { - struct irq_2_iommu irq_2_iommu; - struct irq_2_irte irq_2_irte; - }; + int unused; +#ifdef CONFIG_HPET_TIMER + struct { + int hpet_id; + int hpet_index; + void *hpet_data; + }; #endif - union { -#ifdef CONFIG_X86_IO_APIC +#ifdef CONFIG_PCI_MSI struct { - struct list_head irq_2_pin; + struct pci_dev *msi_dev; + irq_hw_number_t msi_hwirq; + }; +#endif +#ifdef CONFIG_X86_IO_APIC + struct { + int ioapic_id; + int ioapic_pin; + int ioapic_node; + u32 ioapic_trigger : 1; + u32 ioapic_polarity : 1; + u32 ioapic_valid : 1; + struct IO_APIC_route_entry *ioapic_entry; + }; +#endif +#ifdef CONFIG_DMAR_TABLE + struct { + int dmar_id; + void *dmar_data; + }; +#endif +#ifdef CONFIG_HT_IRQ + struct { + int ht_pos; + int ht_idx; + struct pci_dev *ht_dev; + void *ht_update; + }; +#endif +#ifdef CONFIG_X86_UV + struct { + int uv_limit; + int uv_blade; + unsigned long uv_offset; + char *uv_name; }; #endif }; }; +struct irq_cfg { + unsigned int dest_apicid; + u8 vector; +}; + extern struct irq_cfg *irq_cfg(unsigned int irq); extern struct irq_cfg *irqd_cfg(struct irq_data *irq_data); -extern struct irq_cfg *alloc_irq_and_cfg_at(unsigned int at, int node); extern void lock_vector_lock(void); extern void unlock_vector_lock(void); -extern int assign_irq_vector(int, struct irq_cfg *, const struct cpumask *); -extern void clear_irq_vector(int irq, struct irq_cfg *cfg); extern void setup_vector_irq(int cpu); #ifdef CONFIG_SMP extern void send_cleanup_vector(struct irq_cfg *); @@ -150,10 +149,7 @@ static inline void send_cleanup_vector(struct irq_cfg *c) { } static inline void irq_complete_move(struct irq_cfg *c) { } #endif -extern int apic_retrigger_irq(struct irq_data *data); extern void apic_ack_edge(struct irq_data *data); -extern int apic_set_affinity(struct irq_data *data, const struct cpumask *mask, - unsigned int *dest_id); #else /* CONFIG_X86_LOCAL_APIC */ static inline void lock_vector_lock(void) {} static inline void unlock_vector_lock(void) {} @@ -163,8 +159,7 @@ static inline void unlock_vector_lock(void) {} extern atomic_t irq_err_count; extern atomic_t irq_mis_count; -/* EISA */ -extern void eisa_set_level_irq(unsigned int irq); +extern void elcr_set_level_irq(unsigned int irq); /* SMP */ extern __visible void smp_apic_timer_interrupt(struct pt_regs *); @@ -178,7 +173,6 @@ extern asmlinkage void smp_irq_move_cleanup_interrupt(void); extern __visible void smp_reschedule_interrupt(struct pt_regs *); extern __visible void smp_call_function_interrupt(struct pt_regs *); extern __visible void smp_call_function_single_interrupt(struct pt_regs *); -extern __visible void smp_invalidate_interrupt(struct pt_regs *); #endif extern char irq_entries_start[]; diff --git a/arch/x86/include/asm/io_apic.h b/arch/x86/include/asm/io_apic.h index 2f91685fe1cd..6cbf2cfb3f8a 100644 --- a/arch/x86/include/asm/io_apic.h +++ b/arch/x86/include/asm/io_apic.h @@ -95,9 +95,22 @@ struct IR_IO_APIC_route_entry { index : 15; } __attribute__ ((packed)); -#define IOAPIC_AUTO -1 -#define IOAPIC_EDGE 0 -#define IOAPIC_LEVEL 1 +struct irq_alloc_info; +struct ioapic_domain_cfg; + +#define IOAPIC_AUTO -1 +#define IOAPIC_EDGE 0 +#define IOAPIC_LEVEL 1 + +#define IOAPIC_MASKED 1 +#define IOAPIC_UNMASKED 0 + +#define IOAPIC_POL_HIGH 0 +#define IOAPIC_POL_LOW 1 + +#define IOAPIC_DEST_MODE_PHYSICAL 0 +#define IOAPIC_DEST_MODE_LOGICAL 1 + #define IOAPIC_MAP_ALLOC 0x1 #define IOAPIC_MAP_CHECK 0x2 @@ -110,9 +123,6 @@ extern int nr_ioapics; extern int mpc_ioapic_id(int ioapic); extern unsigned int mpc_ioapic_addr(int ioapic); -extern struct mp_ioapic_gsi *mp_ioapic_gsi_routing(int ioapic); - -#define MP_MAX_IOAPIC_PIN 127 /* # of MP IRQ source entries */ extern int mp_irq_entries; @@ -120,9 +130,6 @@ extern int mp_irq_entries; /* MP IRQ source entries */ extern struct mpc_intsrc mp_irqs[MAX_IRQ_SOURCES]; -/* Older SiS APIC requires we rewrite the index register */ -extern int sis_apic_bug; - /* 1 if "noapic" boot option passed */ extern int skip_ioapic_setup; @@ -132,6 +139,8 @@ extern int noioapicquirk; /* -1 if "noapic" boot option passed */ extern int noioapicreroute; +extern u32 gsi_top; + extern unsigned long io_apic_irqs; #define IO_APIC_IRQ(x) (((x) >= NR_IRQS_LEGACY) || ((1 << (x)) & io_apic_irqs)) @@ -147,13 +156,6 @@ struct irq_cfg; extern void ioapic_insert_resources(void); extern int arch_early_ioapic_init(void); -extern int native_setup_ioapic_entry(int, struct IO_APIC_route_entry *, - unsigned int, int, - struct io_apic_irq_attr *); -extern void eoi_ioapic_irq(unsigned int irq, struct irq_cfg *cfg); - -extern void native_eoi_ioapic_pin(int apic, int pin, int vector); - extern int save_ioapic_entries(void); extern void mask_ioapic_entries(void); extern int restore_ioapic_entries(void); @@ -161,82 +163,32 @@ extern int restore_ioapic_entries(void); extern void setup_ioapic_ids_from_mpc(void); extern void setup_ioapic_ids_from_mpc_nocheck(void); -struct io_apic_irq_attr { - int ioapic; - int ioapic_pin; - int trigger; - int polarity; -}; - -enum ioapic_domain_type { - IOAPIC_DOMAIN_INVALID, - IOAPIC_DOMAIN_LEGACY, - IOAPIC_DOMAIN_STRICT, - IOAPIC_DOMAIN_DYNAMIC, -}; - -struct device_node; -struct irq_domain; -struct irq_domain_ops; - -struct ioapic_domain_cfg { - enum ioapic_domain_type type; - const struct irq_domain_ops *ops; - struct device_node *dev; -}; - -struct mp_ioapic_gsi{ - u32 gsi_base; - u32 gsi_end; -}; -extern u32 gsi_top; - extern int mp_find_ioapic(u32 gsi); extern int mp_find_ioapic_pin(int ioapic, u32 gsi); -extern u32 mp_pin_to_gsi(int ioapic, int pin); -extern int mp_map_gsi_to_irq(u32 gsi, unsigned int flags); +extern int mp_map_gsi_to_irq(u32 gsi, unsigned int flags, + struct irq_alloc_info *info); extern void mp_unmap_irq(int irq); extern int mp_register_ioapic(int id, u32 address, u32 gsi_base, struct ioapic_domain_cfg *cfg); extern int mp_unregister_ioapic(u32 gsi_base); extern int mp_ioapic_registered(u32 gsi_base); -extern int mp_irqdomain_map(struct irq_domain *domain, unsigned int virq, - irq_hw_number_t hwirq); -extern void mp_irqdomain_unmap(struct irq_domain *domain, unsigned int virq); -extern int mp_set_gsi_attr(u32 gsi, int trigger, int polarity, int node); -extern void __init pre_init_apic_IRQ0(void); + +extern void ioapic_set_alloc_attr(struct irq_alloc_info *info, + int node, int trigger, int polarity); extern void mp_save_irq(struct mpc_intsrc *m); extern void disable_ioapic_support(void); -extern void __init native_io_apic_init_mappings(void); +extern void __init io_apic_init_mappings(void); extern unsigned int native_io_apic_read(unsigned int apic, unsigned int reg); -extern void native_io_apic_write(unsigned int apic, unsigned int reg, unsigned int val); -extern void native_io_apic_modify(unsigned int apic, unsigned int reg, unsigned int val); extern void native_disable_io_apic(void); -extern void native_io_apic_print_entries(unsigned int apic, unsigned int nr_entries); -extern void intel_ir_io_apic_print_entries(unsigned int apic, unsigned int nr_entries); -extern int native_ioapic_set_affinity(struct irq_data *, - const struct cpumask *, - bool); static inline unsigned int io_apic_read(unsigned int apic, unsigned int reg) { return x86_io_apic_ops.read(apic, reg); } -static inline void io_apic_write(unsigned int apic, unsigned int reg, unsigned int value) -{ - x86_io_apic_ops.write(apic, reg, value); -} -static inline void io_apic_modify(unsigned int apic, unsigned int reg, unsigned int value) -{ - x86_io_apic_ops.modify(apic, reg, value); -} - -extern void io_apic_eoi(unsigned int apic, unsigned int vector); - extern void setup_IO_APIC(void); extern void enable_IO_APIC(void); extern void disable_IO_APIC(void); @@ -253,8 +205,12 @@ static inline int arch_early_ioapic_init(void) { return 0; } static inline void print_IO_APICs(void) {} #define gsi_top (NR_IRQS_LEGACY) static inline int mp_find_ioapic(u32 gsi) { return 0; } -static inline u32 mp_pin_to_gsi(int ioapic, int pin) { return UINT_MAX; } -static inline int mp_map_gsi_to_irq(u32 gsi, unsigned int flags) { return gsi; } +static inline int mp_map_gsi_to_irq(u32 gsi, unsigned int flags, + struct irq_alloc_info *info) +{ + return gsi; +} + static inline void mp_unmap_irq(int irq) { } static inline int save_ioapic_entries(void) @@ -268,17 +224,11 @@ static inline int restore_ioapic_entries(void) return -ENOMEM; } -static inline void mp_save_irq(struct mpc_intsrc *m) { }; +static inline void mp_save_irq(struct mpc_intsrc *m) { } static inline void disable_ioapic_support(void) { } -#define native_io_apic_init_mappings NULL +static inline void io_apic_init_mappings(void) { } #define native_io_apic_read NULL -#define native_io_apic_write NULL -#define native_io_apic_modify NULL #define native_disable_io_apic NULL -#define native_io_apic_print_entries NULL -#define native_ioapic_set_affinity NULL -#define native_setup_ioapic_entry NULL -#define native_eoi_ioapic_pin NULL static inline void setup_IO_APIC(void) { } static inline void enable_IO_APIC(void) { } diff --git a/arch/x86/include/asm/irq.h b/arch/x86/include/asm/irq.h index a80cbb88ea91..8008d06581c7 100644 --- a/arch/x86/include/asm/irq.h +++ b/arch/x86/include/asm/irq.h @@ -30,6 +30,10 @@ extern void fixup_irqs(void); extern void irq_force_complete_move(int); #endif +#ifdef CONFIG_HAVE_KVM +extern void kvm_set_posted_intr_wakeup_handler(void (*handler)(void)); +#endif + extern void (*x86_platform_ipi_callback)(void); extern void native_init_IRQ(void); extern bool handle_irq(unsigned irq, struct pt_regs *regs); diff --git a/arch/x86/include/asm/irq_remapping.h b/arch/x86/include/asm/irq_remapping.h index 6224d316c405..78974fbc33b4 100644 --- a/arch/x86/include/asm/irq_remapping.h +++ b/arch/x86/include/asm/irq_remapping.h @@ -22,14 +22,12 @@ #ifndef __X86_IRQ_REMAPPING_H #define __X86_IRQ_REMAPPING_H +#include <asm/irqdomain.h> +#include <asm/hw_irq.h> #include <asm/io_apic.h> -struct IO_APIC_route_entry; -struct io_apic_irq_attr; -struct irq_chip; struct msi_msg; -struct pci_dev; -struct irq_cfg; +struct irq_alloc_info; #ifdef CONFIG_IRQ_REMAP @@ -39,22 +37,21 @@ extern int irq_remapping_enable(void); extern void irq_remapping_disable(void); extern int irq_remapping_reenable(int); extern int irq_remap_enable_fault_handling(void); -extern int setup_ioapic_remapped_entry(int irq, - struct IO_APIC_route_entry *entry, - unsigned int destination, - int vector, - struct io_apic_irq_attr *attr); -extern void free_remapped_irq(int irq); -extern void compose_remapped_msi_msg(struct pci_dev *pdev, - unsigned int irq, unsigned int dest, - struct msi_msg *msg, u8 hpet_id); -extern int setup_hpet_msi_remapped(unsigned int irq, unsigned int id); extern void panic_if_irq_remap(const char *msg); -extern bool setup_remapped_irq(int irq, - struct irq_cfg *cfg, - struct irq_chip *chip); -void irq_remap_modify_chip_defaults(struct irq_chip *chip); +extern struct irq_domain * +irq_remapping_get_ir_irq_domain(struct irq_alloc_info *info); +extern struct irq_domain * +irq_remapping_get_irq_domain(struct irq_alloc_info *info); + +/* Create PCI MSI/MSIx irqdomain, use @parent as the parent irqdomain. */ +extern struct irq_domain *arch_create_msi_irq_domain(struct irq_domain *parent); + +/* Get parent irqdomain for interrupt remapping irqdomain */ +static inline struct irq_domain *arch_get_ir_parent_domain(void) +{ + return x86_vector_domain; +} #else /* CONFIG_IRQ_REMAP */ @@ -64,42 +61,22 @@ static inline int irq_remapping_enable(void) { return -ENODEV; } static inline void irq_remapping_disable(void) { } static inline int irq_remapping_reenable(int eim) { return -ENODEV; } static inline int irq_remap_enable_fault_handling(void) { return -ENODEV; } -static inline int setup_ioapic_remapped_entry(int irq, - struct IO_APIC_route_entry *entry, - unsigned int destination, - int vector, - struct io_apic_irq_attr *attr) -{ - return -ENODEV; -} -static inline void free_remapped_irq(int irq) { } -static inline void compose_remapped_msi_msg(struct pci_dev *pdev, - unsigned int irq, unsigned int dest, - struct msi_msg *msg, u8 hpet_id) -{ -} -static inline int setup_hpet_msi_remapped(unsigned int irq, unsigned int id) -{ - return -ENODEV; -} static inline void panic_if_irq_remap(const char *msg) { } -static inline void irq_remap_modify_chip_defaults(struct irq_chip *chip) +static inline struct irq_domain * +irq_remapping_get_ir_irq_domain(struct irq_alloc_info *info) { + return NULL; } -static inline bool setup_remapped_irq(int irq, - struct irq_cfg *cfg, - struct irq_chip *chip) +static inline struct irq_domain * +irq_remapping_get_irq_domain(struct irq_alloc_info *info) { - return false; + return NULL; } -#endif /* CONFIG_IRQ_REMAP */ - -#define dmar_alloc_hwirq() irq_alloc_hwirq(-1) -#define dmar_free_hwirq irq_free_hwirq +#endif /* CONFIG_IRQ_REMAP */ #endif /* __X86_IRQ_REMAPPING_H */ diff --git a/arch/x86/include/asm/irq_vectors.h b/arch/x86/include/asm/irq_vectors.h index 666c89ec4bd7..0ed29ac13a9d 100644 --- a/arch/x86/include/asm/irq_vectors.h +++ b/arch/x86/include/asm/irq_vectors.h @@ -47,31 +47,12 @@ #define IRQ_MOVE_CLEANUP_VECTOR FIRST_EXTERNAL_VECTOR #define IA32_SYSCALL_VECTOR 0x80 -#ifdef CONFIG_X86_32 -# define SYSCALL_VECTOR 0x80 -#endif /* * Vectors 0x30-0x3f are used for ISA interrupts. * round up to the next 16-vector boundary */ -#define IRQ0_VECTOR ((FIRST_EXTERNAL_VECTOR + 16) & ~15) - -#define IRQ1_VECTOR (IRQ0_VECTOR + 1) -#define IRQ2_VECTOR (IRQ0_VECTOR + 2) -#define IRQ3_VECTOR (IRQ0_VECTOR + 3) -#define IRQ4_VECTOR (IRQ0_VECTOR + 4) -#define IRQ5_VECTOR (IRQ0_VECTOR + 5) -#define IRQ6_VECTOR (IRQ0_VECTOR + 6) -#define IRQ7_VECTOR (IRQ0_VECTOR + 7) -#define IRQ8_VECTOR (IRQ0_VECTOR + 8) -#define IRQ9_VECTOR (IRQ0_VECTOR + 9) -#define IRQ10_VECTOR (IRQ0_VECTOR + 10) -#define IRQ11_VECTOR (IRQ0_VECTOR + 11) -#define IRQ12_VECTOR (IRQ0_VECTOR + 12) -#define IRQ13_VECTOR (IRQ0_VECTOR + 13) -#define IRQ14_VECTOR (IRQ0_VECTOR + 14) -#define IRQ15_VECTOR (IRQ0_VECTOR + 15) +#define ISA_IRQ_VECTOR(irq) (((FIRST_EXTERNAL_VECTOR + 16) & ~15) + irq) /* * Special IRQ vectors used by the SMP architecture, 0xf0-0xff @@ -105,6 +86,7 @@ /* Vector for KVM to deliver posted interrupt IPI */ #ifdef CONFIG_HAVE_KVM #define POSTED_INTR_VECTOR 0xf2 +#define POSTED_INTR_WAKEUP_VECTOR 0xf1 #endif /* @@ -155,18 +137,22 @@ static inline int invalid_vm86_irq(int irq) * static arrays. */ -#define NR_IRQS_LEGACY 16 +#define NR_IRQS_LEGACY 16 -#define IO_APIC_VECTOR_LIMIT ( 32 * MAX_IO_APICS ) +#define CPU_VECTOR_LIMIT (64 * NR_CPUS) +#define IO_APIC_VECTOR_LIMIT (32 * MAX_IO_APICS) -#ifdef CONFIG_X86_IO_APIC -# define CPU_VECTOR_LIMIT (64 * NR_CPUS) -# define NR_IRQS \ +#if defined(CONFIG_X86_IO_APIC) && defined(CONFIG_PCI_MSI) +#define NR_IRQS \ (CPU_VECTOR_LIMIT > IO_APIC_VECTOR_LIMIT ? \ (NR_VECTORS + CPU_VECTOR_LIMIT) : \ (NR_VECTORS + IO_APIC_VECTOR_LIMIT)) -#else /* !CONFIG_X86_IO_APIC: */ -# define NR_IRQS NR_IRQS_LEGACY +#elif defined(CONFIG_X86_IO_APIC) +#define NR_IRQS (NR_VECTORS + IO_APIC_VECTOR_LIMIT) +#elif defined(CONFIG_PCI_MSI) +#define NR_IRQS (NR_VECTORS + CPU_VECTOR_LIMIT) +#else +#define NR_IRQS NR_IRQS_LEGACY #endif #endif /* _ASM_X86_IRQ_VECTORS_H */ diff --git a/arch/x86/include/asm/irqdomain.h b/arch/x86/include/asm/irqdomain.h new file mode 100644 index 000000000000..d26075b52885 --- /dev/null +++ b/arch/x86/include/asm/irqdomain.h @@ -0,0 +1,63 @@ +#ifndef _ASM_IRQDOMAIN_H +#define _ASM_IRQDOMAIN_H + +#include <linux/irqdomain.h> +#include <asm/hw_irq.h> + +#ifdef CONFIG_X86_LOCAL_APIC +enum { + /* Allocate contiguous CPU vectors */ + X86_IRQ_ALLOC_CONTIGUOUS_VECTORS = 0x1, +}; + +extern struct irq_domain *x86_vector_domain; + +extern void init_irq_alloc_info(struct irq_alloc_info *info, + const struct cpumask *mask); +extern void copy_irq_alloc_info(struct irq_alloc_info *dst, + struct irq_alloc_info *src); +#endif /* CONFIG_X86_LOCAL_APIC */ + +#ifdef CONFIG_X86_IO_APIC +struct device_node; +struct irq_data; + +enum ioapic_domain_type { + IOAPIC_DOMAIN_INVALID, + IOAPIC_DOMAIN_LEGACY, + IOAPIC_DOMAIN_STRICT, + IOAPIC_DOMAIN_DYNAMIC, +}; + +struct ioapic_domain_cfg { + enum ioapic_domain_type type; + const struct irq_domain_ops *ops; + struct device_node *dev; +}; + +extern const struct irq_domain_ops mp_ioapic_irqdomain_ops; + +extern int mp_irqdomain_alloc(struct irq_domain *domain, unsigned int virq, + unsigned int nr_irqs, void *arg); +extern void mp_irqdomain_free(struct irq_domain *domain, unsigned int virq, + unsigned int nr_irqs); +extern void mp_irqdomain_activate(struct irq_domain *domain, + struct irq_data *irq_data); +extern void mp_irqdomain_deactivate(struct irq_domain *domain, + struct irq_data *irq_data); +extern int mp_irqdomain_ioapic_idx(struct irq_domain *domain); +#endif /* CONFIG_X86_IO_APIC */ + +#ifdef CONFIG_PCI_MSI +extern void arch_init_msi_domain(struct irq_domain *domain); +#else +static inline void arch_init_msi_domain(struct irq_domain *domain) { } +#endif + +#ifdef CONFIG_HT_IRQ +extern void arch_init_htirq_domain(struct irq_domain *domain); +#else +static inline void arch_init_htirq_domain(struct irq_domain *domain) { } +#endif + +#endif diff --git a/arch/x86/include/asm/msi.h b/arch/x86/include/asm/msi.h new file mode 100644 index 000000000000..93724cc62177 --- /dev/null +++ b/arch/x86/include/asm/msi.h @@ -0,0 +1,7 @@ +#ifndef _ASM_X86_MSI_H +#define _ASM_X86_MSI_H +#include <asm/hw_irq.h> + +typedef struct irq_alloc_info msi_alloc_info_t; + +#endif /* _ASM_X86_MSI_H */ diff --git a/arch/x86/include/asm/paravirt_types.h b/arch/x86/include/asm/paravirt_types.h index f7b0b5c112f2..344c646e7f06 100644 --- a/arch/x86/include/asm/paravirt_types.h +++ b/arch/x86/include/asm/paravirt_types.h @@ -160,13 +160,14 @@ struct pv_cpu_ops { u64 (*read_pmc)(int counter); unsigned long long (*read_tscp)(unsigned int *aux); +#ifdef CONFIG_X86_32 /* * Atomically enable interrupts and return to userspace. This - * is only ever used to return to 32-bit processes; in a - * 64-bit kernel, it's used for 32-on-64 compat processes, but - * never native 64-bit processes. (Jump, not call.) + * is only used in 32-bit kernels. 64-bit kernels use + * usergs_sysret32 instead. */ void (*irq_enable_sysexit)(void); +#endif /* * Switch to usermode gs and return to 64-bit usermode using diff --git a/arch/x86/include/asm/pci.h b/arch/x86/include/asm/pci.h index 4e370a5d8117..d8c80ff32e8c 100644 --- a/arch/x86/include/asm/pci.h +++ b/arch/x86/include/asm/pci.h @@ -96,15 +96,10 @@ extern void pci_iommu_alloc(void); #ifdef CONFIG_PCI_MSI /* implemented in arch/x86/kernel/apic/io_apic. */ struct msi_desc; -void native_compose_msi_msg(struct pci_dev *pdev, unsigned int irq, - unsigned int dest, struct msi_msg *msg, u8 hpet_id); int native_setup_msi_irqs(struct pci_dev *dev, int nvec, int type); void native_teardown_msi_irq(unsigned int irq); void native_restore_msi_irqs(struct pci_dev *dev); -int setup_msi_irq(struct pci_dev *dev, struct msi_desc *msidesc, - unsigned int irq_base, unsigned int irq_offset); #else -#define native_compose_msi_msg NULL #define native_setup_msi_irqs NULL #define native_teardown_msi_irq NULL #endif diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h index b4bdec3e9523..225ee545e1a0 100644 --- a/arch/x86/include/asm/thread_info.h +++ b/arch/x86/include/asm/thread_info.h @@ -177,8 +177,6 @@ struct thread_info { */ #ifndef __ASSEMBLY__ -DECLARE_PER_CPU(unsigned long, kernel_stack); - static inline struct thread_info *current_thread_info(void) { return (struct thread_info *)(current_top_of_stack() - THREAD_SIZE); @@ -197,9 +195,13 @@ static inline unsigned long current_stack_pointer(void) #else /* !__ASSEMBLY__ */ +#ifdef CONFIG_X86_64 +# define cpu_current_top_of_stack (cpu_tss + TSS_sp0) +#endif + /* Load thread_info address into "reg" */ #define GET_THREAD_INFO(reg) \ - _ASM_MOV PER_CPU_VAR(kernel_stack),reg ; \ + _ASM_MOV PER_CPU_VAR(cpu_current_top_of_stack),reg ; \ _ASM_SUB $(THREAD_SIZE),reg ; /* diff --git a/arch/x86/include/asm/uaccess_32.h b/arch/x86/include/asm/uaccess_32.h index 3c03a5de64d3..0ed5504c6060 100644 --- a/arch/x86/include/asm/uaccess_32.h +++ b/arch/x86/include/asm/uaccess_32.h @@ -59,6 +59,10 @@ __copy_to_user_inatomic(void __user *to, const void *from, unsigned long n) __put_user_size(*(u32 *)from, (u32 __user *)to, 4, ret, 4); return ret; + case 8: + __put_user_size(*(u64 *)from, (u64 __user *)to, + 8, ret, 8); + return ret; } } return __copy_to_user_ll(to, from, n); diff --git a/arch/x86/include/asm/x86_init.h b/arch/x86/include/asm/x86_init.h index f58a9c7a3c86..48d34d28f5a6 100644 --- a/arch/x86/include/asm/x86_init.h +++ b/arch/x86/include/asm/x86_init.h @@ -171,38 +171,17 @@ struct x86_platform_ops { }; struct pci_dev; -struct msi_msg; struct x86_msi_ops { int (*setup_msi_irqs)(struct pci_dev *dev, int nvec, int type); - void (*compose_msi_msg)(struct pci_dev *dev, unsigned int irq, - unsigned int dest, struct msi_msg *msg, - u8 hpet_id); void (*teardown_msi_irq)(unsigned int irq); void (*teardown_msi_irqs)(struct pci_dev *dev); void (*restore_msi_irqs)(struct pci_dev *dev); - int (*setup_hpet_msi)(unsigned int irq, unsigned int id); }; -struct IO_APIC_route_entry; -struct io_apic_irq_attr; -struct irq_data; -struct cpumask; - struct x86_io_apic_ops { - void (*init) (void); unsigned int (*read) (unsigned int apic, unsigned int reg); - void (*write) (unsigned int apic, unsigned int reg, unsigned int value); - void (*modify) (unsigned int apic, unsigned int reg, unsigned int value); void (*disable)(void); - void (*print_entries)(unsigned int apic, unsigned int nr_entries); - int (*set_affinity)(struct irq_data *data, - const struct cpumask *mask, - bool force); - int (*setup_entry)(int irq, struct IO_APIC_route_entry *entry, - unsigned int destination, int vector, - struct io_apic_irq_attr *attr); - void (*eoi_ioapic_pin)(int apic, int pin, int vector); }; extern struct x86_init_ops x86_init; diff --git a/arch/x86/include/uapi/asm/msr-index.h b/arch/x86/include/uapi/asm/msr-index.h index c469490db4a8..3c6bb342a48f 100644 --- a/arch/x86/include/uapi/asm/msr-index.h +++ b/arch/x86/include/uapi/asm/msr-index.h @@ -140,6 +140,7 @@ #define MSR_CORE_C3_RESIDENCY 0x000003fc #define MSR_CORE_C6_RESIDENCY 0x000003fd #define MSR_CORE_C7_RESIDENCY 0x000003fe +#define MSR_KNL_CORE_C6_RESIDENCY 0x000003ff #define MSR_PKG_C2_RESIDENCY 0x0000060d #define MSR_PKG_C8_RESIDENCY 0x00000630 #define MSR_PKG_C9_RESIDENCY 0x00000631 diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c index dbe76a14c3c9..e49ee24da85e 100644 --- a/arch/x86/kernel/acpi/boot.c +++ b/arch/x86/kernel/acpi/boot.c @@ -31,12 +31,12 @@ #include <linux/module.h> #include <linux/dmi.h> #include <linux/irq.h> -#include <linux/irqdomain.h> #include <linux/slab.h> #include <linux/bootmem.h> #include <linux/ioport.h> #include <linux/pci.h> +#include <asm/irqdomain.h> #include <asm/pci_x86.h> #include <asm/pgtable.h> #include <asm/io_apic.h> @@ -400,57 +400,13 @@ static int mp_config_acpi_gsi(struct device *dev, u32 gsi, int trigger, return 0; } -static int mp_register_gsi(struct device *dev, u32 gsi, int trigger, - int polarity) -{ - int irq, node; - - if (acpi_irq_model != ACPI_IRQ_MODEL_IOAPIC) - return gsi; - - trigger = trigger == ACPI_EDGE_SENSITIVE ? 0 : 1; - polarity = polarity == ACPI_ACTIVE_HIGH ? 0 : 1; - node = dev ? dev_to_node(dev) : NUMA_NO_NODE; - if (mp_set_gsi_attr(gsi, trigger, polarity, node)) { - pr_warn("Failed to set pin attr for GSI%d\n", gsi); - return -1; - } - - irq = mp_map_gsi_to_irq(gsi, IOAPIC_MAP_ALLOC); - if (irq < 0) - return irq; - - /* Don't set up the ACPI SCI because it's already set up */ - if (enable_update_mptable && acpi_gbl_FADT.sci_interrupt != gsi) - mp_config_acpi_gsi(dev, gsi, trigger, polarity); - - return irq; -} - -static void mp_unregister_gsi(u32 gsi) -{ - int irq; - - if (acpi_irq_model != ACPI_IRQ_MODEL_IOAPIC) - return; - - irq = mp_map_gsi_to_irq(gsi, 0); - if (irq > 0) - mp_unmap_irq(irq); -} - -static struct irq_domain_ops acpi_irqdomain_ops = { - .map = mp_irqdomain_map, - .unmap = mp_irqdomain_unmap, -}; - static int __init acpi_parse_ioapic(struct acpi_subtable_header * header, const unsigned long end) { struct acpi_madt_io_apic *ioapic = NULL; struct ioapic_domain_cfg cfg = { .type = IOAPIC_DOMAIN_DYNAMIC, - .ops = &acpi_irqdomain_ops, + .ops = &mp_ioapic_irqdomain_ops, }; ioapic = (struct acpi_madt_io_apic *)header; @@ -652,7 +608,7 @@ static int acpi_register_gsi_pic(struct device *dev, u32 gsi, * Make sure all (legacy) PCI IRQs are set as level-triggered. */ if (trigger == ACPI_LEVEL_SENSITIVE) - eisa_set_level_irq(gsi); + elcr_set_level_irq(gsi); #endif return gsi; @@ -663,10 +619,21 @@ static int acpi_register_gsi_ioapic(struct device *dev, u32 gsi, int trigger, int polarity) { int irq = gsi; - #ifdef CONFIG_X86_IO_APIC + int node; + struct irq_alloc_info info; + + node = dev ? dev_to_node(dev) : NUMA_NO_NODE; + trigger = trigger == ACPI_EDGE_SENSITIVE ? 0 : 1; + polarity = polarity == ACPI_ACTIVE_HIGH ? 0 : 1; + ioapic_set_alloc_attr(&info, node, trigger, polarity); + mutex_lock(&acpi_ioapic_lock); - irq = mp_register_gsi(dev, gsi, trigger, polarity); + irq = mp_map_gsi_to_irq(gsi, IOAPIC_MAP_ALLOC, &info); + /* Don't set up the ACPI SCI because it's already set up */ + if (irq >= 0 && enable_update_mptable && + acpi_gbl_FADT.sci_interrupt != gsi) + mp_config_acpi_gsi(dev, gsi, trigger, polarity); mutex_unlock(&acpi_ioapic_lock); #endif @@ -676,8 +643,12 @@ static int acpi_register_gsi_ioapic(struct device *dev, u32 gsi, static void acpi_unregister_gsi_ioapic(u32 gsi) { #ifdef CONFIG_X86_IO_APIC + int irq; + mutex_lock(&acpi_ioapic_lock); - mp_unregister_gsi(gsi); + irq = mp_map_gsi_to_irq(gsi, 0, NULL); + if (irq > 0) + mp_unmap_irq(irq); mutex_unlock(&acpi_ioapic_lock); #endif } @@ -786,7 +757,7 @@ int acpi_register_ioapic(acpi_handle handle, u64 phys_addr, u32 gsi_base) u64 addr; struct ioapic_domain_cfg cfg = { .type = IOAPIC_DOMAIN_DYNAMIC, - .ops = &acpi_irqdomain_ops, + .ops = &mp_ioapic_irqdomain_ops, }; ioapic_id = acpi_get_ioapic_id(handle, gsi_base, &addr); diff --git a/arch/x86/kernel/acpi/wakeup_64.S b/arch/x86/kernel/acpi/wakeup_64.S index ae693b51ed8e..8c35df468104 100644 --- a/arch/x86/kernel/acpi/wakeup_64.S +++ b/arch/x86/kernel/acpi/wakeup_64.S @@ -62,7 +62,7 @@ ENTRY(do_suspend_lowlevel) pushfq popq pt_regs_flags(%rax) - movq $resume_point, saved_rip(%rip) + movq $.Lresume_point, saved_rip(%rip) movq %rsp, saved_rsp movq %rbp, saved_rbp @@ -75,10 +75,10 @@ ENTRY(do_suspend_lowlevel) xorl %eax, %eax call x86_acpi_enter_sleep_state /* in case something went wrong, restore the machine status and go on */ - jmp resume_point + jmp .Lresume_point .align 4 -resume_point: +.Lresume_point: /* We don't restore %rax, it must be 0 anyway */ movq $saved_context, %rax movq saved_context_cr4(%rax), %rbx diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c index aef653193160..b0932c4341b3 100644 --- a/arch/x86/kernel/alternative.c +++ b/arch/x86/kernel/alternative.c @@ -227,6 +227,15 @@ void __init arch_init_ideal_nops(void) #endif } break; + + case X86_VENDOR_AMD: + if (boot_cpu_data.x86 > 0xf) { + ideal_nops = p6_nops; + return; + } + + /* fall through */ + default: #ifdef CONFIG_X86_64 ideal_nops = k8_nops; diff --git a/arch/x86/kernel/apb_timer.c b/arch/x86/kernel/apb_timer.c index 6a7c23ff21d3..ede92c3364d3 100644 --- a/arch/x86/kernel/apb_timer.c +++ b/arch/x86/kernel/apb_timer.c @@ -171,10 +171,6 @@ static int __init apbt_clockevent_register(void) static void apbt_setup_irq(struct apbt_dev *adev) { - /* timer0 irq has been setup early */ - if (adev->irq == 0) - return; - irq_modify_status(adev->irq, 0, IRQ_MOVE_PCNTXT); irq_set_affinity(adev->irq, cpumask_of(adev->cpu)); } diff --git a/arch/x86/kernel/apic/htirq.c b/arch/x86/kernel/apic/htirq.c index 816f36e979ad..ae50d3454d78 100644 --- a/arch/x86/kernel/apic/htirq.c +++ b/arch/x86/kernel/apic/htirq.c @@ -3,6 +3,8 @@ * * Copyright (C) 1997, 1998, 1999, 2000, 2009 Ingo Molnar, Hajnalka Szabo * Moved from arch/x86/kernel/apic/io_apic.c. + * Jiang Liu <jiang.liu@linux.intel.com> + * Add support of hierarchical irqdomain * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as @@ -14,78 +16,112 @@ #include <linux/device.h> #include <linux/pci.h> #include <linux/htirq.h> +#include <asm/irqdomain.h> #include <asm/hw_irq.h> #include <asm/apic.h> #include <asm/hypertransport.h> +static struct irq_domain *htirq_domain; + /* * Hypertransport interrupt support */ -static void target_ht_irq(unsigned int irq, unsigned int dest, u8 vector) -{ - struct ht_irq_msg msg; - - fetch_ht_irq_msg(irq, &msg); - - msg.address_lo &= ~(HT_IRQ_LOW_VECTOR_MASK | HT_IRQ_LOW_DEST_ID_MASK); - msg.address_hi &= ~(HT_IRQ_HIGH_DEST_ID_MASK); - - msg.address_lo |= HT_IRQ_LOW_VECTOR(vector) | HT_IRQ_LOW_DEST_ID(dest); - msg.address_hi |= HT_IRQ_HIGH_DEST_ID(dest); - - write_ht_irq_msg(irq, &msg); -} - static int ht_set_affinity(struct irq_data *data, const struct cpumask *mask, bool force) { - struct irq_cfg *cfg = irqd_cfg(data); - unsigned int dest; + struct irq_data *parent = data->parent_data; int ret; - ret = apic_set_affinity(data, mask, &dest); - if (ret) - return ret; - - target_ht_irq(data->irq, dest, cfg->vector); - return IRQ_SET_MASK_OK_NOCOPY; + ret = parent->chip->irq_set_affinity(parent, mask, force); + if (ret >= 0) { + struct ht_irq_msg msg; + struct irq_cfg *cfg = irqd_cfg(data); + + fetch_ht_irq_msg(data->irq, &msg); + msg.address_lo &= ~(HT_IRQ_LOW_VECTOR_MASK | + HT_IRQ_LOW_DEST_ID_MASK); + msg.address_lo |= HT_IRQ_LOW_VECTOR(cfg->vector) | + HT_IRQ_LOW_DEST_ID(cfg->dest_apicid); + msg.address_hi &= ~(HT_IRQ_HIGH_DEST_ID_MASK); + msg.address_hi |= HT_IRQ_HIGH_DEST_ID(cfg->dest_apicid); + write_ht_irq_msg(data->irq, &msg); + } + + return ret; } static struct irq_chip ht_irq_chip = { .name = "PCI-HT", .irq_mask = mask_ht_irq, .irq_unmask = unmask_ht_irq, - .irq_ack = apic_ack_edge, + .irq_ack = irq_chip_ack_parent, .irq_set_affinity = ht_set_affinity, - .irq_retrigger = apic_retrigger_irq, + .irq_retrigger = irq_chip_retrigger_hierarchy, .flags = IRQCHIP_SKIP_SET_WAKE, }; -int arch_setup_ht_irq(unsigned int irq, struct pci_dev *dev) +static int htirq_domain_alloc(struct irq_domain *domain, unsigned int virq, + unsigned int nr_irqs, void *arg) { - struct irq_cfg *cfg; - struct ht_irq_msg msg; - unsigned dest; - int err; + struct ht_irq_cfg *ht_cfg; + struct irq_alloc_info *info = arg; + struct pci_dev *dev; + irq_hw_number_t hwirq; + int ret; - if (disable_apic) - return -ENXIO; + if (nr_irqs > 1 || !info) + return -EINVAL; - cfg = irq_cfg(irq); - err = assign_irq_vector(irq, cfg, apic->target_cpus()); - if (err) - return err; + dev = info->ht_dev; + hwirq = (info->ht_idx & 0xFF) | + PCI_DEVID(dev->bus->number, dev->devfn) << 8 | + (pci_domain_nr(dev->bus) & 0xFFFFFFFF) << 24; + if (irq_find_mapping(domain, hwirq) > 0) + return -EEXIST; - err = apic->cpu_mask_to_apicid_and(cfg->domain, - apic->target_cpus(), &dest); - if (err) - return err; + ht_cfg = kmalloc(sizeof(*ht_cfg), GFP_KERNEL); + if (!ht_cfg) + return -ENOMEM; - msg.address_hi = HT_IRQ_HIGH_DEST_ID(dest); + ret = irq_domain_alloc_irqs_parent(domain, virq, nr_irqs, info); + if (ret < 0) { + kfree(ht_cfg); + return ret; + } + + /* Initialize msg to a value that will never match the first write. */ + ht_cfg->msg.address_lo = 0xffffffff; + ht_cfg->msg.address_hi = 0xffffffff; + ht_cfg->dev = info->ht_dev; + ht_cfg->update = info->ht_update; + ht_cfg->pos = info->ht_pos; + ht_cfg->idx = 0x10 + (info->ht_idx * 2); + irq_domain_set_info(domain, virq, hwirq, &ht_irq_chip, ht_cfg, + handle_edge_irq, ht_cfg, "edge"); + + return 0; +} + +static void htirq_domain_free(struct irq_domain *domain, unsigned int virq, + unsigned int nr_irqs) +{ + struct irq_data *irq_data = irq_domain_get_irq_data(domain, virq); + + BUG_ON(nr_irqs != 1); + kfree(irq_data->chip_data); + irq_domain_free_irqs_top(domain, virq, nr_irqs); +} +static void htirq_domain_activate(struct irq_domain *domain, + struct irq_data *irq_data) +{ + struct ht_irq_msg msg; + struct irq_cfg *cfg = irqd_cfg(irq_data); + + msg.address_hi = HT_IRQ_HIGH_DEST_ID(cfg->dest_apicid); msg.address_lo = HT_IRQ_LOW_BASE | - HT_IRQ_LOW_DEST_ID(dest) | + HT_IRQ_LOW_DEST_ID(cfg->dest_apicid) | HT_IRQ_LOW_VECTOR(cfg->vector) | ((apic->irq_dest_mode == 0) ? HT_IRQ_LOW_DM_PHYSICAL : @@ -95,13 +131,56 @@ int arch_setup_ht_irq(unsigned int irq, struct pci_dev *dev) HT_IRQ_LOW_MT_FIXED : HT_IRQ_LOW_MT_ARBITRATED) | HT_IRQ_LOW_IRQ_MASKED; + write_ht_irq_msg(irq_data->irq, &msg); +} - write_ht_irq_msg(irq, &msg); +static void htirq_domain_deactivate(struct irq_domain *domain, + struct irq_data *irq_data) +{ + struct ht_irq_msg msg; - irq_set_chip_and_handler_name(irq, &ht_irq_chip, - handle_edge_irq, "edge"); + memset(&msg, 0, sizeof(msg)); + write_ht_irq_msg(irq_data->irq, &msg); +} - dev_dbg(&dev->dev, "irq %d for HT\n", irq); +static const struct irq_domain_ops htirq_domain_ops = { + .alloc = htirq_domain_alloc, + .free = htirq_domain_free, + .activate = htirq_domain_activate, + .deactivate = htirq_domain_deactivate, +}; - return 0; +void arch_init_htirq_domain(struct irq_domain *parent) +{ + if (disable_apic) + return; + + htirq_domain = irq_domain_add_tree(NULL, &htirq_domain_ops, NULL); + if (!htirq_domain) + pr_warn("failed to initialize irqdomain for HTIRQ.\n"); + else + htirq_domain->parent = parent; +} + +int arch_setup_ht_irq(int idx, int pos, struct pci_dev *dev, + ht_irq_update_t *update) +{ + struct irq_alloc_info info; + + if (!htirq_domain) + return -ENOSYS; + + init_irq_alloc_info(&info, NULL); + info.ht_idx = idx; + info.ht_pos = pos; + info.ht_dev = dev; + info.ht_update = update; + + return irq_domain_alloc_irqs(htirq_domain, 1, dev_to_node(&dev->dev), + &info); +} + +void arch_teardown_ht_irq(unsigned int irq) +{ + irq_domain_free_irqs(irq, 1); } diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c index f4dc2462a1ac..845dc0df2002 100644 --- a/arch/x86/kernel/apic/io_apic.c +++ b/arch/x86/kernel/apic/io_apic.c @@ -18,6 +18,16 @@ * and Rolf G. Tews * for testing these extensively * Paul Diefenbaugh : Added full ACPI support + * + * Historical information which is worth to be preserved: + * + * - SiS APIC rmw bug: + * + * We used to have a workaround for a bug in SiS chips which + * required to rewrite the index register for a read-modify-write + * operation as the chip lost the index information which was + * setup for the read already. We cache the data now, so that + * workaround has been removed. */ #include <linux/mm.h> @@ -31,13 +41,13 @@ #include <linux/acpi.h> #include <linux/module.h> #include <linux/syscore_ops.h> -#include <linux/irqdomain.h> #include <linux/freezer.h> #include <linux/kthread.h> #include <linux/jiffies.h> /* time_after() */ #include <linux/slab.h> #include <linux/bootmem.h> +#include <asm/irqdomain.h> #include <asm/idle.h> #include <asm/io.h> #include <asm/smp.h> @@ -63,27 +73,31 @@ #define for_each_ioapic_pin(idx, pin) \ for_each_ioapic((idx)) \ for_each_pin((idx), (pin)) - #define for_each_irq_pin(entry, head) \ list_for_each_entry(entry, &head, list) -/* - * Is the SiS APIC rmw bug present ? - * -1 = don't know, 0 = no, 1 = yes - */ -int sis_apic_bug = -1; - static DEFINE_RAW_SPINLOCK(ioapic_lock); static DEFINE_MUTEX(ioapic_mutex); static unsigned int ioapic_dynirq_base; static int ioapic_initialized; -struct mp_pin_info { +struct irq_pin_list { + struct list_head list; + int apic, pin; +}; + +struct mp_chip_data { + struct list_head irq_2_pin; + struct IO_APIC_route_entry entry; int trigger; int polarity; - int node; - int set; u32 count; + bool isa_irq; +}; + +struct mp_ioapic_gsi { + u32 gsi_base; + u32 gsi_end; }; static struct ioapic { @@ -101,7 +115,6 @@ static struct ioapic { struct mp_ioapic_gsi gsi_config; struct ioapic_domain_cfg irqdomain_cfg; struct irq_domain *irqdomain; - struct mp_pin_info *pin_info; struct resource *iomem_res; } ioapics[MAX_IO_APICS]; @@ -117,7 +130,7 @@ unsigned int mpc_ioapic_addr(int ioapic_idx) return ioapics[ioapic_idx].mp_config.apicaddr; } -struct mp_ioapic_gsi *mp_ioapic_gsi_routing(int ioapic_idx) +static inline struct mp_ioapic_gsi *mp_ioapic_gsi_routing(int ioapic_idx) { return &ioapics[ioapic_idx].gsi_config; } @@ -129,11 +142,16 @@ static inline int mp_ioapic_pin_count(int ioapic) return gsi_cfg->gsi_end - gsi_cfg->gsi_base + 1; } -u32 mp_pin_to_gsi(int ioapic, int pin) +static inline u32 mp_pin_to_gsi(int ioapic, int pin) { return mp_ioapic_gsi_routing(ioapic)->gsi_base + pin; } +static inline bool mp_is_legacy_irq(int irq) +{ + return irq >= 0 && irq < nr_legacy_irqs(); +} + /* * Initialize all legacy IRQs and all pins on the first IOAPIC * if we have legacy interrupt controller. Kernel boot option "pirq=" @@ -144,12 +162,7 @@ static inline int mp_init_irq_at_boot(int ioapic, int irq) if (!nr_legacy_irqs()) return 0; - return ioapic == 0 || (irq >= 0 && irq < nr_legacy_irqs()); -} - -static inline struct mp_pin_info *mp_pin_info(int ioapic_idx, int pin) -{ - return ioapics[ioapic_idx].pin_info + pin; + return ioapic == 0 || mp_is_legacy_irq(irq); } static inline struct irq_domain *mp_ioapic_irqdomain(int ioapic) @@ -216,16 +229,6 @@ void mp_save_irq(struct mpc_intsrc *m) panic("Max # of irq sources exceeded!!\n"); } -struct irq_pin_list { - struct list_head list; - int apic, pin; -}; - -static struct irq_pin_list *alloc_irq_pin_list(int node) -{ - return kzalloc_node(sizeof(struct irq_pin_list), GFP_KERNEL, node); -} - static void alloc_ioapic_saved_registers(int idx) { size_t size; @@ -247,8 +250,7 @@ static void free_ioapic_saved_registers(int idx) int __init arch_early_ioapic_init(void) { - struct irq_cfg *cfg; - int i, node = cpu_to_node(0); + int i; if (!nr_legacy_irqs()) io_apic_irqs = ~0UL; @@ -256,16 +258,6 @@ int __init arch_early_ioapic_init(void) for_each_ioapic(i) alloc_ioapic_saved_registers(i); - /* - * For legacy IRQ's, start with assigning irq0 to irq15 to - * IRQ0_VECTOR to IRQ15_VECTOR for all cpu's. - */ - for (i = 0; i < nr_legacy_irqs(); i++) { - cfg = alloc_irq_and_cfg_at(i, node); - cfg->vector = IRQ0_VECTOR + i; - cpumask_setall(cfg->domain); - } - return 0; } @@ -283,7 +275,7 @@ static __attribute_const__ struct io_apic __iomem *io_apic_base(int idx) + (mpc_ioapic_addr(idx) & ~PAGE_MASK); } -void io_apic_eoi(unsigned int apic, unsigned int vector) +static inline void io_apic_eoi(unsigned int apic, unsigned int vector) { struct io_apic __iomem *io_apic = io_apic_base(apic); writel(vector, &io_apic->eoi); @@ -296,7 +288,8 @@ unsigned int native_io_apic_read(unsigned int apic, unsigned int reg) return readl(&io_apic->data); } -void native_io_apic_write(unsigned int apic, unsigned int reg, unsigned int value) +static void io_apic_write(unsigned int apic, unsigned int reg, + unsigned int value) { struct io_apic __iomem *io_apic = io_apic_base(apic); @@ -304,21 +297,6 @@ void native_io_apic_write(unsigned int apic, unsigned int reg, unsigned int valu writel(value, &io_apic->data); } -/* - * Re-write a value: to be used for read-modify-write - * cycles where the read already set up the index register. - * - * Older SiS APIC requires we rewrite the index register - */ -void native_io_apic_modify(unsigned int apic, unsigned int reg, unsigned int value) -{ - struct io_apic __iomem *io_apic = io_apic_base(apic); - - if (sis_apic_bug) - writel(reg, &io_apic->index); - writel(value, &io_apic->data); -} - union entry_union { struct { u32 w1, w2; }; struct IO_APIC_route_entry entry; @@ -378,7 +356,7 @@ static void ioapic_write_entry(int apic, int pin, struct IO_APIC_route_entry e) static void ioapic_mask_entry(int apic, int pin) { unsigned long flags; - union entry_union eu = { .entry.mask = 1 }; + union entry_union eu = { .entry.mask = IOAPIC_MASKED }; raw_spin_lock_irqsave(&ioapic_lock, flags); io_apic_write(apic, 0x10 + 2*pin, eu.w1); @@ -391,16 +369,17 @@ static void ioapic_mask_entry(int apic, int pin) * shared ISA-space IRQs, so we have to support them. We are super * fast in the common case, and fast for shared ISA-space IRQs. */ -static int __add_pin_to_irq_node(struct irq_cfg *cfg, int node, int apic, int pin) +static int __add_pin_to_irq_node(struct mp_chip_data *data, + int node, int apic, int pin) { struct irq_pin_list *entry; /* don't allow duplicates */ - for_each_irq_pin(entry, cfg->irq_2_pin) + for_each_irq_pin(entry, data->irq_2_pin) if (entry->apic == apic && entry->pin == pin) return 0; - entry = alloc_irq_pin_list(node); + entry = kzalloc_node(sizeof(struct irq_pin_list), GFP_ATOMIC, node); if (!entry) { pr_err("can not alloc irq_pin_list (%d,%d,%d)\n", node, apic, pin); @@ -408,16 +387,16 @@ static int __add_pin_to_irq_node(struct irq_cfg *cfg, int node, int apic, int pi } entry->apic = apic; entry->pin = pin; + list_add_tail(&entry->list, &data->irq_2_pin); - list_add_tail(&entry->list, &cfg->irq_2_pin); return 0; } -static void __remove_pin_from_irq(struct irq_cfg *cfg, int apic, int pin) +static void __remove_pin_from_irq(struct mp_chip_data *data, int apic, int pin) { struct irq_pin_list *tmp, *entry; - list_for_each_entry_safe(entry, tmp, &cfg->irq_2_pin, list) + list_for_each_entry_safe(entry, tmp, &data->irq_2_pin, list) if (entry->apic == apic && entry->pin == pin) { list_del(&entry->list); kfree(entry); @@ -425,22 +404,23 @@ static void __remove_pin_from_irq(struct irq_cfg *cfg, int apic, int pin) } } -static void add_pin_to_irq_node(struct irq_cfg *cfg, int node, int apic, int pin) +static void add_pin_to_irq_node(struct mp_chip_data *data, + int node, int apic, int pin) { - if (__add_pin_to_irq_node(cfg, node, apic, pin)) + if (__add_pin_to_irq_node(data, node, apic, pin)) panic("IO-APIC: failed to add irq-pin. Can not proceed\n"); } /* * Reroute an IRQ to a different pin. */ -static void __init replace_pin_at_irq_node(struct irq_cfg *cfg, int node, +static void __init replace_pin_at_irq_node(struct mp_chip_data *data, int node, int oldapic, int oldpin, int newapic, int newpin) { struct irq_pin_list *entry; - for_each_irq_pin(entry, cfg->irq_2_pin) { + for_each_irq_pin(entry, data->irq_2_pin) { if (entry->apic == oldapic && entry->pin == oldpin) { entry->apic = newapic; entry->pin = newpin; @@ -450,32 +430,26 @@ static void __init replace_pin_at_irq_node(struct irq_cfg *cfg, int node, } /* old apic/pin didn't exist, so just add new ones */ - add_pin_to_irq_node(cfg, node, newapic, newpin); -} - -static void __io_apic_modify_irq(struct irq_pin_list *entry, - int mask_and, int mask_or, - void (*final)(struct irq_pin_list *entry)) -{ - unsigned int reg, pin; - - pin = entry->pin; - reg = io_apic_read(entry->apic, 0x10 + pin * 2); - reg &= mask_and; - reg |= mask_or; - io_apic_modify(entry->apic, 0x10 + pin * 2, reg); - if (final) - final(entry); + add_pin_to_irq_node(data, node, newapic, newpin); } -static void io_apic_modify_irq(struct irq_cfg *cfg, +static void io_apic_modify_irq(struct mp_chip_data *data, int mask_and, int mask_or, void (*final)(struct irq_pin_list *entry)) { + union entry_union eu; struct irq_pin_list *entry; - for_each_irq_pin(entry, cfg->irq_2_pin) - __io_apic_modify_irq(entry, mask_and, mask_or, final); + eu.entry = data->entry; + eu.w1 &= mask_and; + eu.w1 |= mask_or; + data->entry = eu.entry; + + for_each_irq_pin(entry, data->irq_2_pin) { + io_apic_write(entry->apic, 0x10 + 2 * entry->pin, eu.w1); + if (final) + final(entry); + } } static void io_apic_sync(struct irq_pin_list *entry) @@ -490,39 +464,31 @@ static void io_apic_sync(struct irq_pin_list *entry) readl(&io_apic->data); } -static void mask_ioapic(struct irq_cfg *cfg) +static void mask_ioapic_irq(struct irq_data *irq_data) { + struct mp_chip_data *data = irq_data->chip_data; unsigned long flags; raw_spin_lock_irqsave(&ioapic_lock, flags); - io_apic_modify_irq(cfg, ~0, IO_APIC_REDIR_MASKED, &io_apic_sync); + io_apic_modify_irq(data, ~0, IO_APIC_REDIR_MASKED, &io_apic_sync); raw_spin_unlock_irqrestore(&ioapic_lock, flags); } -static void mask_ioapic_irq(struct irq_data *data) +static void __unmask_ioapic(struct mp_chip_data *data) { - mask_ioapic(irqd_cfg(data)); + io_apic_modify_irq(data, ~IO_APIC_REDIR_MASKED, 0, NULL); } -static void __unmask_ioapic(struct irq_cfg *cfg) -{ - io_apic_modify_irq(cfg, ~IO_APIC_REDIR_MASKED, 0, NULL); -} - -static void unmask_ioapic(struct irq_cfg *cfg) +static void unmask_ioapic_irq(struct irq_data *irq_data) { + struct mp_chip_data *data = irq_data->chip_data; unsigned long flags; raw_spin_lock_irqsave(&ioapic_lock, flags); - __unmask_ioapic(cfg); + __unmask_ioapic(data); raw_spin_unlock_irqrestore(&ioapic_lock, flags); } -static void unmask_ioapic_irq(struct irq_data *data) -{ - unmask_ioapic(irqd_cfg(data)); -} - /* * IO-APIC versions below 0x20 don't support EOI register. * For the record, here is the information about various versions: @@ -539,7 +505,7 @@ static void unmask_ioapic_irq(struct irq_data *data) * Otherwise, we simulate the EOI message manually by changing the trigger * mode to edge and then back to level, with RTE being masked during this. */ -void native_eoi_ioapic_pin(int apic, int pin, int vector) +static void __eoi_ioapic_pin(int apic, int pin, int vector) { if (mpc_ioapic_ver(apic) >= 0x20) { io_apic_eoi(apic, vector); @@ -551,7 +517,7 @@ void native_eoi_ioapic_pin(int apic, int pin, int vector) /* * Mask the entry and change the trigger mode to edge. */ - entry1.mask = 1; + entry1.mask = IOAPIC_MASKED; entry1.trigger = IOAPIC_EDGE; __ioapic_write_entry(apic, pin, entry1); @@ -563,15 +529,14 @@ void native_eoi_ioapic_pin(int apic, int pin, int vector) } } -void eoi_ioapic_irq(unsigned int irq, struct irq_cfg *cfg) +void eoi_ioapic_pin(int vector, struct mp_chip_data *data) { - struct irq_pin_list *entry; unsigned long flags; + struct irq_pin_list *entry; raw_spin_lock_irqsave(&ioapic_lock, flags); - for_each_irq_pin(entry, cfg->irq_2_pin) - x86_io_apic_ops.eoi_ioapic_pin(entry->apic, entry->pin, - cfg->vector); + for_each_irq_pin(entry, data->irq_2_pin) + __eoi_ioapic_pin(entry->apic, entry->pin, vector); raw_spin_unlock_irqrestore(&ioapic_lock, flags); } @@ -588,8 +553,8 @@ static void clear_IO_APIC_pin(unsigned int apic, unsigned int pin) * Make sure the entry is masked and re-read the contents to check * if it is a level triggered pin and if the remote-IRR is set. */ - if (!entry.mask) { - entry.mask = 1; + if (entry.mask == IOAPIC_UNMASKED) { + entry.mask = IOAPIC_MASKED; ioapic_write_entry(apic, pin, entry); entry = ioapic_read_entry(apic, pin); } @@ -602,13 +567,12 @@ static void clear_IO_APIC_pin(unsigned int apic, unsigned int pin) * doesn't clear the remote-IRR if the trigger mode is not * set to level. */ - if (!entry.trigger) { + if (entry.trigger == IOAPIC_EDGE) { entry.trigger = IOAPIC_LEVEL; ioapic_write_entry(apic, pin, entry); } - raw_spin_lock_irqsave(&ioapic_lock, flags); - x86_io_apic_ops.eoi_ioapic_pin(apic, pin, entry.vector); + __eoi_ioapic_pin(apic, pin, entry.vector); raw_spin_unlock_irqrestore(&ioapic_lock, flags); } @@ -706,8 +670,8 @@ void mask_ioapic_entries(void) struct IO_APIC_route_entry entry; entry = ioapics[apic].saved_registers[pin]; - if (!entry.mask) { - entry.mask = 1; + if (entry.mask == IOAPIC_UNMASKED) { + entry.mask = IOAPIC_MASKED; ioapic_write_entry(apic, pin, entry); } } @@ -809,11 +773,11 @@ static int EISA_ELCR(unsigned int irq) #endif -/* ISA interrupts are always polarity zero edge triggered, +/* ISA interrupts are always active high edge triggered, * when listed as conforming in the MP table. */ -#define default_ISA_trigger(idx) (0) -#define default_ISA_polarity(idx) (0) +#define default_ISA_trigger(idx) (IOAPIC_EDGE) +#define default_ISA_polarity(idx) (IOAPIC_POL_HIGH) /* EISA interrupts are always polarity zero and can be edge or level * trigger depending on the ELCR value. If an interrupt is listed as @@ -823,53 +787,55 @@ static int EISA_ELCR(unsigned int irq) #define default_EISA_trigger(idx) (EISA_ELCR(mp_irqs[idx].srcbusirq)) #define default_EISA_polarity(idx) default_ISA_polarity(idx) -/* PCI interrupts are always polarity one level triggered, +/* PCI interrupts are always active low level triggered, * when listed as conforming in the MP table. */ -#define default_PCI_trigger(idx) (1) -#define default_PCI_polarity(idx) (1) +#define default_PCI_trigger(idx) (IOAPIC_LEVEL) +#define default_PCI_polarity(idx) (IOAPIC_POL_LOW) static int irq_polarity(int idx) { int bus = mp_irqs[idx].srcbus; - int polarity; /* * Determine IRQ line polarity (high active or low active): */ - switch (mp_irqs[idx].irqflag & 3) - { - case 0: /* conforms, ie. bus-type dependent polarity */ - if (test_bit(bus, mp_bus_not_pci)) - polarity = default_ISA_polarity(idx); - else - polarity = default_PCI_polarity(idx); - break; - case 1: /* high active */ - { - polarity = 0; - break; - } - case 2: /* reserved */ - { - pr_warn("broken BIOS!!\n"); - polarity = 1; - break; - } - case 3: /* low active */ - { - polarity = 1; - break; - } - default: /* invalid */ - { - pr_warn("broken BIOS!!\n"); - polarity = 1; - break; - } + switch (mp_irqs[idx].irqflag & 0x03) { + case 0: + /* conforms to spec, ie. bus-type dependent polarity */ + if (test_bit(bus, mp_bus_not_pci)) + return default_ISA_polarity(idx); + else + return default_PCI_polarity(idx); + case 1: + return IOAPIC_POL_HIGH; + case 2: + pr_warn("IOAPIC: Invalid polarity: 2, defaulting to low\n"); + case 3: + default: /* Pointless default required due to do gcc stupidity */ + return IOAPIC_POL_LOW; + } +} + +#ifdef CONFIG_EISA +static int eisa_irq_trigger(int idx, int bus, int trigger) +{ + switch (mp_bus_id_to_type[bus]) { + case MP_BUS_PCI: + case MP_BUS_ISA: + return trigger; + case MP_BUS_EISA: + return default_EISA_trigger(idx); } - return polarity; + pr_warn("IOAPIC: Invalid srcbus: %d defaulting to level\n", bus); + return IOAPIC_LEVEL; } +#else +static inline int eisa_irq_trigger(int idx, int bus, int trigger) +{ + return trigger; +} +#endif static int irq_trigger(int idx) { @@ -879,153 +845,227 @@ static int irq_trigger(int idx) /* * Determine IRQ trigger mode (edge or level sensitive): */ - switch ((mp_irqs[idx].irqflag>>2) & 3) - { - case 0: /* conforms, ie. bus-type dependent */ - if (test_bit(bus, mp_bus_not_pci)) - trigger = default_ISA_trigger(idx); - else - trigger = default_PCI_trigger(idx); -#ifdef CONFIG_EISA - switch (mp_bus_id_to_type[bus]) { - case MP_BUS_ISA: /* ISA pin */ - { - /* set before the switch */ - break; - } - case MP_BUS_EISA: /* EISA pin */ - { - trigger = default_EISA_trigger(idx); - break; - } - case MP_BUS_PCI: /* PCI pin */ - { - /* set before the switch */ - break; - } - default: - { - pr_warn("broken BIOS!!\n"); - trigger = 1; - break; - } - } + switch ((mp_irqs[idx].irqflag >> 2) & 0x03) { + case 0: + /* conforms to spec, ie. bus-type dependent trigger mode */ + if (test_bit(bus, mp_bus_not_pci)) + trigger = default_ISA_trigger(idx); + else + trigger = default_PCI_trigger(idx); + /* Take EISA into account */ + return eisa_irq_trigger(idx, bus, trigger); + case 1: + return IOAPIC_EDGE; + case 2: + pr_warn("IOAPIC: Invalid trigger mode 2 defaulting to level\n"); + case 3: + default: /* Pointless default required due to do gcc stupidity */ + return IOAPIC_LEVEL; + } +} + +void ioapic_set_alloc_attr(struct irq_alloc_info *info, int node, + int trigger, int polarity) +{ + init_irq_alloc_info(info, NULL); + info->type = X86_IRQ_ALLOC_TYPE_IOAPIC; + info->ioapic_node = node; + info->ioapic_trigger = trigger; + info->ioapic_polarity = polarity; + info->ioapic_valid = 1; +} + +#ifndef CONFIG_ACPI +int acpi_get_override_irq(u32 gsi, int *trigger, int *polarity); #endif - break; - case 1: /* edge */ - { - trigger = 0; - break; - } - case 2: /* reserved */ - { - pr_warn("broken BIOS!!\n"); - trigger = 1; - break; - } - case 3: /* level */ - { - trigger = 1; - break; - } - default: /* invalid */ - { - pr_warn("broken BIOS!!\n"); - trigger = 0; - break; + +static void ioapic_copy_alloc_attr(struct irq_alloc_info *dst, + struct irq_alloc_info *src, + u32 gsi, int ioapic_idx, int pin) +{ + int trigger, polarity; + + copy_irq_alloc_info(dst, src); + dst->type = X86_IRQ_ALLOC_TYPE_IOAPIC; + dst->ioapic_id = mpc_ioapic_id(ioapic_idx); + dst->ioapic_pin = pin; + dst->ioapic_valid = 1; + if (src && src->ioapic_valid) { + dst->ioapic_node = src->ioapic_node; + dst->ioapic_trigger = src->ioapic_trigger; + dst->ioapic_polarity = src->ioapic_polarity; + } else { + dst->ioapic_node = NUMA_NO_NODE; + if (acpi_get_override_irq(gsi, &trigger, &polarity) >= 0) { + dst->ioapic_trigger = trigger; + dst->ioapic_polarity = polarity; + } else { + /* + * PCI interrupts are always active low level + * triggered. + */ + dst->ioapic_trigger = IOAPIC_LEVEL; + dst->ioapic_polarity = IOAPIC_POL_LOW; } } - return trigger; } -static int alloc_irq_from_domain(struct irq_domain *domain, u32 gsi, int pin) +static int ioapic_alloc_attr_node(struct irq_alloc_info *info) +{ + return (info && info->ioapic_valid) ? info->ioapic_node : NUMA_NO_NODE; +} + +static void mp_register_handler(unsigned int irq, unsigned long trigger) +{ + irq_flow_handler_t hdl; + bool fasteoi; + + if (trigger) { + irq_set_status_flags(irq, IRQ_LEVEL); + fasteoi = true; + } else { + irq_clear_status_flags(irq, IRQ_LEVEL); + fasteoi = false; + } + + hdl = fasteoi ? handle_fasteoi_irq : handle_edge_irq; + __irq_set_handler(irq, hdl, 0, fasteoi ? "fasteoi" : "edge"); +} + +static bool mp_check_pin_attr(int irq, struct irq_alloc_info *info) { + struct mp_chip_data *data = irq_get_chip_data(irq); + + /* + * setup_IO_APIC_irqs() programs all legacy IRQs with default trigger + * and polarity attirbutes. So allow the first user to reprogram the + * pin with real trigger and polarity attributes. + */ + if (irq < nr_legacy_irqs() && data->count == 1) { + if (info->ioapic_trigger != data->trigger) + mp_register_handler(irq, data->trigger); + data->entry.trigger = data->trigger = info->ioapic_trigger; + data->entry.polarity = data->polarity = info->ioapic_polarity; + } + + return data->trigger == info->ioapic_trigger && + data->polarity == info->ioapic_polarity; +} + +static int alloc_irq_from_domain(struct irq_domain *domain, int ioapic, u32 gsi, + struct irq_alloc_info *info) +{ + bool legacy = false; int irq = -1; - int ioapic = (int)(long)domain->host_data; int type = ioapics[ioapic].irqdomain_cfg.type; switch (type) { case IOAPIC_DOMAIN_LEGACY: /* - * Dynamically allocate IRQ number for non-ISA IRQs in the first 16 - * GSIs on some weird platforms. + * Dynamically allocate IRQ number for non-ISA IRQs in the first + * 16 GSIs on some weird platforms. */ - if (gsi < nr_legacy_irqs()) - irq = irq_create_mapping(domain, pin); - else if (irq_create_strict_mappings(domain, gsi, pin, 1) == 0) + if (!ioapic_initialized || gsi >= nr_legacy_irqs()) irq = gsi; + legacy = mp_is_legacy_irq(irq); break; case IOAPIC_DOMAIN_STRICT: - if (irq_create_strict_mappings(domain, gsi, pin, 1) == 0) - irq = gsi; + irq = gsi; break; case IOAPIC_DOMAIN_DYNAMIC: - irq = irq_create_mapping(domain, pin); break; default: WARN(1, "ioapic: unknown irqdomain type %d\n", type); - break; + return -1; + } + + return __irq_domain_alloc_irqs(domain, irq, 1, + ioapic_alloc_attr_node(info), + info, legacy); +} + +/* + * Need special handling for ISA IRQs because there may be multiple IOAPIC pins + * sharing the same ISA IRQ number and irqdomain only supports 1:1 mapping + * between IOAPIC pin and IRQ number. A typical IOAPIC has 24 pins, pin 0-15 are + * used for legacy IRQs and pin 16-23 are used for PCI IRQs (PIRQ A-H). + * When ACPI is disabled, only legacy IRQ numbers (IRQ0-15) are available, and + * some BIOSes may use MP Interrupt Source records to override IRQ numbers for + * PIRQs instead of reprogramming the interrupt routing logic. Thus there may be + * multiple pins sharing the same legacy IRQ number when ACPI is disabled. + */ +static int alloc_isa_irq_from_domain(struct irq_domain *domain, + int irq, int ioapic, int pin, + struct irq_alloc_info *info) +{ + struct mp_chip_data *data; + struct irq_data *irq_data = irq_get_irq_data(irq); + int node = ioapic_alloc_attr_node(info); + + /* + * Legacy ISA IRQ has already been allocated, just add pin to + * the pin list assoicated with this IRQ and program the IOAPIC + * entry. The IOAPIC entry + */ + if (irq_data && irq_data->parent_data) { + if (!mp_check_pin_attr(irq, info)) + return -EBUSY; + if (__add_pin_to_irq_node(irq_data->chip_data, node, ioapic, + info->ioapic_pin)) + return -ENOMEM; + } else { + irq = __irq_domain_alloc_irqs(domain, irq, 1, node, info, true); + if (irq >= 0) { + irq_data = irq_domain_get_irq_data(domain, irq); + data = irq_data->chip_data; + data->isa_irq = true; + } } - return irq > 0 ? irq : -1; + return irq; } static int mp_map_pin_to_irq(u32 gsi, int idx, int ioapic, int pin, - unsigned int flags) + unsigned int flags, struct irq_alloc_info *info) { int irq; + bool legacy = false; + struct irq_alloc_info tmp; + struct mp_chip_data *data; struct irq_domain *domain = mp_ioapic_irqdomain(ioapic); - struct mp_pin_info *info = mp_pin_info(ioapic, pin); if (!domain) - return -1; + return -ENOSYS; - mutex_lock(&ioapic_mutex); - - /* - * Don't use irqdomain to manage ISA IRQs because there may be - * multiple IOAPIC pins sharing the same ISA IRQ number and - * irqdomain only supports 1:1 mapping between IOAPIC pin and - * IRQ number. A typical IOAPIC has 24 pins, pin 0-15 are used - * for legacy IRQs and pin 16-23 are used for PCI IRQs (PIRQ A-H). - * When ACPI is disabled, only legacy IRQ numbers (IRQ0-15) are - * available, and some BIOSes may use MP Interrupt Source records - * to override IRQ numbers for PIRQs instead of reprogramming - * the interrupt routing logic. Thus there may be multiple pins - * sharing the same legacy IRQ number when ACPI is disabled. - */ if (idx >= 0 && test_bit(mp_irqs[idx].srcbus, mp_bus_not_pci)) { irq = mp_irqs[idx].srcbusirq; - if (flags & IOAPIC_MAP_ALLOC) { - if (info->count == 0 && - mp_irqdomain_map(domain, irq, pin) != 0) - irq = -1; + legacy = mp_is_legacy_irq(irq); + } - /* special handling for timer IRQ0 */ + mutex_lock(&ioapic_mutex); + if (!(flags & IOAPIC_MAP_ALLOC)) { + if (!legacy) { + irq = irq_find_mapping(domain, pin); if (irq == 0) - info->count++; + irq = -ENOENT; } } else { - irq = irq_find_mapping(domain, pin); - if (irq <= 0 && (flags & IOAPIC_MAP_ALLOC)) - irq = alloc_irq_from_domain(domain, gsi, pin); - } - - if (flags & IOAPIC_MAP_ALLOC) { - /* special handling for legacy IRQs */ - if (irq < nr_legacy_irqs() && info->count == 1 && - mp_irqdomain_map(domain, irq, pin) != 0) - irq = -1; - - if (irq > 0) - info->count++; - else if (info->count == 0) - info->set = 0; + ioapic_copy_alloc_attr(&tmp, info, gsi, ioapic, pin); + if (legacy) + irq = alloc_isa_irq_from_domain(domain, irq, + ioapic, pin, &tmp); + else if ((irq = irq_find_mapping(domain, pin)) == 0) + irq = alloc_irq_from_domain(domain, ioapic, gsi, &tmp); + else if (!mp_check_pin_attr(irq, &tmp)) + irq = -EBUSY; + if (irq >= 0) { + data = irq_get_chip_data(irq); + data->count++; + } } - mutex_unlock(&ioapic_mutex); - return irq > 0 ? irq : -1; + return irq; } static int pin_2_irq(int idx, int ioapic, int pin, unsigned int flags) @@ -1058,10 +1098,10 @@ static int pin_2_irq(int idx, int ioapic, int pin, unsigned int flags) } #endif - return mp_map_pin_to_irq(gsi, idx, ioapic, pin, flags); + return mp_map_pin_to_irq(gsi, idx, ioapic, pin, flags, NULL); } -int mp_map_gsi_to_irq(u32 gsi, unsigned int flags) +int mp_map_gsi_to_irq(u32 gsi, unsigned int flags, struct irq_alloc_info *info) { int ioapic, pin, idx; @@ -1074,31 +1114,24 @@ int mp_map_gsi_to_irq(u32 gsi, unsigned int flags) if ((flags & IOAPIC_MAP_CHECK) && idx < 0) return -1; - return mp_map_pin_to_irq(gsi, idx, ioapic, pin, flags); + return mp_map_pin_to_irq(gsi, idx, ioapic, pin, flags, info); } void mp_unmap_irq(int irq) { - struct irq_data *data = irq_get_irq_data(irq); - struct mp_pin_info *info; - int ioapic, pin; + struct irq_data *irq_data = irq_get_irq_data(irq); + struct mp_chip_data *data; - if (!data || !data->domain) + if (!irq_data || !irq_data->domain) return; - ioapic = (int)(long)data->domain->host_data; - pin = (int)data->hwirq; - info = mp_pin_info(ioapic, pin); + data = irq_data->chip_data; + if (!data || data->isa_irq) + return; mutex_lock(&ioapic_mutex); - if (--info->count == 0) { - info->set = 0; - if (irq < nr_legacy_irqs() && - ioapics[ioapic].irqdomain_cfg.type == IOAPIC_DOMAIN_LEGACY) - mp_irqdomain_unmap(data->domain, irq); - else - irq_dispose_mapping(irq); - } + if (--data->count == 0) + irq_domain_free_irqs(irq, 1); mutex_unlock(&ioapic_mutex); } @@ -1165,7 +1198,7 @@ out: } EXPORT_SYMBOL(IO_APIC_get_PCI_irq_vector); -static struct irq_chip ioapic_chip; +static struct irq_chip ioapic_chip, ioapic_ir_chip; #ifdef CONFIG_X86_32 static inline int IO_APIC_irq_trigger(int irq) @@ -1189,96 +1222,6 @@ static inline int IO_APIC_irq_trigger(int irq) } #endif -static void ioapic_register_intr(unsigned int irq, struct irq_cfg *cfg, - unsigned long trigger) -{ - struct irq_chip *chip = &ioapic_chip; - irq_flow_handler_t hdl; - bool fasteoi; - - if ((trigger == IOAPIC_AUTO && IO_APIC_irq_trigger(irq)) || - trigger == IOAPIC_LEVEL) { - irq_set_status_flags(irq, IRQ_LEVEL); - fasteoi = true; - } else { - irq_clear_status_flags(irq, IRQ_LEVEL); - fasteoi = false; - } - - if (setup_remapped_irq(irq, cfg, chip)) - fasteoi = trigger != 0; - - hdl = fasteoi ? handle_fasteoi_irq : handle_edge_irq; - irq_set_chip_and_handler_name(irq, chip, hdl, - fasteoi ? "fasteoi" : "edge"); -} - -int native_setup_ioapic_entry(int irq, struct IO_APIC_route_entry *entry, - unsigned int destination, int vector, - struct io_apic_irq_attr *attr) -{ - memset(entry, 0, sizeof(*entry)); - - entry->delivery_mode = apic->irq_delivery_mode; - entry->dest_mode = apic->irq_dest_mode; - entry->dest = destination; - entry->vector = vector; - entry->mask = 0; /* enable IRQ */ - entry->trigger = attr->trigger; - entry->polarity = attr->polarity; - - /* - * Mask level triggered irqs. - * Use IRQ_DELAYED_DISABLE for edge triggered irqs. - */ - if (attr->trigger) - entry->mask = 1; - - return 0; -} - -static void setup_ioapic_irq(unsigned int irq, struct irq_cfg *cfg, - struct io_apic_irq_attr *attr) -{ - struct IO_APIC_route_entry entry; - unsigned int dest; - - if (!IO_APIC_IRQ(irq)) - return; - - if (assign_irq_vector(irq, cfg, apic->target_cpus())) - return; - - if (apic->cpu_mask_to_apicid_and(cfg->domain, apic->target_cpus(), - &dest)) { - pr_warn("Failed to obtain apicid for ioapic %d, pin %d\n", - mpc_ioapic_id(attr->ioapic), attr->ioapic_pin); - clear_irq_vector(irq, cfg); - - return; - } - - apic_printk(APIC_VERBOSE,KERN_DEBUG - "IOAPIC[%d]: Set routing entry (%d-%d -> 0x%x -> " - "IRQ %d Mode:%i Active:%i Dest:%d)\n", - attr->ioapic, mpc_ioapic_id(attr->ioapic), attr->ioapic_pin, - cfg->vector, irq, attr->trigger, attr->polarity, dest); - - if (x86_io_apic_ops.setup_entry(irq, &entry, dest, cfg->vector, attr)) { - pr_warn("Failed to setup ioapic entry for ioapic %d, pin %d\n", - mpc_ioapic_id(attr->ioapic), attr->ioapic_pin); - clear_irq_vector(irq, cfg); - - return; - } - - ioapic_register_intr(irq, cfg, attr->trigger); - if (irq < nr_legacy_irqs()) - legacy_pic->mask(irq); - - ioapic_write_entry(attr->ioapic, attr->ioapic_pin, entry); -} - static void __init setup_IO_APIC_irqs(void) { unsigned int ioapic, pin; @@ -1298,106 +1241,41 @@ static void __init setup_IO_APIC_irqs(void) } } -/* - * Set up the timer pin, possibly with the 8259A-master behind. - */ -static void __init setup_timer_IRQ0_pin(unsigned int ioapic_idx, - unsigned int pin, int vector) -{ - struct IO_APIC_route_entry entry; - unsigned int dest; - - memset(&entry, 0, sizeof(entry)); - - /* - * We use logical delivery to get the timer IRQ - * to the first CPU. - */ - if (unlikely(apic->cpu_mask_to_apicid_and(apic->target_cpus(), - apic->target_cpus(), &dest))) - dest = BAD_APICID; - - entry.dest_mode = apic->irq_dest_mode; - entry.mask = 0; /* don't mask IRQ for edge */ - entry.dest = dest; - entry.delivery_mode = apic->irq_delivery_mode; - entry.polarity = 0; - entry.trigger = 0; - entry.vector = vector; - - /* - * The timer IRQ doesn't have to know that behind the - * scene we may have a 8259A-master in AEOI mode ... - */ - irq_set_chip_and_handler_name(0, &ioapic_chip, handle_edge_irq, - "edge"); - - /* - * Add it to the IO-APIC irq-routing table: - */ - ioapic_write_entry(ioapic_idx, pin, entry); -} - -void native_io_apic_print_entries(unsigned int apic, unsigned int nr_entries) +void ioapic_zap_locks(void) { - int i; - - pr_debug(" NR Dst Mask Trig IRR Pol Stat Dmod Deli Vect:\n"); - - for (i = 0; i <= nr_entries; i++) { - struct IO_APIC_route_entry entry; - - entry = ioapic_read_entry(apic, i); - - pr_debug(" %02x %02X ", i, entry.dest); - pr_cont("%1d %1d %1d %1d %1d " - "%1d %1d %02X\n", - entry.mask, - entry.trigger, - entry.irr, - entry.polarity, - entry.delivery_status, - entry.dest_mode, - entry.delivery_mode, - entry.vector); - } + raw_spin_lock_init(&ioapic_lock); } -void intel_ir_io_apic_print_entries(unsigned int apic, - unsigned int nr_entries) +static void io_apic_print_entries(unsigned int apic, unsigned int nr_entries) { int i; + char buf[256]; + struct IO_APIC_route_entry entry; + struct IR_IO_APIC_route_entry *ir_entry = (void *)&entry; - pr_debug(" NR Indx Fmt Mask Trig IRR Pol Stat Indx2 Zero Vect:\n"); - + printk(KERN_DEBUG "IOAPIC %d:\n", apic); for (i = 0; i <= nr_entries; i++) { - struct IR_IO_APIC_route_entry *ir_entry; - struct IO_APIC_route_entry entry; - entry = ioapic_read_entry(apic, i); - - ir_entry = (struct IR_IO_APIC_route_entry *)&entry; - - pr_debug(" %02x %04X ", i, ir_entry->index); - pr_cont("%1d %1d %1d %1d %1d " - "%1d %1d %X %02X\n", - ir_entry->format, - ir_entry->mask, - ir_entry->trigger, - ir_entry->irr, - ir_entry->polarity, - ir_entry->delivery_status, - ir_entry->index2, - ir_entry->zero, - ir_entry->vector); + snprintf(buf, sizeof(buf), + " pin%02x, %s, %s, %s, V(%02X), IRR(%1d), S(%1d)", + i, + entry.mask == IOAPIC_MASKED ? "disabled" : "enabled ", + entry.trigger == IOAPIC_LEVEL ? "level" : "edge ", + entry.polarity == IOAPIC_POL_LOW ? "low " : "high", + entry.vector, entry.irr, entry.delivery_status); + if (ir_entry->format) + printk(KERN_DEBUG "%s, remapped, I(%04X), Z(%X)\n", + buf, (ir_entry->index << 15) | ir_entry->index, + ir_entry->zero); + else + printk(KERN_DEBUG "%s, %s, D(%02X), M(%1d)\n", + buf, + entry.dest_mode == IOAPIC_DEST_MODE_LOGICAL ? + "logical " : "physical", + entry.dest, entry.delivery_mode); } } -void ioapic_zap_locks(void) -{ - raw_spin_lock_init(&ioapic_lock); -} - static void __init print_IO_APIC(int ioapic_idx) { union IO_APIC_reg_00 reg_00; @@ -1451,16 +1329,13 @@ static void __init print_IO_APIC(int ioapic_idx) } printk(KERN_DEBUG ".... IRQ redirection table:\n"); - - x86_io_apic_ops.print_entries(ioapic_idx, reg_01.bits.entries); + io_apic_print_entries(ioapic_idx, reg_01.bits.entries); } void __init print_IO_APICs(void) { int ioapic_idx; - struct irq_cfg *cfg; unsigned int irq; - struct irq_chip *chip; printk(KERN_DEBUG "number of MP IRQ sources: %d.\n", mp_irq_entries); for_each_ioapic(ioapic_idx) @@ -1480,18 +1355,20 @@ void __init print_IO_APICs(void) printk(KERN_DEBUG "IRQ to pin mappings:\n"); for_each_active_irq(irq) { struct irq_pin_list *entry; + struct irq_chip *chip; + struct mp_chip_data *data; chip = irq_get_chip(irq); - if (chip != &ioapic_chip) + if (chip != &ioapic_chip && chip != &ioapic_ir_chip) continue; - - cfg = irq_cfg(irq); - if (!cfg) + data = irq_get_chip_data(irq); + if (!data) continue; - if (list_empty(&cfg->irq_2_pin)) + if (list_empty(&data->irq_2_pin)) continue; + printk(KERN_DEBUG "IRQ%d ", irq); - for_each_irq_pin(entry, cfg->irq_2_pin) + for_each_irq_pin(entry, data->irq_2_pin) pr_cont("-> %d:%d", entry->apic, entry->pin); pr_cont("\n"); } @@ -1564,15 +1441,12 @@ void native_disable_io_apic(void) struct IO_APIC_route_entry entry; memset(&entry, 0, sizeof(entry)); - entry.mask = 0; /* Enabled */ - entry.trigger = 0; /* Edge */ - entry.irr = 0; - entry.polarity = 0; /* High */ - entry.delivery_status = 0; - entry.dest_mode = 0; /* Physical */ - entry.delivery_mode = dest_ExtINT; /* ExtInt */ - entry.vector = 0; - entry.dest = read_apic_id(); + entry.mask = IOAPIC_UNMASKED; + entry.trigger = IOAPIC_EDGE; + entry.polarity = IOAPIC_POL_HIGH; + entry.dest_mode = IOAPIC_DEST_MODE_PHYSICAL; + entry.delivery_mode = dest_ExtINT; + entry.dest = read_apic_id(); /* * Add it to the IO-APIC irq-routing table: @@ -1582,7 +1456,6 @@ void native_disable_io_apic(void) if (cpu_has_apic || apic_from_smp_config()) disconnect_bsp_APIC(ioapic_i8259.pin != -1); - } /* @@ -1792,7 +1665,6 @@ static int __init timer_irq_works(void) * This is not complete - we should be able to fake * an edge even if it isn't on the 8259A... */ - static unsigned int startup_ioapic_irq(struct irq_data *data) { int was_pending = 0, irq = data->irq; @@ -1804,74 +1676,22 @@ static unsigned int startup_ioapic_irq(struct irq_data *data) if (legacy_pic->irq_pending(irq)) was_pending = 1; } - __unmask_ioapic(irqd_cfg(data)); + __unmask_ioapic(data->chip_data); raw_spin_unlock_irqrestore(&ioapic_lock, flags); return was_pending; } -/* - * Level and edge triggered IO-APIC interrupts need different handling, - * so we use two separate IRQ descriptors. Edge triggered IRQs can be - * handled with the level-triggered descriptor, but that one has slightly - * more overhead. Level-triggered interrupts cannot be handled with the - * edge-triggered handler, without risking IRQ storms and other ugly - * races. - */ - -static void __target_IO_APIC_irq(unsigned int irq, unsigned int dest, struct irq_cfg *cfg) -{ - int apic, pin; - struct irq_pin_list *entry; - u8 vector = cfg->vector; - - for_each_irq_pin(entry, cfg->irq_2_pin) { - unsigned int reg; - - apic = entry->apic; - pin = entry->pin; - - io_apic_write(apic, 0x11 + pin*2, dest); - reg = io_apic_read(apic, 0x10 + pin*2); - reg &= ~IO_APIC_REDIR_VECTOR_MASK; - reg |= vector; - io_apic_modify(apic, 0x10 + pin*2, reg); - } -} - -int native_ioapic_set_affinity(struct irq_data *data, - const struct cpumask *mask, - bool force) -{ - unsigned int dest, irq = data->irq; - unsigned long flags; - int ret; - - if (!config_enabled(CONFIG_SMP)) - return -EPERM; - - raw_spin_lock_irqsave(&ioapic_lock, flags); - ret = apic_set_affinity(data, mask, &dest); - if (!ret) { - /* Only the high 8 bits are valid. */ - dest = SET_APIC_LOGICAL_ID(dest); - __target_IO_APIC_irq(irq, dest, irqd_cfg(data)); - ret = IRQ_SET_MASK_OK_NOCOPY; - } - raw_spin_unlock_irqrestore(&ioapic_lock, flags); - return ret; -} - atomic_t irq_mis_count; #ifdef CONFIG_GENERIC_PENDING_IRQ -static bool io_apic_level_ack_pending(struct irq_cfg *cfg) +static bool io_apic_level_ack_pending(struct mp_chip_data *data) { struct irq_pin_list *entry; unsigned long flags; raw_spin_lock_irqsave(&ioapic_lock, flags); - for_each_irq_pin(entry, cfg->irq_2_pin) { + for_each_irq_pin(entry, data->irq_2_pin) { unsigned int reg; int pin; @@ -1888,18 +1708,17 @@ static bool io_apic_level_ack_pending(struct irq_cfg *cfg) return false; } -static inline bool ioapic_irqd_mask(struct irq_data *data, struct irq_cfg *cfg) +static inline bool ioapic_irqd_mask(struct irq_data *data) { /* If we are moving the irq we need to mask it */ if (unlikely(irqd_is_setaffinity_pending(data))) { - mask_ioapic(cfg); + mask_ioapic_irq(data); return true; } return false; } -static inline void ioapic_irqd_unmask(struct irq_data *data, - struct irq_cfg *cfg, bool masked) +static inline void ioapic_irqd_unmask(struct irq_data *data, bool masked) { if (unlikely(masked)) { /* Only migrate the irq if the ack has been received. @@ -1928,31 +1747,30 @@ static inline void ioapic_irqd_unmask(struct irq_data *data, * accurate and is causing problems then it is a hardware bug * and you can go talk to the chipset vendor about it. */ - if (!io_apic_level_ack_pending(cfg)) + if (!io_apic_level_ack_pending(data->chip_data)) irq_move_masked_irq(data); - unmask_ioapic(cfg); + unmask_ioapic_irq(data); } } #else -static inline bool ioapic_irqd_mask(struct irq_data *data, struct irq_cfg *cfg) +static inline bool ioapic_irqd_mask(struct irq_data *data) { return false; } -static inline void ioapic_irqd_unmask(struct irq_data *data, - struct irq_cfg *cfg, bool masked) +static inline void ioapic_irqd_unmask(struct irq_data *data, bool masked) { } #endif -static void ack_ioapic_level(struct irq_data *data) +static void ioapic_ack_level(struct irq_data *irq_data) { - struct irq_cfg *cfg = irqd_cfg(data); - int i, irq = data->irq; + struct irq_cfg *cfg = irqd_cfg(irq_data); unsigned long v; bool masked; + int i; irq_complete_move(cfg); - masked = ioapic_irqd_mask(data, cfg); + masked = ioapic_irqd_mask(irq_data); /* * It appears there is an erratum which affects at least version 0x11 @@ -2004,11 +1822,49 @@ static void ack_ioapic_level(struct irq_data *data) */ if (!(v & (1 << (i & 0x1f)))) { atomic_inc(&irq_mis_count); + eoi_ioapic_pin(cfg->vector, irq_data->chip_data); + } + + ioapic_irqd_unmask(irq_data, masked); +} + +static void ioapic_ir_ack_level(struct irq_data *irq_data) +{ + struct mp_chip_data *data = irq_data->chip_data; + + /* + * Intr-remapping uses pin number as the virtual vector + * in the RTE. Actual vector is programmed in + * intr-remapping table entry. Hence for the io-apic + * EOI we use the pin number. + */ + ack_APIC_irq(); + eoi_ioapic_pin(data->entry.vector, data); +} - eoi_ioapic_irq(irq, cfg); +static int ioapic_set_affinity(struct irq_data *irq_data, + const struct cpumask *mask, bool force) +{ + struct irq_data *parent = irq_data->parent_data; + struct mp_chip_data *data = irq_data->chip_data; + struct irq_pin_list *entry; + struct irq_cfg *cfg; + unsigned long flags; + int ret; + + ret = parent->chip->irq_set_affinity(parent, mask, force); + raw_spin_lock_irqsave(&ioapic_lock, flags); + if (ret >= 0 && ret != IRQ_SET_MASK_OK_DONE) { + cfg = irqd_cfg(irq_data); + data->entry.dest = cfg->dest_apicid; + data->entry.vector = cfg->vector; + for_each_irq_pin(entry, data->irq_2_pin) + __ioapic_write_entry(entry->apic, entry->pin, + data->entry); } + raw_spin_unlock_irqrestore(&ioapic_lock, flags); - ioapic_irqd_unmask(data, cfg, masked); + return ret; } static struct irq_chip ioapic_chip __read_mostly = { @@ -2016,10 +1872,20 @@ static struct irq_chip ioapic_chip __read_mostly = { .irq_startup = startup_ioapic_irq, .irq_mask = mask_ioapic_irq, .irq_unmask = unmask_ioapic_irq, - .irq_ack = apic_ack_edge, - .irq_eoi = ack_ioapic_level, - .irq_set_affinity = native_ioapic_set_affinity, - .irq_retrigger = apic_retrigger_irq, + .irq_ack = irq_chip_ack_parent, + .irq_eoi = ioapic_ack_level, + .irq_set_affinity = ioapic_set_affinity, + .flags = IRQCHIP_SKIP_SET_WAKE, +}; + +static struct irq_chip ioapic_ir_chip __read_mostly = { + .name = "IR-IO-APIC", + .irq_startup = startup_ioapic_irq, + .irq_mask = mask_ioapic_irq, + .irq_unmask = unmask_ioapic_irq, + .irq_ack = irq_chip_ack_parent, + .irq_eoi = ioapic_ir_ack_level, + .irq_set_affinity = ioapic_set_affinity, .flags = IRQCHIP_SKIP_SET_WAKE, }; @@ -2113,12 +1979,12 @@ static inline void __init unlock_ExtINT_logic(void) memset(&entry1, 0, sizeof(entry1)); - entry1.dest_mode = 0; /* physical delivery */ - entry1.mask = 0; /* unmask IRQ now */ + entry1.dest_mode = IOAPIC_DEST_MODE_PHYSICAL; + entry1.mask = IOAPIC_UNMASKED; entry1.dest = hard_smp_processor_id(); entry1.delivery_mode = dest_ExtINT; entry1.polarity = entry0.polarity; - entry1.trigger = 0; + entry1.trigger = IOAPIC_EDGE; entry1.vector = 0; ioapic_write_entry(apic, pin, entry1); @@ -2152,6 +2018,25 @@ static int __init disable_timer_pin_setup(char *arg) } early_param("disable_timer_pin_1", disable_timer_pin_setup); +static int mp_alloc_timer_irq(int ioapic, int pin) +{ + int irq = -1; + struct irq_domain *domain = mp_ioapic_irqdomain(ioapic); + + if (domain) { + struct irq_alloc_info info; + + ioapic_set_alloc_attr(&info, NUMA_NO_NODE, 0, 0); + info.ioapic_id = mpc_ioapic_id(ioapic); + info.ioapic_pin = pin; + mutex_lock(&ioapic_mutex); + irq = alloc_isa_irq_from_domain(domain, 0, ioapic, pin, &info); + mutex_unlock(&ioapic_mutex); + } + + return irq; +} + /* * This code may look a bit paranoid, but it's supposed to cooperate with * a wide range of boards and BIOS bugs. Fortunately only the timer IRQ @@ -2162,7 +2047,9 @@ early_param("disable_timer_pin_1", disable_timer_pin_setup); */ static inline void __init check_timer(void) { - struct irq_cfg *cfg = irq_cfg(0); + struct irq_data *irq_data = irq_get_irq_data(0); + struct mp_chip_data *data = irq_data->chip_data; + struct irq_cfg *cfg = irqd_cfg(irq_data); int node = cpu_to_node(0); int apic1, pin1, apic2, pin2; unsigned long flags; @@ -2174,7 +2061,6 @@ static inline void __init check_timer(void) * get/set the timer IRQ vector: */ legacy_pic->mask(0); - assign_irq_vector(0, cfg, apic->target_cpus()); /* * As IRQ0 is to be enabled in the 8259A, the virtual @@ -2215,23 +2101,21 @@ static inline void __init check_timer(void) } if (pin1 != -1) { - /* - * Ok, does IRQ0 through the IOAPIC work? - */ + /* Ok, does IRQ0 through the IOAPIC work? */ if (no_pin1) { - add_pin_to_irq_node(cfg, node, apic1, pin1); - setup_timer_IRQ0_pin(apic1, pin1, cfg->vector); + mp_alloc_timer_irq(apic1, pin1); } else { - /* for edge trigger, setup_ioapic_irq already - * leave it unmasked. + /* + * for edge trigger, it's already unmasked, * so only need to unmask if it is level-trigger * do we really have level trigger timer? */ int idx; idx = find_irq_entry(apic1, pin1, mp_INT); if (idx != -1 && irq_trigger(idx)) - unmask_ioapic(cfg); + unmask_ioapic_irq(irq_get_chip_data(0)); } + irq_domain_activate_irq(irq_data); if (timer_irq_works()) { if (disable_timer_pin_1 > 0) clear_IO_APIC_pin(0, pin1); @@ -2251,8 +2135,8 @@ static inline void __init check_timer(void) /* * legacy devices should be connected to IO APIC #0 */ - replace_pin_at_irq_node(cfg, node, apic1, pin1, apic2, pin2); - setup_timer_IRQ0_pin(apic2, pin2, cfg->vector); + replace_pin_at_irq_node(data, node, apic1, pin1, apic2, pin2); + irq_domain_activate_irq(irq_data); legacy_pic->unmask(0); if (timer_irq_works()) { apic_printk(APIC_QUIET, KERN_INFO "....... works.\n"); @@ -2329,36 +2213,35 @@ out: static int mp_irqdomain_create(int ioapic) { - size_t size; + struct irq_alloc_info info; + struct irq_domain *parent; int hwirqs = mp_ioapic_pin_count(ioapic); struct ioapic *ip = &ioapics[ioapic]; struct ioapic_domain_cfg *cfg = &ip->irqdomain_cfg; struct mp_ioapic_gsi *gsi_cfg = mp_ioapic_gsi_routing(ioapic); - size = sizeof(struct mp_pin_info) * mp_ioapic_pin_count(ioapic); - ip->pin_info = kzalloc(size, GFP_KERNEL); - if (!ip->pin_info) - return -ENOMEM; - if (cfg->type == IOAPIC_DOMAIN_INVALID) return 0; + init_irq_alloc_info(&info, NULL); + info.type = X86_IRQ_ALLOC_TYPE_IOAPIC; + info.ioapic_id = mpc_ioapic_id(ioapic); + parent = irq_remapping_get_ir_irq_domain(&info); + if (!parent) + parent = x86_vector_domain; + ip->irqdomain = irq_domain_add_linear(cfg->dev, hwirqs, cfg->ops, (void *)(long)ioapic); - if(!ip->irqdomain) { - kfree(ip->pin_info); - ip->pin_info = NULL; + if (!ip->irqdomain) return -ENOMEM; - } + + ip->irqdomain->parent = parent; if (cfg->type == IOAPIC_DOMAIN_LEGACY || cfg->type == IOAPIC_DOMAIN_STRICT) ioapic_dynirq_base = max(ioapic_dynirq_base, gsi_cfg->gsi_end + 1); - if (gsi_cfg->gsi_base == 0) - irq_set_default_host(ip->irqdomain); - return 0; } @@ -2368,8 +2251,6 @@ static void ioapic_destroy_irqdomain(int idx) irq_domain_remove(ioapics[idx].irqdomain); ioapics[idx].irqdomain = NULL; } - kfree(ioapics[idx].pin_info); - ioapics[idx].pin_info = NULL; } void __init setup_IO_APIC(void) @@ -2399,20 +2280,6 @@ void __init setup_IO_APIC(void) ioapic_initialized = 1; } -/* - * Called after all the initialization is done. If we didn't find any - * APIC bugs then we can allow the modify fast path - */ - -static int __init io_apic_bug_finalize(void) -{ - if (sis_apic_bug == -1) - sis_apic_bug = 0; - return 0; -} - -late_initcall(io_apic_bug_finalize); - static void resume_ioapic_id(int ioapic_idx) { unsigned long flags; @@ -2451,20 +2318,6 @@ static int __init ioapic_init_ops(void) device_initcall(ioapic_init_ops); -static int -io_apic_setup_irq_pin(unsigned int irq, int node, struct io_apic_irq_attr *attr) -{ - struct irq_cfg *cfg = alloc_irq_and_cfg_at(irq, node); - int ret; - - if (!cfg) - return -EINVAL; - ret = __add_pin_to_irq_node(cfg, node, attr->ioapic, attr->ioapic_pin); - if (!ret) - setup_ioapic_irq(irq, cfg, attr); - return ret; -} - static int io_apic_get_redir_entries(int ioapic) { union IO_APIC_reg_01 reg_01; @@ -2692,7 +2545,7 @@ void __init setup_ioapic_dest(void) else mask = apic->target_cpus(); - x86_io_apic_ops.set_affinity(idata, mask, false); + irq_set_affinity(irq, mask); } } @@ -2737,7 +2590,7 @@ static struct resource * __init ioapic_setup_resources(void) return res; } -void __init native_io_apic_init_mappings(void) +void __init io_apic_init_mappings(void) { unsigned long ioapic_phys, idx = FIX_IO_APIC_BASE_0; struct resource *ioapic_res; @@ -2962,7 +2815,6 @@ int mp_unregister_ioapic(u32 gsi_base) { int ioapic, pin; int found = 0; - struct mp_pin_info *pin_info; for_each_ioapic(ioapic) if (ioapics[ioapic].gsi_config.gsi_base == gsi_base) { @@ -2975,11 +2827,17 @@ int mp_unregister_ioapic(u32 gsi_base) } for_each_pin(ioapic, pin) { - pin_info = mp_pin_info(ioapic, pin); - if (pin_info->count) { - pr_warn("pin%d on IOAPIC%d is still in use.\n", - pin, ioapic); - return -EBUSY; + u32 gsi = mp_pin_to_gsi(ioapic, pin); + int irq = mp_map_gsi_to_irq(gsi, 0, NULL); + struct mp_chip_data *data; + + if (irq >= 0) { + data = irq_get_chip_data(irq); + if (data && data->count) { + pr_warn("pin%d on IOAPIC%d is still in use.\n", + pin, ioapic); + return -EBUSY; + } } } @@ -3006,108 +2864,141 @@ int mp_ioapic_registered(u32 gsi_base) return 0; } -static inline void set_io_apic_irq_attr(struct io_apic_irq_attr *irq_attr, - int ioapic, int ioapic_pin, - int trigger, int polarity) +static void mp_irqdomain_get_attr(u32 gsi, struct mp_chip_data *data, + struct irq_alloc_info *info) { - irq_attr->ioapic = ioapic; - irq_attr->ioapic_pin = ioapic_pin; - irq_attr->trigger = trigger; - irq_attr->polarity = polarity; + if (info && info->ioapic_valid) { + data->trigger = info->ioapic_trigger; + data->polarity = info->ioapic_polarity; + } else if (acpi_get_override_irq(gsi, &data->trigger, + &data->polarity) < 0) { + /* PCI interrupts are always active low level triggered. */ + data->trigger = IOAPIC_LEVEL; + data->polarity = IOAPIC_POL_LOW; + } } -int mp_irqdomain_map(struct irq_domain *domain, unsigned int virq, - irq_hw_number_t hwirq) +static void mp_setup_entry(struct irq_cfg *cfg, struct mp_chip_data *data, + struct IO_APIC_route_entry *entry) { - int ioapic = (int)(long)domain->host_data; - struct mp_pin_info *info = mp_pin_info(ioapic, hwirq); - struct io_apic_irq_attr attr; + memset(entry, 0, sizeof(*entry)); + entry->delivery_mode = apic->irq_delivery_mode; + entry->dest_mode = apic->irq_dest_mode; + entry->dest = cfg->dest_apicid; + entry->vector = cfg->vector; + entry->trigger = data->trigger; + entry->polarity = data->polarity; + /* + * Mask level triggered irqs. Edge triggered irqs are masked + * by the irq core code in case they fire. + */ + if (data->trigger == IOAPIC_LEVEL) + entry->mask = IOAPIC_MASKED; + else + entry->mask = IOAPIC_UNMASKED; +} - /* Get default attribute if not set by caller yet */ - if (!info->set) { - u32 gsi = mp_pin_to_gsi(ioapic, hwirq); +int mp_irqdomain_alloc(struct irq_domain *domain, unsigned int virq, + unsigned int nr_irqs, void *arg) +{ + int ret, ioapic, pin; + struct irq_cfg *cfg; + struct irq_data *irq_data; + struct mp_chip_data *data; + struct irq_alloc_info *info = arg; - if (acpi_get_override_irq(gsi, &info->trigger, - &info->polarity) < 0) { - /* - * PCI interrupts are always polarity one level - * triggered. - */ - info->trigger = 1; - info->polarity = 1; - } - info->node = NUMA_NO_NODE; + if (!info || nr_irqs > 1) + return -EINVAL; + irq_data = irq_domain_get_irq_data(domain, virq); + if (!irq_data) + return -EINVAL; - /* - * setup_IO_APIC_irqs() programs all legacy IRQs with default - * trigger and polarity attributes. Don't set the flag for that - * case so the first legacy IRQ user could reprogram the pin - * with real trigger and polarity attributes. - */ - if (virq >= nr_legacy_irqs() || info->count) - info->set = 1; - } - set_io_apic_irq_attr(&attr, ioapic, hwirq, info->trigger, - info->polarity); + ioapic = mp_irqdomain_ioapic_idx(domain); + pin = info->ioapic_pin; + if (irq_find_mapping(domain, (irq_hw_number_t)pin) > 0) + return -EEXIST; - return io_apic_setup_irq_pin(virq, info->node, &attr); -} + data = kzalloc(sizeof(*data), GFP_KERNEL); + if (!data) + return -ENOMEM; -void mp_irqdomain_unmap(struct irq_domain *domain, unsigned int virq) -{ - struct irq_data *data = irq_get_irq_data(virq); - struct irq_cfg *cfg = irq_cfg(virq); - int ioapic = (int)(long)domain->host_data; - int pin = (int)data->hwirq; + info->ioapic_entry = &data->entry; + ret = irq_domain_alloc_irqs_parent(domain, virq, nr_irqs, info); + if (ret < 0) { + kfree(data); + return ret; + } + + INIT_LIST_HEAD(&data->irq_2_pin); + irq_data->hwirq = info->ioapic_pin; + irq_data->chip = (domain->parent == x86_vector_domain) ? + &ioapic_chip : &ioapic_ir_chip; + irq_data->chip_data = data; + mp_irqdomain_get_attr(mp_pin_to_gsi(ioapic, pin), data, info); + + cfg = irqd_cfg(irq_data); + add_pin_to_irq_node(data, ioapic_alloc_attr_node(info), ioapic, pin); + if (info->ioapic_entry) + mp_setup_entry(cfg, data, info->ioapic_entry); + mp_register_handler(virq, data->trigger); + if (virq < nr_legacy_irqs()) + legacy_pic->mask(virq); + + apic_printk(APIC_VERBOSE, KERN_DEBUG + "IOAPIC[%d]: Set routing entry (%d-%d -> 0x%x -> IRQ %d Mode:%i Active:%i Dest:%d)\n", + ioapic, mpc_ioapic_id(ioapic), pin, cfg->vector, + virq, data->trigger, data->polarity, cfg->dest_apicid); - ioapic_mask_entry(ioapic, pin); - __remove_pin_from_irq(cfg, ioapic, pin); - WARN_ON(!list_empty(&cfg->irq_2_pin)); - arch_teardown_hwirq(virq); + return 0; } -int mp_set_gsi_attr(u32 gsi, int trigger, int polarity, int node) +void mp_irqdomain_free(struct irq_domain *domain, unsigned int virq, + unsigned int nr_irqs) { - int ret = 0; - int ioapic, pin; - struct mp_pin_info *info; + struct irq_data *irq_data; + struct mp_chip_data *data; - ioapic = mp_find_ioapic(gsi); - if (ioapic < 0) - return -ENODEV; - - pin = mp_find_ioapic_pin(ioapic, gsi); - info = mp_pin_info(ioapic, pin); - trigger = trigger ? 1 : 0; - polarity = polarity ? 1 : 0; - - mutex_lock(&ioapic_mutex); - if (!info->set) { - info->trigger = trigger; - info->polarity = polarity; - info->node = node; - info->set = 1; - } else if (info->trigger != trigger || info->polarity != polarity) { - ret = -EBUSY; + BUG_ON(nr_irqs != 1); + irq_data = irq_domain_get_irq_data(domain, virq); + if (irq_data && irq_data->chip_data) { + data = irq_data->chip_data; + __remove_pin_from_irq(data, mp_irqdomain_ioapic_idx(domain), + (int)irq_data->hwirq); + WARN_ON(!list_empty(&data->irq_2_pin)); + kfree(irq_data->chip_data); } - mutex_unlock(&ioapic_mutex); - - return ret; + irq_domain_free_irqs_top(domain, virq, nr_irqs); } -/* Enable IOAPIC early just for system timer */ -void __init pre_init_apic_IRQ0(void) +void mp_irqdomain_activate(struct irq_domain *domain, + struct irq_data *irq_data) { - struct io_apic_irq_attr attr = { 0, 0, 0, 0 }; + unsigned long flags; + struct irq_pin_list *entry; + struct mp_chip_data *data = irq_data->chip_data; - printk(KERN_INFO "Early APIC setup for system timer0\n"); -#ifndef CONFIG_SMP - physid_set_mask_of_physid(boot_cpu_physical_apicid, - &phys_cpu_present_map); -#endif - setup_local_APIC(); + raw_spin_lock_irqsave(&ioapic_lock, flags); + for_each_irq_pin(entry, data->irq_2_pin) + __ioapic_write_entry(entry->apic, entry->pin, data->entry); + raw_spin_unlock_irqrestore(&ioapic_lock, flags); +} - io_apic_setup_irq_pin(0, 0, &attr); - irq_set_chip_and_handler_name(0, &ioapic_chip, handle_edge_irq, - "edge"); +void mp_irqdomain_deactivate(struct irq_domain *domain, + struct irq_data *irq_data) +{ + /* It won't be called for IRQ with multiple IOAPIC pins associated */ + ioapic_mask_entry(mp_irqdomain_ioapic_idx(domain), + (int)irq_data->hwirq); +} + +int mp_irqdomain_ioapic_idx(struct irq_domain *domain) +{ + return (int)(long)domain->host_data; } + +const struct irq_domain_ops mp_ioapic_irqdomain_ops = { + .alloc = mp_irqdomain_alloc, + .free = mp_irqdomain_free, + .activate = mp_irqdomain_activate, + .deactivate = mp_irqdomain_deactivate, +}; diff --git a/arch/x86/kernel/apic/msi.c b/arch/x86/kernel/apic/msi.c index d6ba2d660dc5..1a9d735e09c6 100644 --- a/arch/x86/kernel/apic/msi.c +++ b/arch/x86/kernel/apic/msi.c @@ -3,6 +3,8 @@ * * Copyright (C) 1997, 1998, 1999, 2000, 2009 Ingo Molnar, Hajnalka Szabo * Moved from arch/x86/kernel/apic/io_apic.c. + * Jiang Liu <jiang.liu@linux.intel.com> + * Convert to hierarchical irqdomain * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as @@ -14,22 +16,23 @@ #include <linux/dmar.h> #include <linux/hpet.h> #include <linux/msi.h> +#include <asm/irqdomain.h> #include <asm/msidef.h> #include <asm/hpet.h> #include <asm/hw_irq.h> #include <asm/apic.h> #include <asm/irq_remapping.h> -void native_compose_msi_msg(struct pci_dev *pdev, - unsigned int irq, unsigned int dest, - struct msi_msg *msg, u8 hpet_id) +static struct irq_domain *msi_default_domain; + +static void irq_msi_compose_msg(struct irq_data *data, struct msi_msg *msg) { - struct irq_cfg *cfg = irq_cfg(irq); + struct irq_cfg *cfg = irqd_cfg(data); msg->address_hi = MSI_ADDR_BASE_HI; if (x2apic_enabled()) - msg->address_hi |= MSI_ADDR_EXT_DEST_ID(dest); + msg->address_hi |= MSI_ADDR_EXT_DEST_ID(cfg->dest_apicid); msg->address_lo = MSI_ADDR_BASE_LO | @@ -39,7 +42,7 @@ void native_compose_msi_msg(struct pci_dev *pdev, ((apic->irq_delivery_mode != dest_LowestPrio) ? MSI_ADDR_REDIRECTION_CPU : MSI_ADDR_REDIRECTION_LOWPRI) | - MSI_ADDR_DEST_ID(dest); + MSI_ADDR_DEST_ID(cfg->dest_apicid); msg->data = MSI_DATA_TRIGGER_EDGE | @@ -50,180 +53,201 @@ void native_compose_msi_msg(struct pci_dev *pdev, MSI_DATA_VECTOR(cfg->vector); } -static int msi_compose_msg(struct pci_dev *pdev, unsigned int irq, - struct msi_msg *msg, u8 hpet_id) +/* + * IRQ Chip for MSI PCI/PCI-X/PCI-Express Devices, + * which implement the MSI or MSI-X Capability Structure. + */ +static struct irq_chip pci_msi_controller = { + .name = "PCI-MSI", + .irq_unmask = pci_msi_unmask_irq, + .irq_mask = pci_msi_mask_irq, + .irq_ack = irq_chip_ack_parent, + .irq_retrigger = irq_chip_retrigger_hierarchy, + .irq_compose_msi_msg = irq_msi_compose_msg, + .flags = IRQCHIP_SKIP_SET_WAKE, +}; + +int native_setup_msi_irqs(struct pci_dev *dev, int nvec, int type) { - struct irq_cfg *cfg; - int err; - unsigned dest; + struct irq_domain *domain; + struct irq_alloc_info info; - if (disable_apic) - return -ENXIO; + init_irq_alloc_info(&info, NULL); + info.type = X86_IRQ_ALLOC_TYPE_MSI; + info.msi_dev = dev; - cfg = irq_cfg(irq); - err = assign_irq_vector(irq, cfg, apic->target_cpus()); - if (err) - return err; + domain = irq_remapping_get_irq_domain(&info); + if (domain == NULL) + domain = msi_default_domain; + if (domain == NULL) + return -ENOSYS; - err = apic->cpu_mask_to_apicid_and(cfg->domain, - apic->target_cpus(), &dest); - if (err) - return err; + return pci_msi_domain_alloc_irqs(domain, dev, nvec, type); +} - x86_msi.compose_msi_msg(pdev, irq, dest, msg, hpet_id); +void native_teardown_msi_irq(unsigned int irq) +{ + irq_domain_free_irqs(irq, 1); +} - return 0; +static irq_hw_number_t pci_msi_get_hwirq(struct msi_domain_info *info, + msi_alloc_info_t *arg) +{ + return arg->msi_hwirq; } -static int -msi_set_affinity(struct irq_data *data, const struct cpumask *mask, bool force) +static int pci_msi_prepare(struct irq_domain *domain, struct device *dev, + int nvec, msi_alloc_info_t *arg) { - struct irq_cfg *cfg = irqd_cfg(data); - struct msi_msg msg; - unsigned int dest; - int ret; + struct pci_dev *pdev = to_pci_dev(dev); + struct msi_desc *desc = first_pci_msi_entry(pdev); + + init_irq_alloc_info(arg, NULL); + arg->msi_dev = pdev; + if (desc->msi_attrib.is_msix) { + arg->type = X86_IRQ_ALLOC_TYPE_MSIX; + } else { + arg->type = X86_IRQ_ALLOC_TYPE_MSI; + arg->flags |= X86_IRQ_ALLOC_CONTIGUOUS_VECTORS; + } - ret = apic_set_affinity(data, mask, &dest); - if (ret) - return ret; + return 0; +} - __get_cached_msi_msg(data->msi_desc, &msg); +static void pci_msi_set_desc(msi_alloc_info_t *arg, struct msi_desc *desc) +{ + arg->msi_hwirq = pci_msi_domain_calc_hwirq(arg->msi_dev, desc); +} + +static struct msi_domain_ops pci_msi_domain_ops = { + .get_hwirq = pci_msi_get_hwirq, + .msi_prepare = pci_msi_prepare, + .set_desc = pci_msi_set_desc, +}; - msg.data &= ~MSI_DATA_VECTOR_MASK; - msg.data |= MSI_DATA_VECTOR(cfg->vector); - msg.address_lo &= ~MSI_ADDR_DEST_ID_MASK; - msg.address_lo |= MSI_ADDR_DEST_ID(dest); +static struct msi_domain_info pci_msi_domain_info = { + .flags = MSI_FLAG_USE_DEF_DOM_OPS | MSI_FLAG_USE_DEF_CHIP_OPS | + MSI_FLAG_PCI_MSIX, + .ops = &pci_msi_domain_ops, + .chip = &pci_msi_controller, + .handler = handle_edge_irq, + .handler_name = "edge", +}; - __pci_write_msi_msg(data->msi_desc, &msg); +void arch_init_msi_domain(struct irq_domain *parent) +{ + if (disable_apic) + return; - return IRQ_SET_MASK_OK_NOCOPY; + msi_default_domain = pci_msi_create_irq_domain(NULL, + &pci_msi_domain_info, parent); + if (!msi_default_domain) + pr_warn("failed to initialize irqdomain for MSI/MSI-x.\n"); } -/* - * IRQ Chip for MSI PCI/PCI-X/PCI-Express Devices, - * which implement the MSI or MSI-X Capability Structure. - */ -static struct irq_chip msi_chip = { - .name = "PCI-MSI", +#ifdef CONFIG_IRQ_REMAP +static struct irq_chip pci_msi_ir_controller = { + .name = "IR-PCI-MSI", .irq_unmask = pci_msi_unmask_irq, .irq_mask = pci_msi_mask_irq, - .irq_ack = apic_ack_edge, - .irq_set_affinity = msi_set_affinity, - .irq_retrigger = apic_retrigger_irq, + .irq_ack = irq_chip_ack_parent, + .irq_retrigger = irq_chip_retrigger_hierarchy, + .irq_set_vcpu_affinity = irq_chip_set_vcpu_affinity_parent, .flags = IRQCHIP_SKIP_SET_WAKE, }; -int setup_msi_irq(struct pci_dev *dev, struct msi_desc *msidesc, - unsigned int irq_base, unsigned int irq_offset) -{ - struct irq_chip *chip = &msi_chip; - struct msi_msg msg; - unsigned int irq = irq_base + irq_offset; - int ret; - - ret = msi_compose_msg(dev, irq, &msg, -1); - if (ret < 0) - return ret; - - irq_set_msi_desc_off(irq_base, irq_offset, msidesc); - - /* - * MSI-X message is written per-IRQ, the offset is always 0. - * MSI message denotes a contiguous group of IRQs, written for 0th IRQ. - */ - if (!irq_offset) - pci_write_msi_msg(irq, &msg); +static struct msi_domain_info pci_msi_ir_domain_info = { + .flags = MSI_FLAG_USE_DEF_DOM_OPS | MSI_FLAG_USE_DEF_CHIP_OPS | + MSI_FLAG_MULTI_PCI_MSI | MSI_FLAG_PCI_MSIX, + .ops = &pci_msi_domain_ops, + .chip = &pci_msi_ir_controller, + .handler = handle_edge_irq, + .handler_name = "edge", +}; - setup_remapped_irq(irq, irq_cfg(irq), chip); +struct irq_domain *arch_create_msi_irq_domain(struct irq_domain *parent) +{ + return pci_msi_create_irq_domain(NULL, &pci_msi_ir_domain_info, parent); +} +#endif - irq_set_chip_and_handler_name(irq, chip, handle_edge_irq, "edge"); +#ifdef CONFIG_DMAR_TABLE +static void dmar_msi_write_msg(struct irq_data *data, struct msi_msg *msg) +{ + dmar_msi_write(data->irq, msg); +} - dev_dbg(&dev->dev, "irq %d for MSI/MSI-X\n", irq); +static struct irq_chip dmar_msi_controller = { + .name = "DMAR-MSI", + .irq_unmask = dmar_msi_unmask, + .irq_mask = dmar_msi_mask, + .irq_ack = irq_chip_ack_parent, + .irq_set_affinity = msi_domain_set_affinity, + .irq_retrigger = irq_chip_retrigger_hierarchy, + .irq_compose_msi_msg = irq_msi_compose_msg, + .irq_write_msi_msg = dmar_msi_write_msg, + .flags = IRQCHIP_SKIP_SET_WAKE, +}; - return 0; +static irq_hw_number_t dmar_msi_get_hwirq(struct msi_domain_info *info, + msi_alloc_info_t *arg) +{ + return arg->dmar_id; } -int native_setup_msi_irqs(struct pci_dev *dev, int nvec, int type) +static int dmar_msi_init(struct irq_domain *domain, + struct msi_domain_info *info, unsigned int virq, + irq_hw_number_t hwirq, msi_alloc_info_t *arg) { - struct msi_desc *msidesc; - unsigned int irq; - int node, ret; + irq_domain_set_info(domain, virq, arg->dmar_id, info->chip, NULL, + handle_edge_irq, arg->dmar_data, "edge"); - /* Multiple MSI vectors only supported with interrupt remapping */ - if (type == PCI_CAP_ID_MSI && nvec > 1) - return 1; + return 0; +} - node = dev_to_node(&dev->dev); +static struct msi_domain_ops dmar_msi_domain_ops = { + .get_hwirq = dmar_msi_get_hwirq, + .msi_init = dmar_msi_init, +}; - list_for_each_entry(msidesc, &dev->msi_list, list) { - irq = irq_alloc_hwirq(node); - if (!irq) - return -ENOSPC; +static struct msi_domain_info dmar_msi_domain_info = { + .ops = &dmar_msi_domain_ops, + .chip = &dmar_msi_controller, +}; - ret = setup_msi_irq(dev, msidesc, irq, 0); - if (ret < 0) { - irq_free_hwirq(irq); - return ret; - } +static struct irq_domain *dmar_get_irq_domain(void) +{ + static struct irq_domain *dmar_domain; + static DEFINE_MUTEX(dmar_lock); - } - return 0; -} + mutex_lock(&dmar_lock); + if (dmar_domain == NULL) + dmar_domain = msi_create_irq_domain(NULL, &dmar_msi_domain_info, + x86_vector_domain); + mutex_unlock(&dmar_lock); -void native_teardown_msi_irq(unsigned int irq) -{ - irq_free_hwirq(irq); + return dmar_domain; } -#ifdef CONFIG_DMAR_TABLE -static int -dmar_msi_set_affinity(struct irq_data *data, const struct cpumask *mask, - bool force) +int dmar_alloc_hwirq(int id, int node, void *arg) { - struct irq_cfg *cfg = irqd_cfg(data); - unsigned int dest, irq = data->irq; - struct msi_msg msg; - int ret; - - ret = apic_set_affinity(data, mask, &dest); - if (ret) - return ret; + struct irq_domain *domain = dmar_get_irq_domain(); + struct irq_alloc_info info; - dmar_msi_read(irq, &msg); + if (!domain) + return -1; - msg.data &= ~MSI_DATA_VECTOR_MASK; - msg.data |= MSI_DATA_VECTOR(cfg->vector); - msg.address_lo &= ~MSI_ADDR_DEST_ID_MASK; - msg.address_lo |= MSI_ADDR_DEST_ID(dest); - msg.address_hi = MSI_ADDR_BASE_HI | MSI_ADDR_EXT_DEST_ID(dest); + init_irq_alloc_info(&info, NULL); + info.type = X86_IRQ_ALLOC_TYPE_DMAR; + info.dmar_id = id; + info.dmar_data = arg; - dmar_msi_write(irq, &msg); - - return IRQ_SET_MASK_OK_NOCOPY; + return irq_domain_alloc_irqs(domain, 1, node, &info); } -static struct irq_chip dmar_msi_type = { - .name = "DMAR_MSI", - .irq_unmask = dmar_msi_unmask, - .irq_mask = dmar_msi_mask, - .irq_ack = apic_ack_edge, - .irq_set_affinity = dmar_msi_set_affinity, - .irq_retrigger = apic_retrigger_irq, - .flags = IRQCHIP_SKIP_SET_WAKE, -}; - -int arch_setup_dmar_msi(unsigned int irq) +void dmar_free_hwirq(int irq) { - int ret; - struct msi_msg msg; - - ret = msi_compose_msg(NULL, irq, &msg, -1); - if (ret < 0) - return ret; - dmar_msi_write(irq, &msg); - irq_set_chip_and_handler_name(irq, &dmar_msi_type, handle_edge_irq, - "edge"); - return 0; + irq_domain_free_irqs(irq, 1); } #endif @@ -231,56 +255,103 @@ int arch_setup_dmar_msi(unsigned int irq) * MSI message composition */ #ifdef CONFIG_HPET_TIMER +static inline int hpet_dev_id(struct irq_domain *domain) +{ + struct msi_domain_info *info = msi_get_domain_info(domain); + + return (int)(long)info->data; +} -static int hpet_msi_set_affinity(struct irq_data *data, - const struct cpumask *mask, bool force) +static void hpet_msi_write_msg(struct irq_data *data, struct msi_msg *msg) { - struct irq_cfg *cfg = irqd_cfg(data); - struct msi_msg msg; - unsigned int dest; - int ret; + hpet_msi_write(data->handler_data, msg); +} - ret = apic_set_affinity(data, mask, &dest); - if (ret) - return ret; +static struct irq_chip hpet_msi_controller = { + .name = "HPET-MSI", + .irq_unmask = hpet_msi_unmask, + .irq_mask = hpet_msi_mask, + .irq_ack = irq_chip_ack_parent, + .irq_set_affinity = msi_domain_set_affinity, + .irq_retrigger = irq_chip_retrigger_hierarchy, + .irq_compose_msi_msg = irq_msi_compose_msg, + .irq_write_msi_msg = hpet_msi_write_msg, + .flags = IRQCHIP_SKIP_SET_WAKE, +}; - hpet_msi_read(data->handler_data, &msg); +static irq_hw_number_t hpet_msi_get_hwirq(struct msi_domain_info *info, + msi_alloc_info_t *arg) +{ + return arg->hpet_index; +} - msg.data &= ~MSI_DATA_VECTOR_MASK; - msg.data |= MSI_DATA_VECTOR(cfg->vector); - msg.address_lo &= ~MSI_ADDR_DEST_ID_MASK; - msg.address_lo |= MSI_ADDR_DEST_ID(dest); +static int hpet_msi_init(struct irq_domain *domain, + struct msi_domain_info *info, unsigned int virq, + irq_hw_number_t hwirq, msi_alloc_info_t *arg) +{ + irq_set_status_flags(virq, IRQ_MOVE_PCNTXT); + irq_domain_set_info(domain, virq, arg->hpet_index, info->chip, NULL, + handle_edge_irq, arg->hpet_data, "edge"); - hpet_msi_write(data->handler_data, &msg); + return 0; +} - return IRQ_SET_MASK_OK_NOCOPY; +static void hpet_msi_free(struct irq_domain *domain, + struct msi_domain_info *info, unsigned int virq) +{ + irq_clear_status_flags(virq, IRQ_MOVE_PCNTXT); } -static struct irq_chip hpet_msi_type = { - .name = "HPET_MSI", - .irq_unmask = hpet_msi_unmask, - .irq_mask = hpet_msi_mask, - .irq_ack = apic_ack_edge, - .irq_set_affinity = hpet_msi_set_affinity, - .irq_retrigger = apic_retrigger_irq, - .flags = IRQCHIP_SKIP_SET_WAKE, +static struct msi_domain_ops hpet_msi_domain_ops = { + .get_hwirq = hpet_msi_get_hwirq, + .msi_init = hpet_msi_init, + .msi_free = hpet_msi_free, +}; + +static struct msi_domain_info hpet_msi_domain_info = { + .ops = &hpet_msi_domain_ops, + .chip = &hpet_msi_controller, }; -int default_setup_hpet_msi(unsigned int irq, unsigned int id) +struct irq_domain *hpet_create_irq_domain(int hpet_id) { - struct irq_chip *chip = &hpet_msi_type; - struct msi_msg msg; - int ret; + struct irq_domain *parent; + struct irq_alloc_info info; + struct msi_domain_info *domain_info; + + if (x86_vector_domain == NULL) + return NULL; + + domain_info = kzalloc(sizeof(*domain_info), GFP_KERNEL); + if (!domain_info) + return NULL; + + *domain_info = hpet_msi_domain_info; + domain_info->data = (void *)(long)hpet_id; + + init_irq_alloc_info(&info, NULL); + info.type = X86_IRQ_ALLOC_TYPE_HPET; + info.hpet_id = hpet_id; + parent = irq_remapping_get_ir_irq_domain(&info); + if (parent == NULL) + parent = x86_vector_domain; + else + hpet_msi_controller.name = "IR-HPET-MSI"; + + return msi_create_irq_domain(NULL, domain_info, parent); +} - ret = msi_compose_msg(NULL, irq, &msg, id); - if (ret < 0) - return ret; +int hpet_assign_irq(struct irq_domain *domain, struct hpet_dev *dev, + int dev_num) +{ + struct irq_alloc_info info; - hpet_msi_write(irq_get_handler_data(irq), &msg); - irq_set_status_flags(irq, IRQ_MOVE_PCNTXT); - setup_remapped_irq(irq, irq_cfg(irq), chip); + init_irq_alloc_info(&info, NULL); + info.type = X86_IRQ_ALLOC_TYPE_HPET; + info.hpet_data = dev; + info.hpet_id = hpet_dev_id(domain); + info.hpet_index = dev_num; - irq_set_chip_and_handler_name(irq, chip, handle_edge_irq, "edge"); - return 0; + return irq_domain_alloc_irqs(domain, 1, NUMA_NO_NODE, &info); } #endif diff --git a/arch/x86/kernel/apic/vector.c b/arch/x86/kernel/apic/vector.c index 6cedd7914581..28eba2d38b15 100644 --- a/arch/x86/kernel/apic/vector.c +++ b/arch/x86/kernel/apic/vector.c @@ -3,6 +3,8 @@ * * Copyright (C) 1997, 1998, 1999, 2000, 2009 Ingo Molnar, Hajnalka Szabo * Moved from arch/x86/kernel/apic/io_apic.c. + * Jiang Liu <jiang.liu@linux.intel.com> + * Enable support of hierarchical irqdomains * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as @@ -11,15 +13,28 @@ #include <linux/interrupt.h> #include <linux/init.h> #include <linux/compiler.h> -#include <linux/irqdomain.h> #include <linux/slab.h> +#include <asm/irqdomain.h> #include <asm/hw_irq.h> #include <asm/apic.h> #include <asm/i8259.h> #include <asm/desc.h> #include <asm/irq_remapping.h> +struct apic_chip_data { + struct irq_cfg cfg; + cpumask_var_t domain; + cpumask_var_t old_domain; + u8 move_in_progress : 1; +}; + +struct irq_domain *x86_vector_domain; static DEFINE_RAW_SPINLOCK(vector_lock); +static cpumask_var_t vector_cpumask; +static struct irq_chip lapic_controller; +#ifdef CONFIG_X86_IO_APIC +static struct apic_chip_data *legacy_irq_data[NR_IRQS_LEGACY]; +#endif void lock_vector_lock(void) { @@ -34,71 +49,59 @@ void unlock_vector_lock(void) raw_spin_unlock(&vector_lock); } -struct irq_cfg *irq_cfg(unsigned int irq) +static struct apic_chip_data *apic_chip_data(struct irq_data *irq_data) { - return irq_get_chip_data(irq); + if (!irq_data) + return NULL; + + while (irq_data->parent_data) + irq_data = irq_data->parent_data; + + return irq_data->chip_data; } struct irq_cfg *irqd_cfg(struct irq_data *irq_data) { - return irq_data->chip_data; + struct apic_chip_data *data = apic_chip_data(irq_data); + + return data ? &data->cfg : NULL; } -static struct irq_cfg *alloc_irq_cfg(unsigned int irq, int node) +struct irq_cfg *irq_cfg(unsigned int irq) { - struct irq_cfg *cfg; + return irqd_cfg(irq_get_irq_data(irq)); +} - cfg = kzalloc_node(sizeof(*cfg), GFP_KERNEL, node); - if (!cfg) +static struct apic_chip_data *alloc_apic_chip_data(int node) +{ + struct apic_chip_data *data; + + data = kzalloc_node(sizeof(*data), GFP_KERNEL, node); + if (!data) return NULL; - if (!zalloc_cpumask_var_node(&cfg->domain, GFP_KERNEL, node)) - goto out_cfg; - if (!zalloc_cpumask_var_node(&cfg->old_domain, GFP_KERNEL, node)) + if (!zalloc_cpumask_var_node(&data->domain, GFP_KERNEL, node)) + goto out_data; + if (!zalloc_cpumask_var_node(&data->old_domain, GFP_KERNEL, node)) goto out_domain; -#ifdef CONFIG_X86_IO_APIC - INIT_LIST_HEAD(&cfg->irq_2_pin); -#endif - return cfg; + return data; out_domain: - free_cpumask_var(cfg->domain); -out_cfg: - kfree(cfg); + free_cpumask_var(data->domain); +out_data: + kfree(data); return NULL; } -struct irq_cfg *alloc_irq_and_cfg_at(unsigned int at, int node) +static void free_apic_chip_data(struct apic_chip_data *data) { - int res = irq_alloc_desc_at(at, node); - struct irq_cfg *cfg; - - if (res < 0) { - if (res != -EEXIST) - return NULL; - cfg = irq_cfg(at); - if (cfg) - return cfg; + if (data) { + free_cpumask_var(data->domain); + free_cpumask_var(data->old_domain); + kfree(data); } - - cfg = alloc_irq_cfg(at, node); - if (cfg) - irq_set_chip_data(at, cfg); - else - irq_free_desc(at); - return cfg; -} - -static void free_irq_cfg(unsigned int at, struct irq_cfg *cfg) -{ - if (!cfg) - return; - irq_set_chip_data(at, NULL); - free_cpumask_var(cfg->domain); - free_cpumask_var(cfg->old_domain); - kfree(cfg); } -static int -__assign_irq_vector(int irq, struct irq_cfg *cfg, const struct cpumask *mask) +static int __assign_irq_vector(int irq, struct apic_chip_data *d, + const struct cpumask *mask) { /* * NOTE! The local APIC isn't very good at handling @@ -114,36 +117,33 @@ __assign_irq_vector(int irq, struct irq_cfg *cfg, const struct cpumask *mask) static int current_vector = FIRST_EXTERNAL_VECTOR + VECTOR_OFFSET_START; static int current_offset = VECTOR_OFFSET_START % 16; int cpu, err; - cpumask_var_t tmp_mask; - if (cfg->move_in_progress) + if (d->move_in_progress) return -EBUSY; - if (!alloc_cpumask_var(&tmp_mask, GFP_ATOMIC)) - return -ENOMEM; - /* Only try and allocate irqs on cpus that are present */ err = -ENOSPC; - cpumask_clear(cfg->old_domain); + cpumask_clear(d->old_domain); cpu = cpumask_first_and(mask, cpu_online_mask); while (cpu < nr_cpu_ids) { int new_cpu, vector, offset; - apic->vector_allocation_domain(cpu, tmp_mask, mask); + apic->vector_allocation_domain(cpu, vector_cpumask, mask); - if (cpumask_subset(tmp_mask, cfg->domain)) { + if (cpumask_subset(vector_cpumask, d->domain)) { err = 0; - if (cpumask_equal(tmp_mask, cfg->domain)) + if (cpumask_equal(vector_cpumask, d->domain)) break; /* * New cpumask using the vector is a proper subset of * the current in use mask. So cleanup the vector * allocation for the members that are not used anymore. */ - cpumask_andnot(cfg->old_domain, cfg->domain, tmp_mask); - cfg->move_in_progress = - cpumask_intersects(cfg->old_domain, cpu_online_mask); - cpumask_and(cfg->domain, cfg->domain, tmp_mask); + cpumask_andnot(d->old_domain, d->domain, + vector_cpumask); + d->move_in_progress = + cpumask_intersects(d->old_domain, cpu_online_mask); + cpumask_and(d->domain, d->domain, vector_cpumask); break; } @@ -157,16 +157,18 @@ next: } if (unlikely(current_vector == vector)) { - cpumask_or(cfg->old_domain, cfg->old_domain, tmp_mask); - cpumask_andnot(tmp_mask, mask, cfg->old_domain); - cpu = cpumask_first_and(tmp_mask, cpu_online_mask); + cpumask_or(d->old_domain, d->old_domain, + vector_cpumask); + cpumask_andnot(vector_cpumask, mask, d->old_domain); + cpu = cpumask_first_and(vector_cpumask, + cpu_online_mask); continue; } if (test_bit(vector, used_vectors)) goto next; - for_each_cpu_and(new_cpu, tmp_mask, cpu_online_mask) { + for_each_cpu_and(new_cpu, vector_cpumask, cpu_online_mask) { if (per_cpu(vector_irq, new_cpu)[vector] > VECTOR_UNDEFINED) goto next; @@ -174,55 +176,73 @@ next: /* Found one! */ current_vector = vector; current_offset = offset; - if (cfg->vector) { - cpumask_copy(cfg->old_domain, cfg->domain); - cfg->move_in_progress = - cpumask_intersects(cfg->old_domain, cpu_online_mask); + if (d->cfg.vector) { + cpumask_copy(d->old_domain, d->domain); + d->move_in_progress = + cpumask_intersects(d->old_domain, cpu_online_mask); } - for_each_cpu_and(new_cpu, tmp_mask, cpu_online_mask) + for_each_cpu_and(new_cpu, vector_cpumask, cpu_online_mask) per_cpu(vector_irq, new_cpu)[vector] = irq; - cfg->vector = vector; - cpumask_copy(cfg->domain, tmp_mask); + d->cfg.vector = vector; + cpumask_copy(d->domain, vector_cpumask); err = 0; break; } - free_cpumask_var(tmp_mask); + + if (!err) { + /* cache destination APIC IDs into cfg->dest_apicid */ + err = apic->cpu_mask_to_apicid_and(mask, d->domain, + &d->cfg.dest_apicid); + } return err; } -int assign_irq_vector(int irq, struct irq_cfg *cfg, const struct cpumask *mask) +static int assign_irq_vector(int irq, struct apic_chip_data *data, + const struct cpumask *mask) { int err; unsigned long flags; raw_spin_lock_irqsave(&vector_lock, flags); - err = __assign_irq_vector(irq, cfg, mask); + err = __assign_irq_vector(irq, data, mask); raw_spin_unlock_irqrestore(&vector_lock, flags); return err; } -void clear_irq_vector(int irq, struct irq_cfg *cfg) +static int assign_irq_vector_policy(int irq, int node, + struct apic_chip_data *data, + struct irq_alloc_info *info) +{ + if (info && info->mask) + return assign_irq_vector(irq, data, info->mask); + if (node != NUMA_NO_NODE && + assign_irq_vector(irq, data, cpumask_of_node(node)) == 0) + return 0; + return assign_irq_vector(irq, data, apic->target_cpus()); +} + +static void clear_irq_vector(int irq, struct apic_chip_data *data) { int cpu, vector; unsigned long flags; raw_spin_lock_irqsave(&vector_lock, flags); - BUG_ON(!cfg->vector); + BUG_ON(!data->cfg.vector); - vector = cfg->vector; - for_each_cpu_and(cpu, cfg->domain, cpu_online_mask) + vector = data->cfg.vector; + for_each_cpu_and(cpu, data->domain, cpu_online_mask) per_cpu(vector_irq, cpu)[vector] = VECTOR_UNDEFINED; - cfg->vector = 0; - cpumask_clear(cfg->domain); + data->cfg.vector = 0; + cpumask_clear(data->domain); - if (likely(!cfg->move_in_progress)) { + if (likely(!data->move_in_progress)) { raw_spin_unlock_irqrestore(&vector_lock, flags); return; } - for_each_cpu_and(cpu, cfg->old_domain, cpu_online_mask) { + for_each_cpu_and(cpu, data->old_domain, cpu_online_mask) { for (vector = FIRST_EXTERNAL_VECTOR; vector < NR_VECTORS; vector++) { if (per_cpu(vector_irq, cpu)[vector] != irq) @@ -231,10 +251,95 @@ void clear_irq_vector(int irq, struct irq_cfg *cfg) break; } } - cfg->move_in_progress = 0; + data->move_in_progress = 0; raw_spin_unlock_irqrestore(&vector_lock, flags); } +void init_irq_alloc_info(struct irq_alloc_info *info, + const struct cpumask *mask) +{ + memset(info, 0, sizeof(*info)); + info->mask = mask; +} + +void copy_irq_alloc_info(struct irq_alloc_info *dst, struct irq_alloc_info *src) +{ + if (src) + *dst = *src; + else + memset(dst, 0, sizeof(*dst)); +} + +static void x86_vector_free_irqs(struct irq_domain *domain, + unsigned int virq, unsigned int nr_irqs) +{ + struct irq_data *irq_data; + int i; + + for (i = 0; i < nr_irqs; i++) { + irq_data = irq_domain_get_irq_data(x86_vector_domain, virq + i); + if (irq_data && irq_data->chip_data) { + clear_irq_vector(virq + i, irq_data->chip_data); + free_apic_chip_data(irq_data->chip_data); +#ifdef CONFIG_X86_IO_APIC + if (virq + i < nr_legacy_irqs()) + legacy_irq_data[virq + i] = NULL; +#endif + irq_domain_reset_irq_data(irq_data); + } + } +} + +static int x86_vector_alloc_irqs(struct irq_domain *domain, unsigned int virq, + unsigned int nr_irqs, void *arg) +{ + struct irq_alloc_info *info = arg; + struct apic_chip_data *data; + struct irq_data *irq_data; + int i, err; + + if (disable_apic) + return -ENXIO; + + /* Currently vector allocator can't guarantee contiguous allocations */ + if ((info->flags & X86_IRQ_ALLOC_CONTIGUOUS_VECTORS) && nr_irqs > 1) + return -ENOSYS; + + for (i = 0; i < nr_irqs; i++) { + irq_data = irq_domain_get_irq_data(domain, virq + i); + BUG_ON(!irq_data); +#ifdef CONFIG_X86_IO_APIC + if (virq + i < nr_legacy_irqs() && legacy_irq_data[virq + i]) + data = legacy_irq_data[virq + i]; + else +#endif + data = alloc_apic_chip_data(irq_data->node); + if (!data) { + err = -ENOMEM; + goto error; + } + + irq_data->chip = &lapic_controller; + irq_data->chip_data = data; + irq_data->hwirq = virq + i; + err = assign_irq_vector_policy(virq, irq_data->node, data, + info); + if (err) + goto error; + } + + return 0; + +error: + x86_vector_free_irqs(domain, virq, i + 1); + return err; +} + +static const struct irq_domain_ops x86_vector_domain_ops = { + .alloc = x86_vector_alloc_irqs, + .free = x86_vector_free_irqs, +}; + int __init arch_probe_nr_irqs(void) { int nr; @@ -258,8 +363,43 @@ int __init arch_probe_nr_irqs(void) return nr_legacy_irqs(); } +#ifdef CONFIG_X86_IO_APIC +static void init_legacy_irqs(void) +{ + int i, node = cpu_to_node(0); + struct apic_chip_data *data; + + /* + * For legacy IRQ's, start with assigning irq0 to irq15 to + * ISA_IRQ_VECTOR(i) for all cpu's. + */ + for (i = 0; i < nr_legacy_irqs(); i++) { + data = legacy_irq_data[i] = alloc_apic_chip_data(node); + BUG_ON(!data); + + data->cfg.vector = ISA_IRQ_VECTOR(i); + cpumask_setall(data->domain); + irq_set_chip_data(i, data); + } +} +#else +static void init_legacy_irqs(void) { } +#endif + int __init arch_early_irq_init(void) { + init_legacy_irqs(); + + x86_vector_domain = irq_domain_add_tree(NULL, &x86_vector_domain_ops, + NULL); + BUG_ON(x86_vector_domain == NULL); + irq_set_default_host(x86_vector_domain); + + arch_init_msi_domain(x86_vector_domain); + arch_init_htirq_domain(x86_vector_domain); + + BUG_ON(!alloc_cpumask_var(&vector_cpumask, GFP_KERNEL)); + return arch_early_ioapic_init(); } @@ -267,7 +407,7 @@ static void __setup_vector_irq(int cpu) { /* Initialize vector_irq on a new cpu */ int irq, vector; - struct irq_cfg *cfg; + struct apic_chip_data *data; /* * vector_lock will make sure that we don't run into irq vector @@ -277,13 +417,13 @@ static void __setup_vector_irq(int cpu) raw_spin_lock(&vector_lock); /* Mark the inuse vectors */ for_each_active_irq(irq) { - cfg = irq_cfg(irq); - if (!cfg) + data = apic_chip_data(irq_get_irq_data(irq)); + if (!data) continue; - if (!cpumask_test_cpu(cpu, cfg->domain)) + if (!cpumask_test_cpu(cpu, data->domain)) continue; - vector = cfg->vector; + vector = data->cfg.vector; per_cpu(vector_irq, cpu)[vector] = irq; } /* Mark the free vectors */ @@ -292,8 +432,8 @@ static void __setup_vector_irq(int cpu) if (irq <= VECTOR_UNDEFINED) continue; - cfg = irq_cfg(irq); - if (!cpumask_test_cpu(cpu, cfg->domain)) + data = apic_chip_data(irq_get_irq_data(irq)); + if (!cpumask_test_cpu(cpu, data->domain)) per_cpu(vector_irq, cpu)[vector] = VECTOR_UNDEFINED; } raw_spin_unlock(&vector_lock); @@ -314,20 +454,20 @@ void setup_vector_irq(int cpu) * legacy vector to irq mapping: */ for (irq = 0; irq < nr_legacy_irqs(); irq++) - per_cpu(vector_irq, cpu)[IRQ0_VECTOR + irq] = irq; + per_cpu(vector_irq, cpu)[ISA_IRQ_VECTOR(irq)] = irq; __setup_vector_irq(cpu); } -int apic_retrigger_irq(struct irq_data *data) +static int apic_retrigger_irq(struct irq_data *irq_data) { - struct irq_cfg *cfg = irqd_cfg(data); + struct apic_chip_data *data = apic_chip_data(irq_data); unsigned long flags; int cpu; raw_spin_lock_irqsave(&vector_lock, flags); - cpu = cpumask_first_and(cfg->domain, cpu_online_mask); - apic->send_IPI_mask(cpumask_of(cpu), cfg->vector); + cpu = cpumask_first_and(data->domain, cpu_online_mask); + apic->send_IPI_mask(cpumask_of(cpu), data->cfg.vector); raw_spin_unlock_irqrestore(&vector_lock, flags); return 1; @@ -340,73 +480,76 @@ void apic_ack_edge(struct irq_data *data) ack_APIC_irq(); } -/* - * Either sets data->affinity to a valid value, and returns - * ->cpu_mask_to_apicid of that in dest_id, or returns -1 and - * leaves data->affinity untouched. - */ -int apic_set_affinity(struct irq_data *data, const struct cpumask *mask, - unsigned int *dest_id) +static int apic_set_affinity(struct irq_data *irq_data, + const struct cpumask *dest, bool force) { - struct irq_cfg *cfg = irqd_cfg(data); - unsigned int irq = data->irq; - int err; + struct apic_chip_data *data = irq_data->chip_data; + int err, irq = irq_data->irq; if (!config_enabled(CONFIG_SMP)) return -EPERM; - if (!cpumask_intersects(mask, cpu_online_mask)) + if (!cpumask_intersects(dest, cpu_online_mask)) return -EINVAL; - err = assign_irq_vector(irq, cfg, mask); - if (err) - return err; - - err = apic->cpu_mask_to_apicid_and(mask, cfg->domain, dest_id); + err = assign_irq_vector(irq, data, dest); if (err) { - if (assign_irq_vector(irq, cfg, data->affinity)) + struct irq_data *top = irq_get_irq_data(irq); + + if (assign_irq_vector(irq, data, top->affinity)) pr_err("Failed to recover vector for irq %d\n", irq); return err; } - cpumask_copy(data->affinity, mask); - - return 0; + return IRQ_SET_MASK_OK; } +static struct irq_chip lapic_controller = { + .irq_ack = apic_ack_edge, + .irq_set_affinity = apic_set_affinity, + .irq_retrigger = apic_retrigger_irq, +}; + #ifdef CONFIG_SMP -void send_cleanup_vector(struct irq_cfg *cfg) +static void __send_cleanup_vector(struct apic_chip_data *data) { cpumask_var_t cleanup_mask; if (unlikely(!alloc_cpumask_var(&cleanup_mask, GFP_ATOMIC))) { unsigned int i; - for_each_cpu_and(i, cfg->old_domain, cpu_online_mask) + for_each_cpu_and(i, data->old_domain, cpu_online_mask) apic->send_IPI_mask(cpumask_of(i), IRQ_MOVE_CLEANUP_VECTOR); } else { - cpumask_and(cleanup_mask, cfg->old_domain, cpu_online_mask); + cpumask_and(cleanup_mask, data->old_domain, cpu_online_mask); apic->send_IPI_mask(cleanup_mask, IRQ_MOVE_CLEANUP_VECTOR); free_cpumask_var(cleanup_mask); } - cfg->move_in_progress = 0; + data->move_in_progress = 0; +} + +void send_cleanup_vector(struct irq_cfg *cfg) +{ + struct apic_chip_data *data; + + data = container_of(cfg, struct apic_chip_data, cfg); + if (data->move_in_progress) + __send_cleanup_vector(data); } asmlinkage __visible void smp_irq_move_cleanup_interrupt(void) { unsigned vector, me; - ack_APIC_irq(); - irq_enter(); - exit_idle(); + entering_ack_irq(); me = smp_processor_id(); for (vector = FIRST_EXTERNAL_VECTOR; vector < NR_VECTORS; vector++) { int irq; unsigned int irr; struct irq_desc *desc; - struct irq_cfg *cfg; + struct apic_chip_data *data; irq = __this_cpu_read(vector_irq[vector]); @@ -417,8 +560,8 @@ asmlinkage __visible void smp_irq_move_cleanup_interrupt(void) if (!desc) continue; - cfg = irq_cfg(irq); - if (!cfg) + data = apic_chip_data(&desc->irq_data); + if (!data) continue; raw_spin_lock(&desc->lock); @@ -427,10 +570,11 @@ asmlinkage __visible void smp_irq_move_cleanup_interrupt(void) * Check if the irq migration is in progress. If so, we * haven't received the cleanup request yet for this irq. */ - if (cfg->move_in_progress) + if (data->move_in_progress) goto unlock; - if (vector == cfg->vector && cpumask_test_cpu(me, cfg->domain)) + if (vector == data->cfg.vector && + cpumask_test_cpu(me, data->domain)) goto unlock; irr = apic_read(APIC_IRR + (vector / 32 * 0x10)); @@ -450,20 +594,21 @@ unlock: raw_spin_unlock(&desc->lock); } - irq_exit(); + exiting_irq(); } static void __irq_complete_move(struct irq_cfg *cfg, unsigned vector) { unsigned me; + struct apic_chip_data *data; - if (likely(!cfg->move_in_progress)) + data = container_of(cfg, struct apic_chip_data, cfg); + if (likely(!data->move_in_progress)) return; me = smp_processor_id(); - - if (vector == cfg->vector && cpumask_test_cpu(me, cfg->domain)) - send_cleanup_vector(cfg); + if (vector == data->cfg.vector && cpumask_test_cpu(me, data->domain)) + __send_cleanup_vector(data); } void irq_complete_move(struct irq_cfg *cfg) @@ -475,46 +620,11 @@ void irq_force_complete_move(int irq) { struct irq_cfg *cfg = irq_cfg(irq); - if (!cfg) - return; - - __irq_complete_move(cfg, cfg->vector); + if (cfg) + __irq_complete_move(cfg, cfg->vector); } #endif -/* - * Dynamic irq allocate and deallocation. Should be replaced by irq domains! - */ -int arch_setup_hwirq(unsigned int irq, int node) -{ - struct irq_cfg *cfg; - unsigned long flags; - int ret; - - cfg = alloc_irq_cfg(irq, node); - if (!cfg) - return -ENOMEM; - - raw_spin_lock_irqsave(&vector_lock, flags); - ret = __assign_irq_vector(irq, cfg, apic->target_cpus()); - raw_spin_unlock_irqrestore(&vector_lock, flags); - - if (!ret) - irq_set_chip_data(irq, cfg); - else - free_irq_cfg(irq, cfg); - return ret; -} - -void arch_teardown_hwirq(unsigned int irq) -{ - struct irq_cfg *cfg = irq_cfg(irq); - - free_remapped_irq(irq); - clear_irq_vector(irq, cfg); - free_irq_cfg(irq, cfg); -} - static void __init print_APIC_field(int base) { int i; diff --git a/arch/x86/kernel/apic/x2apic_phys.c b/arch/x86/kernel/apic/x2apic_phys.c index 6fae733e9194..3ffd925655e0 100644 --- a/arch/x86/kernel/apic/x2apic_phys.c +++ b/arch/x86/kernel/apic/x2apic_phys.c @@ -21,11 +21,13 @@ early_param("x2apic_phys", set_x2apic_phys_mode); static bool x2apic_fadt_phys(void) { +#ifdef CONFIG_ACPI if ((acpi_gbl_FADT.header.revision >= FADT2_REVISION_ID) && (acpi_gbl_FADT.flags & ACPI_FADT_APIC_PHYSICAL)) { printk(KERN_DEBUG "System requires x2apic physical mode\n"); return true; } +#endif return false; } diff --git a/arch/x86/kernel/asm-offsets.c b/arch/x86/kernel/asm-offsets.c index 9f6b9341950f..8e3d22a1af94 100644 --- a/arch/x86/kernel/asm-offsets.c +++ b/arch/x86/kernel/asm-offsets.c @@ -41,6 +41,25 @@ void common(void) { OFFSET(pbe_orig_address, pbe, orig_address); OFFSET(pbe_next, pbe, next); +#if defined(CONFIG_X86_32) || defined(CONFIG_IA32_EMULATION) + BLANK(); + OFFSET(IA32_SIGCONTEXT_ax, sigcontext_ia32, ax); + OFFSET(IA32_SIGCONTEXT_bx, sigcontext_ia32, bx); + OFFSET(IA32_SIGCONTEXT_cx, sigcontext_ia32, cx); + OFFSET(IA32_SIGCONTEXT_dx, sigcontext_ia32, dx); + OFFSET(IA32_SIGCONTEXT_si, sigcontext_ia32, si); + OFFSET(IA32_SIGCONTEXT_di, sigcontext_ia32, di); + OFFSET(IA32_SIGCONTEXT_bp, sigcontext_ia32, bp); + OFFSET(IA32_SIGCONTEXT_sp, sigcontext_ia32, sp); + OFFSET(IA32_SIGCONTEXT_ip, sigcontext_ia32, ip); + + BLANK(); + OFFSET(TI_sysenter_return, thread_info, sysenter_return); + + BLANK(); + OFFSET(IA32_RT_SIGFRAME_sigcontext, rt_sigframe_ia32, uc.uc_mcontext); +#endif + #ifdef CONFIG_PARAVIRT BLANK(); OFFSET(PARAVIRT_enabled, pv_info, paravirt_enabled); @@ -49,7 +68,9 @@ void common(void) { OFFSET(PV_IRQ_irq_disable, pv_irq_ops, irq_disable); OFFSET(PV_IRQ_irq_enable, pv_irq_ops, irq_enable); OFFSET(PV_CPU_iret, pv_cpu_ops, iret); +#ifdef CONFIG_X86_32 OFFSET(PV_CPU_irq_enable_sysexit, pv_cpu_ops, irq_enable_sysexit); +#endif OFFSET(PV_CPU_read_cr0, pv_cpu_ops, read_cr0); OFFSET(PV_MMU_read_cr2, pv_mmu_ops, read_cr2); #endif diff --git a/arch/x86/kernel/asm-offsets_32.c b/arch/x86/kernel/asm-offsets_32.c index 47703aed74cf..6ce39025f467 100644 --- a/arch/x86/kernel/asm-offsets_32.c +++ b/arch/x86/kernel/asm-offsets_32.c @@ -17,17 +17,6 @@ void foo(void); void foo(void) { - OFFSET(IA32_SIGCONTEXT_ax, sigcontext, ax); - OFFSET(IA32_SIGCONTEXT_bx, sigcontext, bx); - OFFSET(IA32_SIGCONTEXT_cx, sigcontext, cx); - OFFSET(IA32_SIGCONTEXT_dx, sigcontext, dx); - OFFSET(IA32_SIGCONTEXT_si, sigcontext, si); - OFFSET(IA32_SIGCONTEXT_di, sigcontext, di); - OFFSET(IA32_SIGCONTEXT_bp, sigcontext, bp); - OFFSET(IA32_SIGCONTEXT_sp, sigcontext, sp); - OFFSET(IA32_SIGCONTEXT_ip, sigcontext, ip); - BLANK(); - OFFSET(CPUINFO_x86, cpuinfo_x86, x86); OFFSET(CPUINFO_x86_vendor, cpuinfo_x86, x86_vendor); OFFSET(CPUINFO_x86_model, cpuinfo_x86, x86_model); @@ -37,10 +26,6 @@ void foo(void) OFFSET(CPUINFO_x86_vendor_id, cpuinfo_x86, x86_vendor_id); BLANK(); - OFFSET(TI_sysenter_return, thread_info, sysenter_return); - OFFSET(TI_cpu, thread_info, cpu); - BLANK(); - OFFSET(PT_EBX, pt_regs, bx); OFFSET(PT_ECX, pt_regs, cx); OFFSET(PT_EDX, pt_regs, dx); @@ -60,9 +45,6 @@ void foo(void) OFFSET(PT_OLDSS, pt_regs, ss); BLANK(); - OFFSET(IA32_RT_SIGFRAME_sigcontext, rt_sigframe, uc.uc_mcontext); - BLANK(); - OFFSET(saved_context_gdt_desc, saved_context, gdt_desc); BLANK(); diff --git a/arch/x86/kernel/asm-offsets_64.c b/arch/x86/kernel/asm-offsets_64.c index 5ce6f2da8763..dcaab87da629 100644 --- a/arch/x86/kernel/asm-offsets_64.c +++ b/arch/x86/kernel/asm-offsets_64.c @@ -29,27 +29,6 @@ int main(void) BLANK(); #endif -#ifdef CONFIG_IA32_EMULATION - OFFSET(TI_sysenter_return, thread_info, sysenter_return); - BLANK(); - -#define ENTRY(entry) OFFSET(IA32_SIGCONTEXT_ ## entry, sigcontext_ia32, entry) - ENTRY(ax); - ENTRY(bx); - ENTRY(cx); - ENTRY(dx); - ENTRY(si); - ENTRY(di); - ENTRY(bp); - ENTRY(sp); - ENTRY(ip); - BLANK(); -#undef ENTRY - - OFFSET(IA32_RT_SIGFRAME_sigcontext, rt_sigframe_ia32, uc.uc_mcontext); - BLANK(); -#endif - #define ENTRY(entry) OFFSET(pt_regs_ ## entry, pt_regs, entry) ENTRY(bx); ENTRY(cx); diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index a62cf04dac8a..6bec0b55863e 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -1155,10 +1155,6 @@ static __init int setup_disablecpuid(char *arg) } __setup("clearcpuid=", setup_disablecpuid); -DEFINE_PER_CPU(unsigned long, kernel_stack) = - (unsigned long)&init_thread_union + THREAD_SIZE; -EXPORT_PER_CPU_SYMBOL(kernel_stack); - #ifdef CONFIG_X86_64 struct desc_ptr idt_descr = { NR_VECTORS * 16 - 1, (unsigned long) idt_table }; struct desc_ptr debug_idt_descr = { NR_VECTORS * 16 - 1, diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c index e535533d5ab8..20190bdac9d5 100644 --- a/arch/x86/kernel/cpu/mcheck/mce.c +++ b/arch/x86/kernel/cpu/mcheck/mce.c @@ -708,6 +708,7 @@ static int mce_no_way_out(struct mce *m, char **msg, unsigned long *validp, struct pt_regs *regs) { int i, ret = 0; + char *tmp; for (i = 0; i < mca_cfg.banks; i++) { m->status = mce_rdmsrl(MSR_IA32_MCx_STATUS(i)); @@ -716,9 +717,11 @@ static int mce_no_way_out(struct mce *m, char **msg, unsigned long *validp, if (quirk_no_way_out) quirk_no_way_out(i, m, regs); } - if (mce_severity(m, mca_cfg.tolerant, msg, true) >= - MCE_PANIC_SEVERITY) + + if (mce_severity(m, mca_cfg.tolerant, &tmp, true) >= MCE_PANIC_SEVERITY) { + *msg = tmp; ret = 1; + } } return ret; } diff --git a/arch/x86/kernel/cpu/mshyperv.c b/arch/x86/kernel/cpu/mshyperv.c index 939155ffdece..aad4bd84b475 100644 --- a/arch/x86/kernel/cpu/mshyperv.c +++ b/arch/x86/kernel/cpu/mshyperv.c @@ -39,14 +39,12 @@ void hyperv_vector_handler(struct pt_regs *regs) { struct pt_regs *old_regs = set_irq_regs(regs); - irq_enter(); - exit_idle(); - + entering_irq(); inc_irq_stat(irq_hv_callback_count); if (vmbus_handler) vmbus_handler(); - irq_exit(); + exiting_irq(); set_irq_regs(old_regs); } diff --git a/arch/x86/kernel/devicetree.c b/arch/x86/kernel/devicetree.c index 6367a780cc8c..5ee771859b6f 100644 --- a/arch/x86/kernel/devicetree.c +++ b/arch/x86/kernel/devicetree.c @@ -4,7 +4,6 @@ #include <linux/bootmem.h> #include <linux/export.h> #include <linux/io.h> -#include <linux/irqdomain.h> #include <linux/interrupt.h> #include <linux/list.h> #include <linux/of.h> @@ -17,6 +16,7 @@ #include <linux/of_pci.h> #include <linux/initrd.h> +#include <asm/irqdomain.h> #include <asm/hpet.h> #include <asm/apic.h> #include <asm/pci_x86.h> @@ -196,38 +196,31 @@ static struct of_ioapic_type of_ioapic_type[] = }, }; -static int ioapic_xlate(struct irq_domain *domain, - struct device_node *controller, - const u32 *intspec, u32 intsize, - irq_hw_number_t *out_hwirq, u32 *out_type) +static int dt_irqdomain_alloc(struct irq_domain *domain, unsigned int virq, + unsigned int nr_irqs, void *arg) { + struct of_phandle_args *irq_data = (void *)arg; struct of_ioapic_type *it; - u32 line, idx, gsi; + struct irq_alloc_info tmp; - if (WARN_ON(intsize < 2)) + if (WARN_ON(irq_data->args_count < 2)) return -EINVAL; - - line = intspec[0]; - - if (intspec[1] >= ARRAY_SIZE(of_ioapic_type)) + if (irq_data->args[1] >= ARRAY_SIZE(of_ioapic_type)) return -EINVAL; - it = &of_ioapic_type[intspec[1]]; + it = &of_ioapic_type[irq_data->args[1]]; + ioapic_set_alloc_attr(&tmp, NUMA_NO_NODE, it->trigger, it->polarity); + tmp.ioapic_id = mpc_ioapic_id(mp_irqdomain_ioapic_idx(domain)); + tmp.ioapic_pin = irq_data->args[0]; - idx = (u32)(long)domain->host_data; - gsi = mp_pin_to_gsi(idx, line); - if (mp_set_gsi_attr(gsi, it->trigger, it->polarity, cpu_to_node(0))) - return -EBUSY; - - *out_hwirq = line; - *out_type = it->out_type; - return 0; + return mp_irqdomain_alloc(domain, virq, nr_irqs, &tmp); } -const struct irq_domain_ops ioapic_irq_domain_ops = { - .map = mp_irqdomain_map, - .unmap = mp_irqdomain_unmap, - .xlate = ioapic_xlate, +static const struct irq_domain_ops ioapic_irq_domain_ops = { + .alloc = dt_irqdomain_alloc, + .free = mp_irqdomain_free, + .activate = mp_irqdomain_activate, + .deactivate = mp_irqdomain_deactivate, }; static void __init dtb_add_ioapic(struct device_node *dn) diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S index 02c2eff7478d..22aadc917868 100644 --- a/arch/x86/kernel/entry_64.S +++ b/arch/x86/kernel/entry_64.S @@ -216,7 +216,7 @@ ENTRY(system_call) GLOBAL(system_call_after_swapgs) movq %rsp,PER_CPU_VAR(rsp_scratch) - movq PER_CPU_VAR(kernel_stack),%rsp + movq PER_CPU_VAR(cpu_current_top_of_stack),%rsp /* Construct struct pt_regs on stack */ pushq_cfi $__USER_DS /* pt_regs->ss */ @@ -419,26 +419,27 @@ syscall_return: * a completely clean 64-bit userspace context. */ movq RCX(%rsp),%rcx - cmpq %rcx,RIP(%rsp) /* RCX == RIP */ + movq RIP(%rsp),%r11 + cmpq %rcx,%r11 /* RCX == RIP */ jne opportunistic_sysret_failed /* * On Intel CPUs, SYSRET with non-canonical RCX/RIP will #GP * in kernel space. This essentially lets the user take over - * the kernel, since userspace controls RSP. It's not worth - * testing for canonicalness exactly -- this check detects any - * of the 17 high bits set, which is true for non-canonical - * or kernel addresses. (This will pessimize vsyscall=native. - * Big deal.) + * the kernel, since userspace controls RSP. * - * If virtual addresses ever become wider, this will need + * If width of "canonical tail" ever becomes variable, this will need * to be updated to remain correct on both old and new CPUs. */ .ifne __VIRTUAL_MASK_SHIFT - 47 .error "virtual address width changed -- SYSRET checks need update" .endif - shr $__VIRTUAL_MASK_SHIFT, %rcx - jnz opportunistic_sysret_failed + /* Change top 16 bits to be the sign-extension of 47th bit */ + shl $(64 - (__VIRTUAL_MASK_SHIFT+1)), %rcx + sar $(64 - (__VIRTUAL_MASK_SHIFT+1)), %rcx + /* If this changed %rcx, it was not canonical */ + cmpq %rcx, %r11 + jne opportunistic_sysret_failed cmpq $__USER_CS,CS(%rsp) /* CS must match SYSRET */ jne opportunistic_sysret_failed @@ -475,8 +476,8 @@ syscall_return: */ syscall_return_via_sysret: CFI_REMEMBER_STATE - /* r11 is already restored (see code above) */ - RESTORE_C_REGS_EXCEPT_R11 + /* rcx and r11 are already restored (see code above) */ + RESTORE_C_REGS_EXCEPT_RCX_R11 movq RSP(%rsp),%rsp USERGS_SYSRET64 CFI_RESTORE_STATE @@ -533,40 +534,27 @@ GLOBAL(stub_execveat) CFI_ENDPROC END(stub_execveat) -#ifdef CONFIG_X86_X32_ABI +#if defined(CONFIG_X86_X32_ABI) || defined(CONFIG_IA32_EMULATION) .align 8 GLOBAL(stub_x32_execve) +GLOBAL(stub32_execve) CFI_STARTPROC DEFAULT_FRAME 0, 8 call compat_sys_execve jmp return_from_execve CFI_ENDPROC +END(stub32_execve) END(stub_x32_execve) .align 8 GLOBAL(stub_x32_execveat) - CFI_STARTPROC - DEFAULT_FRAME 0, 8 - call compat_sys_execveat - jmp return_from_execve - CFI_ENDPROC -END(stub_x32_execveat) -#endif - -#ifdef CONFIG_IA32_EMULATION - .align 8 -GLOBAL(stub32_execve) - CFI_STARTPROC - call compat_sys_execve - jmp return_from_execve - CFI_ENDPROC -END(stub32_execve) - .align 8 GLOBAL(stub32_execveat) CFI_STARTPROC + DEFAULT_FRAME 0, 8 call compat_sys_execveat jmp return_from_execve CFI_ENDPROC END(stub32_execveat) +END(stub_x32_execveat) #endif /* @@ -622,7 +610,7 @@ ENTRY(ret_from_fork) RESTORE_EXTRA_REGS - testl $3,CS(%rsp) # from kernel_thread? + testb $3, CS(%rsp) # from kernel_thread? /* * By the time we get here, we have no idea whether our pt_regs, @@ -686,8 +674,8 @@ END(irq_entries_start) leaq -RBP(%rsp),%rdi /* arg1 for \func (pointer to pt_regs) */ - testl $3, CS-RBP(%rsp) - je 1f + testb $3, CS-RBP(%rsp) + jz 1f SWAPGS 1: /* @@ -741,8 +729,8 @@ ret_from_intr: CFI_DEF_CFA_REGISTER rsp CFI_ADJUST_CFA_OFFSET RBP - testl $3,CS(%rsp) - je retint_kernel + testb $3, CS(%rsp) + jz retint_kernel /* Interrupt came from user space */ GET_THREAD_INFO(%rcx) @@ -928,6 +916,8 @@ apicinterrupt X86_PLATFORM_IPI_VECTOR \ #ifdef CONFIG_HAVE_KVM apicinterrupt3 POSTED_INTR_VECTOR \ kvm_posted_intr_ipi smp_kvm_posted_intr_ipi +apicinterrupt3 POSTED_INTR_WAKEUP_VECTOR \ + kvm_posted_intr_wakeup_ipi smp_kvm_posted_intr_wakeup_ipi #endif #ifdef CONFIG_X86_MCE_THRESHOLD @@ -989,7 +979,7 @@ ENTRY(\sym) .if \paranoid .if \paranoid == 1 CFI_REMEMBER_STATE - testl $3, CS(%rsp) /* If coming from userspace, switch */ + testb $3, CS(%rsp) /* If coming from userspace, switch */ jnz 1f /* stacks. */ .endif call paranoid_entry @@ -1202,17 +1192,17 @@ ENTRY(xen_failsafe_callback) /*CFI_REL_OFFSET ds,DS*/ CFI_REL_OFFSET r11,8 CFI_REL_OFFSET rcx,0 - movw %ds,%cx + movl %ds,%ecx cmpw %cx,0x10(%rsp) CFI_REMEMBER_STATE jne 1f - movw %es,%cx + movl %es,%ecx cmpw %cx,0x18(%rsp) jne 1f - movw %fs,%cx + movl %fs,%ecx cmpw %cx,0x20(%rsp) jne 1f - movw %gs,%cx + movl %gs,%ecx cmpw %cx,0x28(%rsp) jne 1f /* All segments match their saved values => Category 2 (Bad IRET). */ @@ -1330,8 +1320,8 @@ ENTRY(error_entry) SAVE_C_REGS 8 SAVE_EXTRA_REGS 8 xorl %ebx,%ebx - testl $3,CS+8(%rsp) - je error_kernelspace + testb $3, CS+8(%rsp) + jz error_kernelspace error_swapgs: SWAPGS error_sti: @@ -1382,7 +1372,7 @@ ENTRY(error_exit) TRACE_IRQS_OFF GET_THREAD_INFO(%rcx) testl %eax,%eax - jne retint_kernel + jnz retint_kernel LOCKDEP_SYS_EXIT_IRQ movl TI_flags(%rcx),%edx movl $_TIF_WORK_MASK,%edi @@ -1627,7 +1617,6 @@ end_repeat_nmi: je 1f movq %r12, %cr2 1: - testl %ebx,%ebx /* swapgs needed? */ jnz nmi_restore nmi_swapgs: diff --git a/arch/x86/kernel/head_32.S b/arch/x86/kernel/head_32.S index d031bad9e07e..02d257256200 100644 --- a/arch/x86/kernel/head_32.S +++ b/arch/x86/kernel/head_32.S @@ -547,7 +547,7 @@ ENTRY(early_idt_handler) cld cmpl $2,(%esp) # X86_TRAP_NMI - je is_nmi # Ignore NMI + je .Lis_nmi # Ignore NMI cmpl $2,%ss:early_recursion_flag je hlt_loop @@ -600,7 +600,7 @@ ex_entry: pop %ecx pop %eax decl %ss:early_recursion_flag -is_nmi: +.Lis_nmi: addl $8,%esp /* drop vector number and error code */ iret ENDPROC(early_idt_handler) diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S index ae6588b301c2..43eafc8afb69 100644 --- a/arch/x86/kernel/head_64.S +++ b/arch/x86/kernel/head_64.S @@ -344,7 +344,7 @@ ENTRY(early_idt_handler) cld cmpl $2,(%rsp) # X86_TRAP_NMI - je is_nmi # Ignore NMI + je .Lis_nmi # Ignore NMI cmpl $2,early_recursion_flag(%rip) jz 1f @@ -409,7 +409,7 @@ ENTRY(early_idt_handler) popq %rcx popq %rax decl early_recursion_flag(%rip) -is_nmi: +.Lis_nmi: addq $16,%rsp # drop vector number and error code INTERRUPT_RETURN ENDPROC(early_idt_handler) diff --git a/arch/x86/kernel/hpet.c b/arch/x86/kernel/hpet.c index 3acbff4716b0..e2449cf38b06 100644 --- a/arch/x86/kernel/hpet.c +++ b/arch/x86/kernel/hpet.c @@ -12,6 +12,7 @@ #include <linux/pm.h> #include <linux/io.h> +#include <asm/irqdomain.h> #include <asm/fixmap.h> #include <asm/hpet.h> #include <asm/time.h> @@ -305,8 +306,6 @@ static void hpet_legacy_clockevent_register(void) printk(KERN_DEBUG "hpet clockevent registered\n"); } -static int hpet_setup_msi_irq(unsigned int irq); - static void hpet_set_mode(enum clock_event_mode mode, struct clock_event_device *evt, int timer) { @@ -357,7 +356,7 @@ static void hpet_set_mode(enum clock_event_mode mode, hpet_enable_legacy_int(); } else { struct hpet_dev *hdev = EVT_TO_HPET_DEV(evt); - hpet_setup_msi_irq(hdev->irq); + irq_domain_activate_irq(irq_get_irq_data(hdev->irq)); disable_irq(hdev->irq); irq_set_affinity(hdev->irq, cpumask_of(hdev->cpu)); enable_irq(hdev->irq); @@ -423,6 +422,7 @@ static int hpet_legacy_next_event(unsigned long delta, static DEFINE_PER_CPU(struct hpet_dev *, cpu_hpet_dev); static struct hpet_dev *hpet_devs; +static struct irq_domain *hpet_domain; void hpet_msi_unmask(struct irq_data *data) { @@ -473,31 +473,6 @@ static int hpet_msi_next_event(unsigned long delta, return hpet_next_event(delta, evt, hdev->num); } -static int hpet_setup_msi_irq(unsigned int irq) -{ - if (x86_msi.setup_hpet_msi(irq, hpet_blockid)) { - irq_free_hwirq(irq); - return -EINVAL; - } - return 0; -} - -static int hpet_assign_irq(struct hpet_dev *dev) -{ - unsigned int irq = irq_alloc_hwirq(-1); - - if (!irq) - return -EINVAL; - - irq_set_handler_data(irq, dev); - - if (hpet_setup_msi_irq(irq)) - return -EINVAL; - - dev->irq = irq; - return 0; -} - static irqreturn_t hpet_interrupt_handler(int irq, void *data) { struct hpet_dev *dev = (struct hpet_dev *)data; @@ -540,9 +515,6 @@ static void init_one_hpet_msi_clockevent(struct hpet_dev *hdev, int cpu) if (!(hdev->flags & HPET_DEV_VALID)) return; - if (hpet_setup_msi_irq(hdev->irq)) - return; - hdev->cpu = cpu; per_cpu(cpu_hpet_dev, cpu) = hdev; evt->name = hdev->name; @@ -574,7 +546,7 @@ static void hpet_msi_capability_lookup(unsigned int start_timer) unsigned int id; unsigned int num_timers; unsigned int num_timers_used = 0; - int i; + int i, irq; if (hpet_msi_disable) return; @@ -587,6 +559,10 @@ static void hpet_msi_capability_lookup(unsigned int start_timer) num_timers++; /* Value read out starts from 0 */ hpet_print_config(); + hpet_domain = hpet_create_irq_domain(hpet_blockid); + if (!hpet_domain) + return; + hpet_devs = kzalloc(sizeof(struct hpet_dev) * num_timers, GFP_KERNEL); if (!hpet_devs) return; @@ -601,15 +577,16 @@ static void hpet_msi_capability_lookup(unsigned int start_timer) if (!(cfg & HPET_TN_FSB_CAP)) continue; + irq = hpet_assign_irq(hpet_domain, hdev, hdev->num); + if (irq < 0) + continue; + + sprintf(hdev->name, "hpet%d", i); + hdev->num = i; + hdev->irq = irq; hdev->flags = 0; if (cfg & HPET_TN_PERIODIC_CAP) hdev->flags |= HPET_DEV_PERI_CAP; - hdev->num = i; - - sprintf(hdev->name, "hpet%d", i); - if (hpet_assign_irq(hdev)) - continue; - hdev->flags |= HPET_DEV_FSB_CAP; hdev->flags |= HPET_DEV_VALID; num_timers_used++; @@ -709,10 +686,6 @@ static int hpet_cpuhp_notify(struct notifier_block *n, } #else -static int hpet_setup_msi_irq(unsigned int irq) -{ - return 0; -} static void hpet_msi_capability_lookup(unsigned int start_timer) { return; diff --git a/arch/x86/kernel/i387.c b/arch/x86/kernel/i387.c index 009183276bb7..6185d3141219 100644 --- a/arch/x86/kernel/i387.c +++ b/arch/x86/kernel/i387.c @@ -173,6 +173,21 @@ static void init_thread_xstate(void) xstate_size = sizeof(struct i387_fxsave_struct); else xstate_size = sizeof(struct i387_fsave_struct); + + /* + * Quirk: we don't yet handle the XSAVES* instructions + * correctly, as we don't correctly convert between + * standard and compacted format when interfacing + * with user-space - so disable it for now. + * + * The difference is small: with recent CPUs the + * compacted format is only marginally smaller than + * the standard FPU state format. + * + * ( This is easy to backport while we are fixing + * XSAVES* support. ) + */ + setup_clear_cpu_cap(X86_FEATURE_XSAVES); } /* diff --git a/arch/x86/kernel/i8259.c b/arch/x86/kernel/i8259.c index e7cc5370cd2f..16cb827a5b27 100644 --- a/arch/x86/kernel/i8259.c +++ b/arch/x86/kernel/i8259.c @@ -329,8 +329,8 @@ static void init_8259A(int auto_eoi) */ outb_pic(0x11, PIC_MASTER_CMD); /* ICW1: select 8259A-1 init */ - /* ICW2: 8259A-1 IR0-7 mapped to 0x30-0x37 */ - outb_pic(IRQ0_VECTOR, PIC_MASTER_IMR); + /* ICW2: 8259A-1 IR0-7 mapped to ISA_IRQ_VECTOR(0) */ + outb_pic(ISA_IRQ_VECTOR(0), PIC_MASTER_IMR); /* 8259A-1 (the master) has a slave on IR2 */ outb_pic(1U << PIC_CASCADE_IR, PIC_MASTER_IMR); @@ -342,8 +342,8 @@ static void init_8259A(int auto_eoi) outb_pic(0x11, PIC_SLAVE_CMD); /* ICW1: select 8259A-2 init */ - /* ICW2: 8259A-2 IR0-7 mapped to IRQ8_VECTOR */ - outb_pic(IRQ8_VECTOR, PIC_SLAVE_IMR); + /* ICW2: 8259A-2 IR0-7 mapped to ISA_IRQ_VECTOR(8) */ + outb_pic(ISA_IRQ_VECTOR(8), PIC_SLAVE_IMR); /* 8259A-2 is a slave on master's IR2 */ outb_pic(PIC_CASCADE_IR, PIC_SLAVE_IMR); /* (slave's support for AEOI in flat mode is to be investigated) */ diff --git a/arch/x86/kernel/irq.c b/arch/x86/kernel/irq.c index e5952c225532..7e10c8b4b318 100644 --- a/arch/x86/kernel/irq.c +++ b/arch/x86/kernel/irq.c @@ -22,6 +22,12 @@ #define CREATE_TRACE_POINTS #include <asm/trace/irq_vectors.h> +DEFINE_PER_CPU_SHARED_ALIGNED(irq_cpustat_t, irq_stat); +EXPORT_PER_CPU_SYMBOL(irq_stat); + +DEFINE_PER_CPU(struct pt_regs *, irq_regs); +EXPORT_PER_CPU_SYMBOL(irq_regs); + atomic_t irq_err_count; /* Function pointer for generic interrupt vector handling */ @@ -136,6 +142,18 @@ int arch_show_interrupts(struct seq_file *p, int prec) #if defined(CONFIG_X86_IO_APIC) seq_printf(p, "%*s: %10u\n", prec, "MIS", atomic_read(&irq_mis_count)); #endif +#ifdef CONFIG_HAVE_KVM + seq_printf(p, "%*s: ", prec, "PIN"); + for_each_online_cpu(j) + seq_printf(p, "%10u ", irq_stats(j)->kvm_posted_intr_ipis); + seq_puts(p, " Posted-interrupt notification event\n"); + + seq_printf(p, "%*s: ", prec, "PIW"); + for_each_online_cpu(j) + seq_printf(p, "%10u ", + irq_stats(j)->kvm_posted_intr_wakeup_ipis); + seq_puts(p, " Posted-interrupt wakeup event\n"); +#endif return 0; } @@ -192,8 +210,7 @@ __visible unsigned int __irq_entry do_IRQ(struct pt_regs *regs) unsigned vector = ~regs->orig_ax; unsigned irq; - irq_enter(); - exit_idle(); + entering_irq(); irq = __this_cpu_read(vector_irq[vector]); @@ -209,7 +226,7 @@ __visible unsigned int __irq_entry do_IRQ(struct pt_regs *regs) } } - irq_exit(); + exiting_irq(); set_irq_regs(old_regs); return 1; @@ -237,6 +254,18 @@ __visible void smp_x86_platform_ipi(struct pt_regs *regs) } #ifdef CONFIG_HAVE_KVM +static void dummy_handler(void) {} +static void (*kvm_posted_intr_wakeup_handler)(void) = dummy_handler; + +void kvm_set_posted_intr_wakeup_handler(void (*handler)(void)) +{ + if (handler) + kvm_posted_intr_wakeup_handler = handler; + else + kvm_posted_intr_wakeup_handler = dummy_handler; +} +EXPORT_SYMBOL_GPL(kvm_set_posted_intr_wakeup_handler); + /* * Handler for POSTED_INTERRUPT_VECTOR. */ @@ -244,16 +273,23 @@ __visible void smp_kvm_posted_intr_ipi(struct pt_regs *regs) { struct pt_regs *old_regs = set_irq_regs(regs); - ack_APIC_irq(); - - irq_enter(); - - exit_idle(); - + entering_ack_irq(); inc_irq_stat(kvm_posted_intr_ipis); + exiting_irq(); + set_irq_regs(old_regs); +} - irq_exit(); +/* + * Handler for POSTED_INTERRUPT_WAKEUP_VECTOR. + */ +__visible void smp_kvm_posted_intr_wakeup_ipi(struct pt_regs *regs) +{ + struct pt_regs *old_regs = set_irq_regs(regs); + entering_ack_irq(); + inc_irq_stat(kvm_posted_intr_wakeup_ipis); + kvm_posted_intr_wakeup_handler(); + exiting_irq(); set_irq_regs(old_regs); } #endif diff --git a/arch/x86/kernel/irq_32.c b/arch/x86/kernel/irq_32.c index f9fd86a7fcc7..cd74f5978ab9 100644 --- a/arch/x86/kernel/irq_32.c +++ b/arch/x86/kernel/irq_32.c @@ -21,12 +21,6 @@ #include <asm/apic.h> -DEFINE_PER_CPU_SHARED_ALIGNED(irq_cpustat_t, irq_stat); -EXPORT_PER_CPU_SYMBOL(irq_stat); - -DEFINE_PER_CPU(struct pt_regs *, irq_regs); -EXPORT_PER_CPU_SYMBOL(irq_regs); - #ifdef CONFIG_DEBUG_STACKOVERFLOW int sysctl_panic_on_stackoverflow __read_mostly; diff --git a/arch/x86/kernel/irq_64.c b/arch/x86/kernel/irq_64.c index 394e643d7830..bc4604e500a3 100644 --- a/arch/x86/kernel/irq_64.c +++ b/arch/x86/kernel/irq_64.c @@ -20,12 +20,6 @@ #include <asm/idle.h> #include <asm/apic.h> -DEFINE_PER_CPU_SHARED_ALIGNED(irq_cpustat_t, irq_stat); -EXPORT_PER_CPU_SYMBOL(irq_stat); - -DEFINE_PER_CPU(struct pt_regs *, irq_regs); -EXPORT_PER_CPU_SYMBOL(irq_regs); - int sysctl_panic_on_stackoverflow; /* diff --git a/arch/x86/kernel/irq_work.c b/arch/x86/kernel/irq_work.c index 15d741ddfeeb..dc5fa6a1e8d6 100644 --- a/arch/x86/kernel/irq_work.c +++ b/arch/x86/kernel/irq_work.c @@ -10,12 +10,6 @@ #include <asm/apic.h> #include <asm/trace/irq_vectors.h> -static inline void irq_work_entering_irq(void) -{ - irq_enter(); - ack_APIC_irq(); -} - static inline void __smp_irq_work_interrupt(void) { inc_irq_stat(apic_irq_work_irqs); @@ -24,14 +18,14 @@ static inline void __smp_irq_work_interrupt(void) __visible void smp_irq_work_interrupt(struct pt_regs *regs) { - irq_work_entering_irq(); + ipi_entering_ack_irq(); __smp_irq_work_interrupt(); exiting_irq(); } __visible void smp_trace_irq_work_interrupt(struct pt_regs *regs) { - irq_work_entering_irq(); + ipi_entering_ack_irq(); trace_irq_work_entry(IRQ_WORK_VECTOR); __smp_irq_work_interrupt(); trace_irq_work_exit(IRQ_WORK_VECTOR); diff --git a/arch/x86/kernel/irqinit.c b/arch/x86/kernel/irqinit.c index cd10a6437264..680723a8e4b6 100644 --- a/arch/x86/kernel/irqinit.c +++ b/arch/x86/kernel/irqinit.c @@ -86,7 +86,7 @@ void __init init_IRQ(void) int i; /* - * On cpu 0, Assign IRQ0_VECTOR..IRQ15_VECTOR's to IRQ 0..15. + * On cpu 0, Assign ISA_IRQ_VECTOR(irq) to IRQ 0..15. * If these IRQ's are handled by legacy interrupt-controllers like PIC, * then this configuration will likely be static after the boot. If * these IRQ's are handled by more mordern controllers like IO-APIC, @@ -94,7 +94,7 @@ void __init init_IRQ(void) * irq's migrate etc. */ for (i = 0; i < nr_legacy_irqs(); i++) - per_cpu(vector_irq, 0)[IRQ0_VECTOR + i] = i; + per_cpu(vector_irq, 0)[ISA_IRQ_VECTOR(i)] = i; x86_init.irqs.intr_init(); } @@ -144,6 +144,8 @@ static void __init apic_intr_init(void) #ifdef CONFIG_HAVE_KVM /* IPI for KVM to deliver posted interrupt */ alloc_intr_gate(POSTED_INTR_VECTOR, kvm_posted_intr_ipi); + /* IPI for KVM to deliver interrupt to wake up tasks */ + alloc_intr_gate(POSTED_INTR_WAKEUP_VECTOR, kvm_posted_intr_wakeup_ipi); #endif /* IPI vectors for APIC spurious and error interrupts */ diff --git a/arch/x86/kernel/mpparse.c b/arch/x86/kernel/mpparse.c index 2d2a237f2c73..30ca7607cbbb 100644 --- a/arch/x86/kernel/mpparse.c +++ b/arch/x86/kernel/mpparse.c @@ -19,8 +19,8 @@ #include <linux/module.h> #include <linux/smp.h> #include <linux/pci.h> -#include <linux/irqdomain.h> +#include <asm/irqdomain.h> #include <asm/mtrr.h> #include <asm/mpspec.h> #include <asm/pgalloc.h> @@ -113,11 +113,6 @@ static void __init MP_bus_info(struct mpc_bus *m) pr_warn("Unknown bustype %s - ignoring\n", str); } -static struct irq_domain_ops mp_ioapic_irqdomain_ops = { - .map = mp_irqdomain_map, - .unmap = mp_irqdomain_unmap, -}; - static void __init MP_ioapic_info(struct mpc_ioapic *m) { struct ioapic_domain_cfg cfg = { diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c index c614dd492f5f..58bcfb67c01f 100644 --- a/arch/x86/kernel/paravirt.c +++ b/arch/x86/kernel/paravirt.c @@ -154,7 +154,9 @@ unsigned paravirt_patch_default(u8 type, u16 clobbers, void *insnbuf, ret = paravirt_patch_ident_64(insnbuf, len); else if (type == PARAVIRT_PATCH(pv_cpu_ops.iret) || +#ifdef CONFIG_X86_32 type == PARAVIRT_PATCH(pv_cpu_ops.irq_enable_sysexit) || +#endif type == PARAVIRT_PATCH(pv_cpu_ops.usergs_sysret32) || type == PARAVIRT_PATCH(pv_cpu_ops.usergs_sysret64)) /* If operation requires a jmp, then jmp */ @@ -371,7 +373,7 @@ __visible struct pv_cpu_ops pv_cpu_ops = { .load_sp0 = native_load_sp0, -#if defined(CONFIG_X86_32) || defined(CONFIG_IA32_EMULATION) +#if defined(CONFIG_X86_32) .irq_enable_sysexit = native_irq_enable_sysexit, #endif #ifdef CONFIG_X86_64 diff --git a/arch/x86/kernel/paravirt_patch_64.c b/arch/x86/kernel/paravirt_patch_64.c index a1da6737ba5b..0de21c62c348 100644 --- a/arch/x86/kernel/paravirt_patch_64.c +++ b/arch/x86/kernel/paravirt_patch_64.c @@ -49,7 +49,6 @@ unsigned native_patch(u8 type, u16 clobbers, void *ibuf, PATCH_SITE(pv_irq_ops, save_fl); PATCH_SITE(pv_irq_ops, irq_enable); PATCH_SITE(pv_irq_ops, irq_disable); - PATCH_SITE(pv_cpu_ops, irq_enable_sysexit); PATCH_SITE(pv_cpu_ops, usergs_sysret32); PATCH_SITE(pv_cpu_ops, usergs_sysret64); PATCH_SITE(pv_cpu_ops, swapgs); diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c index 8ed2106b06da..a99900cedc22 100644 --- a/arch/x86/kernel/process_32.c +++ b/arch/x86/kernel/process_32.c @@ -302,13 +302,10 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) arch_end_context_switch(next_p); /* - * Reload esp0, kernel_stack, and current_top_of_stack. This changes + * Reload esp0 and cpu_current_top_of_stack. This changes * current_thread_info(). */ load_sp0(tss, next); - this_cpu_write(kernel_stack, - (unsigned long)task_stack_page(next_p) + - THREAD_SIZE); this_cpu_write(cpu_current_top_of_stack, (unsigned long)task_stack_page(next_p) + THREAD_SIZE); diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c index ddfdbf74f174..82134506faa8 100644 --- a/arch/x86/kernel/process_64.c +++ b/arch/x86/kernel/process_64.c @@ -409,9 +409,6 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) /* Reload esp0 and ss1. This changes current_thread_info(). */ load_sp0(tss, next); - this_cpu_write(kernel_stack, - (unsigned long)task_stack_page(next_p) + THREAD_SIZE); - /* * Now maybe reload the debug registers and handle I/O bitmaps */ diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index d74ac33290ae..8d04a7594a03 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -1222,8 +1222,7 @@ void __init setup_arch(char **cmdline_p) init_cpu_to_node(); init_apic_mappings(); - if (x86_io_apic_ops.init) - x86_io_apic_ops.init(); + io_apic_init_mappings(); kvm_guest_init(); diff --git a/arch/x86/kernel/smp.c b/arch/x86/kernel/smp.c index be8e1bde07aa..15aaa69bbb5e 100644 --- a/arch/x86/kernel/smp.c +++ b/arch/x86/kernel/smp.c @@ -170,8 +170,7 @@ static int smp_stop_nmi_callback(unsigned int val, struct pt_regs *regs) asmlinkage __visible void smp_reboot_interrupt(void) { - ack_APIC_irq(); - irq_enter(); + ipi_entering_ack_irq(); stop_this_cpu(NULL); irq_exit(); } @@ -265,12 +264,6 @@ __visible void smp_reschedule_interrupt(struct pt_regs *regs) */ } -static inline void smp_entering_irq(void) -{ - ack_APIC_irq(); - irq_enter(); -} - __visible void smp_trace_reschedule_interrupt(struct pt_regs *regs) { /* @@ -279,7 +272,7 @@ __visible void smp_trace_reschedule_interrupt(struct pt_regs *regs) * scheduler_ipi(). This is OK, since those functions are allowed * to nest. */ - smp_entering_irq(); + ipi_entering_ack_irq(); trace_reschedule_entry(RESCHEDULE_VECTOR); __smp_reschedule_interrupt(); trace_reschedule_exit(RESCHEDULE_VECTOR); @@ -297,14 +290,14 @@ static inline void __smp_call_function_interrupt(void) __visible void smp_call_function_interrupt(struct pt_regs *regs) { - smp_entering_irq(); + ipi_entering_ack_irq(); __smp_call_function_interrupt(); exiting_irq(); } __visible void smp_trace_call_function_interrupt(struct pt_regs *regs) { - smp_entering_irq(); + ipi_entering_ack_irq(); trace_call_function_entry(CALL_FUNCTION_VECTOR); __smp_call_function_interrupt(); trace_call_function_exit(CALL_FUNCTION_VECTOR); @@ -319,14 +312,14 @@ static inline void __smp_call_function_single_interrupt(void) __visible void smp_call_function_single_interrupt(struct pt_regs *regs) { - smp_entering_irq(); + ipi_entering_ack_irq(); __smp_call_function_single_interrupt(); exiting_irq(); } __visible void smp_trace_call_function_single_interrupt(struct pt_regs *regs) { - smp_entering_irq(); + ipi_entering_ack_irq(); trace_call_function_single_entry(CALL_FUNCTION_SINGLE_VECTOR); __smp_call_function_single_interrupt(); trace_call_function_single_exit(CALL_FUNCTION_SINGLE_VECTOR); diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index 50e547eac8cd..fd6291c921b6 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -514,6 +514,40 @@ void __inquire_remote_apic(int apicid) } /* + * The Multiprocessor Specification 1.4 (1997) example code suggests + * that there should be a 10ms delay between the BSP asserting INIT + * and de-asserting INIT, when starting a remote processor. + * But that slows boot and resume on modern processors, which include + * many cores and don't require that delay. + * + * Cmdline "init_cpu_udelay=" is available to over-ride this delay. + * Modern processor families are quirked to remove the delay entirely. + */ +#define UDELAY_10MS_DEFAULT 10000 + +static unsigned int init_udelay = UDELAY_10MS_DEFAULT; + +static int __init cpu_init_udelay(char *str) +{ + get_option(&str, &init_udelay); + + return 0; +} +early_param("cpu_init_udelay", cpu_init_udelay); + +static void __init smp_quirk_init_udelay(void) +{ + /* if cmdline changed it from default, leave it alone */ + if (init_udelay != UDELAY_10MS_DEFAULT) + return; + + /* if modern processor, use no delay */ + if (((boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) && (boot_cpu_data.x86 == 6)) || + ((boot_cpu_data.x86_vendor == X86_VENDOR_AMD) && (boot_cpu_data.x86 >= 0xF))) + init_udelay = 0; +} + +/* * Poke the other CPU in the eye via NMI to wake it up. Remember that the normal * INIT, INIT, STARTUP sequence will reset the chip hard for us, and this * won't ... remember to clear down the APIC, etc later. @@ -555,7 +589,7 @@ wakeup_secondary_cpu_via_nmi(int apicid, unsigned long start_eip) static int wakeup_secondary_cpu_via_init(int phys_apicid, unsigned long start_eip) { - unsigned long send_status, accept_status = 0; + unsigned long send_status = 0, accept_status = 0; int maxlvt, num_starts, j; maxlvt = lapic_get_maxlvt(); @@ -583,7 +617,7 @@ wakeup_secondary_cpu_via_init(int phys_apicid, unsigned long start_eip) pr_debug("Waiting for send to finish...\n"); send_status = safe_apic_wait_icr_idle(); - mdelay(10); + udelay(init_udelay); pr_debug("Deasserting INIT\n"); @@ -651,6 +685,7 @@ wakeup_secondary_cpu_via_init(int phys_apicid, unsigned long start_eip) * Give the other CPU some time to accept the IPI. */ udelay(200); + if (maxlvt > 3) /* Due to the Pentium erratum 3AP. */ apic_write(APIC_ESR, 0); accept_status = (apic_read(APIC_ESR) & 0xEF); @@ -792,8 +827,6 @@ void common_cpu_up(unsigned int cpu, struct task_struct *idle) clear_tsk_thread_flag(idle, TIF_FORK); initial_gs = per_cpu_offset(cpu); #endif - per_cpu(kernel_stack, cpu) = - (unsigned long)task_stack_page(idle) + THREAD_SIZE; } /* @@ -1176,6 +1209,8 @@ void __init native_smp_prepare_cpus(unsigned int max_cpus) uv_system_init(); set_mtrr_aps_delayed_init(); + + smp_quirk_init_udelay(); } void arch_enable_nonboot_cpus_begin(void) diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c index 324ab5247687..5e0791f9d3dc 100644 --- a/arch/x86/kernel/traps.c +++ b/arch/x86/kernel/traps.c @@ -997,8 +997,8 @@ void __init trap_init(void) #endif #ifdef CONFIG_X86_32 - set_system_trap_gate(SYSCALL_VECTOR, &system_call); - set_bit(SYSCALL_VECTOR, used_vectors); + set_system_trap_gate(IA32_SYSCALL_VECTOR, &system_call); + set_bit(IA32_SYSCALL_VECTOR, used_vectors); #endif /* diff --git a/arch/x86/kernel/x86_init.c b/arch/x86/kernel/x86_init.c index 234b0722de53..3cee10abf01d 100644 --- a/arch/x86/kernel/x86_init.c +++ b/arch/x86/kernel/x86_init.c @@ -111,11 +111,9 @@ EXPORT_SYMBOL_GPL(x86_platform); #if defined(CONFIG_PCI_MSI) struct x86_msi_ops x86_msi = { .setup_msi_irqs = native_setup_msi_irqs, - .compose_msi_msg = native_compose_msi_msg, .teardown_msi_irq = native_teardown_msi_irq, .teardown_msi_irqs = default_teardown_msi_irqs, .restore_msi_irqs = default_restore_msi_irqs, - .setup_hpet_msi = default_setup_hpet_msi, }; /* MSI arch specific hooks */ @@ -141,13 +139,6 @@ void arch_restore_msi_irqs(struct pci_dev *dev) #endif struct x86_io_apic_ops x86_io_apic_ops = { - .init = native_io_apic_init_mappings, .read = native_io_apic_read, - .write = native_io_apic_write, - .modify = native_io_apic_modify, .disable = native_disable_io_apic, - .print_entries = native_io_apic_print_entries, - .set_affinity = native_ioapic_set_affinity, - .setup_entry = native_setup_ioapic_entry, - .eoi_ioapic_pin = native_eoi_ioapic_pin, }; diff --git a/arch/x86/lguest/boot.c b/arch/x86/lguest/boot.c index 8f9a133cc099..cab9aaa7802c 100644 --- a/arch/x86/lguest/boot.c +++ b/arch/x86/lguest/boot.c @@ -90,7 +90,7 @@ struct lguest_data lguest_data = { .noirq_iret = (u32)lguest_noirq_iret, .kernel_address = PAGE_OFFSET, .blocked_interrupts = { 1 }, /* Block timer interrupts */ - .syscall_vec = SYSCALL_VECTOR, + .syscall_vec = IA32_SYSCALL_VECTOR, }; /*G:037 @@ -866,7 +866,7 @@ static void __init lguest_init_IRQ(void) for (i = FIRST_EXTERNAL_VECTOR; i < FIRST_SYSTEM_VECTOR; i++) { /* Some systems map "vectors" to interrupts weirdly. Not us! */ __this_cpu_write(vector_irq[i], i - FIRST_EXTERNAL_VECTOR); - if (i != SYSCALL_VECTOR) + if (i != IA32_SYSCALL_VECTOR) set_intr_gate(i, irq_entries_start + 8 * (i - FIRST_EXTERNAL_VECTOR)); } diff --git a/arch/x86/lib/Makefile b/arch/x86/lib/Makefile index 1530afb07c85..982989d282ff 100644 --- a/arch/x86/lib/Makefile +++ b/arch/x86/lib/Makefile @@ -40,6 +40,6 @@ else lib-y += csum-partial_64.o csum-copy_64.o csum-wrappers_64.o lib-y += clear_page_64.o copy_page_64.o lib-y += memmove_64.o memset_64.o - lib-y += copy_user_64.o copy_user_nocache_64.o + lib-y += copy_user_64.o lib-y += cmpxchg16b_emu.o endif diff --git a/arch/x86/lib/copy_user_64.S b/arch/x86/lib/copy_user_64.S index fa997dfaef24..e4b3beee83bd 100644 --- a/arch/x86/lib/copy_user_64.S +++ b/arch/x86/lib/copy_user_64.S @@ -16,30 +16,6 @@ #include <asm/asm.h> #include <asm/smap.h> - .macro ALIGN_DESTINATION - /* check for bad alignment of destination */ - movl %edi,%ecx - andl $7,%ecx - jz 102f /* already aligned */ - subl $8,%ecx - negl %ecx - subl %ecx,%edx -100: movb (%rsi),%al -101: movb %al,(%rdi) - incq %rsi - incq %rdi - decl %ecx - jnz 100b -102: - .section .fixup,"ax" -103: addl %ecx,%edx /* ecx is zerorest also */ - jmp copy_user_handle_tail - .previous - - _ASM_EXTABLE(100b,103b) - _ASM_EXTABLE(101b,103b) - .endm - /* Standard copy_to_user with segment limit checking */ ENTRY(_copy_to_user) CFI_STARTPROC @@ -266,3 +242,95 @@ ENTRY(copy_user_enhanced_fast_string) _ASM_EXTABLE(1b,12b) CFI_ENDPROC ENDPROC(copy_user_enhanced_fast_string) + +/* + * copy_user_nocache - Uncached memory copy with exception handling + * This will force destination/source out of cache for more performance. + */ +ENTRY(__copy_user_nocache) + CFI_STARTPROC + ASM_STAC + cmpl $8,%edx + jb 20f /* less then 8 bytes, go to byte copy loop */ + ALIGN_DESTINATION + movl %edx,%ecx + andl $63,%edx + shrl $6,%ecx + jz 17f +1: movq (%rsi),%r8 +2: movq 1*8(%rsi),%r9 +3: movq 2*8(%rsi),%r10 +4: movq 3*8(%rsi),%r11 +5: movnti %r8,(%rdi) +6: movnti %r9,1*8(%rdi) +7: movnti %r10,2*8(%rdi) +8: movnti %r11,3*8(%rdi) +9: movq 4*8(%rsi),%r8 +10: movq 5*8(%rsi),%r9 +11: movq 6*8(%rsi),%r10 +12: movq 7*8(%rsi),%r11 +13: movnti %r8,4*8(%rdi) +14: movnti %r9,5*8(%rdi) +15: movnti %r10,6*8(%rdi) +16: movnti %r11,7*8(%rdi) + leaq 64(%rsi),%rsi + leaq 64(%rdi),%rdi + decl %ecx + jnz 1b +17: movl %edx,%ecx + andl $7,%edx + shrl $3,%ecx + jz 20f +18: movq (%rsi),%r8 +19: movnti %r8,(%rdi) + leaq 8(%rsi),%rsi + leaq 8(%rdi),%rdi + decl %ecx + jnz 18b +20: andl %edx,%edx + jz 23f + movl %edx,%ecx +21: movb (%rsi),%al +22: movb %al,(%rdi) + incq %rsi + incq %rdi + decl %ecx + jnz 21b +23: xorl %eax,%eax + ASM_CLAC + sfence + ret + + .section .fixup,"ax" +30: shll $6,%ecx + addl %ecx,%edx + jmp 60f +40: lea (%rdx,%rcx,8),%rdx + jmp 60f +50: movl %ecx,%edx +60: sfence + jmp copy_user_handle_tail + .previous + + _ASM_EXTABLE(1b,30b) + _ASM_EXTABLE(2b,30b) + _ASM_EXTABLE(3b,30b) + _ASM_EXTABLE(4b,30b) + _ASM_EXTABLE(5b,30b) + _ASM_EXTABLE(6b,30b) + _ASM_EXTABLE(7b,30b) + _ASM_EXTABLE(8b,30b) + _ASM_EXTABLE(9b,30b) + _ASM_EXTABLE(10b,30b) + _ASM_EXTABLE(11b,30b) + _ASM_EXTABLE(12b,30b) + _ASM_EXTABLE(13b,30b) + _ASM_EXTABLE(14b,30b) + _ASM_EXTABLE(15b,30b) + _ASM_EXTABLE(16b,30b) + _ASM_EXTABLE(18b,40b) + _ASM_EXTABLE(19b,40b) + _ASM_EXTABLE(21b,50b) + _ASM_EXTABLE(22b,50b) + CFI_ENDPROC +ENDPROC(__copy_user_nocache) diff --git a/arch/x86/lib/copy_user_nocache_64.S b/arch/x86/lib/copy_user_nocache_64.S deleted file mode 100644 index 6a4f43c2d9e6..000000000000 --- a/arch/x86/lib/copy_user_nocache_64.S +++ /dev/null @@ -1,136 +0,0 @@ -/* - * Copyright 2008 Vitaly Mayatskikh <vmayatsk@redhat.com> - * Copyright 2002 Andi Kleen, SuSE Labs. - * Subject to the GNU Public License v2. - * - * Functions to copy from and to user space. - */ - -#include <linux/linkage.h> -#include <asm/dwarf2.h> - -#define FIX_ALIGNMENT 1 - -#include <asm/current.h> -#include <asm/asm-offsets.h> -#include <asm/thread_info.h> -#include <asm/asm.h> -#include <asm/smap.h> - - .macro ALIGN_DESTINATION -#ifdef FIX_ALIGNMENT - /* check for bad alignment of destination */ - movl %edi,%ecx - andl $7,%ecx - jz 102f /* already aligned */ - subl $8,%ecx - negl %ecx - subl %ecx,%edx -100: movb (%rsi),%al -101: movb %al,(%rdi) - incq %rsi - incq %rdi - decl %ecx - jnz 100b -102: - .section .fixup,"ax" -103: addl %ecx,%edx /* ecx is zerorest also */ - jmp copy_user_handle_tail - .previous - - _ASM_EXTABLE(100b,103b) - _ASM_EXTABLE(101b,103b) -#endif - .endm - -/* - * copy_user_nocache - Uncached memory copy with exception handling - * This will force destination/source out of cache for more performance. - */ -ENTRY(__copy_user_nocache) - CFI_STARTPROC - ASM_STAC - cmpl $8,%edx - jb 20f /* less then 8 bytes, go to byte copy loop */ - ALIGN_DESTINATION - movl %edx,%ecx - andl $63,%edx - shrl $6,%ecx - jz 17f -1: movq (%rsi),%r8 -2: movq 1*8(%rsi),%r9 -3: movq 2*8(%rsi),%r10 -4: movq 3*8(%rsi),%r11 -5: movnti %r8,(%rdi) -6: movnti %r9,1*8(%rdi) -7: movnti %r10,2*8(%rdi) -8: movnti %r11,3*8(%rdi) -9: movq 4*8(%rsi),%r8 -10: movq 5*8(%rsi),%r9 -11: movq 6*8(%rsi),%r10 -12: movq 7*8(%rsi),%r11 -13: movnti %r8,4*8(%rdi) -14: movnti %r9,5*8(%rdi) -15: movnti %r10,6*8(%rdi) -16: movnti %r11,7*8(%rdi) - leaq 64(%rsi),%rsi - leaq 64(%rdi),%rdi - decl %ecx - jnz 1b -17: movl %edx,%ecx - andl $7,%edx - shrl $3,%ecx - jz 20f -18: movq (%rsi),%r8 -19: movnti %r8,(%rdi) - leaq 8(%rsi),%rsi - leaq 8(%rdi),%rdi - decl %ecx - jnz 18b -20: andl %edx,%edx - jz 23f - movl %edx,%ecx -21: movb (%rsi),%al -22: movb %al,(%rdi) - incq %rsi - incq %rdi - decl %ecx - jnz 21b -23: xorl %eax,%eax - ASM_CLAC - sfence - ret - - .section .fixup,"ax" -30: shll $6,%ecx - addl %ecx,%edx - jmp 60f -40: lea (%rdx,%rcx,8),%rdx - jmp 60f -50: movl %ecx,%edx -60: sfence - jmp copy_user_handle_tail - .previous - - _ASM_EXTABLE(1b,30b) - _ASM_EXTABLE(2b,30b) - _ASM_EXTABLE(3b,30b) - _ASM_EXTABLE(4b,30b) - _ASM_EXTABLE(5b,30b) - _ASM_EXTABLE(6b,30b) - _ASM_EXTABLE(7b,30b) - _ASM_EXTABLE(8b,30b) - _ASM_EXTABLE(9b,30b) - _ASM_EXTABLE(10b,30b) - _ASM_EXTABLE(11b,30b) - _ASM_EXTABLE(12b,30b) - _ASM_EXTABLE(13b,30b) - _ASM_EXTABLE(14b,30b) - _ASM_EXTABLE(15b,30b) - _ASM_EXTABLE(16b,30b) - _ASM_EXTABLE(18b,40b) - _ASM_EXTABLE(19b,40b) - _ASM_EXTABLE(21b,50b) - _ASM_EXTABLE(22b,50b) - CFI_ENDPROC -ENDPROC(__copy_user_nocache) diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c index 99f76103c6b7..ddeff4844a10 100644 --- a/arch/x86/net/bpf_jit_comp.c +++ b/arch/x86/net/bpf_jit_comp.c @@ -966,7 +966,12 @@ void bpf_int_jit_compile(struct bpf_prog *prog) } ctx.cleanup_addr = proglen; - for (pass = 0; pass < 10; pass++) { + /* JITed image shrinks with every pass and the loop iterates + * until the image stops shrinking. Very large bpf programs + * may converge on the last pass. In such case do one more + * pass to emit the final image + */ + for (pass = 0; pass < 10 || image; pass++) { proglen = do_jit(prog, addrs, image, oldproglen, &ctx); if (proglen <= 0) { image = NULL; diff --git a/arch/x86/pci/acpi.c b/arch/x86/pci/acpi.c index d93963340c3c..14a63ed6fe09 100644 --- a/arch/x86/pci/acpi.c +++ b/arch/x86/pci/acpi.c @@ -482,9 +482,16 @@ struct pci_bus *pci_acpi_scan_root(struct acpi_pci_root *root) int pcibios_root_bridge_prepare(struct pci_host_bridge *bridge) { - struct pci_sysdata *sd = bridge->bus->sysdata; - - ACPI_COMPANION_SET(&bridge->dev, sd->companion); + /* + * We pass NULL as parent to pci_create_root_bus(), so if it is not NULL + * here, pci_create_root_bus() has been called by someone else and + * sysdata is likely to be different from what we expect. Let it go in + * that case. + */ + if (!bridge->dev.parent) { + struct pci_sysdata *sd = bridge->bus->sysdata; + ACPI_COMPANION_SET(&bridge->dev, sd->companion); + } return 0; } diff --git a/arch/x86/pci/intel_mid_pci.c b/arch/x86/pci/intel_mid_pci.c index 852aa4c92da0..27062303c881 100644 --- a/arch/x86/pci/intel_mid_pci.c +++ b/arch/x86/pci/intel_mid_pci.c @@ -208,6 +208,7 @@ static int pci_write(struct pci_bus *bus, unsigned int devfn, int where, static int intel_mid_pci_irq_enable(struct pci_dev *dev) { + struct irq_alloc_info info; int polarity; if (dev->irq_managed && dev->irq > 0) @@ -217,14 +218,13 @@ static int intel_mid_pci_irq_enable(struct pci_dev *dev) polarity = 0; /* active high */ else polarity = 1; /* active low */ + ioapic_set_alloc_attr(&info, dev_to_node(&dev->dev), 1, polarity); /* * MRST only have IOAPIC, the PCI irq lines are 1:1 mapped to * IOAPIC RTE entries, so we just enable RTE for the device. */ - if (mp_set_gsi_attr(dev->irq, 1, polarity, dev_to_node(&dev->dev))) - return -EBUSY; - if (mp_map_gsi_to_irq(dev->irq, IOAPIC_MAP_ALLOC) < 0) + if (mp_map_gsi_to_irq(dev->irq, IOAPIC_MAP_ALLOC, &info) < 0) return -EBUSY; dev->irq_managed = 1; diff --git a/arch/x86/pci/irq.c b/arch/x86/pci/irq.c index 5dc6ca5e1741..9bd115484745 100644 --- a/arch/x86/pci/irq.c +++ b/arch/x86/pci/irq.c @@ -146,19 +146,20 @@ static void __init pirq_peer_trick(void) /* * Code for querying and setting of IRQ routes on various interrupt routers. + * PIC Edge/Level Control Registers (ELCR) 0x4d0 & 0x4d1. */ -void eisa_set_level_irq(unsigned int irq) +void elcr_set_level_irq(unsigned int irq) { unsigned char mask = 1 << (irq & 7); unsigned int port = 0x4d0 + (irq >> 3); unsigned char val; - static u16 eisa_irq_mask; + static u16 elcr_irq_mask; - if (irq >= 16 || (1 << irq) & eisa_irq_mask) + if (irq >= 16 || (1 << irq) & elcr_irq_mask) return; - eisa_irq_mask |= (1 << irq); + elcr_irq_mask |= (1 << irq); printk(KERN_DEBUG "PCI: setting IRQ %u as level-triggered\n", irq); val = inb(port); if (!(val & mask)) { @@ -965,11 +966,11 @@ static int pcibios_lookup_irq(struct pci_dev *dev, int assign) } else if (r->get && (irq = r->get(pirq_router_dev, dev, pirq)) && \ ((!(pci_probe & PCI_USE_PIRQ_MASK)) || ((1 << irq) & mask))) { msg = "found"; - eisa_set_level_irq(irq); + elcr_set_level_irq(irq); } else if (newirq && r->set && (dev->class >> 8) != PCI_CLASS_DISPLAY_VGA) { if (r->set(pirq_router_dev, dev, pirq, newirq)) { - eisa_set_level_irq(newirq); + elcr_set_level_irq(newirq); msg = "assigned"; irq = newirq; } diff --git a/arch/x86/platform/Makefile b/arch/x86/platform/Makefile index a62e0be3a2f1..f1a6c8e86ddd 100644 --- a/arch/x86/platform/Makefile +++ b/arch/x86/platform/Makefile @@ -1,4 +1,5 @@ # Platform specific code goes here +obj-y += atom/ obj-y += ce4100/ obj-y += efi/ obj-y += geode/ diff --git a/arch/x86/platform/atom/Makefile b/arch/x86/platform/atom/Makefile new file mode 100644 index 000000000000..0a3a40cbc794 --- /dev/null +++ b/arch/x86/platform/atom/Makefile @@ -0,0 +1 @@ +obj-$(CONFIG_PUNIT_ATOM_DEBUG) += punit_atom_debug.o diff --git a/arch/x86/platform/atom/punit_atom_debug.c b/arch/x86/platform/atom/punit_atom_debug.c new file mode 100644 index 000000000000..5ca8ead91579 --- /dev/null +++ b/arch/x86/platform/atom/punit_atom_debug.c @@ -0,0 +1,183 @@ +/* + * Intel SOC Punit device state debug driver + * Punit controls power management for North Complex devices (Graphics + * blocks, Image Signal Processing, video processing, display, DSP etc.) + * + * Copyright (c) 2015, Intel Corporation. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + */ + +#include <linux/module.h> +#include <linux/init.h> +#include <linux/device.h> +#include <linux/debugfs.h> +#include <linux/seq_file.h> +#include <linux/io.h> +#include <asm/cpu_device_id.h> +#include <asm/iosf_mbi.h> + +/* Side band Interface port */ +#define PUNIT_PORT 0x04 +/* Power gate status reg */ +#define PWRGT_STATUS 0x61 +/* Subsystem config/status Video processor */ +#define VED_SS_PM0 0x32 +/* Subsystem config/status ISP (Image Signal Processor) */ +#define ISP_SS_PM0 0x39 +/* Subsystem config/status Input/output controller */ +#define MIO_SS_PM 0x3B +/* Shift bits for getting status for video, isp and i/o */ +#define SSS_SHIFT 24 +/* Shift bits for getting status for graphics rendering */ +#define RENDER_POS 0 +/* Shift bits for getting status for media control */ +#define MEDIA_POS 2 +/* Shift bits for getting status for Valley View/Baytrail display */ +#define VLV_DISPLAY_POS 6 +/* Subsystem config/status display for Cherry Trail SOC */ +#define CHT_DSP_SSS 0x36 +/* Shift bits for getting status for display */ +#define CHT_DSP_SSS_POS 16 + +struct punit_device { + char *name; + int reg; + int sss_pos; +}; + +static const struct punit_device punit_device_byt[] = { + { "GFX RENDER", PWRGT_STATUS, RENDER_POS }, + { "GFX MEDIA", PWRGT_STATUS, MEDIA_POS }, + { "DISPLAY", PWRGT_STATUS, VLV_DISPLAY_POS }, + { "VED", VED_SS_PM0, SSS_SHIFT }, + { "ISP", ISP_SS_PM0, SSS_SHIFT }, + { "MIO", MIO_SS_PM, SSS_SHIFT }, + { NULL } +}; + +static const struct punit_device punit_device_cht[] = { + { "GFX RENDER", PWRGT_STATUS, RENDER_POS }, + { "GFX MEDIA", PWRGT_STATUS, MEDIA_POS }, + { "DISPLAY", CHT_DSP_SSS, CHT_DSP_SSS_POS }, + { "VED", VED_SS_PM0, SSS_SHIFT }, + { "ISP", ISP_SS_PM0, SSS_SHIFT }, + { "MIO", MIO_SS_PM, SSS_SHIFT }, + { NULL } +}; + +static const char * const dstates[] = {"D0", "D0i1", "D0i2", "D0i3"}; + +static int punit_dev_state_show(struct seq_file *seq_file, void *unused) +{ + u32 punit_pwr_status; + struct punit_device *punit_devp = seq_file->private; + int index; + int status; + + seq_puts(seq_file, "\n\nPUNIT NORTH COMPLEX DEVICES :\n"); + while (punit_devp->name) { + status = iosf_mbi_read(PUNIT_PORT, BT_MBI_PMC_READ, + punit_devp->reg, + &punit_pwr_status); + if (status) { + seq_printf(seq_file, "%9s : Read Failed\n", + punit_devp->name); + } else { + index = (punit_pwr_status >> punit_devp->sss_pos) & 3; + seq_printf(seq_file, "%9s : %s\n", punit_devp->name, + dstates[index]); + } + punit_devp++; + } + + return 0; +} + +static int punit_dev_state_open(struct inode *inode, struct file *file) +{ + return single_open(file, punit_dev_state_show, inode->i_private); +} + +static const struct file_operations punit_dev_state_ops = { + .open = punit_dev_state_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; + +static struct dentry *punit_dbg_file; + +static int punit_dbgfs_register(struct punit_device *punit_device) +{ + static struct dentry *dev_state; + + punit_dbg_file = debugfs_create_dir("punit_atom", NULL); + if (!punit_dbg_file) + return -ENXIO; + + dev_state = debugfs_create_file("dev_power_state", S_IFREG | S_IRUGO, + punit_dbg_file, punit_device, + &punit_dev_state_ops); + if (!dev_state) { + pr_err("punit_dev_state register failed\n"); + debugfs_remove(punit_dbg_file); + return -ENXIO; + } + + return 0; +} + +static void punit_dbgfs_unregister(void) +{ + debugfs_remove_recursive(punit_dbg_file); +} + +#define ICPU(model, drv_data) \ + { X86_VENDOR_INTEL, 6, model, X86_FEATURE_MWAIT,\ + (kernel_ulong_t)&drv_data } + +static const struct x86_cpu_id intel_punit_cpu_ids[] = { + ICPU(55, punit_device_byt), /* Valleyview, Bay Trail */ + ICPU(76, punit_device_cht), /* Braswell, Cherry Trail */ + {} +}; + +MODULE_DEVICE_TABLE(x86cpu, intel_punit_cpu_ids); + +static int __init punit_atom_debug_init(void) +{ + const struct x86_cpu_id *id; + int ret; + + id = x86_match_cpu(intel_punit_cpu_ids); + if (!id) + return -ENODEV; + + ret = punit_dbgfs_register((struct punit_device *)id->driver_data); + if (ret < 0) + return ret; + + return 0; +} + +static void __exit punit_atom_debug_exit(void) +{ + punit_dbgfs_unregister(); +} + +module_init(punit_atom_debug_init); +module_exit(punit_atom_debug_exit); + +MODULE_AUTHOR("Kumar P, Mahesh <mahesh.kumar.p@intel.com>"); +MODULE_AUTHOR("Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>"); +MODULE_DESCRIPTION("Driver for Punit devices states debugging"); +MODULE_LICENSE("GPL v2"); diff --git a/arch/x86/platform/intel-mid/device_libs/platform_wdt.c b/arch/x86/platform/intel-mid/device_libs/platform_wdt.c index 0b283d4d0ad7..de734134bc8d 100644 --- a/arch/x86/platform/intel-mid/device_libs/platform_wdt.c +++ b/arch/x86/platform/intel-mid/device_libs/platform_wdt.c @@ -27,6 +27,7 @@ static struct platform_device wdt_dev = { static int tangier_probe(struct platform_device *pdev) { int gsi; + struct irq_alloc_info info; struct intel_mid_wdt_pdata *pdata = pdev->dev.platform_data; if (!pdata) @@ -34,8 +35,8 @@ static int tangier_probe(struct platform_device *pdev) /* IOAPIC builds identity mapping between GSI and IRQ on MID */ gsi = pdata->irq; - if (mp_set_gsi_attr(gsi, 1, 0, cpu_to_node(0)) || - mp_map_gsi_to_irq(gsi, IOAPIC_MAP_ALLOC) <= 0) { + ioapic_set_alloc_attr(&info, cpu_to_node(0), 1, 0); + if (mp_map_gsi_to_irq(gsi, IOAPIC_MAP_ALLOC, &info) <= 0) { dev_warn(&pdev->dev, "cannot find interrupt %d in ioapic\n", gsi); return -EINVAL; diff --git a/arch/x86/platform/intel-mid/intel-mid.c b/arch/x86/platform/intel-mid/intel-mid.c index 3005f0c89f2e..01d54ea766c1 100644 --- a/arch/x86/platform/intel-mid/intel-mid.c +++ b/arch/x86/platform/intel-mid/intel-mid.c @@ -81,26 +81,34 @@ static unsigned long __init intel_mid_calibrate_tsc(void) return 0; } +static void __init intel_mid_setup_bp_timer(void) +{ + apbt_time_init(); + setup_boot_APIC_clock(); +} + static void __init intel_mid_time_init(void) { sfi_table_parse(SFI_SIG_MTMR, NULL, NULL, sfi_parse_mtmr); + switch (intel_mid_timer_options) { case INTEL_MID_TIMER_APBT_ONLY: break; case INTEL_MID_TIMER_LAPIC_APBT: - x86_init.timers.setup_percpu_clockev = setup_boot_APIC_clock; + /* Use apbt and local apic */ + x86_init.timers.setup_percpu_clockev = intel_mid_setup_bp_timer; x86_cpuinit.setup_percpu_clockev = setup_secondary_APIC_clock; - break; + return; default: if (!boot_cpu_has(X86_FEATURE_ARAT)) break; + /* Lapic only, no apbt */ x86_init.timers.setup_percpu_clockev = setup_boot_APIC_clock; x86_cpuinit.setup_percpu_clockev = setup_secondary_APIC_clock; return; } - /* we need at least one APB timer */ - pre_init_apic_IRQ0(); - apbt_time_init(); + + x86_init.timers.setup_percpu_clockev = apbt_time_init; } static void intel_mid_arch_setup(void) diff --git a/arch/x86/platform/intel-mid/sfi.c b/arch/x86/platform/intel-mid/sfi.c index c14ad34776c4..ce992e8cc065 100644 --- a/arch/x86/platform/intel-mid/sfi.c +++ b/arch/x86/platform/intel-mid/sfi.c @@ -95,18 +95,16 @@ int __init sfi_parse_mtmr(struct sfi_table_header *table) pr_debug("timer[%d]: paddr = 0x%08x, freq = %dHz, irq = %d\n", totallen, (u32)pentry->phys_addr, pentry->freq_hz, pentry->irq); - if (!pentry->irq) - continue; - mp_irq.type = MP_INTSRC; - mp_irq.irqtype = mp_INT; -/* triggering mode edge bit 2-3, active high polarity bit 0-1 */ - mp_irq.irqflag = 5; - mp_irq.srcbus = MP_BUS_ISA; - mp_irq.srcbusirq = pentry->irq; /* IRQ */ - mp_irq.dstapic = MP_APIC_ALL; - mp_irq.dstirq = pentry->irq; - mp_save_irq(&mp_irq); - mp_map_gsi_to_irq(pentry->irq, IOAPIC_MAP_ALLOC); + mp_irq.type = MP_INTSRC; + mp_irq.irqtype = mp_INT; + /* triggering mode edge bit 2-3, active high polarity bit 0-1 */ + mp_irq.irqflag = 5; + mp_irq.srcbus = MP_BUS_ISA; + mp_irq.srcbusirq = pentry->irq; /* IRQ */ + mp_irq.dstapic = MP_APIC_ALL; + mp_irq.dstirq = pentry->irq; + mp_save_irq(&mp_irq); + mp_map_gsi_to_irq(pentry->irq, IOAPIC_MAP_ALLOC, NULL); } return 0; @@ -177,7 +175,7 @@ int __init sfi_parse_mrtc(struct sfi_table_header *table) mp_irq.dstapic = MP_APIC_ALL; mp_irq.dstirq = pentry->irq; mp_save_irq(&mp_irq); - mp_map_gsi_to_irq(pentry->irq, IOAPIC_MAP_ALLOC); + mp_map_gsi_to_irq(pentry->irq, IOAPIC_MAP_ALLOC, NULL); } return 0; } @@ -436,6 +434,7 @@ static int __init sfi_parse_devs(struct sfi_table_header *table) struct devs_id *dev = NULL; int num, i, ret; int polarity; + struct irq_alloc_info info; sb = (struct sfi_table_simple *)table; num = SFI_GET_NUM_ENTRIES(sb, struct sfi_device_table_entry); @@ -469,9 +468,8 @@ static int __init sfi_parse_devs(struct sfi_table_header *table) polarity = 1; } - ret = mp_set_gsi_attr(irq, 1, polarity, NUMA_NO_NODE); - if (ret == 0) - ret = mp_map_gsi_to_irq(irq, IOAPIC_MAP_ALLOC); + ioapic_set_alloc_attr(&info, NUMA_NO_NODE, 1, polarity); + ret = mp_map_gsi_to_irq(irq, IOAPIC_MAP_ALLOC, &info); WARN_ON(ret < 0); } diff --git a/arch/x86/platform/sfi/sfi.c b/arch/x86/platform/sfi/sfi.c index 2a8a74f3bd76..6c7111bbd1e9 100644 --- a/arch/x86/platform/sfi/sfi.c +++ b/arch/x86/platform/sfi/sfi.c @@ -25,8 +25,8 @@ #include <linux/init.h> #include <linux/sfi.h> #include <linux/io.h> -#include <linux/irqdomain.h> +#include <asm/irqdomain.h> #include <asm/io_apic.h> #include <asm/mpspec.h> #include <asm/setup.h> @@ -71,9 +71,6 @@ static int __init sfi_parse_cpus(struct sfi_table_header *table) #endif /* CONFIG_X86_LOCAL_APIC */ #ifdef CONFIG_X86_IO_APIC -static struct irq_domain_ops sfi_ioapic_irqdomain_ops = { - .map = mp_irqdomain_map, -}; static int __init sfi_parse_ioapic(struct sfi_table_header *table) { @@ -82,7 +79,7 @@ static int __init sfi_parse_ioapic(struct sfi_table_header *table) int i, num; struct ioapic_domain_cfg cfg = { .type = IOAPIC_DOMAIN_STRICT, - .ops = &sfi_ioapic_irqdomain_ops, + .ops = &mp_ioapic_irqdomain_ops, }; sb = (struct sfi_table_simple *)table; diff --git a/arch/x86/platform/uv/uv_irq.c b/arch/x86/platform/uv/uv_irq.c index 0ce673645432..8570abe68be1 100644 --- a/arch/x86/platform/uv/uv_irq.c +++ b/arch/x86/platform/uv/uv_irq.c @@ -13,22 +13,37 @@ #include <linux/slab.h> #include <linux/irq.h> +#include <asm/irqdomain.h> #include <asm/apic.h> #include <asm/uv/uv_irq.h> #include <asm/uv/uv_hub.h> /* MMR offset and pnode of hub sourcing interrupts for a given irq */ -struct uv_irq_2_mmr_pnode{ - struct rb_node list; +struct uv_irq_2_mmr_pnode { unsigned long offset; int pnode; - int irq; }; -static DEFINE_SPINLOCK(uv_irq_lock); -static struct rb_root uv_irq_root; +static void uv_program_mmr(struct irq_cfg *cfg, struct uv_irq_2_mmr_pnode *info) +{ + unsigned long mmr_value; + struct uv_IO_APIC_route_entry *entry; + + BUILD_BUG_ON(sizeof(struct uv_IO_APIC_route_entry) != + sizeof(unsigned long)); + + mmr_value = 0; + entry = (struct uv_IO_APIC_route_entry *)&mmr_value; + entry->vector = cfg->vector; + entry->delivery_mode = apic->irq_delivery_mode; + entry->dest_mode = apic->irq_dest_mode; + entry->polarity = 0; + entry->trigger = 0; + entry->mask = 0; + entry->dest = cfg->dest_apicid; -static int uv_set_irq_affinity(struct irq_data *, const struct cpumask *, bool); + uv_write_global_mmr64(info->pnode, info->offset, mmr_value); +} static void uv_noop(struct irq_data *data) { } @@ -37,6 +52,23 @@ static void uv_ack_apic(struct irq_data *data) ack_APIC_irq(); } +static int +uv_set_irq_affinity(struct irq_data *data, const struct cpumask *mask, + bool force) +{ + struct irq_data *parent = data->parent_data; + struct irq_cfg *cfg = irqd_cfg(data); + int ret; + + ret = parent->chip->irq_set_affinity(parent, mask, force); + if (ret >= 0) { + uv_program_mmr(cfg, data->chip_data); + send_cleanup_vector(cfg); + } + + return ret; +} + static struct irq_chip uv_irq_chip = { .name = "UV-CORE", .irq_mask = uv_noop, @@ -45,189 +77,99 @@ static struct irq_chip uv_irq_chip = { .irq_set_affinity = uv_set_irq_affinity, }; -/* - * Add offset and pnode information of the hub sourcing interrupts to the - * rb tree for a specific irq. - */ -static int uv_set_irq_2_mmr_info(int irq, unsigned long offset, unsigned blade) +static int uv_domain_alloc(struct irq_domain *domain, unsigned int virq, + unsigned int nr_irqs, void *arg) { - struct rb_node **link = &uv_irq_root.rb_node; - struct rb_node *parent = NULL; - struct uv_irq_2_mmr_pnode *n; - struct uv_irq_2_mmr_pnode *e; - unsigned long irqflags; - - n = kmalloc_node(sizeof(struct uv_irq_2_mmr_pnode), GFP_KERNEL, - uv_blade_to_memory_nid(blade)); - if (!n) + struct uv_irq_2_mmr_pnode *chip_data; + struct irq_alloc_info *info = arg; + struct irq_data *irq_data = irq_domain_get_irq_data(domain, virq); + int ret; + + if (nr_irqs > 1 || !info || info->type != X86_IRQ_ALLOC_TYPE_UV) + return -EINVAL; + + chip_data = kmalloc_node(sizeof(*chip_data), GFP_KERNEL, + irq_data->node); + if (!chip_data) return -ENOMEM; - n->irq = irq; - n->offset = offset; - n->pnode = uv_blade_to_pnode(blade); - spin_lock_irqsave(&uv_irq_lock, irqflags); - /* Find the right place in the rbtree: */ - while (*link) { - parent = *link; - e = rb_entry(parent, struct uv_irq_2_mmr_pnode, list); - - if (unlikely(irq == e->irq)) { - /* irq entry exists */ - e->pnode = uv_blade_to_pnode(blade); - e->offset = offset; - spin_unlock_irqrestore(&uv_irq_lock, irqflags); - kfree(n); - return 0; - } - - if (irq < e->irq) - link = &(*link)->rb_left; + ret = irq_domain_alloc_irqs_parent(domain, virq, nr_irqs, arg); + if (ret >= 0) { + if (info->uv_limit == UV_AFFINITY_CPU) + irq_set_status_flags(virq, IRQ_NO_BALANCING); else - link = &(*link)->rb_right; + irq_set_status_flags(virq, IRQ_MOVE_PCNTXT); + + chip_data->pnode = uv_blade_to_pnode(info->uv_blade); + chip_data->offset = info->uv_offset; + irq_domain_set_info(domain, virq, virq, &uv_irq_chip, chip_data, + handle_percpu_irq, NULL, info->uv_name); + } else { + kfree(chip_data); } - /* Insert the node into the rbtree. */ - rb_link_node(&n->list, parent, link); - rb_insert_color(&n->list, &uv_irq_root); - - spin_unlock_irqrestore(&uv_irq_lock, irqflags); - return 0; + return ret; } -/* Retrieve offset and pnode information from the rb tree for a specific irq */ -int uv_irq_2_mmr_info(int irq, unsigned long *offset, int *pnode) +static void uv_domain_free(struct irq_domain *domain, unsigned int virq, + unsigned int nr_irqs) { - struct uv_irq_2_mmr_pnode *e; - struct rb_node *n; - unsigned long irqflags; - - spin_lock_irqsave(&uv_irq_lock, irqflags); - n = uv_irq_root.rb_node; - while (n) { - e = rb_entry(n, struct uv_irq_2_mmr_pnode, list); - - if (e->irq == irq) { - *offset = e->offset; - *pnode = e->pnode; - spin_unlock_irqrestore(&uv_irq_lock, irqflags); - return 0; - } - - if (irq < e->irq) - n = n->rb_left; - else - n = n->rb_right; - } - spin_unlock_irqrestore(&uv_irq_lock, irqflags); - return -1; + struct irq_data *irq_data = irq_domain_get_irq_data(domain, virq); + + BUG_ON(nr_irqs != 1); + kfree(irq_data->chip_data); + irq_clear_status_flags(virq, IRQ_MOVE_PCNTXT); + irq_clear_status_flags(virq, IRQ_NO_BALANCING); + irq_domain_free_irqs_top(domain, virq, nr_irqs); } /* * Re-target the irq to the specified CPU and enable the specified MMR located * on the specified blade to allow the sending of MSIs to the specified CPU. */ -static int -arch_enable_uv_irq(char *irq_name, unsigned int irq, int cpu, int mmr_blade, - unsigned long mmr_offset, int limit) +static void uv_domain_activate(struct irq_domain *domain, + struct irq_data *irq_data) { - const struct cpumask *eligible_cpu = cpumask_of(cpu); - struct irq_cfg *cfg = irq_cfg(irq); - unsigned long mmr_value; - struct uv_IO_APIC_route_entry *entry; - int mmr_pnode, err; - unsigned int dest; - - BUILD_BUG_ON(sizeof(struct uv_IO_APIC_route_entry) != - sizeof(unsigned long)); - - err = assign_irq_vector(irq, cfg, eligible_cpu); - if (err != 0) - return err; - - err = apic->cpu_mask_to_apicid_and(eligible_cpu, eligible_cpu, &dest); - if (err != 0) - return err; - - if (limit == UV_AFFINITY_CPU) - irq_set_status_flags(irq, IRQ_NO_BALANCING); - else - irq_set_status_flags(irq, IRQ_MOVE_PCNTXT); - - irq_set_chip_and_handler_name(irq, &uv_irq_chip, handle_percpu_irq, - irq_name); - - mmr_value = 0; - entry = (struct uv_IO_APIC_route_entry *)&mmr_value; - entry->vector = cfg->vector; - entry->delivery_mode = apic->irq_delivery_mode; - entry->dest_mode = apic->irq_dest_mode; - entry->polarity = 0; - entry->trigger = 0; - entry->mask = 0; - entry->dest = dest; - - mmr_pnode = uv_blade_to_pnode(mmr_blade); - uv_write_global_mmr64(mmr_pnode, mmr_offset, mmr_value); - - if (cfg->move_in_progress) - send_cleanup_vector(cfg); - - return irq; + uv_program_mmr(irqd_cfg(irq_data), irq_data->chip_data); } /* * Disable the specified MMR located on the specified blade so that MSIs are * longer allowed to be sent. */ -static void arch_disable_uv_irq(int mmr_pnode, unsigned long mmr_offset) +static void uv_domain_deactivate(struct irq_domain *domain, + struct irq_data *irq_data) { unsigned long mmr_value; struct uv_IO_APIC_route_entry *entry; - BUILD_BUG_ON(sizeof(struct uv_IO_APIC_route_entry) != - sizeof(unsigned long)); - mmr_value = 0; entry = (struct uv_IO_APIC_route_entry *)&mmr_value; entry->mask = 1; - - uv_write_global_mmr64(mmr_pnode, mmr_offset, mmr_value); + uv_program_mmr(irqd_cfg(irq_data), irq_data->chip_data); } -static int -uv_set_irq_affinity(struct irq_data *data, const struct cpumask *mask, - bool force) -{ - struct irq_cfg *cfg = irqd_cfg(data); - unsigned int dest; - unsigned long mmr_value, mmr_offset; - struct uv_IO_APIC_route_entry *entry; - int mmr_pnode; - - if (apic_set_affinity(data, mask, &dest)) - return -1; - - mmr_value = 0; - entry = (struct uv_IO_APIC_route_entry *)&mmr_value; - - entry->vector = cfg->vector; - entry->delivery_mode = apic->irq_delivery_mode; - entry->dest_mode = apic->irq_dest_mode; - entry->polarity = 0; - entry->trigger = 0; - entry->mask = 0; - entry->dest = dest; - - /* Get previously stored MMR and pnode of hub sourcing interrupts */ - if (uv_irq_2_mmr_info(data->irq, &mmr_offset, &mmr_pnode)) - return -1; - - uv_write_global_mmr64(mmr_pnode, mmr_offset, mmr_value); +static const struct irq_domain_ops uv_domain_ops = { + .alloc = uv_domain_alloc, + .free = uv_domain_free, + .activate = uv_domain_activate, + .deactivate = uv_domain_deactivate, +}; - if (cfg->move_in_progress) - send_cleanup_vector(cfg); +static struct irq_domain *uv_get_irq_domain(void) +{ + static struct irq_domain *uv_domain; + static DEFINE_MUTEX(uv_lock); + + mutex_lock(&uv_lock); + if (uv_domain == NULL) { + uv_domain = irq_domain_add_tree(NULL, &uv_domain_ops, NULL); + if (uv_domain) + uv_domain->parent = x86_vector_domain; + } + mutex_unlock(&uv_lock); - return IRQ_SET_MASK_OK_NOCOPY; + return uv_domain; } /* @@ -238,19 +180,21 @@ uv_set_irq_affinity(struct irq_data *data, const struct cpumask *mask, int uv_setup_irq(char *irq_name, int cpu, int mmr_blade, unsigned long mmr_offset, int limit) { - int ret, irq = irq_alloc_hwirq(uv_blade_to_memory_nid(mmr_blade)); + struct irq_alloc_info info; + struct irq_domain *domain = uv_get_irq_domain(); - if (!irq) - return -EBUSY; + if (!domain) + return -ENOMEM; - ret = arch_enable_uv_irq(irq_name, irq, cpu, mmr_blade, mmr_offset, - limit); - if (ret == irq) - uv_set_irq_2_mmr_info(irq, mmr_offset, mmr_blade); - else - irq_free_hwirq(irq); + init_irq_alloc_info(&info, cpumask_of(cpu)); + info.type = X86_IRQ_ALLOC_TYPE_UV; + info.uv_limit = limit; + info.uv_blade = mmr_blade; + info.uv_offset = mmr_offset; + info.uv_name = irq_name; - return ret; + return irq_domain_alloc_irqs(domain, 1, + uv_blade_to_memory_nid(mmr_blade), &info); } EXPORT_SYMBOL_GPL(uv_setup_irq); @@ -263,26 +207,6 @@ EXPORT_SYMBOL_GPL(uv_setup_irq); */ void uv_teardown_irq(unsigned int irq) { - struct uv_irq_2_mmr_pnode *e; - struct rb_node *n; - unsigned long irqflags; - - spin_lock_irqsave(&uv_irq_lock, irqflags); - n = uv_irq_root.rb_node; - while (n) { - e = rb_entry(n, struct uv_irq_2_mmr_pnode, list); - if (e->irq == irq) { - arch_disable_uv_irq(e->pnode, e->offset); - rb_erase(n, &uv_irq_root); - kfree(e); - break; - } - if (irq < e->irq) - n = n->rb_left; - else - n = n->rb_right; - } - spin_unlock_irqrestore(&uv_irq_lock, irqflags); - irq_free_hwirq(irq); + irq_domain_free_irqs(irq, 1); } EXPORT_SYMBOL_GPL(uv_teardown_irq); diff --git a/arch/x86/power/hibernate_asm_64.S b/arch/x86/power/hibernate_asm_64.S index 3c4469a7a929..e2386cb4e0c3 100644 --- a/arch/x86/power/hibernate_asm_64.S +++ b/arch/x86/power/hibernate_asm_64.S @@ -78,9 +78,9 @@ ENTRY(restore_image) /* code below has been relocated to a safe page */ ENTRY(core_restore_code) -loop: +.Lloop: testq %rdx, %rdx - jz done + jz .Ldone /* get addresses from the pbe and copy the page */ movq pbe_address(%rdx), %rsi @@ -91,8 +91,8 @@ loop: /* progress to the next pbe */ movq pbe_next(%rdx), %rdx - jmp loop -done: + jmp .Lloop +.Ldone: /* jump to the restore_registers address from the image header */ jmpq *%rax /* diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c index 46957ead3060..fe969ac1c65e 100644 --- a/arch/x86/xen/enlighten.c +++ b/arch/x86/xen/enlighten.c @@ -1181,10 +1181,11 @@ static const struct pv_cpu_ops xen_cpu_ops __initconst = { .read_tscp = native_read_tscp, .iret = xen_iret, - .irq_enable_sysexit = xen_sysexit, #ifdef CONFIG_X86_64 .usergs_sysret32 = xen_sysret32, .usergs_sysret64 = xen_sysret64, +#else + .irq_enable_sysexit = xen_sysexit, #endif .load_tr_desc = paravirt_nop, diff --git a/arch/x86/xen/xen-asm_64.S b/arch/x86/xen/xen-asm_64.S index 985fc3ee0973..04529e620559 100644 --- a/arch/x86/xen/xen-asm_64.S +++ b/arch/x86/xen/xen-asm_64.S @@ -15,6 +15,8 @@ #include <asm/percpu.h> #include <asm/processor-flags.h> #include <asm/segment.h> +#include <asm/asm-offsets.h> +#include <asm/thread_info.h> #include <xen/interface/xen.h> @@ -47,29 +49,13 @@ ENTRY(xen_iret) ENDPATCH(xen_iret) RELOC(xen_iret, 1b+1) -/* - * sysexit is not used for 64-bit processes, so it's only ever used to - * return to 32-bit compat userspace. - */ -ENTRY(xen_sysexit) - pushq $__USER32_DS - pushq %rcx - pushq $X86_EFLAGS_IF - pushq $__USER32_CS - pushq %rdx - - pushq $0 -1: jmp hypercall_iret -ENDPATCH(xen_sysexit) -RELOC(xen_sysexit, 1b+1) - ENTRY(xen_sysret64) /* * We're already on the usermode stack at this point, but * still with the kernel gs, so we can easily switch back */ movq %rsp, PER_CPU_VAR(rsp_scratch) - movq PER_CPU_VAR(kernel_stack), %rsp + movq PER_CPU_VAR(cpu_current_top_of_stack), %rsp pushq $__USER_DS pushq PER_CPU_VAR(rsp_scratch) @@ -88,7 +74,7 @@ ENTRY(xen_sysret32) * still with the kernel gs, so we can easily switch back */ movq %rsp, PER_CPU_VAR(rsp_scratch) - movq PER_CPU_VAR(kernel_stack), %rsp + movq PER_CPU_VAR(cpu_current_top_of_stack), %rsp pushq $__USER32_DS pushq PER_CPU_VAR(rsp_scratch) diff --git a/arch/x86/xen/xen-ops.h b/arch/x86/xen/xen-ops.h index 9e195c683549..c20fe29e65f4 100644 --- a/arch/x86/xen/xen-ops.h +++ b/arch/x86/xen/xen-ops.h @@ -134,7 +134,9 @@ DECL_ASM(void, xen_restore_fl_direct, unsigned long); /* These are not functions, and cannot be called normally */ __visible void xen_iret(void); +#ifdef CONFIG_X86_32 __visible void xen_sysexit(void); +#endif __visible void xen_sysret32(void); __visible void xen_sysret64(void); __visible void xen_adjust_exception_frame(void); diff --git a/arch/xtensa/include/asm/dma-mapping.h b/arch/xtensa/include/asm/dma-mapping.h index 172a02a6ad14..ba78ccf651e7 100644 --- a/arch/xtensa/include/asm/dma-mapping.h +++ b/arch/xtensa/include/asm/dma-mapping.h @@ -185,4 +185,17 @@ static inline int dma_get_sgtable(struct device *dev, struct sg_table *sgt, return -EINVAL; } +static inline void *dma_alloc_attrs(struct device *dev, size_t size, + dma_addr_t *dma_handle, gfp_t flag, + struct dma_attrs *attrs) +{ + return NULL; +} + +static inline void dma_free_attrs(struct device *dev, size_t size, + void *vaddr, dma_addr_t dma_handle, + struct dma_attrs *attrs) +{ +} + #endif /* _XTENSA_DMA_MAPPING_H */ diff --git a/crypto/Kconfig b/crypto/Kconfig index 8aaf298a80e1..362905e7c841 100644 --- a/crypto/Kconfig +++ b/crypto/Kconfig @@ -1512,15 +1512,6 @@ config CRYPTO_USER_API_RNG This option enables the user-spaces interface for random number generator algorithms. -config CRYPTO_USER_API_AEAD - tristate "User-space interface for AEAD cipher algorithms" - depends on NET - select CRYPTO_AEAD - select CRYPTO_USER_API - help - This option enables the user-spaces interface for AEAD - cipher algorithms. - config CRYPTO_HASH_INFO bool diff --git a/drivers/bus/mips_cdmm.c b/drivers/bus/mips_cdmm.c index 5bd792c68f9b..ab3bde16ecb4 100644 --- a/drivers/bus/mips_cdmm.c +++ b/drivers/bus/mips_cdmm.c @@ -453,7 +453,7 @@ void __iomem *mips_cdmm_early_probe(unsigned int dev_type) /* Look for a specific device type */ for (; drb < bus->drbs; drb += size + 1) { - acsr = readl(cdmm + drb * CDMM_DRB_SIZE); + acsr = __raw_readl(cdmm + drb * CDMM_DRB_SIZE); type = (acsr & CDMM_ACSR_DEVTYPE) >> CDMM_ACSR_DEVTYPE_SHIFT; if (type == dev_type) return cdmm + drb * CDMM_DRB_SIZE; @@ -500,7 +500,7 @@ static void mips_cdmm_bus_discover(struct mips_cdmm_bus *bus) bus->discovered = true; pr_info("cdmm%u discovery (%u blocks)\n", cpu, bus->drbs); for (; drb < bus->drbs; drb += size + 1) { - acsr = readl(cdmm + drb * CDMM_DRB_SIZE); + acsr = __raw_readl(cdmm + drb * CDMM_DRB_SIZE); type = (acsr & CDMM_ACSR_DEVTYPE) >> CDMM_ACSR_DEVTYPE_SHIFT; size = (acsr & CDMM_ACSR_DEVSIZE) >> CDMM_ACSR_DEVSIZE_SHIFT; rev = (acsr & CDMM_ACSR_DEVREV) >> CDMM_ACSR_DEVREV_SHIFT; diff --git a/drivers/gpio/gpio-kempld.c b/drivers/gpio/gpio-kempld.c index 6b8115f34208..83f281dda1e0 100644 --- a/drivers/gpio/gpio-kempld.c +++ b/drivers/gpio/gpio-kempld.c @@ -117,7 +117,7 @@ static int kempld_gpio_get_direction(struct gpio_chip *chip, unsigned offset) = container_of(chip, struct kempld_gpio_data, chip); struct kempld_device_data *pld = gpio->pld; - return kempld_gpio_get_bit(pld, KEMPLD_GPIO_DIR_NUM(offset), offset); + return !kempld_gpio_get_bit(pld, KEMPLD_GPIO_DIR_NUM(offset), offset); } static int kempld_gpio_pincount(struct kempld_device_data *pld) diff --git a/drivers/gpio/gpiolib.c b/drivers/gpio/gpiolib.c index 59eaa23767d8..6bc612b8a49f 100644 --- a/drivers/gpio/gpiolib.c +++ b/drivers/gpio/gpiolib.c @@ -53,6 +53,11 @@ static DEFINE_MUTEX(gpio_lookup_lock); static LIST_HEAD(gpio_lookup_list); LIST_HEAD(gpio_chips); + +static void gpiochip_free_hogs(struct gpio_chip *chip); +static void gpiochip_irqchip_remove(struct gpio_chip *gpiochip); + + static inline void desc_set_label(struct gpio_desc *d, const char *label) { d->label = label; @@ -297,6 +302,7 @@ int gpiochip_add(struct gpio_chip *chip) err_remove_chip: acpi_gpiochip_remove(chip); + gpiochip_free_hogs(chip); of_gpiochip_remove(chip); spin_lock_irqsave(&gpio_lock, flags); list_del(&chip->list); @@ -313,10 +319,6 @@ err_free_descs: } EXPORT_SYMBOL_GPL(gpiochip_add); -/* Forward-declaration */ -static void gpiochip_irqchip_remove(struct gpio_chip *gpiochip); -static void gpiochip_free_hogs(struct gpio_chip *chip); - /** * gpiochip_remove() - unregister a gpio_chip * @chip: the chip to unregister diff --git a/drivers/gpu/drm/drm_plane_helper.c b/drivers/gpu/drm/drm_plane_helper.c index 40c1db9ad7c3..2f0ed11024eb 100644 --- a/drivers/gpu/drm/drm_plane_helper.c +++ b/drivers/gpu/drm/drm_plane_helper.c @@ -465,6 +465,9 @@ int drm_plane_helper_commit(struct drm_plane *plane, if (!crtc[i]) continue; + if (crtc[i]->cursor == plane) + continue; + /* There's no other way to figure out whether the crtc is running. */ ret = drm_crtc_vblank_get(crtc[i]); if (ret == 0) { diff --git a/drivers/gpu/drm/nouveau/include/nvif/class.h b/drivers/gpu/drm/nouveau/include/nvif/class.h index 0b5af0fe8659..64f8b2f687d2 100644 --- a/drivers/gpu/drm/nouveau/include/nvif/class.h +++ b/drivers/gpu/drm/nouveau/include/nvif/class.h @@ -14,7 +14,7 @@ #define FERMI_TWOD_A 0x0000902d -#define FERMI_MEMORY_TO_MEMORY_FORMAT_A 0x0000903d +#define FERMI_MEMORY_TO_MEMORY_FORMAT_A 0x00009039 #define KEPLER_INLINE_TO_MEMORY_A 0x0000a040 #define KEPLER_INLINE_TO_MEMORY_B 0x0000a140 diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gm204.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gm204.c index 2f5eadd12a9b..fdb1dcf16a59 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gm204.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gm204.c @@ -329,7 +329,6 @@ gm204_gr_init(struct nvkm_object *object) nv_mask(priv, 0x419cc0, 0x00000008, 0x00000008); for (gpc = 0; gpc < priv->gpc_nr; gpc++) { - printk(KERN_ERR "ppc %d %d\n", gpc, priv->ppc_nr[gpc]); for (ppc = 0; ppc < priv->ppc_nr[gpc]; ppc++) nv_wr32(priv, PPC_UNIT(gpc, ppc, 0x038), 0xc0000000); nv_wr32(priv, GPC_UNIT(gpc, 0x0420), 0xc0000000); diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/devinit/gf100.c b/drivers/gpu/drm/nouveau/nvkm/subdev/devinit/gf100.c index e8778c67578e..c61102f70805 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/devinit/gf100.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/devinit/gf100.c @@ -90,12 +90,14 @@ gf100_devinit_disable(struct nvkm_devinit *devinit) return disable; } -static int +int gf100_devinit_ctor(struct nvkm_object *parent, struct nvkm_object *engine, struct nvkm_oclass *oclass, void *data, u32 size, struct nvkm_object **pobject) { + struct nvkm_devinit_impl *impl = (void *)oclass; struct nv50_devinit_priv *priv; + u64 disable; int ret; ret = nvkm_devinit_create(parent, engine, oclass, &priv); @@ -103,7 +105,8 @@ gf100_devinit_ctor(struct nvkm_object *parent, struct nvkm_object *engine, if (ret) return ret; - if (nv_rd32(priv, 0x022500) & 0x00000001) + disable = impl->disable(&priv->base); + if (disable & (1ULL << NVDEV_ENGINE_DISP)) priv->base.post = true; return 0; diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/devinit/gm107.c b/drivers/gpu/drm/nouveau/nvkm/subdev/devinit/gm107.c index b345a53e881d..87ca0ece37b4 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/devinit/gm107.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/devinit/gm107.c @@ -48,7 +48,7 @@ struct nvkm_oclass * gm107_devinit_oclass = &(struct nvkm_devinit_impl) { .base.handle = NV_SUBDEV(DEVINIT, 0x07), .base.ofuncs = &(struct nvkm_ofuncs) { - .ctor = nv50_devinit_ctor, + .ctor = gf100_devinit_ctor, .dtor = _nvkm_devinit_dtor, .init = nv50_devinit_init, .fini = _nvkm_devinit_fini, diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/devinit/gm204.c b/drivers/gpu/drm/nouveau/nvkm/subdev/devinit/gm204.c index 535172c5f1ad..1076fcf0d716 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/devinit/gm204.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/devinit/gm204.c @@ -161,7 +161,7 @@ struct nvkm_oclass * gm204_devinit_oclass = &(struct nvkm_devinit_impl) { .base.handle = NV_SUBDEV(DEVINIT, 0x07), .base.ofuncs = &(struct nvkm_ofuncs) { - .ctor = nv50_devinit_ctor, + .ctor = gf100_devinit_ctor, .dtor = _nvkm_devinit_dtor, .init = nv50_devinit_init, .fini = _nvkm_devinit_fini, diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/devinit/nv50.h b/drivers/gpu/drm/nouveau/nvkm/subdev/devinit/nv50.h index b882b65ff3cd..9243521c80ac 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/devinit/nv50.h +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/devinit/nv50.h @@ -15,6 +15,9 @@ int nv50_devinit_pll_set(struct nvkm_devinit *, u32, u32); int gt215_devinit_pll_set(struct nvkm_devinit *, u32, u32); +int gf100_devinit_ctor(struct nvkm_object *, struct nvkm_object *, + struct nvkm_oclass *, void *, u32, + struct nvkm_object **); int gf100_devinit_pll_set(struct nvkm_devinit *, u32, u32); u64 gm107_devinit_disable(struct nvkm_devinit *); diff --git a/drivers/gpu/drm/radeon/atombios_crtc.c b/drivers/gpu/drm/radeon/atombios_crtc.c index 42b2ea3fdcf3..e597ffc26563 100644 --- a/drivers/gpu/drm/radeon/atombios_crtc.c +++ b/drivers/gpu/drm/radeon/atombios_crtc.c @@ -1798,7 +1798,9 @@ static int radeon_get_shared_nondp_ppll(struct drm_crtc *crtc) if ((crtc->mode.clock == test_crtc->mode.clock) && (adjusted_clock == test_adjusted_clock) && (radeon_crtc->ss_enabled == test_radeon_crtc->ss_enabled) && - (test_radeon_crtc->pll_id != ATOM_PPLL_INVALID)) + (test_radeon_crtc->pll_id != ATOM_PPLL_INVALID) && + (drm_detect_monitor_audio(radeon_connector_edid(test_radeon_crtc->connector)) == + drm_detect_monitor_audio(radeon_connector_edid(radeon_crtc->connector)))) return test_radeon_crtc->pll_id; } } diff --git a/drivers/gpu/drm/radeon/cik.c b/drivers/gpu/drm/radeon/cik.c index a0c35bbc8546..ba50f3c1c2e0 100644 --- a/drivers/gpu/drm/radeon/cik.c +++ b/drivers/gpu/drm/radeon/cik.c @@ -5822,7 +5822,7 @@ static int cik_pcie_gart_enable(struct radeon_device *rdev) L2_CACHE_BIGK_FRAGMENT_SIZE(4)); /* setup context0 */ WREG32(VM_CONTEXT0_PAGE_TABLE_START_ADDR, rdev->mc.gtt_start >> 12); - WREG32(VM_CONTEXT0_PAGE_TABLE_END_ADDR, (rdev->mc.gtt_end >> 12) - 1); + WREG32(VM_CONTEXT0_PAGE_TABLE_END_ADDR, rdev->mc.gtt_end >> 12); WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR, rdev->gart.table_addr >> 12); WREG32(VM_CONTEXT0_PROTECTION_FAULT_DEFAULT_ADDR, (u32)(rdev->dummy_page.addr >> 12)); diff --git a/drivers/gpu/drm/radeon/evergreen.c b/drivers/gpu/drm/radeon/evergreen.c index 05e6d6ef5963..f848acfd3fc8 100644 --- a/drivers/gpu/drm/radeon/evergreen.c +++ b/drivers/gpu/drm/radeon/evergreen.c @@ -2485,7 +2485,7 @@ static int evergreen_pcie_gart_enable(struct radeon_device *rdev) WREG32(MC_VM_MB_L1_TLB2_CNTL, tmp); WREG32(MC_VM_MB_L1_TLB3_CNTL, tmp); WREG32(VM_CONTEXT0_PAGE_TABLE_START_ADDR, rdev->mc.gtt_start >> 12); - WREG32(VM_CONTEXT0_PAGE_TABLE_END_ADDR, (rdev->mc.gtt_end >> 12) - 1); + WREG32(VM_CONTEXT0_PAGE_TABLE_END_ADDR, rdev->mc.gtt_end >> 12); WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR, rdev->gart.table_addr >> 12); WREG32(VM_CONTEXT0_CNTL, ENABLE_CONTEXT | PAGE_TABLE_DEPTH(0) | RANGE_PROTECTION_FAULT_ENABLE_DEFAULT); diff --git a/drivers/gpu/drm/radeon/evergreen_hdmi.c b/drivers/gpu/drm/radeon/evergreen_hdmi.c index 0926739c9fa7..9953356fe263 100644 --- a/drivers/gpu/drm/radeon/evergreen_hdmi.c +++ b/drivers/gpu/drm/radeon/evergreen_hdmi.c @@ -400,7 +400,7 @@ void evergreen_hdmi_enable(struct drm_encoder *encoder, bool enable) if (enable) { struct drm_connector *connector = radeon_get_connector_for_encoder(encoder); - if (drm_detect_monitor_audio(radeon_connector_edid(connector))) { + if (connector && drm_detect_monitor_audio(radeon_connector_edid(connector))) { WREG32(HDMI_INFOFRAME_CONTROL0 + dig->afmt->offset, HDMI_AVI_INFO_SEND | /* enable AVI info frames */ HDMI_AVI_INFO_CONT | /* required for audio info values to be updated */ @@ -438,7 +438,8 @@ void evergreen_dp_enable(struct drm_encoder *encoder, bool enable) if (!dig || !dig->afmt) return; - if (enable && drm_detect_monitor_audio(radeon_connector_edid(connector))) { + if (enable && connector && + drm_detect_monitor_audio(radeon_connector_edid(connector))) { struct drm_connector *connector = radeon_get_connector_for_encoder(encoder); struct radeon_connector *radeon_connector = to_radeon_connector(connector); struct radeon_connector_atom_dig *dig_connector; diff --git a/drivers/gpu/drm/radeon/ni.c b/drivers/gpu/drm/radeon/ni.c index aba2f428c0a8..64d3a771920d 100644 --- a/drivers/gpu/drm/radeon/ni.c +++ b/drivers/gpu/drm/radeon/ni.c @@ -1282,7 +1282,7 @@ static int cayman_pcie_gart_enable(struct radeon_device *rdev) L2_CACHE_BIGK_FRAGMENT_SIZE(6)); /* setup context0 */ WREG32(VM_CONTEXT0_PAGE_TABLE_START_ADDR, rdev->mc.gtt_start >> 12); - WREG32(VM_CONTEXT0_PAGE_TABLE_END_ADDR, (rdev->mc.gtt_end >> 12) - 1); + WREG32(VM_CONTEXT0_PAGE_TABLE_END_ADDR, rdev->mc.gtt_end >> 12); WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR, rdev->gart.table_addr >> 12); WREG32(VM_CONTEXT0_PROTECTION_FAULT_DEFAULT_ADDR, (u32)(rdev->dummy_page.addr >> 12)); diff --git a/drivers/gpu/drm/radeon/r600.c b/drivers/gpu/drm/radeon/r600.c index 25b4ac967742..8f6d862a1882 100644 --- a/drivers/gpu/drm/radeon/r600.c +++ b/drivers/gpu/drm/radeon/r600.c @@ -1112,7 +1112,7 @@ static int r600_pcie_gart_enable(struct radeon_device *rdev) WREG32(MC_VM_L1_TLB_MCB_RD_SEM_CNTL, tmp | ENABLE_SEMAPHORE_MODE); WREG32(MC_VM_L1_TLB_MCB_WR_SEM_CNTL, tmp | ENABLE_SEMAPHORE_MODE); WREG32(VM_CONTEXT0_PAGE_TABLE_START_ADDR, rdev->mc.gtt_start >> 12); - WREG32(VM_CONTEXT0_PAGE_TABLE_END_ADDR, (rdev->mc.gtt_end >> 12) - 1); + WREG32(VM_CONTEXT0_PAGE_TABLE_END_ADDR, rdev->mc.gtt_end >> 12); WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR, rdev->gart.table_addr >> 12); WREG32(VM_CONTEXT0_CNTL, ENABLE_CONTEXT | PAGE_TABLE_DEPTH(0) | RANGE_PROTECTION_FAULT_ENABLE_DEFAULT); diff --git a/drivers/gpu/drm/radeon/radeon_audio.c b/drivers/gpu/drm/radeon/radeon_audio.c index dcb779647c57..25191f126f3b 100644 --- a/drivers/gpu/drm/radeon/radeon_audio.c +++ b/drivers/gpu/drm/radeon/radeon_audio.c @@ -460,9 +460,6 @@ void radeon_audio_detect(struct drm_connector *connector, if (!connector || !connector->encoder) return; - if (!radeon_encoder_is_digital(connector->encoder)) - return; - rdev = connector->encoder->dev->dev_private; if (!radeon_audio_chipset_supported(rdev)) @@ -471,26 +468,26 @@ void radeon_audio_detect(struct drm_connector *connector, radeon_encoder = to_radeon_encoder(connector->encoder); dig = radeon_encoder->enc_priv; - if (!dig->afmt) - return; - if (status == connector_status_connected) { - struct radeon_connector *radeon_connector = to_radeon_connector(connector); + struct radeon_connector *radeon_connector; + int sink_type; + + if (!drm_detect_monitor_audio(radeon_connector_edid(connector))) { + radeon_encoder->audio = NULL; + return; + } + + radeon_connector = to_radeon_connector(connector); + sink_type = radeon_dp_getsinktype(radeon_connector); if (connector->connector_type == DRM_MODE_CONNECTOR_DisplayPort && - radeon_dp_getsinktype(radeon_connector) == - CONNECTOR_OBJECT_ID_DISPLAYPORT) + sink_type == CONNECTOR_OBJECT_ID_DISPLAYPORT) radeon_encoder->audio = rdev->audio.dp_funcs; else radeon_encoder->audio = rdev->audio.hdmi_funcs; dig->afmt->pin = radeon_audio_get_pin(connector->encoder); - if (drm_detect_monitor_audio(radeon_connector_edid(connector))) { - radeon_audio_enable(rdev, dig->afmt->pin, 0xf); - } else { - radeon_audio_enable(rdev, dig->afmt->pin, 0); - dig->afmt->pin = NULL; - } + radeon_audio_enable(rdev, dig->afmt->pin, 0xf); } else { radeon_audio_enable(rdev, dig->afmt->pin, 0); dig->afmt->pin = NULL; diff --git a/drivers/gpu/drm/radeon/radeon_connectors.c b/drivers/gpu/drm/radeon/radeon_connectors.c index d17d251dbd4f..cebb65e07e1d 100644 --- a/drivers/gpu/drm/radeon/radeon_connectors.c +++ b/drivers/gpu/drm/radeon/radeon_connectors.c @@ -1379,10 +1379,8 @@ out: /* updated in get modes as well since we need to know if it's analog or digital */ radeon_connector_update_scratch_regs(connector, ret); - if (radeon_audio != 0) { - radeon_connector_get_edid(connector); + if (radeon_audio != 0) radeon_audio_detect(connector, ret); - } exit: pm_runtime_mark_last_busy(connector->dev->dev); @@ -1719,10 +1717,8 @@ radeon_dp_detect(struct drm_connector *connector, bool force) radeon_connector_update_scratch_regs(connector, ret); - if (radeon_audio != 0) { - radeon_connector_get_edid(connector); + if (radeon_audio != 0) radeon_audio_detect(connector, ret); - } out: pm_runtime_mark_last_busy(connector->dev->dev); diff --git a/drivers/gpu/drm/radeon/rv770.c b/drivers/gpu/drm/radeon/rv770.c index c54d6313a46d..01ee96acb398 100644 --- a/drivers/gpu/drm/radeon/rv770.c +++ b/drivers/gpu/drm/radeon/rv770.c @@ -921,7 +921,7 @@ static int rv770_pcie_gart_enable(struct radeon_device *rdev) WREG32(MC_VM_MB_L1_TLB2_CNTL, tmp); WREG32(MC_VM_MB_L1_TLB3_CNTL, tmp); WREG32(VM_CONTEXT0_PAGE_TABLE_START_ADDR, rdev->mc.gtt_start >> 12); - WREG32(VM_CONTEXT0_PAGE_TABLE_END_ADDR, (rdev->mc.gtt_end >> 12) - 1); + WREG32(VM_CONTEXT0_PAGE_TABLE_END_ADDR, rdev->mc.gtt_end >> 12); WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR, rdev->gart.table_addr >> 12); WREG32(VM_CONTEXT0_CNTL, ENABLE_CONTEXT | PAGE_TABLE_DEPTH(0) | RANGE_PROTECTION_FAULT_ENABLE_DEFAULT); diff --git a/drivers/gpu/drm/radeon/si.c b/drivers/gpu/drm/radeon/si.c index 5326f753e107..4c679b802bc8 100644 --- a/drivers/gpu/drm/radeon/si.c +++ b/drivers/gpu/drm/radeon/si.c @@ -4303,7 +4303,7 @@ static int si_pcie_gart_enable(struct radeon_device *rdev) L2_CACHE_BIGK_FRAGMENT_SIZE(4)); /* setup context0 */ WREG32(VM_CONTEXT0_PAGE_TABLE_START_ADDR, rdev->mc.gtt_start >> 12); - WREG32(VM_CONTEXT0_PAGE_TABLE_END_ADDR, (rdev->mc.gtt_end >> 12) - 1); + WREG32(VM_CONTEXT0_PAGE_TABLE_END_ADDR, rdev->mc.gtt_end >> 12); WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR, rdev->gart.table_addr >> 12); WREG32(VM_CONTEXT0_PROTECTION_FAULT_DEFAULT_ADDR, (u32)(rdev->dummy_page.addr >> 12)); diff --git a/drivers/gpu/drm/vgem/Makefile b/drivers/gpu/drm/vgem/Makefile index 1055cb79096c..3f4c7b842028 100644 --- a/drivers/gpu/drm/vgem/Makefile +++ b/drivers/gpu/drm/vgem/Makefile @@ -1,4 +1,4 @@ ccflags-y := -Iinclude/drm -vgem-y := vgem_drv.o vgem_dma_buf.o +vgem-y := vgem_drv.o obj-$(CONFIG_DRM_VGEM) += vgem.o diff --git a/drivers/gpu/drm/vgem/vgem_dma_buf.c b/drivers/gpu/drm/vgem/vgem_dma_buf.c deleted file mode 100644 index 0254438ad1a6..000000000000 --- a/drivers/gpu/drm/vgem/vgem_dma_buf.c +++ /dev/null @@ -1,94 +0,0 @@ -/* - * Copyright © 2012 Intel Corporation - * Copyright © 2014 The Chromium OS Authors - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. - * - * Authors: - * Ben Widawsky <ben@bwidawsk.net> - * - */ - -#include <linux/dma-buf.h> -#include "vgem_drv.h" - -struct sg_table *vgem_gem_prime_get_sg_table(struct drm_gem_object *gobj) -{ - struct drm_vgem_gem_object *obj = to_vgem_bo(gobj); - BUG_ON(obj->pages == NULL); - - return drm_prime_pages_to_sg(obj->pages, obj->base.size / PAGE_SIZE); -} - -int vgem_gem_prime_pin(struct drm_gem_object *gobj) -{ - struct drm_vgem_gem_object *obj = to_vgem_bo(gobj); - return vgem_gem_get_pages(obj); -} - -void vgem_gem_prime_unpin(struct drm_gem_object *gobj) -{ - struct drm_vgem_gem_object *obj = to_vgem_bo(gobj); - vgem_gem_put_pages(obj); -} - -void *vgem_gem_prime_vmap(struct drm_gem_object *gobj) -{ - struct drm_vgem_gem_object *obj = to_vgem_bo(gobj); - BUG_ON(obj->pages == NULL); - - return vmap(obj->pages, obj->base.size / PAGE_SIZE, 0, PAGE_KERNEL); -} - -void vgem_gem_prime_vunmap(struct drm_gem_object *obj, void *vaddr) -{ - vunmap(vaddr); -} - -struct drm_gem_object *vgem_gem_prime_import(struct drm_device *dev, - struct dma_buf *dma_buf) -{ - struct drm_vgem_gem_object *obj = NULL; - int ret; - - obj = kzalloc(sizeof(*obj), GFP_KERNEL); - if (obj == NULL) { - ret = -ENOMEM; - goto fail; - } - - ret = drm_gem_object_init(dev, &obj->base, dma_buf->size); - if (ret) { - ret = -ENOMEM; - goto fail_free; - } - - get_dma_buf(dma_buf); - - obj->base.dma_buf = dma_buf; - obj->use_dma_buf = true; - - return &obj->base; - -fail_free: - kfree(obj); -fail: - return ERR_PTR(ret); -} diff --git a/drivers/gpu/drm/vgem/vgem_drv.c b/drivers/gpu/drm/vgem/vgem_drv.c index cb3b43525b2d..7a207ca547be 100644 --- a/drivers/gpu/drm/vgem/vgem_drv.c +++ b/drivers/gpu/drm/vgem/vgem_drv.c @@ -302,22 +302,13 @@ static const struct file_operations vgem_driver_fops = { }; static struct drm_driver vgem_driver = { - .driver_features = DRIVER_GEM | DRIVER_PRIME, + .driver_features = DRIVER_GEM, .gem_free_object = vgem_gem_free_object, .gem_vm_ops = &vgem_gem_vm_ops, .ioctls = vgem_ioctls, .fops = &vgem_driver_fops, .dumb_create = vgem_gem_dumb_create, .dumb_map_offset = vgem_gem_dumb_map, - .prime_handle_to_fd = drm_gem_prime_handle_to_fd, - .prime_fd_to_handle = drm_gem_prime_fd_to_handle, - .gem_prime_export = drm_gem_prime_export, - .gem_prime_import = vgem_gem_prime_import, - .gem_prime_pin = vgem_gem_prime_pin, - .gem_prime_unpin = vgem_gem_prime_unpin, - .gem_prime_get_sg_table = vgem_gem_prime_get_sg_table, - .gem_prime_vmap = vgem_gem_prime_vmap, - .gem_prime_vunmap = vgem_gem_prime_vunmap, .name = DRIVER_NAME, .desc = DRIVER_DESC, .date = DRIVER_DATE, diff --git a/drivers/gpu/drm/vgem/vgem_drv.h b/drivers/gpu/drm/vgem/vgem_drv.h index 57ab4d8f41f9..e9f92f7ee275 100644 --- a/drivers/gpu/drm/vgem/vgem_drv.h +++ b/drivers/gpu/drm/vgem/vgem_drv.h @@ -43,15 +43,4 @@ struct drm_vgem_gem_object { extern void vgem_gem_put_pages(struct drm_vgem_gem_object *obj); extern int vgem_gem_get_pages(struct drm_vgem_gem_object *obj); -/* vgem_dma_buf.c */ -extern struct sg_table *vgem_gem_prime_get_sg_table( - struct drm_gem_object *gobj); -extern int vgem_gem_prime_pin(struct drm_gem_object *gobj); -extern void vgem_gem_prime_unpin(struct drm_gem_object *gobj); -extern void *vgem_gem_prime_vmap(struct drm_gem_object *gobj); -extern void vgem_gem_prime_vunmap(struct drm_gem_object *obj, void *vaddr); -extern struct drm_gem_object *vgem_gem_prime_import(struct drm_device *dev, - struct dma_buf *dma_buf); - - #endif diff --git a/drivers/hwmon/nct6683.c b/drivers/hwmon/nct6683.c index f3830db02d46..37f01702d081 100644 --- a/drivers/hwmon/nct6683.c +++ b/drivers/hwmon/nct6683.c @@ -439,6 +439,7 @@ nct6683_create_attr_group(struct device *dev, struct sensor_template_group *tg, (*t)->dev_attr.attr.name, tg->base + i); if ((*t)->s2) { a2 = &su->u.a2; + sysfs_attr_init(&a2->dev_attr.attr); a2->dev_attr.attr.name = su->name; a2->nr = (*t)->u.s.nr + i; a2->index = (*t)->u.s.index; @@ -449,6 +450,7 @@ nct6683_create_attr_group(struct device *dev, struct sensor_template_group *tg, *attrs = &a2->dev_attr.attr; } else { a = &su->u.a1; + sysfs_attr_init(&a->dev_attr.attr); a->dev_attr.attr.name = su->name; a->index = (*t)->u.index + i; a->dev_attr.attr.mode = diff --git a/drivers/hwmon/nct6775.c b/drivers/hwmon/nct6775.c index 4fcb48103299..bd1c99deac71 100644 --- a/drivers/hwmon/nct6775.c +++ b/drivers/hwmon/nct6775.c @@ -995,6 +995,7 @@ nct6775_create_attr_group(struct device *dev, struct sensor_template_group *tg, (*t)->dev_attr.attr.name, tg->base + i); if ((*t)->s2) { a2 = &su->u.a2; + sysfs_attr_init(&a2->dev_attr.attr); a2->dev_attr.attr.name = su->name; a2->nr = (*t)->u.s.nr + i; a2->index = (*t)->u.s.index; @@ -1005,6 +1006,7 @@ nct6775_create_attr_group(struct device *dev, struct sensor_template_group *tg, *attrs = &a2->dev_attr.attr; } else { a = &su->u.a1; + sysfs_attr_init(&a->dev_attr.attr); a->dev_attr.attr.name = su->name; a->index = (*t)->u.index + i; a->dev_attr.attr.mode = diff --git a/drivers/hwmon/ntc_thermistor.c b/drivers/hwmon/ntc_thermistor.c index 112e4d45e4a0..68800115876b 100644 --- a/drivers/hwmon/ntc_thermistor.c +++ b/drivers/hwmon/ntc_thermistor.c @@ -239,8 +239,10 @@ static struct ntc_thermistor_platform_data * ntc_thermistor_parse_dt(struct platform_device *pdev) { struct iio_channel *chan; + enum iio_chan_type type; struct device_node *np = pdev->dev.of_node; struct ntc_thermistor_platform_data *pdata; + int ret; if (!np) return NULL; @@ -253,6 +255,13 @@ ntc_thermistor_parse_dt(struct platform_device *pdev) if (IS_ERR(chan)) return ERR_CAST(chan); + ret = iio_get_channel_type(chan, &type); + if (ret < 0) + return ERR_PTR(ret); + + if (type != IIO_VOLTAGE) + return ERR_PTR(-EINVAL); + if (of_property_read_u32(np, "pullup-uv", &pdata->pullup_uv)) return ERR_PTR(-ENODEV); if (of_property_read_u32(np, "pullup-ohm", &pdata->pullup_ohm)) diff --git a/drivers/hwmon/tmp401.c b/drivers/hwmon/tmp401.c index 99664ebc738d..ccf4cffe0ee1 100644 --- a/drivers/hwmon/tmp401.c +++ b/drivers/hwmon/tmp401.c @@ -44,7 +44,7 @@ #include <linux/sysfs.h> /* Addresses to scan */ -static const unsigned short normal_i2c[] = { 0x37, 0x48, 0x49, 0x4a, 0x4c, 0x4d, +static const unsigned short normal_i2c[] = { 0x48, 0x49, 0x4a, 0x4c, 0x4d, 0x4e, 0x4f, I2C_CLIENT_END }; enum chips { tmp401, tmp411, tmp431, tmp432, tmp435 }; diff --git a/drivers/infiniband/ulp/isert/ib_isert.c b/drivers/infiniband/ulp/isert/ib_isert.c index 327529ee85eb..3f40319a55da 100644 --- a/drivers/infiniband/ulp/isert/ib_isert.c +++ b/drivers/infiniband/ulp/isert/ib_isert.c @@ -547,11 +547,11 @@ isert_create_pi_ctx(struct fast_reg_descriptor *desc, return 0; err_prot_mr: - ib_dereg_mr(desc->pi_ctx->prot_mr); + ib_dereg_mr(pi_ctx->prot_mr); err_prot_frpl: - ib_free_fast_reg_page_list(desc->pi_ctx->prot_frpl); + ib_free_fast_reg_page_list(pi_ctx->prot_frpl); err_pi_ctx: - kfree(desc->pi_ctx); + kfree(pi_ctx); return ret; } diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c index e43d48956dea..cbe8c1f28a95 100644 --- a/drivers/iommu/amd_iommu.c +++ b/drivers/iommu/amd_iommu.c @@ -34,6 +34,7 @@ #include <linux/irq.h> #include <linux/msi.h> #include <linux/dma-contiguous.h> +#include <linux/irqdomain.h> #include <asm/irq_remapping.h> #include <asm/io_apic.h> #include <asm/apic.h> @@ -3851,6 +3852,21 @@ union irte { } fields; }; +struct irq_2_irte { + u16 devid; /* Device ID for IRTE table */ + u16 index; /* Index into IRTE table*/ +}; + +struct amd_ir_data { + struct irq_2_irte irq_2_irte; + union irte irte_entry; + union { + struct msi_msg msi_entry; + }; +}; + +static struct irq_chip amd_ir_chip; + #define DTE_IRQ_PHYS_ADDR_MASK (((1ULL << 45)-1) << 6) #define DTE_IRQ_REMAP_INTCTL (2ULL << 60) #define DTE_IRQ_TABLE_LEN (8ULL << 1) @@ -3944,7 +3960,7 @@ out_unlock: return table; } -static int alloc_irq_index(struct irq_cfg *cfg, u16 devid, int count) +static int alloc_irq_index(u16 devid, int count) { struct irq_remap_table *table; unsigned long flags; @@ -3966,18 +3982,10 @@ static int alloc_irq_index(struct irq_cfg *cfg, u16 devid, int count) c = 0; if (c == count) { - struct irq_2_irte *irte_info; - for (; c != 0; --c) table->table[index - c + 1] = IRTE_ALLOCATED; index -= count - 1; - - cfg->remapped = 1; - irte_info = &cfg->irq_2_irte; - irte_info->devid = devid; - irte_info->index = index; - goto out; } } @@ -3990,22 +3998,6 @@ out: return index; } -static int get_irte(u16 devid, int index, union irte *irte) -{ - struct irq_remap_table *table; - unsigned long flags; - - table = get_irq_table(devid, false); - if (!table) - return -ENOMEM; - - spin_lock_irqsave(&table->lock, flags); - irte->val = table->table[index]; - spin_unlock_irqrestore(&table->lock, flags); - - return 0; -} - static int modify_irte(u16 devid, int index, union irte irte) { struct irq_remap_table *table; @@ -4052,243 +4044,316 @@ static void free_irte(u16 devid, int index) iommu_completion_wait(iommu); } -static int setup_ioapic_entry(int irq, struct IO_APIC_route_entry *entry, - unsigned int destination, int vector, - struct io_apic_irq_attr *attr) +static int get_devid(struct irq_alloc_info *info) { - struct irq_remap_table *table; - struct irq_2_irte *irte_info; - struct irq_cfg *cfg; - union irte irte; - int ioapic_id; - int index; - int devid; - int ret; - - cfg = irq_cfg(irq); - if (!cfg) - return -EINVAL; - - irte_info = &cfg->irq_2_irte; - ioapic_id = mpc_ioapic_id(attr->ioapic); - devid = get_ioapic_devid(ioapic_id); - - if (devid < 0) - return devid; - - table = get_irq_table(devid, true); - if (table == NULL) - return -ENOMEM; - - index = attr->ioapic_pin; + int devid = -1; - /* Setup IRQ remapping info */ - cfg->remapped = 1; - irte_info->devid = devid; - irte_info->index = index; + switch (info->type) { + case X86_IRQ_ALLOC_TYPE_IOAPIC: + devid = get_ioapic_devid(info->ioapic_id); + break; + case X86_IRQ_ALLOC_TYPE_HPET: + devid = get_hpet_devid(info->hpet_id); + break; + case X86_IRQ_ALLOC_TYPE_MSI: + case X86_IRQ_ALLOC_TYPE_MSIX: + devid = get_device_id(&info->msi_dev->dev); + break; + default: + BUG_ON(1); + break; + } - /* Setup IRTE for IOMMU */ - irte.val = 0; - irte.fields.vector = vector; - irte.fields.int_type = apic->irq_delivery_mode; - irte.fields.destination = destination; - irte.fields.dm = apic->irq_dest_mode; - irte.fields.valid = 1; - - ret = modify_irte(devid, index, irte); - if (ret) - return ret; + return devid; +} - /* Setup IOAPIC entry */ - memset(entry, 0, sizeof(*entry)); +static struct irq_domain *get_ir_irq_domain(struct irq_alloc_info *info) +{ + struct amd_iommu *iommu; + int devid; - entry->vector = index; - entry->mask = 0; - entry->trigger = attr->trigger; - entry->polarity = attr->polarity; + if (!info) + return NULL; - /* - * Mask level triggered irqs. - */ - if (attr->trigger) - entry->mask = 1; + devid = get_devid(info); + if (devid >= 0) { + iommu = amd_iommu_rlookup_table[devid]; + if (iommu) + return iommu->ir_domain; + } - return 0; + return NULL; } -static int set_affinity(struct irq_data *data, const struct cpumask *mask, - bool force) +static struct irq_domain *get_irq_domain(struct irq_alloc_info *info) { - struct irq_2_irte *irte_info; - unsigned int dest, irq; - struct irq_cfg *cfg; - union irte irte; - int err; - - if (!config_enabled(CONFIG_SMP)) - return -1; - - cfg = irqd_cfg(data); - irq = data->irq; - irte_info = &cfg->irq_2_irte; + struct amd_iommu *iommu; + int devid; - if (!cpumask_intersects(mask, cpu_online_mask)) - return -EINVAL; + if (!info) + return NULL; - if (get_irte(irte_info->devid, irte_info->index, &irte)) - return -EBUSY; + switch (info->type) { + case X86_IRQ_ALLOC_TYPE_MSI: + case X86_IRQ_ALLOC_TYPE_MSIX: + devid = get_device_id(&info->msi_dev->dev); + if (devid >= 0) { + iommu = amd_iommu_rlookup_table[devid]; + if (iommu) + return iommu->msi_domain; + } + break; + default: + break; + } - if (assign_irq_vector(irq, cfg, mask)) - return -EBUSY; + return NULL; +} - err = apic->cpu_mask_to_apicid_and(cfg->domain, mask, &dest); - if (err) { - if (assign_irq_vector(irq, cfg, data->affinity)) - pr_err("AMD-Vi: Failed to recover vector for irq %d\n", irq); - return err; - } +struct irq_remap_ops amd_iommu_irq_ops = { + .prepare = amd_iommu_prepare, + .enable = amd_iommu_enable, + .disable = amd_iommu_disable, + .reenable = amd_iommu_reenable, + .enable_faulting = amd_iommu_enable_faulting, + .get_ir_irq_domain = get_ir_irq_domain, + .get_irq_domain = get_irq_domain, +}; - irte.fields.vector = cfg->vector; - irte.fields.destination = dest; +static void irq_remapping_prepare_irte(struct amd_ir_data *data, + struct irq_cfg *irq_cfg, + struct irq_alloc_info *info, + int devid, int index, int sub_handle) +{ + struct irq_2_irte *irte_info = &data->irq_2_irte; + struct msi_msg *msg = &data->msi_entry; + union irte *irte = &data->irte_entry; + struct IO_APIC_route_entry *entry; - modify_irte(irte_info->devid, irte_info->index, irte); + data->irq_2_irte.devid = devid; + data->irq_2_irte.index = index + sub_handle; - if (cfg->move_in_progress) - send_cleanup_vector(cfg); + /* Setup IRTE for IOMMU */ + irte->val = 0; + irte->fields.vector = irq_cfg->vector; + irte->fields.int_type = apic->irq_delivery_mode; + irte->fields.destination = irq_cfg->dest_apicid; + irte->fields.dm = apic->irq_dest_mode; + irte->fields.valid = 1; + + switch (info->type) { + case X86_IRQ_ALLOC_TYPE_IOAPIC: + /* Setup IOAPIC entry */ + entry = info->ioapic_entry; + info->ioapic_entry = NULL; + memset(entry, 0, sizeof(*entry)); + entry->vector = index; + entry->mask = 0; + entry->trigger = info->ioapic_trigger; + entry->polarity = info->ioapic_polarity; + /* Mask level triggered irqs. */ + if (info->ioapic_trigger) + entry->mask = 1; + break; - cpumask_copy(data->affinity, mask); + case X86_IRQ_ALLOC_TYPE_HPET: + case X86_IRQ_ALLOC_TYPE_MSI: + case X86_IRQ_ALLOC_TYPE_MSIX: + msg->address_hi = MSI_ADDR_BASE_HI; + msg->address_lo = MSI_ADDR_BASE_LO; + msg->data = irte_info->index; + break; - return 0; + default: + BUG_ON(1); + break; + } } -static int free_irq(int irq) +static int irq_remapping_alloc(struct irq_domain *domain, unsigned int virq, + unsigned int nr_irqs, void *arg) { - struct irq_2_irte *irte_info; + struct irq_alloc_info *info = arg; + struct irq_data *irq_data; + struct amd_ir_data *data; struct irq_cfg *cfg; + int i, ret, devid; + int index = -1; - cfg = irq_cfg(irq); - if (!cfg) + if (!info) + return -EINVAL; + if (nr_irqs > 1 && info->type != X86_IRQ_ALLOC_TYPE_MSI && + info->type != X86_IRQ_ALLOC_TYPE_MSIX) return -EINVAL; - irte_info = &cfg->irq_2_irte; - - free_irte(irte_info->devid, irte_info->index); + /* + * With IRQ remapping enabled, don't need contiguous CPU vectors + * to support multiple MSI interrupts. + */ + if (info->type == X86_IRQ_ALLOC_TYPE_MSI) + info->flags &= ~X86_IRQ_ALLOC_CONTIGUOUS_VECTORS; - return 0; -} + devid = get_devid(info); + if (devid < 0) + return -EINVAL; -static void compose_msi_msg(struct pci_dev *pdev, - unsigned int irq, unsigned int dest, - struct msi_msg *msg, u8 hpet_id) -{ - struct irq_2_irte *irte_info; - struct irq_cfg *cfg; - union irte irte; + ret = irq_domain_alloc_irqs_parent(domain, virq, nr_irqs, arg); + if (ret < 0) + return ret; - cfg = irq_cfg(irq); - if (!cfg) - return; + ret = -ENOMEM; + data = kzalloc(sizeof(*data), GFP_KERNEL); + if (!data) + goto out_free_parent; - irte_info = &cfg->irq_2_irte; + if (info->type == X86_IRQ_ALLOC_TYPE_IOAPIC) { + if (get_irq_table(devid, true)) + index = info->ioapic_pin; + else + ret = -ENOMEM; + } else { + index = alloc_irq_index(devid, nr_irqs); + } + if (index < 0) { + pr_warn("Failed to allocate IRTE\n"); + kfree(data); + goto out_free_parent; + } - irte.val = 0; - irte.fields.vector = cfg->vector; - irte.fields.int_type = apic->irq_delivery_mode; - irte.fields.destination = dest; - irte.fields.dm = apic->irq_dest_mode; - irte.fields.valid = 1; + for (i = 0; i < nr_irqs; i++) { + irq_data = irq_domain_get_irq_data(domain, virq + i); + cfg = irqd_cfg(irq_data); + if (!irq_data || !cfg) { + ret = -EINVAL; + goto out_free_data; + } - modify_irte(irte_info->devid, irte_info->index, irte); + if (i > 0) { + data = kzalloc(sizeof(*data), GFP_KERNEL); + if (!data) + goto out_free_data; + } + irq_data->hwirq = (devid << 16) + i; + irq_data->chip_data = data; + irq_data->chip = &amd_ir_chip; + irq_remapping_prepare_irte(data, cfg, info, devid, index, i); + irq_set_status_flags(virq + i, IRQ_MOVE_PCNTXT); + } + return 0; - msg->address_hi = MSI_ADDR_BASE_HI; - msg->address_lo = MSI_ADDR_BASE_LO; - msg->data = irte_info->index; +out_free_data: + for (i--; i >= 0; i--) { + irq_data = irq_domain_get_irq_data(domain, virq + i); + if (irq_data) + kfree(irq_data->chip_data); + } + for (i = 0; i < nr_irqs; i++) + free_irte(devid, index + i); +out_free_parent: + irq_domain_free_irqs_common(domain, virq, nr_irqs); + return ret; } -static int msi_alloc_irq(struct pci_dev *pdev, int irq, int nvec) +static void irq_remapping_free(struct irq_domain *domain, unsigned int virq, + unsigned int nr_irqs) { - struct irq_cfg *cfg; - int index; - u16 devid; - - if (!pdev) - return -EINVAL; + struct irq_2_irte *irte_info; + struct irq_data *irq_data; + struct amd_ir_data *data; + int i; - cfg = irq_cfg(irq); - if (!cfg) - return -EINVAL; + for (i = 0; i < nr_irqs; i++) { + irq_data = irq_domain_get_irq_data(domain, virq + i); + if (irq_data && irq_data->chip_data) { + data = irq_data->chip_data; + irte_info = &data->irq_2_irte; + free_irte(irte_info->devid, irte_info->index); + kfree(data); + } + } + irq_domain_free_irqs_common(domain, virq, nr_irqs); +} - devid = get_device_id(&pdev->dev); - index = alloc_irq_index(cfg, devid, nvec); +static void irq_remapping_activate(struct irq_domain *domain, + struct irq_data *irq_data) +{ + struct amd_ir_data *data = irq_data->chip_data; + struct irq_2_irte *irte_info = &data->irq_2_irte; - return index < 0 ? MAX_IRQS_PER_TABLE : index; + modify_irte(irte_info->devid, irte_info->index, data->irte_entry); } -static int msi_setup_irq(struct pci_dev *pdev, unsigned int irq, - int index, int offset) +static void irq_remapping_deactivate(struct irq_domain *domain, + struct irq_data *irq_data) { - struct irq_2_irte *irte_info; - struct irq_cfg *cfg; - u16 devid; + struct amd_ir_data *data = irq_data->chip_data; + struct irq_2_irte *irte_info = &data->irq_2_irte; + union irte entry; - if (!pdev) - return -EINVAL; + entry.val = 0; + modify_irte(irte_info->devid, irte_info->index, data->irte_entry); +} - cfg = irq_cfg(irq); - if (!cfg) - return -EINVAL; +static struct irq_domain_ops amd_ir_domain_ops = { + .alloc = irq_remapping_alloc, + .free = irq_remapping_free, + .activate = irq_remapping_activate, + .deactivate = irq_remapping_deactivate, +}; - if (index >= MAX_IRQS_PER_TABLE) - return 0; +static int amd_ir_set_affinity(struct irq_data *data, + const struct cpumask *mask, bool force) +{ + struct amd_ir_data *ir_data = data->chip_data; + struct irq_2_irte *irte_info = &ir_data->irq_2_irte; + struct irq_cfg *cfg = irqd_cfg(data); + struct irq_data *parent = data->parent_data; + int ret; - devid = get_device_id(&pdev->dev); - irte_info = &cfg->irq_2_irte; + ret = parent->chip->irq_set_affinity(parent, mask, force); + if (ret < 0 || ret == IRQ_SET_MASK_OK_DONE) + return ret; - cfg->remapped = 1; - irte_info->devid = devid; - irte_info->index = index + offset; + /* + * Atomically updates the IRTE with the new destination, vector + * and flushes the interrupt entry cache. + */ + ir_data->irte_entry.fields.vector = cfg->vector; + ir_data->irte_entry.fields.destination = cfg->dest_apicid; + modify_irte(irte_info->devid, irte_info->index, ir_data->irte_entry); - return 0; + /* + * After this point, all the interrupts will start arriving + * at the new destination. So, time to cleanup the previous + * vector allocation. + */ + send_cleanup_vector(cfg); + + return IRQ_SET_MASK_OK_DONE; } -static int alloc_hpet_msi(unsigned int irq, unsigned int id) +static void ir_compose_msi_msg(struct irq_data *irq_data, struct msi_msg *msg) { - struct irq_2_irte *irte_info; - struct irq_cfg *cfg; - int index, devid; + struct amd_ir_data *ir_data = irq_data->chip_data; - cfg = irq_cfg(irq); - if (!cfg) - return -EINVAL; + *msg = ir_data->msi_entry; +} - irte_info = &cfg->irq_2_irte; - devid = get_hpet_devid(id); - if (devid < 0) - return devid; +static struct irq_chip amd_ir_chip = { + .irq_ack = ir_ack_apic_edge, + .irq_set_affinity = amd_ir_set_affinity, + .irq_compose_msi_msg = ir_compose_msi_msg, +}; - index = alloc_irq_index(cfg, devid, 1); - if (index < 0) - return index; +int amd_iommu_create_irq_domain(struct amd_iommu *iommu) +{ + iommu->ir_domain = irq_domain_add_tree(NULL, &amd_ir_domain_ops, iommu); + if (!iommu->ir_domain) + return -ENOMEM; - cfg->remapped = 1; - irte_info->devid = devid; - irte_info->index = index; + iommu->ir_domain->parent = arch_get_ir_parent_domain(); + iommu->msi_domain = arch_create_msi_irq_domain(iommu->ir_domain); return 0; } - -struct irq_remap_ops amd_iommu_irq_ops = { - .prepare = amd_iommu_prepare, - .enable = amd_iommu_enable, - .disable = amd_iommu_disable, - .reenable = amd_iommu_reenable, - .enable_faulting = amd_iommu_enable_faulting, - .setup_ioapic_entry = setup_ioapic_entry, - .set_affinity = set_affinity, - .free_irq = free_irq, - .compose_msi_msg = compose_msi_msg, - .msi_alloc_irq = msi_alloc_irq, - .msi_setup_irq = msi_setup_irq, - .alloc_hpet_msi = alloc_hpet_msi, -}; #endif diff --git a/drivers/iommu/amd_iommu_init.c b/drivers/iommu/amd_iommu_init.c index 450ef5001a65..c17df04d7a7f 100644 --- a/drivers/iommu/amd_iommu_init.c +++ b/drivers/iommu/amd_iommu_init.c @@ -1124,6 +1124,10 @@ static int __init init_iommu_one(struct amd_iommu *iommu, struct ivhd_header *h) if (ret) return ret; + ret = amd_iommu_create_irq_domain(iommu); + if (ret) + return ret; + /* * Make sure IOMMU is not considered to translate itself. The IVRS * table tells us so, but this is a lie! diff --git a/drivers/iommu/amd_iommu_proto.h b/drivers/iommu/amd_iommu_proto.h index 72b0fd455e24..0a21142d3639 100644 --- a/drivers/iommu/amd_iommu_proto.h +++ b/drivers/iommu/amd_iommu_proto.h @@ -62,6 +62,15 @@ extern u8 amd_iommu_pc_get_max_counters(u16 devid); extern int amd_iommu_pc_get_set_reg_val(u16 devid, u8 bank, u8 cntr, u8 fxn, u64 *value, bool is_write); +#ifdef CONFIG_IRQ_REMAP +extern int amd_iommu_create_irq_domain(struct amd_iommu *iommu); +#else +static inline int amd_iommu_create_irq_domain(struct amd_iommu *iommu) +{ + return 0; +} +#endif + #define PPR_SUCCESS 0x0 #define PPR_INVALID 0x1 #define PPR_FAILURE 0xf diff --git a/drivers/iommu/amd_iommu_types.h b/drivers/iommu/amd_iommu_types.h index 05030e523771..6533e874c9d7 100644 --- a/drivers/iommu/amd_iommu_types.h +++ b/drivers/iommu/amd_iommu_types.h @@ -398,6 +398,7 @@ struct amd_iommu_fault { struct iommu_domain; +struct irq_domain; /* * This structure contains generic data for IOMMU protection domains @@ -579,6 +580,10 @@ struct amd_iommu { /* The maximum PC banks and counters/bank (PCSup=1) */ u8 max_banks; u8 max_counters; +#ifdef CONFIG_IRQ_REMAP + struct irq_domain *ir_domain; + struct irq_domain *msi_domain; +#endif }; struct devid_map { diff --git a/drivers/iommu/dmar.c b/drivers/iommu/dmar.c index 9847613085e1..536f2d8ea41a 100644 --- a/drivers/iommu/dmar.c +++ b/drivers/iommu/dmar.c @@ -1087,8 +1087,8 @@ static void free_iommu(struct intel_iommu *iommu) if (iommu->irq) { free_irq(iommu->irq, iommu); - irq_set_handler_data(iommu->irq, NULL); dmar_free_hwirq(iommu->irq); + iommu->irq = 0; } if (iommu->qi) { @@ -1642,23 +1642,14 @@ int dmar_set_interrupt(struct intel_iommu *iommu) if (iommu->irq) return 0; - irq = dmar_alloc_hwirq(); - if (irq <= 0) { + irq = dmar_alloc_hwirq(iommu->seq_id, iommu->node, iommu); + if (irq > 0) { + iommu->irq = irq; + } else { pr_err("IOMMU: no free vectors\n"); return -EINVAL; } - irq_set_handler_data(irq, iommu); - iommu->irq = irq; - - ret = arch_setup_dmar_msi(irq); - if (ret) { - irq_set_handler_data(irq, NULL); - iommu->irq = 0; - dmar_free_hwirq(irq); - return ret; - } - ret = request_irq(irq, dmar_fault, IRQF_NO_THREAD, iommu->name, iommu); if (ret) pr_err("IOMMU: can't request irq\n"); diff --git a/drivers/iommu/intel_irq_remapping.c b/drivers/iommu/intel_irq_remapping.c index 5709ae9c3e77..8fad71cc27e7 100644 --- a/drivers/iommu/intel_irq_remapping.c +++ b/drivers/iommu/intel_irq_remapping.c @@ -8,6 +8,7 @@ #include <linux/irq.h> #include <linux/intel-iommu.h> #include <linux/acpi.h> +#include <linux/irqdomain.h> #include <asm/io_apic.h> #include <asm/smp.h> #include <asm/cpu.h> @@ -31,6 +32,21 @@ struct hpet_scope { unsigned int devfn; }; +struct irq_2_iommu { + struct intel_iommu *iommu; + u16 irte_index; + u16 sub_handle; + u8 irte_mask; +}; + +struct intel_ir_data { + struct irq_2_iommu irq_2_iommu; + struct irte irte_entry; + union { + struct msi_msg msi_entry; + }; +}; + #define IR_X2APIC_MODE(mode) (mode ? (1 << 11) : 0) #define IRTE_DEST(dest) ((eim_mode) ? dest : dest << 8) @@ -50,43 +66,14 @@ static struct hpet_scope ir_hpet[MAX_HPET_TBS]; * the dmar_global_lock. */ static DEFINE_RAW_SPINLOCK(irq_2_ir_lock); +static struct irq_domain_ops intel_ir_domain_ops; static int __init parse_ioapics_under_ir(void); -static struct irq_2_iommu *irq_2_iommu(unsigned int irq) -{ - struct irq_cfg *cfg = irq_cfg(irq); - return cfg ? &cfg->irq_2_iommu : NULL; -} - -static int get_irte(int irq, struct irte *entry) -{ - struct irq_2_iommu *irq_iommu = irq_2_iommu(irq); - unsigned long flags; - int index; - - if (!entry || !irq_iommu) - return -1; - - raw_spin_lock_irqsave(&irq_2_ir_lock, flags); - - if (unlikely(!irq_iommu->iommu)) { - raw_spin_unlock_irqrestore(&irq_2_ir_lock, flags); - return -1; - } - - index = irq_iommu->irte_index + irq_iommu->sub_handle; - *entry = *(irq_iommu->iommu->ir_table->base + index); - - raw_spin_unlock_irqrestore(&irq_2_ir_lock, flags); - return 0; -} - -static int alloc_irte(struct intel_iommu *iommu, int irq, u16 count) +static int alloc_irte(struct intel_iommu *iommu, int irq, + struct irq_2_iommu *irq_iommu, u16 count) { struct ir_table *table = iommu->ir_table; - struct irq_2_iommu *irq_iommu = irq_2_iommu(irq); - struct irq_cfg *cfg = irq_cfg(irq); unsigned int mask = 0; unsigned long flags; int index; @@ -113,7 +100,6 @@ static int alloc_irte(struct intel_iommu *iommu, int irq, u16 count) if (index < 0) { pr_warn("IR%d: can't allocate an IRTE\n", iommu->seq_id); } else { - cfg->remapped = 1; irq_iommu->iommu = iommu; irq_iommu->irte_index = index; irq_iommu->sub_handle = 0; @@ -135,47 +121,9 @@ static int qi_flush_iec(struct intel_iommu *iommu, int index, int mask) return qi_submit_sync(&desc, iommu); } -static int map_irq_to_irte_handle(int irq, u16 *sub_handle) +static int modify_irte(struct irq_2_iommu *irq_iommu, + struct irte *irte_modified) { - struct irq_2_iommu *irq_iommu = irq_2_iommu(irq); - unsigned long flags; - int index; - - if (!irq_iommu) - return -1; - - raw_spin_lock_irqsave(&irq_2_ir_lock, flags); - *sub_handle = irq_iommu->sub_handle; - index = irq_iommu->irte_index; - raw_spin_unlock_irqrestore(&irq_2_ir_lock, flags); - return index; -} - -static int set_irte_irq(int irq, struct intel_iommu *iommu, u16 index, u16 subhandle) -{ - struct irq_2_iommu *irq_iommu = irq_2_iommu(irq); - struct irq_cfg *cfg = irq_cfg(irq); - unsigned long flags; - - if (!irq_iommu) - return -1; - - raw_spin_lock_irqsave(&irq_2_ir_lock, flags); - - cfg->remapped = 1; - irq_iommu->iommu = iommu; - irq_iommu->irte_index = index; - irq_iommu->sub_handle = subhandle; - irq_iommu->irte_mask = 0; - - raw_spin_unlock_irqrestore(&irq_2_ir_lock, flags); - - return 0; -} - -static int modify_irte(int irq, struct irte *irte_modified) -{ - struct irq_2_iommu *irq_iommu = irq_2_iommu(irq); struct intel_iommu *iommu; unsigned long flags; struct irte *irte; @@ -242,7 +190,7 @@ static int clear_entries(struct irq_2_iommu *irq_iommu) return 0; iommu = irq_iommu->iommu; - index = irq_iommu->irte_index + irq_iommu->sub_handle; + index = irq_iommu->irte_index; start = iommu->ir_table->base + index; end = start + (1 << irq_iommu->irte_mask); @@ -257,29 +205,6 @@ static int clear_entries(struct irq_2_iommu *irq_iommu) return qi_flush_iec(iommu, index, irq_iommu->irte_mask); } -static int free_irte(int irq) -{ - struct irq_2_iommu *irq_iommu = irq_2_iommu(irq); - unsigned long flags; - int rc; - - if (!irq_iommu) - return -1; - - raw_spin_lock_irqsave(&irq_2_ir_lock, flags); - - rc = clear_entries(irq_iommu); - - irq_iommu->iommu = NULL; - irq_iommu->irte_index = 0; - irq_iommu->sub_handle = 0; - irq_iommu->irte_mask = 0; - - raw_spin_unlock_irqrestore(&irq_2_ir_lock, flags); - - return rc; -} - /* * source validation type */ @@ -488,7 +413,6 @@ static int intel_setup_irq_remapping(struct intel_iommu *iommu) pages = alloc_pages_node(iommu->node, GFP_KERNEL | __GFP_ZERO, INTR_REMAP_PAGE_ORDER); - if (!pages) { pr_err("IR%d: failed to allocate pages of order %d\n", iommu->seq_id, INTR_REMAP_PAGE_ORDER); @@ -502,11 +426,23 @@ static int intel_setup_irq_remapping(struct intel_iommu *iommu) goto out_free_pages; } + iommu->ir_domain = irq_domain_add_hierarchy(arch_get_ir_parent_domain(), + 0, INTR_REMAP_TABLE_ENTRIES, + NULL, &intel_ir_domain_ops, + iommu); + if (!iommu->ir_domain) { + pr_err("IR%d: failed to allocate irqdomain\n", iommu->seq_id); + goto out_free_bitmap; + } + iommu->ir_msi_domain = arch_create_msi_irq_domain(iommu->ir_domain); + ir_table->base = page_address(pages); ir_table->bitmap = bitmap; iommu->ir_table = ir_table; return 0; +out_free_bitmap: + kfree(bitmap); out_free_pages: __free_pages(pages, INTR_REMAP_PAGE_ORDER); out_free_table: @@ -517,6 +453,14 @@ out_free_table: static void intel_teardown_irq_remapping(struct intel_iommu *iommu) { if (iommu && iommu->ir_table) { + if (iommu->ir_msi_domain) { + irq_domain_remove(iommu->ir_msi_domain); + iommu->ir_msi_domain = NULL; + } + if (iommu->ir_domain) { + irq_domain_remove(iommu->ir_domain); + iommu->ir_domain = NULL; + } free_pages((unsigned long)iommu->ir_table->base, INTR_REMAP_PAGE_ORDER); kfree(iommu->ir_table->bitmap); @@ -702,13 +646,6 @@ static int __init intel_enable_irq_remapping(void) irq_remapping_enabled = 1; - /* - * VT-d has a different layout for IO-APIC entries when - * interrupt remapping is enabled. So it needs a special routine - * to print IO-APIC entries for debugging purposes too. - */ - x86_io_apic_ops.print_entries = intel_ir_io_apic_print_entries; - pr_info("Enabled IRQ remapping in %s mode\n", eim ? "x2apic" : "xapic"); return eim ? IRQ_REMAP_X2APIC_MODE : IRQ_REMAP_XAPIC_MODE; @@ -945,8 +882,7 @@ error: return -1; } -static void prepare_irte(struct irte *irte, int vector, - unsigned int dest) +static void prepare_irte(struct irte *irte, int vector, unsigned int dest) { memset(irte, 0, sizeof(*irte)); @@ -966,76 +902,63 @@ static void prepare_irte(struct irte *irte, int vector, irte->redir_hint = 1; } -static int intel_setup_ioapic_entry(int irq, - struct IO_APIC_route_entry *route_entry, - unsigned int destination, int vector, - struct io_apic_irq_attr *attr) +static struct irq_domain *intel_get_ir_irq_domain(struct irq_alloc_info *info) { - int ioapic_id = mpc_ioapic_id(attr->ioapic); - struct intel_iommu *iommu; - struct IR_IO_APIC_route_entry *entry; - struct irte irte; - int index; - - down_read(&dmar_global_lock); - iommu = map_ioapic_to_ir(ioapic_id); - if (!iommu) { - pr_warn("No mapping iommu for ioapic %d\n", ioapic_id); - index = -ENODEV; - } else { - index = alloc_irte(iommu, irq, 1); - if (index < 0) { - pr_warn("Failed to allocate IRTE for ioapic %d\n", - ioapic_id); - index = -ENOMEM; - } - } - up_read(&dmar_global_lock); - if (index < 0) - return index; - - prepare_irte(&irte, vector, destination); + struct intel_iommu *iommu = NULL; - /* Set source-id of interrupt request */ - set_ioapic_sid(&irte, ioapic_id); + if (!info) + return NULL; - modify_irte(irq, &irte); + switch (info->type) { + case X86_IRQ_ALLOC_TYPE_IOAPIC: + iommu = map_ioapic_to_ir(info->ioapic_id); + break; + case X86_IRQ_ALLOC_TYPE_HPET: + iommu = map_hpet_to_ir(info->hpet_id); + break; + case X86_IRQ_ALLOC_TYPE_MSI: + case X86_IRQ_ALLOC_TYPE_MSIX: + iommu = map_dev_to_ir(info->msi_dev); + break; + default: + BUG_ON(1); + break; + } - apic_printk(APIC_VERBOSE, KERN_DEBUG "IOAPIC[%d]: " - "Set IRTE entry (P:%d FPD:%d Dst_Mode:%d " - "Redir_hint:%d Trig_Mode:%d Dlvry_Mode:%X " - "Avail:%X Vector:%02X Dest:%08X " - "SID:%04X SQ:%X SVT:%X)\n", - attr->ioapic, irte.present, irte.fpd, irte.dst_mode, - irte.redir_hint, irte.trigger_mode, irte.dlvry_mode, - irte.avail, irte.vector, irte.dest_id, - irte.sid, irte.sq, irte.svt); + return iommu ? iommu->ir_domain : NULL; +} - entry = (struct IR_IO_APIC_route_entry *)route_entry; - memset(entry, 0, sizeof(*entry)); +static struct irq_domain *intel_get_irq_domain(struct irq_alloc_info *info) +{ + struct intel_iommu *iommu; - entry->index2 = (index >> 15) & 0x1; - entry->zero = 0; - entry->format = 1; - entry->index = (index & 0x7fff); - /* - * IO-APIC RTE will be configured with virtual vector. - * irq handler will do the explicit EOI to the io-apic. - */ - entry->vector = attr->ioapic_pin; - entry->mask = 0; /* enable IRQ */ - entry->trigger = attr->trigger; - entry->polarity = attr->polarity; + if (!info) + return NULL; - /* Mask level triggered irqs. - * Use IRQ_DELAYED_DISABLE for edge triggered irqs. - */ - if (attr->trigger) - entry->mask = 1; + switch (info->type) { + case X86_IRQ_ALLOC_TYPE_MSI: + case X86_IRQ_ALLOC_TYPE_MSIX: + iommu = map_dev_to_ir(info->msi_dev); + if (iommu) + return iommu->ir_msi_domain; + break; + default: + break; + } - return 0; + return NULL; } +struct irq_remap_ops intel_irq_remap_ops = { + .prepare = intel_prepare_irq_remapping, + .enable = intel_enable_irq_remapping, + .disable = disable_irq_remapping, + .reenable = reenable_irq_remapping, + .enable_faulting = enable_drhd_fault_handling, + .get_ir_irq_domain = intel_get_ir_irq_domain, + .get_irq_domain = intel_get_irq_domain, +}; + /* * Migrate the IO-APIC irq in the presence of intr-remapping. * @@ -1051,170 +974,242 @@ static int intel_setup_ioapic_entry(int irq, * is used to migrate MSI irq's in the presence of interrupt-remapping. */ static int -intel_ioapic_set_affinity(struct irq_data *data, const struct cpumask *mask, - bool force) +intel_ir_set_affinity(struct irq_data *data, const struct cpumask *mask, + bool force) { + struct intel_ir_data *ir_data = data->chip_data; + struct irte *irte = &ir_data->irte_entry; struct irq_cfg *cfg = irqd_cfg(data); - unsigned int dest, irq = data->irq; - struct irte irte; - int err; - - if (!config_enabled(CONFIG_SMP)) - return -EINVAL; - - if (!cpumask_intersects(mask, cpu_online_mask)) - return -EINVAL; - - if (get_irte(irq, &irte)) - return -EBUSY; - - err = assign_irq_vector(irq, cfg, mask); - if (err) - return err; - - err = apic->cpu_mask_to_apicid_and(cfg->domain, mask, &dest); - if (err) { - if (assign_irq_vector(irq, cfg, data->affinity)) - pr_err("Failed to recover vector for irq %d\n", irq); - return err; - } + struct irq_data *parent = data->parent_data; + int ret; - irte.vector = cfg->vector; - irte.dest_id = IRTE_DEST(dest); + ret = parent->chip->irq_set_affinity(parent, mask, force); + if (ret < 0 || ret == IRQ_SET_MASK_OK_DONE) + return ret; /* * Atomically updates the IRTE with the new destination, vector * and flushes the interrupt entry cache. */ - modify_irte(irq, &irte); + irte->vector = cfg->vector; + irte->dest_id = IRTE_DEST(cfg->dest_apicid); + modify_irte(&ir_data->irq_2_iommu, irte); /* * After this point, all the interrupts will start arriving * at the new destination. So, time to cleanup the previous * vector allocation. */ - if (cfg->move_in_progress) - send_cleanup_vector(cfg); + send_cleanup_vector(cfg); - cpumask_copy(data->affinity, mask); - return 0; + return IRQ_SET_MASK_OK_DONE; } -static void intel_compose_msi_msg(struct pci_dev *pdev, - unsigned int irq, unsigned int dest, - struct msi_msg *msg, u8 hpet_id) +static void intel_ir_compose_msi_msg(struct irq_data *irq_data, + struct msi_msg *msg) { - struct irq_cfg *cfg; - struct irte irte; - u16 sub_handle = 0; - int ir_index; - - cfg = irq_cfg(irq); + struct intel_ir_data *ir_data = irq_data->chip_data; - ir_index = map_irq_to_irte_handle(irq, &sub_handle); - BUG_ON(ir_index == -1); - - prepare_irte(&irte, cfg->vector, dest); - - /* Set source-id of interrupt request */ - if (pdev) - set_msi_sid(&irte, pdev); - else - set_hpet_sid(&irte, hpet_id); + *msg = ir_data->msi_entry; +} - modify_irte(irq, &irte); +static struct irq_chip intel_ir_chip = { + .irq_ack = ir_ack_apic_edge, + .irq_set_affinity = intel_ir_set_affinity, + .irq_compose_msi_msg = intel_ir_compose_msi_msg, +}; - msg->address_hi = MSI_ADDR_BASE_HI; - msg->data = sub_handle; - msg->address_lo = MSI_ADDR_BASE_LO | MSI_ADDR_IR_EXT_INT | - MSI_ADDR_IR_SHV | - MSI_ADDR_IR_INDEX1(ir_index) | - MSI_ADDR_IR_INDEX2(ir_index); +static void intel_irq_remapping_prepare_irte(struct intel_ir_data *data, + struct irq_cfg *irq_cfg, + struct irq_alloc_info *info, + int index, int sub_handle) +{ + struct IR_IO_APIC_route_entry *entry; + struct irte *irte = &data->irte_entry; + struct msi_msg *msg = &data->msi_entry; + + prepare_irte(irte, irq_cfg->vector, irq_cfg->dest_apicid); + switch (info->type) { + case X86_IRQ_ALLOC_TYPE_IOAPIC: + /* Set source-id of interrupt request */ + set_ioapic_sid(irte, info->ioapic_id); + apic_printk(APIC_VERBOSE, KERN_DEBUG "IOAPIC[%d]: Set IRTE entry (P:%d FPD:%d Dst_Mode:%d Redir_hint:%d Trig_Mode:%d Dlvry_Mode:%X Avail:%X Vector:%02X Dest:%08X SID:%04X SQ:%X SVT:%X)\n", + info->ioapic_id, irte->present, irte->fpd, + irte->dst_mode, irte->redir_hint, + irte->trigger_mode, irte->dlvry_mode, + irte->avail, irte->vector, irte->dest_id, + irte->sid, irte->sq, irte->svt); + + entry = (struct IR_IO_APIC_route_entry *)info->ioapic_entry; + info->ioapic_entry = NULL; + memset(entry, 0, sizeof(*entry)); + entry->index2 = (index >> 15) & 0x1; + entry->zero = 0; + entry->format = 1; + entry->index = (index & 0x7fff); + /* + * IO-APIC RTE will be configured with virtual vector. + * irq handler will do the explicit EOI to the io-apic. + */ + entry->vector = info->ioapic_pin; + entry->mask = 0; /* enable IRQ */ + entry->trigger = info->ioapic_trigger; + entry->polarity = info->ioapic_polarity; + if (info->ioapic_trigger) + entry->mask = 1; /* Mask level triggered irqs. */ + break; + + case X86_IRQ_ALLOC_TYPE_HPET: + case X86_IRQ_ALLOC_TYPE_MSI: + case X86_IRQ_ALLOC_TYPE_MSIX: + if (info->type == X86_IRQ_ALLOC_TYPE_HPET) + set_hpet_sid(irte, info->hpet_id); + else + set_msi_sid(irte, info->msi_dev); + + msg->address_hi = MSI_ADDR_BASE_HI; + msg->data = sub_handle; + msg->address_lo = MSI_ADDR_BASE_LO | MSI_ADDR_IR_EXT_INT | + MSI_ADDR_IR_SHV | + MSI_ADDR_IR_INDEX1(index) | + MSI_ADDR_IR_INDEX2(index); + break; + + default: + BUG_ON(1); + break; + } } -/* - * Map the PCI dev to the corresponding remapping hardware unit - * and allocate 'nvec' consecutive interrupt-remapping table entries - * in it. - */ -static int intel_msi_alloc_irq(struct pci_dev *dev, int irq, int nvec) +static void intel_free_irq_resources(struct irq_domain *domain, + unsigned int virq, unsigned int nr_irqs) { - struct intel_iommu *iommu; - int index; + struct irq_data *irq_data; + struct intel_ir_data *data; + struct irq_2_iommu *irq_iommu; + unsigned long flags; + int i; - down_read(&dmar_global_lock); - iommu = map_dev_to_ir(dev); - if (!iommu) { - printk(KERN_ERR - "Unable to map PCI %s to iommu\n", pci_name(dev)); - index = -ENOENT; - } else { - index = alloc_irte(iommu, irq, nvec); - if (index < 0) { - printk(KERN_ERR - "Unable to allocate %d IRTE for PCI %s\n", - nvec, pci_name(dev)); - index = -ENOSPC; + for (i = 0; i < nr_irqs; i++) { + irq_data = irq_domain_get_irq_data(domain, virq + i); + if (irq_data && irq_data->chip_data) { + data = irq_data->chip_data; + irq_iommu = &data->irq_2_iommu; + raw_spin_lock_irqsave(&irq_2_ir_lock, flags); + clear_entries(irq_iommu); + raw_spin_unlock_irqrestore(&irq_2_ir_lock, flags); + irq_domain_reset_irq_data(irq_data); + kfree(data); } } - up_read(&dmar_global_lock); - - return index; } -static int intel_msi_setup_irq(struct pci_dev *pdev, unsigned int irq, - int index, int sub_handle) +static int intel_irq_remapping_alloc(struct irq_domain *domain, + unsigned int virq, unsigned int nr_irqs, + void *arg) { - struct intel_iommu *iommu; - int ret = -ENOENT; + struct intel_iommu *iommu = domain->host_data; + struct irq_alloc_info *info = arg; + struct intel_ir_data *data, *ird; + struct irq_data *irq_data; + struct irq_cfg *irq_cfg; + int i, ret, index; + + if (!info || !iommu) + return -EINVAL; + if (nr_irqs > 1 && info->type != X86_IRQ_ALLOC_TYPE_MSI && + info->type != X86_IRQ_ALLOC_TYPE_MSIX) + return -EINVAL; + + /* + * With IRQ remapping enabled, don't need contiguous CPU vectors + * to support multiple MSI interrupts. + */ + if (info->type == X86_IRQ_ALLOC_TYPE_MSI) + info->flags &= ~X86_IRQ_ALLOC_CONTIGUOUS_VECTORS; + + ret = irq_domain_alloc_irqs_parent(domain, virq, nr_irqs, arg); + if (ret < 0) + return ret; + + ret = -ENOMEM; + data = kzalloc(sizeof(*data), GFP_KERNEL); + if (!data) + goto out_free_parent; down_read(&dmar_global_lock); - iommu = map_dev_to_ir(pdev); - if (iommu) { - /* - * setup the mapping between the irq and the IRTE - * base index, the sub_handle pointing to the - * appropriate interrupt remap table entry. - */ - set_irte_irq(irq, iommu, index, sub_handle); - ret = 0; - } + index = alloc_irte(iommu, virq, &data->irq_2_iommu, nr_irqs); up_read(&dmar_global_lock); + if (index < 0) { + pr_warn("Failed to allocate IRTE\n"); + kfree(data); + goto out_free_parent; + } + for (i = 0; i < nr_irqs; i++) { + irq_data = irq_domain_get_irq_data(domain, virq + i); + irq_cfg = irqd_cfg(irq_data); + if (!irq_data || !irq_cfg) { + ret = -EINVAL; + goto out_free_data; + } + + if (i > 0) { + ird = kzalloc(sizeof(*ird), GFP_KERNEL); + if (!ird) + goto out_free_data; + /* Initialize the common data */ + ird->irq_2_iommu = data->irq_2_iommu; + ird->irq_2_iommu.sub_handle = i; + } else { + ird = data; + } + + irq_data->hwirq = (index << 16) + i; + irq_data->chip_data = ird; + irq_data->chip = &intel_ir_chip; + intel_irq_remapping_prepare_irte(ird, irq_cfg, info, index, i); + irq_set_status_flags(virq + i, IRQ_MOVE_PCNTXT); + } + return 0; + +out_free_data: + intel_free_irq_resources(domain, virq, i); +out_free_parent: + irq_domain_free_irqs_common(domain, virq, nr_irqs); return ret; } -static int intel_alloc_hpet_msi(unsigned int irq, unsigned int id) +static void intel_irq_remapping_free(struct irq_domain *domain, + unsigned int virq, unsigned int nr_irqs) { - int ret = -1; - struct intel_iommu *iommu; - int index; + intel_free_irq_resources(domain, virq, nr_irqs); + irq_domain_free_irqs_common(domain, virq, nr_irqs); +} - down_read(&dmar_global_lock); - iommu = map_hpet_to_ir(id); - if (iommu) { - index = alloc_irte(iommu, irq, 1); - if (index >= 0) - ret = 0; - } - up_read(&dmar_global_lock); +static void intel_irq_remapping_activate(struct irq_domain *domain, + struct irq_data *irq_data) +{ + struct intel_ir_data *data = irq_data->chip_data; - return ret; + modify_irte(&data->irq_2_iommu, &data->irte_entry); } -struct irq_remap_ops intel_irq_remap_ops = { - .prepare = intel_prepare_irq_remapping, - .enable = intel_enable_irq_remapping, - .disable = disable_irq_remapping, - .reenable = reenable_irq_remapping, - .enable_faulting = enable_drhd_fault_handling, - .setup_ioapic_entry = intel_setup_ioapic_entry, - .set_affinity = intel_ioapic_set_affinity, - .free_irq = free_irte, - .compose_msi_msg = intel_compose_msi_msg, - .msi_alloc_irq = intel_msi_alloc_irq, - .msi_setup_irq = intel_msi_setup_irq, - .alloc_hpet_msi = intel_alloc_hpet_msi, +static void intel_irq_remapping_deactivate(struct irq_domain *domain, + struct irq_data *irq_data) +{ + struct intel_ir_data *data = irq_data->chip_data; + struct irte entry; + + memset(&entry, 0, sizeof(entry)); + modify_irte(&data->irq_2_iommu, &entry); +} + +static struct irq_domain_ops intel_ir_domain_ops = { + .alloc = intel_irq_remapping_alloc, + .free = intel_irq_remapping_free, + .activate = intel_irq_remapping_activate, + .deactivate = intel_irq_remapping_deactivate, }; /* diff --git a/drivers/iommu/irq_remapping.c b/drivers/iommu/irq_remapping.c index 390079ee1350..fc78b0d41f71 100644 --- a/drivers/iommu/irq_remapping.c +++ b/drivers/iommu/irq_remapping.c @@ -6,6 +6,7 @@ #include <linux/msi.h> #include <linux/irq.h> #include <linux/pci.h> +#include <linux/irqdomain.h> #include <asm/hw_irq.h> #include <asm/irq_remapping.h> @@ -24,18 +25,6 @@ int no_x2apic_optout; static int disable_irq_remap; static struct irq_remap_ops *remap_ops; -static int msi_alloc_remapped_irq(struct pci_dev *pdev, int irq, int nvec); -static int msi_setup_remapped_irq(struct pci_dev *pdev, unsigned int irq, - int index, int sub_handle); -static int set_remapped_irq_affinity(struct irq_data *data, - const struct cpumask *mask, - bool force); - -static bool irq_remapped(struct irq_cfg *cfg) -{ - return (cfg->remapped == 1); -} - static void irq_remapping_disable_io_apic(void) { /* @@ -49,117 +38,9 @@ static void irq_remapping_disable_io_apic(void) disconnect_bsp_APIC(0); } -static int do_setup_msi_irqs(struct pci_dev *dev, int nvec) -{ - int ret, sub_handle, nvec_pow2, index = 0; - unsigned int irq; - struct msi_desc *msidesc; - - msidesc = list_entry(dev->msi_list.next, struct msi_desc, list); - - irq = irq_alloc_hwirqs(nvec, dev_to_node(&dev->dev)); - if (irq == 0) - return -ENOSPC; - - nvec_pow2 = __roundup_pow_of_two(nvec); - for (sub_handle = 0; sub_handle < nvec; sub_handle++) { - if (!sub_handle) { - index = msi_alloc_remapped_irq(dev, irq, nvec_pow2); - if (index < 0) { - ret = index; - goto error; - } - } else { - ret = msi_setup_remapped_irq(dev, irq + sub_handle, - index, sub_handle); - if (ret < 0) - goto error; - } - ret = setup_msi_irq(dev, msidesc, irq, sub_handle); - if (ret < 0) - goto error; - } - return 0; - -error: - irq_free_hwirqs(irq, nvec); - - /* - * Restore altered MSI descriptor fields and prevent just destroyed - * IRQs from tearing down again in default_teardown_msi_irqs() - */ - msidesc->irq = 0; - - return ret; -} - -static int do_setup_msix_irqs(struct pci_dev *dev, int nvec) -{ - int node, ret, sub_handle, index = 0; - struct msi_desc *msidesc; - unsigned int irq; - - node = dev_to_node(&dev->dev); - sub_handle = 0; - - list_for_each_entry(msidesc, &dev->msi_list, list) { - - irq = irq_alloc_hwirq(node); - if (irq == 0) - return -1; - - if (sub_handle == 0) - ret = index = msi_alloc_remapped_irq(dev, irq, nvec); - else - ret = msi_setup_remapped_irq(dev, irq, index, sub_handle); - - if (ret < 0) - goto error; - - ret = setup_msi_irq(dev, msidesc, irq, 0); - if (ret < 0) - goto error; - - sub_handle += 1; - irq += 1; - } - - return 0; - -error: - irq_free_hwirq(irq); - return ret; -} - -static int irq_remapping_setup_msi_irqs(struct pci_dev *dev, - int nvec, int type) -{ - if (type == PCI_CAP_ID_MSI) - return do_setup_msi_irqs(dev, nvec); - else - return do_setup_msix_irqs(dev, nvec); -} - -static void eoi_ioapic_pin_remapped(int apic, int pin, int vector) -{ - /* - * Intr-remapping uses pin number as the virtual vector - * in the RTE. Actual vector is programmed in - * intr-remapping table entry. Hence for the io-apic - * EOI we use the pin number. - */ - io_apic_eoi(apic, pin); -} - static void __init irq_remapping_modify_x86_ops(void) { x86_io_apic_ops.disable = irq_remapping_disable_io_apic; - x86_io_apic_ops.set_affinity = set_remapped_irq_affinity; - x86_io_apic_ops.setup_entry = setup_ioapic_remapped_entry; - x86_io_apic_ops.eoi_ioapic_pin = eoi_ioapic_pin_remapped; - x86_msi.setup_msi_irqs = irq_remapping_setup_msi_irqs; - x86_msi.setup_hpet_msi = setup_hpet_msi_remapped; - x86_msi.compose_msi_msg = compose_remapped_msi_msg; } static __init int setup_nointremap(char *str) @@ -254,113 +135,48 @@ int __init irq_remap_enable_fault_handling(void) return remap_ops->enable_faulting(); } -int setup_ioapic_remapped_entry(int irq, - struct IO_APIC_route_entry *entry, - unsigned int destination, int vector, - struct io_apic_irq_attr *attr) -{ - if (!remap_ops->setup_ioapic_entry) - return -ENODEV; - - return remap_ops->setup_ioapic_entry(irq, entry, destination, - vector, attr); -} - -static int set_remapped_irq_affinity(struct irq_data *data, - const struct cpumask *mask, bool force) -{ - if (!config_enabled(CONFIG_SMP) || !remap_ops->set_affinity) - return 0; - - return remap_ops->set_affinity(data, mask, force); -} - -void free_remapped_irq(int irq) -{ - struct irq_cfg *cfg = irq_cfg(irq); - - if (irq_remapped(cfg) && remap_ops->free_irq) - remap_ops->free_irq(irq); -} - -void compose_remapped_msi_msg(struct pci_dev *pdev, - unsigned int irq, unsigned int dest, - struct msi_msg *msg, u8 hpet_id) -{ - struct irq_cfg *cfg = irq_cfg(irq); - - if (!irq_remapped(cfg)) - native_compose_msi_msg(pdev, irq, dest, msg, hpet_id); - else if (remap_ops->compose_msi_msg) - remap_ops->compose_msi_msg(pdev, irq, dest, msg, hpet_id); -} - -static int msi_alloc_remapped_irq(struct pci_dev *pdev, int irq, int nvec) -{ - if (!remap_ops->msi_alloc_irq) - return -ENODEV; - - return remap_ops->msi_alloc_irq(pdev, irq, nvec); -} - -static int msi_setup_remapped_irq(struct pci_dev *pdev, unsigned int irq, - int index, int sub_handle) -{ - if (!remap_ops->msi_setup_irq) - return -ENODEV; - - return remap_ops->msi_setup_irq(pdev, irq, index, sub_handle); -} - -int setup_hpet_msi_remapped(unsigned int irq, unsigned int id) -{ - int ret; - - if (!remap_ops->alloc_hpet_msi) - return -ENODEV; - - ret = remap_ops->alloc_hpet_msi(irq, id); - if (ret) - return -EINVAL; - - return default_setup_hpet_msi(irq, id); -} - void panic_if_irq_remap(const char *msg) { if (irq_remapping_enabled) panic(msg); } -static void ir_ack_apic_edge(struct irq_data *data) +void ir_ack_apic_edge(struct irq_data *data) { ack_APIC_irq(); } -static void ir_ack_apic_level(struct irq_data *data) +/** + * irq_remapping_get_ir_irq_domain - Get the irqdomain associated with the IOMMU + * device serving request @info + * @info: interrupt allocation information, used to identify the IOMMU device + * + * It's used to get parent irqdomain for HPET and IOAPIC irqdomains. + * Returns pointer to IRQ domain, or NULL on failure. + */ +struct irq_domain * +irq_remapping_get_ir_irq_domain(struct irq_alloc_info *info) { - ack_APIC_irq(); - eoi_ioapic_irq(data->irq, irqd_cfg(data)); -} + if (!remap_ops || !remap_ops->get_ir_irq_domain) + return NULL; -static void ir_print_prefix(struct irq_data *data, struct seq_file *p) -{ - seq_printf(p, " IR-%s", data->chip->name); + return remap_ops->get_ir_irq_domain(info); } -void irq_remap_modify_chip_defaults(struct irq_chip *chip) +/** + * irq_remapping_get_irq_domain - Get the irqdomain serving the request @info + * @info: interrupt allocation information, used to identify the IOMMU device + * + * There will be one PCI MSI/MSIX irqdomain associated with each interrupt + * remapping device, so this interface is used to retrieve the PCI MSI/MSIX + * irqdomain serving request @info. + * Returns pointer to IRQ domain, or NULL on failure. + */ +struct irq_domain * +irq_remapping_get_irq_domain(struct irq_alloc_info *info) { - chip->irq_print_chip = ir_print_prefix; - chip->irq_ack = ir_ack_apic_edge; - chip->irq_eoi = ir_ack_apic_level; - chip->irq_set_affinity = x86_io_apic_ops.set_affinity; -} + if (!remap_ops || !remap_ops->get_irq_domain) + return NULL; -bool setup_remapped_irq(int irq, struct irq_cfg *cfg, struct irq_chip *chip) -{ - if (!irq_remapped(cfg)) - return false; - irq_set_status_flags(irq, IRQ_MOVE_PCNTXT); - irq_remap_modify_chip_defaults(chip); - return true; + return remap_ops->get_irq_domain(info); } diff --git a/drivers/iommu/irq_remapping.h b/drivers/iommu/irq_remapping.h index 7c70cc29ffe6..91d5a119956a 100644 --- a/drivers/iommu/irq_remapping.h +++ b/drivers/iommu/irq_remapping.h @@ -24,12 +24,10 @@ #ifdef CONFIG_IRQ_REMAP -struct IO_APIC_route_entry; -struct io_apic_irq_attr; struct irq_data; -struct cpumask; -struct pci_dev; struct msi_msg; +struct irq_domain; +struct irq_alloc_info; extern int irq_remap_broken; extern int disable_sourceid_checking; @@ -52,36 +50,18 @@ struct irq_remap_ops { /* Enable fault handling */ int (*enable_faulting)(void); - /* IO-APIC setup routine */ - int (*setup_ioapic_entry)(int irq, struct IO_APIC_route_entry *, - unsigned int, int, - struct io_apic_irq_attr *); + /* Get the irqdomain associated the IOMMU device */ + struct irq_domain *(*get_ir_irq_domain)(struct irq_alloc_info *); - /* Set the CPU affinity of a remapped interrupt */ - int (*set_affinity)(struct irq_data *data, const struct cpumask *mask, - bool force); - - /* Free an IRQ */ - int (*free_irq)(int); - - /* Create MSI msg to use for interrupt remapping */ - void (*compose_msi_msg)(struct pci_dev *, - unsigned int, unsigned int, - struct msi_msg *, u8); - - /* Allocate remapping resources for MSI */ - int (*msi_alloc_irq)(struct pci_dev *, int, int); - - /* Setup the remapped MSI irq */ - int (*msi_setup_irq)(struct pci_dev *, unsigned int, int, int); - - /* Setup interrupt remapping for an HPET MSI */ - int (*alloc_hpet_msi)(unsigned int, unsigned int); + /* Get the MSI irqdomain associated with the IOMMU device */ + struct irq_domain *(*get_irq_domain)(struct irq_alloc_info *); }; extern struct irq_remap_ops intel_irq_remap_ops; extern struct irq_remap_ops amd_iommu_irq_ops; +extern void ir_ack_apic_edge(struct irq_data *data); + #else /* CONFIG_IRQ_REMAP */ #define irq_remapping_enabled 0 diff --git a/drivers/lguest/core.c b/drivers/lguest/core.c index 7dc93aa004c8..312ffd3d0017 100644 --- a/drivers/lguest/core.c +++ b/drivers/lguest/core.c @@ -173,7 +173,7 @@ static void unmap_switcher(void) bool lguest_address_ok(const struct lguest *lg, unsigned long addr, unsigned long len) { - return (addr+len) / PAGE_SIZE < lg->pfn_limit && (addr+len >= addr); + return addr+len <= lg->pfn_limit * PAGE_SIZE && (addr+len >= addr); } /* diff --git a/drivers/lguest/interrupts_and_traps.c b/drivers/lguest/interrupts_and_traps.c index 5e7559be222a..eb934b0242e0 100644 --- a/drivers/lguest/interrupts_and_traps.c +++ b/drivers/lguest/interrupts_and_traps.c @@ -20,7 +20,7 @@ #include "lg.h" /* Allow Guests to use a non-128 (ie. non-Linux) syscall trap. */ -static unsigned int syscall_vector = SYSCALL_VECTOR; +static unsigned int syscall_vector = IA32_SYSCALL_VECTOR; module_param(syscall_vector, uint, 0444); /* The address of the interrupt handler is split into two bits: */ @@ -333,8 +333,8 @@ void set_interrupt(struct lg_cpu *cpu, unsigned int irq) */ static bool could_be_syscall(unsigned int num) { - /* Normal Linux SYSCALL_VECTOR or reserved vector? */ - return num == SYSCALL_VECTOR || num == syscall_vector; + /* Normal Linux IA32_SYSCALL_VECTOR or reserved vector? */ + return num == IA32_SYSCALL_VECTOR || num == syscall_vector; } /* The syscall vector it wants must be unused by Host. */ @@ -351,7 +351,7 @@ bool check_syscall_vector(struct lguest *lg) int init_interrupts(void) { /* If they want some strange system call vector, reserve it now */ - if (syscall_vector != SYSCALL_VECTOR) { + if (syscall_vector != IA32_SYSCALL_VECTOR) { if (test_bit(syscall_vector, used_vectors) || vector_used_by_percpu_irq(syscall_vector)) { printk(KERN_ERR "lg: couldn't reserve syscall %u\n", @@ -366,7 +366,7 @@ int init_interrupts(void) void free_interrupts(void) { - if (syscall_vector != SYSCALL_VECTOR) + if (syscall_vector != IA32_SYSCALL_VECTOR) clear_bit(syscall_vector, used_vectors); } diff --git a/drivers/md/dm-mpath.c b/drivers/md/dm-mpath.c index 63953477a07c..eff7bdd7731d 100644 --- a/drivers/md/dm-mpath.c +++ b/drivers/md/dm-mpath.c @@ -429,9 +429,11 @@ static int __multipath_map(struct dm_target *ti, struct request *clone, /* blk-mq request-based interface */ *__clone = blk_get_request(bdev_get_queue(bdev), rq_data_dir(rq), GFP_ATOMIC); - if (IS_ERR(*__clone)) + if (IS_ERR(*__clone)) { /* ENOMEM, requeue */ + clear_mapinfo(m, map_context); return r; + } (*__clone)->bio = (*__clone)->biotail = NULL; (*__clone)->rq_disk = bdev->bd_disk; (*__clone)->cmd_flags |= REQ_FAILFAST_TRANSPORT; diff --git a/drivers/md/dm-table.c b/drivers/md/dm-table.c index d9b00b8565c6..16ba55ad7089 100644 --- a/drivers/md/dm-table.c +++ b/drivers/md/dm-table.c @@ -820,6 +820,12 @@ void dm_consume_args(struct dm_arg_set *as, unsigned num_args) } EXPORT_SYMBOL(dm_consume_args); +static bool __table_type_request_based(unsigned table_type) +{ + return (table_type == DM_TYPE_REQUEST_BASED || + table_type == DM_TYPE_MQ_REQUEST_BASED); +} + static int dm_table_set_type(struct dm_table *t) { unsigned i; @@ -852,8 +858,7 @@ static int dm_table_set_type(struct dm_table *t) * Determine the type from the live device. * Default to bio-based if device is new. */ - if (live_md_type == DM_TYPE_REQUEST_BASED || - live_md_type == DM_TYPE_MQ_REQUEST_BASED) + if (__table_type_request_based(live_md_type)) request_based = 1; else bio_based = 1; @@ -903,7 +908,7 @@ static int dm_table_set_type(struct dm_table *t) } t->type = DM_TYPE_MQ_REQUEST_BASED; - } else if (hybrid && list_empty(devices) && live_md_type != DM_TYPE_NONE) { + } else if (list_empty(devices) && __table_type_request_based(live_md_type)) { /* inherit live MD type */ t->type = live_md_type; @@ -925,10 +930,7 @@ struct target_type *dm_table_get_immutable_target_type(struct dm_table *t) bool dm_table_request_based(struct dm_table *t) { - unsigned table_type = dm_table_get_type(t); - - return (table_type == DM_TYPE_REQUEST_BASED || - table_type == DM_TYPE_MQ_REQUEST_BASED); + return __table_type_request_based(dm_table_get_type(t)); } bool dm_table_mq_request_based(struct dm_table *t) diff --git a/drivers/md/dm.c b/drivers/md/dm.c index a930b72314ac..2caf492890d6 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c @@ -1082,13 +1082,11 @@ static void rq_completed(struct mapped_device *md, int rw, bool run_queue) dm_put(md); } -static void free_rq_clone(struct request *clone, bool must_be_mapped) +static void free_rq_clone(struct request *clone) { struct dm_rq_target_io *tio = clone->end_io_data; struct mapped_device *md = tio->md; - WARN_ON_ONCE(must_be_mapped && !clone->q); - blk_rq_unprep_clone(clone); if (md->type == DM_TYPE_MQ_REQUEST_BASED) @@ -1132,7 +1130,7 @@ static void dm_end_request(struct request *clone, int error) rq->sense_len = clone->sense_len; } - free_rq_clone(clone, true); + free_rq_clone(clone); if (!rq->q->mq_ops) blk_end_request_all(rq, error); else @@ -1151,7 +1149,7 @@ static void dm_unprep_request(struct request *rq) } if (clone) - free_rq_clone(clone, false); + free_rq_clone(clone); } /* @@ -1164,6 +1162,7 @@ static void old_requeue_request(struct request *rq) spin_lock_irqsave(q->queue_lock, flags); blk_requeue_request(q, rq); + blk_run_queue_async(q); spin_unlock_irqrestore(q->queue_lock, flags); } @@ -1724,8 +1723,7 @@ static int dm_merge_bvec(struct request_queue *q, struct mapped_device *md = q->queuedata; struct dm_table *map = dm_get_live_table_fast(md); struct dm_target *ti; - sector_t max_sectors; - int max_size = 0; + sector_t max_sectors, max_size = 0; if (unlikely(!map)) goto out; @@ -1740,8 +1738,16 @@ static int dm_merge_bvec(struct request_queue *q, max_sectors = min(max_io_len(bvm->bi_sector, ti), (sector_t) queue_max_sectors(q)); max_size = (max_sectors << SECTOR_SHIFT) - bvm->bi_size; - if (unlikely(max_size < 0)) /* this shouldn't _ever_ happen */ - max_size = 0; + + /* + * FIXME: this stop-gap fix _must_ be cleaned up (by passing a sector_t + * to the targets' merge function since it holds sectors not bytes). + * Just doing this as an interim fix for stable@ because the more + * comprehensive cleanup of switching to sector_t will impact every + * DM target that implements a ->merge hook. + */ + if (max_size > INT_MAX) + max_size = INT_MAX; /* * merge_bvec_fn() returns number of bytes @@ -1749,7 +1755,7 @@ static int dm_merge_bvec(struct request_queue *q, * max is precomputed maximal io size */ if (max_size && ti->type->merge) - max_size = ti->type->merge(ti, bvm, biovec, max_size); + max_size = ti->type->merge(ti, bvm, biovec, (int) max_size); /* * If the target doesn't support merge method and some of the devices * provided their merge_bvec method (we know this by looking for the @@ -1971,8 +1977,8 @@ static int map_request(struct dm_rq_target_io *tio, struct request *rq, dm_kill_unmapped_request(rq, r); return r; } - if (IS_ERR(clone)) - return DM_MAPIO_REQUEUE; + if (r != DM_MAPIO_REMAPPED) + return r; if (setup_clone(clone, rq, tio, GFP_ATOMIC)) { /* -ENOMEM */ ti->type->release_clone_rq(clone); @@ -2753,13 +2759,15 @@ static int dm_mq_queue_rq(struct blk_mq_hw_ctx *hctx, if (dm_table_get_type(map) == DM_TYPE_REQUEST_BASED) { /* clone request is allocated at the end of the pdu */ tio->clone = (void *)blk_mq_rq_to_pdu(rq) + sizeof(struct dm_rq_target_io); - if (!clone_rq(rq, md, tio, GFP_ATOMIC)) - return BLK_MQ_RQ_QUEUE_BUSY; + (void) clone_rq(rq, md, tio, GFP_ATOMIC); queue_kthread_work(&md->kworker, &tio->work); } else { /* Direct call is fine since .queue_rq allows allocations */ - if (map_request(tio, rq, md) == DM_MAPIO_REQUEUE) - dm_requeue_unmapped_original_request(md, rq); + if (map_request(tio, rq, md) == DM_MAPIO_REQUEUE) { + /* Undo dm_start_request() before requeuing */ + rq_completed(md, rq_data_dir(rq), false); + return BLK_MQ_RQ_QUEUE_BUSY; + } } return BLK_MQ_RQ_QUEUE_OK; diff --git a/drivers/md/md.c b/drivers/md/md.c index 593a02476c78..27506302eb7a 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -4211,12 +4211,12 @@ action_store(struct mddev *mddev, const char *page, size_t len) if (!mddev->pers || !mddev->pers->sync_request) return -EINVAL; - if (cmd_match(page, "frozen")) - set_bit(MD_RECOVERY_FROZEN, &mddev->recovery); - else - clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery); if (cmd_match(page, "idle") || cmd_match(page, "frozen")) { + if (cmd_match(page, "frozen")) + set_bit(MD_RECOVERY_FROZEN, &mddev->recovery); + else + clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery); flush_workqueue(md_misc_wq); if (mddev->sync_thread) { set_bit(MD_RECOVERY_INTR, &mddev->recovery); @@ -4229,16 +4229,17 @@ action_store(struct mddev *mddev, const char *page, size_t len) test_bit(MD_RECOVERY_NEEDED, &mddev->recovery)) return -EBUSY; else if (cmd_match(page, "resync")) - set_bit(MD_RECOVERY_NEEDED, &mddev->recovery); + clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery); else if (cmd_match(page, "recover")) { + clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery); set_bit(MD_RECOVERY_RECOVER, &mddev->recovery); - set_bit(MD_RECOVERY_NEEDED, &mddev->recovery); } else if (cmd_match(page, "reshape")) { int err; if (mddev->pers->start_reshape == NULL) return -EINVAL; err = mddev_lock(mddev); if (!err) { + clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery); err = mddev->pers->start_reshape(mddev); mddev_unlock(mddev); } @@ -4250,6 +4251,7 @@ action_store(struct mddev *mddev, const char *page, size_t len) set_bit(MD_RECOVERY_CHECK, &mddev->recovery); else if (!cmd_match(page, "repair")) return -EINVAL; + clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery); set_bit(MD_RECOVERY_REQUESTED, &mddev->recovery); set_bit(MD_RECOVERY_SYNC, &mddev->recovery); } diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index b9f2b9cc6060..553d54b87052 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c @@ -749,6 +749,7 @@ static void unlock_two_stripes(struct stripe_head *sh1, struct stripe_head *sh2) static bool stripe_can_batch(struct stripe_head *sh) { return test_bit(STRIPE_BATCH_READY, &sh->state) && + !test_bit(STRIPE_BITMAP_PENDING, &sh->state) && is_full_stripe_write(sh); } @@ -837,6 +838,15 @@ static void stripe_add_to_batch_list(struct r5conf *conf, struct stripe_head *sh < IO_THRESHOLD) md_wakeup_thread(conf->mddev->thread); + if (test_and_clear_bit(STRIPE_BIT_DELAY, &sh->state)) { + int seq = sh->bm_seq; + if (test_bit(STRIPE_BIT_DELAY, &sh->batch_head->state) && + sh->batch_head->bm_seq > seq) + seq = sh->batch_head->bm_seq; + set_bit(STRIPE_BIT_DELAY, &sh->batch_head->state); + sh->batch_head->bm_seq = seq; + } + atomic_inc(&sh->count); unlock_out: unlock_two_stripes(head, sh); @@ -2987,14 +2997,32 @@ static int add_stripe_bio(struct stripe_head *sh, struct bio *bi, int dd_idx, pr_debug("added bi b#%llu to stripe s#%llu, disk %d.\n", (unsigned long long)(*bip)->bi_iter.bi_sector, (unsigned long long)sh->sector, dd_idx); - spin_unlock_irq(&sh->stripe_lock); if (conf->mddev->bitmap && firstwrite) { + /* Cannot hold spinlock over bitmap_startwrite, + * but must ensure this isn't added to a batch until + * we have added to the bitmap and set bm_seq. + * So set STRIPE_BITMAP_PENDING to prevent + * batching. + * If multiple add_stripe_bio() calls race here they + * much all set STRIPE_BITMAP_PENDING. So only the first one + * to complete "bitmap_startwrite" gets to set + * STRIPE_BIT_DELAY. This is important as once a stripe + * is added to a batch, STRIPE_BIT_DELAY cannot be changed + * any more. + */ + set_bit(STRIPE_BITMAP_PENDING, &sh->state); + spin_unlock_irq(&sh->stripe_lock); bitmap_startwrite(conf->mddev->bitmap, sh->sector, STRIPE_SECTORS, 0); - sh->bm_seq = conf->seq_flush+1; - set_bit(STRIPE_BIT_DELAY, &sh->state); + spin_lock_irq(&sh->stripe_lock); + clear_bit(STRIPE_BITMAP_PENDING, &sh->state); + if (!sh->batch_head) { + sh->bm_seq = conf->seq_flush+1; + set_bit(STRIPE_BIT_DELAY, &sh->state); + } } + spin_unlock_irq(&sh->stripe_lock); if (stripe_can_batch(sh)) stripe_add_to_batch_list(conf, sh); @@ -3392,6 +3420,8 @@ static void handle_stripe_fill(struct stripe_head *sh, set_bit(STRIPE_HANDLE, &sh->state); } +static void break_stripe_batch_list(struct stripe_head *head_sh, + unsigned long handle_flags); /* handle_stripe_clean_event * any written block on an uptodate or failed drive can be returned. * Note that if we 'wrote' to a failed drive, it will be UPTODATE, but @@ -3405,7 +3435,6 @@ static void handle_stripe_clean_event(struct r5conf *conf, int discard_pending = 0; struct stripe_head *head_sh = sh; bool do_endio = false; - int wakeup_nr = 0; for (i = disks; i--; ) if (sh->dev[i].written) { @@ -3494,44 +3523,8 @@ unhash: if (atomic_dec_and_test(&conf->pending_full_writes)) md_wakeup_thread(conf->mddev->thread); - if (!head_sh->batch_head || !do_endio) - return; - for (i = 0; i < head_sh->disks; i++) { - if (test_and_clear_bit(R5_Overlap, &head_sh->dev[i].flags)) - wakeup_nr++; - } - while (!list_empty(&head_sh->batch_list)) { - int i; - sh = list_first_entry(&head_sh->batch_list, - struct stripe_head, batch_list); - list_del_init(&sh->batch_list); - - set_mask_bits(&sh->state, ~STRIPE_EXPAND_SYNC_FLAG, - head_sh->state & ~((1 << STRIPE_ACTIVE) | - (1 << STRIPE_PREREAD_ACTIVE) | - STRIPE_EXPAND_SYNC_FLAG)); - sh->check_state = head_sh->check_state; - sh->reconstruct_state = head_sh->reconstruct_state; - for (i = 0; i < sh->disks; i++) { - if (test_and_clear_bit(R5_Overlap, &sh->dev[i].flags)) - wakeup_nr++; - sh->dev[i].flags = head_sh->dev[i].flags; - } - - spin_lock_irq(&sh->stripe_lock); - sh->batch_head = NULL; - spin_unlock_irq(&sh->stripe_lock); - if (sh->state & STRIPE_EXPAND_SYNC_FLAG) - set_bit(STRIPE_HANDLE, &sh->state); - release_stripe(sh); - } - - spin_lock_irq(&head_sh->stripe_lock); - head_sh->batch_head = NULL; - spin_unlock_irq(&head_sh->stripe_lock); - wake_up_nr(&conf->wait_for_overlap, wakeup_nr); - if (head_sh->state & STRIPE_EXPAND_SYNC_FLAG) - set_bit(STRIPE_HANDLE, &head_sh->state); + if (head_sh->batch_head && do_endio) + break_stripe_batch_list(head_sh, STRIPE_EXPAND_SYNC_FLAGS); } static void handle_stripe_dirtying(struct r5conf *conf, @@ -4172,9 +4165,13 @@ static void analyse_stripe(struct stripe_head *sh, struct stripe_head_state *s) static int clear_batch_ready(struct stripe_head *sh) { + /* Return '1' if this is a member of batch, or + * '0' if it is a lone stripe or a head which can now be + * handled. + */ struct stripe_head *tmp; if (!test_and_clear_bit(STRIPE_BATCH_READY, &sh->state)) - return 0; + return (sh->batch_head && sh->batch_head != sh); spin_lock(&sh->stripe_lock); if (!sh->batch_head) { spin_unlock(&sh->stripe_lock); @@ -4202,38 +4199,65 @@ static int clear_batch_ready(struct stripe_head *sh) return 0; } -static void check_break_stripe_batch_list(struct stripe_head *sh) +static void break_stripe_batch_list(struct stripe_head *head_sh, + unsigned long handle_flags) { - struct stripe_head *head_sh, *next; + struct stripe_head *sh, *next; int i; - - if (!test_and_clear_bit(STRIPE_BATCH_ERR, &sh->state)) - return; - - head_sh = sh; + int do_wakeup = 0; list_for_each_entry_safe(sh, next, &head_sh->batch_list, batch_list) { list_del_init(&sh->batch_list); - set_mask_bits(&sh->state, ~STRIPE_EXPAND_SYNC_FLAG, - head_sh->state & ~((1 << STRIPE_ACTIVE) | - (1 << STRIPE_PREREAD_ACTIVE) | - (1 << STRIPE_DEGRADED) | - STRIPE_EXPAND_SYNC_FLAG)); + WARN_ON_ONCE(sh->state & ((1 << STRIPE_ACTIVE) | + (1 << STRIPE_SYNCING) | + (1 << STRIPE_REPLACED) | + (1 << STRIPE_PREREAD_ACTIVE) | + (1 << STRIPE_DELAYED) | + (1 << STRIPE_BIT_DELAY) | + (1 << STRIPE_FULL_WRITE) | + (1 << STRIPE_BIOFILL_RUN) | + (1 << STRIPE_COMPUTE_RUN) | + (1 << STRIPE_OPS_REQ_PENDING) | + (1 << STRIPE_DISCARD) | + (1 << STRIPE_BATCH_READY) | + (1 << STRIPE_BATCH_ERR) | + (1 << STRIPE_BITMAP_PENDING))); + WARN_ON_ONCE(head_sh->state & ((1 << STRIPE_DISCARD) | + (1 << STRIPE_REPLACED))); + + set_mask_bits(&sh->state, ~(STRIPE_EXPAND_SYNC_FLAGS | + (1 << STRIPE_DEGRADED)), + head_sh->state & (1 << STRIPE_INSYNC)); + sh->check_state = head_sh->check_state; sh->reconstruct_state = head_sh->reconstruct_state; - for (i = 0; i < sh->disks; i++) + for (i = 0; i < sh->disks; i++) { + if (test_and_clear_bit(R5_Overlap, &sh->dev[i].flags)) + do_wakeup = 1; sh->dev[i].flags = head_sh->dev[i].flags & (~((1 << R5_WriteError) | (1 << R5_Overlap))); - + } spin_lock_irq(&sh->stripe_lock); sh->batch_head = NULL; spin_unlock_irq(&sh->stripe_lock); - - set_bit(STRIPE_HANDLE, &sh->state); + if (handle_flags == 0 || + sh->state & handle_flags) + set_bit(STRIPE_HANDLE, &sh->state); release_stripe(sh); } + spin_lock_irq(&head_sh->stripe_lock); + head_sh->batch_head = NULL; + spin_unlock_irq(&head_sh->stripe_lock); + for (i = 0; i < head_sh->disks; i++) + if (test_and_clear_bit(R5_Overlap, &head_sh->dev[i].flags)) + do_wakeup = 1; + if (head_sh->state & handle_flags) + set_bit(STRIPE_HANDLE, &head_sh->state); + + if (do_wakeup) + wake_up(&head_sh->raid_conf->wait_for_overlap); } static void handle_stripe(struct stripe_head *sh) @@ -4258,7 +4282,8 @@ static void handle_stripe(struct stripe_head *sh) return; } - check_break_stripe_batch_list(sh); + if (test_and_clear_bit(STRIPE_BATCH_ERR, &sh->state)) + break_stripe_batch_list(sh, 0); if (test_bit(STRIPE_SYNC_REQUESTED, &sh->state) && !sh->batch_head) { spin_lock(&sh->stripe_lock); @@ -4312,6 +4337,7 @@ static void handle_stripe(struct stripe_head *sh) if (s.failed > conf->max_degraded) { sh->check_state = 0; sh->reconstruct_state = 0; + break_stripe_batch_list(sh, 0); if (s.to_read+s.to_write+s.written) handle_failed_stripe(conf, sh, &s, disks, &s.return_bi); if (s.syncing + s.replacing) diff --git a/drivers/md/raid5.h b/drivers/md/raid5.h index 7dc0dd86074b..896d603ad0da 100644 --- a/drivers/md/raid5.h +++ b/drivers/md/raid5.h @@ -337,9 +337,12 @@ enum { STRIPE_ON_RELEASE_LIST, STRIPE_BATCH_READY, STRIPE_BATCH_ERR, + STRIPE_BITMAP_PENDING, /* Being added to bitmap, don't add + * to batch yet. + */ }; -#define STRIPE_EXPAND_SYNC_FLAG \ +#define STRIPE_EXPAND_SYNC_FLAGS \ ((1 << STRIPE_EXPAND_SOURCE) |\ (1 << STRIPE_EXPAND_READY) |\ (1 << STRIPE_EXPANDING) |\ diff --git a/drivers/mfd/da9052-core.c b/drivers/mfd/da9052-core.c index ae498b53ee40..46e3840c7a37 100644 --- a/drivers/mfd/da9052-core.c +++ b/drivers/mfd/da9052-core.c @@ -433,6 +433,10 @@ EXPORT_SYMBOL_GPL(da9052_adc_read_temp); static const struct mfd_cell da9052_subdev_info[] = { { .name = "da9052-regulator", + .id = 0, + }, + { + .name = "da9052-regulator", .id = 1, }, { @@ -484,10 +488,6 @@ static const struct mfd_cell da9052_subdev_info[] = { .id = 13, }, { - .name = "da9052-regulator", - .id = 14, - }, - { .name = "da9052-onkey", }, { diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x.h b/drivers/net/ethernet/broadcom/bnx2x/bnx2x.h index a3b0f7a0c61e..1f82a04ce01a 100644 --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x.h +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x.h @@ -1774,7 +1774,7 @@ struct bnx2x { int stats_state; /* used for synchronization of concurrent threads statistics handling */ - struct mutex stats_lock; + struct semaphore stats_lock; /* used by dmae command loader */ struct dmae_command stats_dmae; diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c index fd52ce95127e..33501bcddc48 100644 --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c @@ -12054,7 +12054,7 @@ static int bnx2x_init_bp(struct bnx2x *bp) mutex_init(&bp->port.phy_mutex); mutex_init(&bp->fw_mb_mutex); mutex_init(&bp->drv_info_mutex); - mutex_init(&bp->stats_lock); + sema_init(&bp->stats_lock, 1); bp->drv_info_mng_owner = false; INIT_DELAYED_WORK(&bp->sp_task, bnx2x_sp_task); @@ -13690,9 +13690,10 @@ static int bnx2x_eeh_nic_unload(struct bnx2x *bp) cancel_delayed_work_sync(&bp->sp_task); cancel_delayed_work_sync(&bp->period_task); - mutex_lock(&bp->stats_lock); - bp->stats_state = STATS_STATE_DISABLED; - mutex_unlock(&bp->stats_lock); + if (!down_timeout(&bp->stats_lock, HZ / 10)) { + bp->stats_state = STATS_STATE_DISABLED; + up(&bp->stats_lock); + } bnx2x_save_statistics(bp); diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_stats.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_stats.c index 266b055c2360..69d699f0730a 100644 --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_stats.c +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_stats.c @@ -1372,19 +1372,23 @@ void bnx2x_stats_handle(struct bnx2x *bp, enum bnx2x_stats_event event) * that context in case someone is in the middle of a transition. * For other events, wait a bit until lock is taken. */ - if (!mutex_trylock(&bp->stats_lock)) { + if (down_trylock(&bp->stats_lock)) { if (event == STATS_EVENT_UPDATE) return; DP(BNX2X_MSG_STATS, "Unlikely stats' lock contention [event %d]\n", event); - mutex_lock(&bp->stats_lock); + if (unlikely(down_timeout(&bp->stats_lock, HZ / 10))) { + BNX2X_ERR("Failed to take stats lock [event %d]\n", + event); + return; + } } bnx2x_stats_stm[state][event].action(bp); bp->stats_state = bnx2x_stats_stm[state][event].next_state; - mutex_unlock(&bp->stats_lock); + up(&bp->stats_lock); if ((event != STATS_EVENT_UPDATE) || netif_msg_timer(bp)) DP(BNX2X_MSG_STATS, "state %d -> event %d -> state %d\n", @@ -1970,7 +1974,11 @@ int bnx2x_stats_safe_exec(struct bnx2x *bp, /* Wait for statistics to end [while blocking further requests], * then run supplied function 'safely'. */ - mutex_lock(&bp->stats_lock); + rc = down_timeout(&bp->stats_lock, HZ / 10); + if (unlikely(rc)) { + BNX2X_ERR("Failed to take statistics lock for safe execution\n"); + goto out_no_lock; + } bnx2x_stats_comp(bp); while (bp->stats_pending && cnt--) @@ -1988,7 +1996,7 @@ out: /* No need to restart statistics - if they're enabled, the timer * will restart the statistics. */ - mutex_unlock(&bp->stats_lock); - + up(&bp->stats_lock); +out_no_lock: return rc; } diff --git a/drivers/net/ethernet/brocade/bna/bfa_ioc.c b/drivers/net/ethernet/brocade/bna/bfa_ioc.c index 594a2ab36d31..68f3c13c9ef6 100644 --- a/drivers/net/ethernet/brocade/bna/bfa_ioc.c +++ b/drivers/net/ethernet/brocade/bna/bfa_ioc.c @@ -2414,7 +2414,7 @@ bfa_ioc_boot(struct bfa_ioc *ioc, enum bfi_fwboot_type boot_type, if (status == BFA_STATUS_OK) bfa_ioc_lpu_start(ioc); else - bfa_nw_iocpf_timeout(ioc); + bfa_fsm_send_event(&ioc->iocpf, IOCPF_E_TIMEOUT); return status; } @@ -3029,7 +3029,7 @@ bfa_ioc_poll_fwinit(struct bfa_ioc *ioc) } if (ioc->iocpf.poll_time >= BFA_IOC_TOV) { - bfa_nw_iocpf_timeout(ioc); + bfa_fsm_send_event(&ioc->iocpf, IOCPF_E_TIMEOUT); } else { ioc->iocpf.poll_time += BFA_IOC_POLL_TOV; mod_timer(&ioc->iocpf_timer, jiffies + diff --git a/drivers/net/ethernet/brocade/bna/bnad.c b/drivers/net/ethernet/brocade/bna/bnad.c index 37072a83f9d6..caae6cb2bc1a 100644 --- a/drivers/net/ethernet/brocade/bna/bnad.c +++ b/drivers/net/ethernet/brocade/bna/bnad.c @@ -3701,10 +3701,6 @@ bnad_pci_probe(struct pci_dev *pdev, setup_timer(&bnad->bna.ioceth.ioc.sem_timer, bnad_iocpf_sem_timeout, ((unsigned long)bnad)); - /* Now start the timer before calling IOC */ - mod_timer(&bnad->bna.ioceth.ioc.iocpf_timer, - jiffies + msecs_to_jiffies(BNA_IOC_TIMER_FREQ)); - /* * Start the chip * If the call back comes with error, we bail out. diff --git a/drivers/net/ethernet/brocade/bna/cna_fwimg.c b/drivers/net/ethernet/brocade/bna/cna_fwimg.c index ebf462d8082f..badea368bdc8 100644 --- a/drivers/net/ethernet/brocade/bna/cna_fwimg.c +++ b/drivers/net/ethernet/brocade/bna/cna_fwimg.c @@ -30,6 +30,7 @@ cna_read_firmware(struct pci_dev *pdev, u32 **bfi_image, u32 *bfi_image_size, char *fw_name) { const struct firmware *fw; + u32 n; if (request_firmware(&fw, fw_name, &pdev->dev)) { pr_alert("Can't locate firmware %s\n", fw_name); @@ -40,6 +41,12 @@ cna_read_firmware(struct pci_dev *pdev, u32 **bfi_image, *bfi_image_size = fw->size/sizeof(u32); bfi_fw = fw; + /* Convert loaded firmware to host order as it is stored in file + * as sequence of LE32 integers. + */ + for (n = 0; n < *bfi_image_size; n++) + le32_to_cpus(*bfi_image + n); + return *bfi_image; error: return NULL; diff --git a/drivers/net/ethernet/emulex/benet/be_main.c b/drivers/net/ethernet/emulex/benet/be_main.c index a6dcbf850c1f..6f9ffb9026cd 100644 --- a/drivers/net/ethernet/emulex/benet/be_main.c +++ b/drivers/net/ethernet/emulex/benet/be_main.c @@ -2358,11 +2358,11 @@ static int be_evt_queues_create(struct be_adapter *adapter) adapter->cfg_num_qs); for_all_evt_queues(adapter, eqo, i) { + int numa_node = dev_to_node(&adapter->pdev->dev); if (!zalloc_cpumask_var(&eqo->affinity_mask, GFP_KERNEL)) return -ENOMEM; - cpumask_set_cpu_local_first(i, dev_to_node(&adapter->pdev->dev), - eqo->affinity_mask); - + cpumask_set_cpu(cpumask_local_spread(i, numa_node), + eqo->affinity_mask); netif_napi_add(adapter->netdev, &eqo->napi, be_poll, BE_NAPI_WEIGHT); napi_hash_add(&eqo->napi); diff --git a/drivers/net/ethernet/ibm/emac/core.c b/drivers/net/ethernet/ibm/emac/core.c index de7919322190..b9df0cbd0a38 100644 --- a/drivers/net/ethernet/ibm/emac/core.c +++ b/drivers/net/ethernet/ibm/emac/core.c @@ -2084,12 +2084,8 @@ static void emac_ethtool_get_pauseparam(struct net_device *ndev, static int emac_get_regs_len(struct emac_instance *dev) { - if (emac_has_feature(dev, EMAC_FTR_EMAC4)) - return sizeof(struct emac_ethtool_regs_subhdr) + - EMAC4_ETHTOOL_REGS_SIZE(dev); - else return sizeof(struct emac_ethtool_regs_subhdr) + - EMAC_ETHTOOL_REGS_SIZE(dev); + sizeof(struct emac_regs); } static int emac_ethtool_get_regs_len(struct net_device *ndev) @@ -2114,15 +2110,15 @@ static void *emac_dump_regs(struct emac_instance *dev, void *buf) struct emac_ethtool_regs_subhdr *hdr = buf; hdr->index = dev->cell_index; - if (emac_has_feature(dev, EMAC_FTR_EMAC4)) { + if (emac_has_feature(dev, EMAC_FTR_EMAC4SYNC)) { + hdr->version = EMAC4SYNC_ETHTOOL_REGS_VER; + } else if (emac_has_feature(dev, EMAC_FTR_EMAC4)) { hdr->version = EMAC4_ETHTOOL_REGS_VER; - memcpy_fromio(hdr + 1, dev->emacp, EMAC4_ETHTOOL_REGS_SIZE(dev)); - return (void *)(hdr + 1) + EMAC4_ETHTOOL_REGS_SIZE(dev); } else { hdr->version = EMAC_ETHTOOL_REGS_VER; - memcpy_fromio(hdr + 1, dev->emacp, EMAC_ETHTOOL_REGS_SIZE(dev)); - return (void *)(hdr + 1) + EMAC_ETHTOOL_REGS_SIZE(dev); } + memcpy_fromio(hdr + 1, dev->emacp, sizeof(struct emac_regs)); + return (void *)(hdr + 1) + sizeof(struct emac_regs); } static void emac_ethtool_get_regs(struct net_device *ndev, diff --git a/drivers/net/ethernet/ibm/emac/core.h b/drivers/net/ethernet/ibm/emac/core.h index 67f342a9f65e..28df37420da9 100644 --- a/drivers/net/ethernet/ibm/emac/core.h +++ b/drivers/net/ethernet/ibm/emac/core.h @@ -461,10 +461,7 @@ struct emac_ethtool_regs_subhdr { }; #define EMAC_ETHTOOL_REGS_VER 0 -#define EMAC_ETHTOOL_REGS_SIZE(dev) ((dev)->rsrc_regs.end - \ - (dev)->rsrc_regs.start + 1) -#define EMAC4_ETHTOOL_REGS_VER 1 -#define EMAC4_ETHTOOL_REGS_SIZE(dev) ((dev)->rsrc_regs.end - \ - (dev)->rsrc_regs.start + 1) +#define EMAC4_ETHTOOL_REGS_VER 1 +#define EMAC4SYNC_ETHTOOL_REGS_VER 2 #endif /* __IBM_NEWEMAC_CORE_H */ diff --git a/drivers/net/ethernet/mellanox/mlx4/cmd.c b/drivers/net/ethernet/mellanox/mlx4/cmd.c index 4f7dc044601e..529ef0594b90 100644 --- a/drivers/net/ethernet/mellanox/mlx4/cmd.c +++ b/drivers/net/ethernet/mellanox/mlx4/cmd.c @@ -714,8 +714,13 @@ static int mlx4_cmd_wait(struct mlx4_dev *dev, u64 in_param, u64 *out_param, msecs_to_jiffies(timeout))) { mlx4_warn(dev, "command 0x%x timed out (go bit not cleared)\n", op); - err = -EIO; - goto out_reset; + if (op == MLX4_CMD_NOP) { + err = -EBUSY; + goto out; + } else { + err = -EIO; + goto out_reset; + } } err = context->result; diff --git a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c index 32f5ec737472..cf467a9f6cc7 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c @@ -1501,17 +1501,13 @@ static int mlx4_en_init_affinity_hint(struct mlx4_en_priv *priv, int ring_idx) { struct mlx4_en_rx_ring *ring = priv->rx_ring[ring_idx]; int numa_node = priv->mdev->dev->numa_node; - int ret = 0; if (!zalloc_cpumask_var(&ring->affinity_mask, GFP_KERNEL)) return -ENOMEM; - ret = cpumask_set_cpu_local_first(ring_idx, numa_node, - ring->affinity_mask); - if (ret) - free_cpumask_var(ring->affinity_mask); - - return ret; + cpumask_set_cpu(cpumask_local_spread(ring_idx, numa_node), + ring->affinity_mask); + return 0; } static void mlx4_en_free_affinity_hint(struct mlx4_en_priv *priv, int ring_idx) diff --git a/drivers/net/ethernet/mellanox/mlx4/en_tx.c b/drivers/net/ethernet/mellanox/mlx4/en_tx.c index f7bf312fb443..7bed3a88579f 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_tx.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_tx.c @@ -144,9 +144,9 @@ int mlx4_en_create_tx_ring(struct mlx4_en_priv *priv, ring->queue_index = queue_index; if (queue_index < priv->num_tx_rings_p_up) - cpumask_set_cpu_local_first(queue_index, - priv->mdev->dev->numa_node, - &ring->affinity_mask); + cpumask_set_cpu(cpumask_local_spread(queue_index, + priv->mdev->dev->numa_node), + &ring->affinity_mask); *pring = ring; return 0; diff --git a/drivers/net/ethernet/qlogic/netxen/netxen_nic_main.c b/drivers/net/ethernet/qlogic/netxen/netxen_nic_main.c index e0c31e3947d1..6409a06bbdf6 100644 --- a/drivers/net/ethernet/qlogic/netxen/netxen_nic_main.c +++ b/drivers/net/ethernet/qlogic/netxen/netxen_nic_main.c @@ -3025,9 +3025,9 @@ netxen_sysfs_read_dimm(struct file *filp, struct kobject *kobj, u8 dw, rows, cols, banks, ranks; u32 val; - if (size != sizeof(struct netxen_dimm_cfg)) { + if (size < attr->size) { netdev_err(netdev, "Invalid size\n"); - return -1; + return -EINVAL; } memset(&dimm, 0, sizeof(struct netxen_dimm_cfg)); @@ -3137,7 +3137,7 @@ out: static struct bin_attribute bin_attr_dimm = { .attr = { .name = "dimm", .mode = (S_IRUGO | S_IWUSR) }, - .size = 0, + .size = sizeof(struct netxen_dimm_cfg), .read = netxen_sysfs_read_dimm, }; diff --git a/drivers/net/ethernet/sfc/rx.c b/drivers/net/ethernet/sfc/rx.c index c0ad95d2f63d..809ea4610a77 100644 --- a/drivers/net/ethernet/sfc/rx.c +++ b/drivers/net/ethernet/sfc/rx.c @@ -224,12 +224,17 @@ static void efx_unmap_rx_buffer(struct efx_nic *efx, } } -static void efx_free_rx_buffer(struct efx_rx_buffer *rx_buf) +static void efx_free_rx_buffers(struct efx_rx_queue *rx_queue, + struct efx_rx_buffer *rx_buf, + unsigned int num_bufs) { - if (rx_buf->page) { - put_page(rx_buf->page); - rx_buf->page = NULL; - } + do { + if (rx_buf->page) { + put_page(rx_buf->page); + rx_buf->page = NULL; + } + rx_buf = efx_rx_buf_next(rx_queue, rx_buf); + } while (--num_bufs); } /* Attempt to recycle the page if there is an RX recycle ring; the page can @@ -278,7 +283,7 @@ static void efx_fini_rx_buffer(struct efx_rx_queue *rx_queue, /* If this is the last buffer in a page, unmap and free it. */ if (rx_buf->flags & EFX_RX_BUF_LAST_IN_PAGE) { efx_unmap_rx_buffer(rx_queue->efx, rx_buf); - efx_free_rx_buffer(rx_buf); + efx_free_rx_buffers(rx_queue, rx_buf, 1); } rx_buf->page = NULL; } @@ -304,10 +309,7 @@ static void efx_discard_rx_packet(struct efx_channel *channel, efx_recycle_rx_pages(channel, rx_buf, n_frags); - do { - efx_free_rx_buffer(rx_buf); - rx_buf = efx_rx_buf_next(rx_queue, rx_buf); - } while (--n_frags); + efx_free_rx_buffers(rx_queue, rx_buf, n_frags); } /** @@ -431,11 +433,10 @@ efx_rx_packet_gro(struct efx_channel *channel, struct efx_rx_buffer *rx_buf, skb = napi_get_frags(napi); if (unlikely(!skb)) { - while (n_frags--) { - put_page(rx_buf->page); - rx_buf->page = NULL; - rx_buf = efx_rx_buf_next(&channel->rx_queue, rx_buf); - } + struct efx_rx_queue *rx_queue; + + rx_queue = efx_channel_get_rx_queue(channel); + efx_free_rx_buffers(rx_queue, rx_buf, n_frags); return; } @@ -622,7 +623,10 @@ static void efx_rx_deliver(struct efx_channel *channel, u8 *eh, skb = efx_rx_mk_skb(channel, rx_buf, n_frags, eh, hdr_len); if (unlikely(skb == NULL)) { - efx_free_rx_buffer(rx_buf); + struct efx_rx_queue *rx_queue; + + rx_queue = efx_channel_get_rx_queue(channel); + efx_free_rx_buffers(rx_queue, rx_buf, n_frags); return; } skb_record_rx_queue(skb, channel->rx_queue.core_index); @@ -661,8 +665,12 @@ void __efx_rx_packet(struct efx_channel *channel) * loopback layer, and free the rx_buf here */ if (unlikely(efx->loopback_selftest)) { + struct efx_rx_queue *rx_queue; + efx_loopback_rx_packet(efx, eh, rx_buf->len); - efx_free_rx_buffer(rx_buf); + rx_queue = efx_channel_get_rx_queue(channel); + efx_free_rx_buffers(rx_queue, rx_buf, + channel->rx_pkt_n_frags); goto out; } diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac.h b/drivers/net/ethernet/stmicro/stmmac/stmmac.h index 2ac9552d1fa3..73bab983edd9 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac.h +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac.h @@ -117,6 +117,12 @@ struct stmmac_priv { int use_riwt; int irq_wake; spinlock_t ptp_lock; + +#ifdef CONFIG_DEBUG_FS + struct dentry *dbgfs_dir; + struct dentry *dbgfs_rings_status; + struct dentry *dbgfs_dma_cap; +#endif }; int stmmac_mdio_unregister(struct net_device *ndev); diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c index 05c146f718a3..2c5ce2baca87 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c @@ -118,7 +118,7 @@ static irqreturn_t stmmac_interrupt(int irq, void *dev_id); #ifdef CONFIG_DEBUG_FS static int stmmac_init_fs(struct net_device *dev); -static void stmmac_exit_fs(void); +static void stmmac_exit_fs(struct net_device *dev); #endif #define STMMAC_COAL_TIMER(x) (jiffies + usecs_to_jiffies(x)) @@ -1916,7 +1916,7 @@ static int stmmac_release(struct net_device *dev) netif_carrier_off(dev); #ifdef CONFIG_DEBUG_FS - stmmac_exit_fs(); + stmmac_exit_fs(dev); #endif stmmac_release_ptp(priv); @@ -2508,8 +2508,6 @@ static int stmmac_ioctl(struct net_device *dev, struct ifreq *rq, int cmd) #ifdef CONFIG_DEBUG_FS static struct dentry *stmmac_fs_dir; -static struct dentry *stmmac_rings_status; -static struct dentry *stmmac_dma_cap; static void sysfs_display_ring(void *head, int size, int extend_desc, struct seq_file *seq) @@ -2648,36 +2646,39 @@ static const struct file_operations stmmac_dma_cap_fops = { static int stmmac_init_fs(struct net_device *dev) { - /* Create debugfs entries */ - stmmac_fs_dir = debugfs_create_dir(STMMAC_RESOURCE_NAME, NULL); + struct stmmac_priv *priv = netdev_priv(dev); + + /* Create per netdev entries */ + priv->dbgfs_dir = debugfs_create_dir(dev->name, stmmac_fs_dir); - if (!stmmac_fs_dir || IS_ERR(stmmac_fs_dir)) { - pr_err("ERROR %s, debugfs create directory failed\n", - STMMAC_RESOURCE_NAME); + if (!priv->dbgfs_dir || IS_ERR(priv->dbgfs_dir)) { + pr_err("ERROR %s/%s, debugfs create directory failed\n", + STMMAC_RESOURCE_NAME, dev->name); return -ENOMEM; } /* Entry to report DMA RX/TX rings */ - stmmac_rings_status = debugfs_create_file("descriptors_status", - S_IRUGO, stmmac_fs_dir, dev, - &stmmac_rings_status_fops); + priv->dbgfs_rings_status = + debugfs_create_file("descriptors_status", S_IRUGO, + priv->dbgfs_dir, dev, + &stmmac_rings_status_fops); - if (!stmmac_rings_status || IS_ERR(stmmac_rings_status)) { + if (!priv->dbgfs_rings_status || IS_ERR(priv->dbgfs_rings_status)) { pr_info("ERROR creating stmmac ring debugfs file\n"); - debugfs_remove(stmmac_fs_dir); + debugfs_remove_recursive(priv->dbgfs_dir); return -ENOMEM; } /* Entry to report the DMA HW features */ - stmmac_dma_cap = debugfs_create_file("dma_cap", S_IRUGO, stmmac_fs_dir, - dev, &stmmac_dma_cap_fops); + priv->dbgfs_dma_cap = debugfs_create_file("dma_cap", S_IRUGO, + priv->dbgfs_dir, + dev, &stmmac_dma_cap_fops); - if (!stmmac_dma_cap || IS_ERR(stmmac_dma_cap)) { + if (!priv->dbgfs_dma_cap || IS_ERR(priv->dbgfs_dma_cap)) { pr_info("ERROR creating stmmac MMC debugfs file\n"); - debugfs_remove(stmmac_rings_status); - debugfs_remove(stmmac_fs_dir); + debugfs_remove_recursive(priv->dbgfs_dir); return -ENOMEM; } @@ -2685,11 +2686,11 @@ static int stmmac_init_fs(struct net_device *dev) return 0; } -static void stmmac_exit_fs(void) +static void stmmac_exit_fs(struct net_device *dev) { - debugfs_remove(stmmac_rings_status); - debugfs_remove(stmmac_dma_cap); - debugfs_remove(stmmac_fs_dir); + struct stmmac_priv *priv = netdev_priv(dev); + + debugfs_remove_recursive(priv->dbgfs_dir); } #endif /* CONFIG_DEBUG_FS */ @@ -3149,6 +3150,35 @@ err: __setup("stmmaceth=", stmmac_cmdline_opt); #endif /* MODULE */ +static int __init stmmac_init(void) +{ +#ifdef CONFIG_DEBUG_FS + /* Create debugfs main directory if it doesn't exist yet */ + if (!stmmac_fs_dir) { + stmmac_fs_dir = debugfs_create_dir(STMMAC_RESOURCE_NAME, NULL); + + if (!stmmac_fs_dir || IS_ERR(stmmac_fs_dir)) { + pr_err("ERROR %s, debugfs create directory failed\n", + STMMAC_RESOURCE_NAME); + + return -ENOMEM; + } + } +#endif + + return 0; +} + +static void __exit stmmac_exit(void) +{ +#ifdef CONFIG_DEBUG_FS + debugfs_remove_recursive(stmmac_fs_dir); +#endif +} + +module_init(stmmac_init) +module_exit(stmmac_exit) + MODULE_DESCRIPTION("STMMAC 10/100/1000 Ethernet device driver"); MODULE_AUTHOR("Giuseppe Cavallaro <peppe.cavallaro@st.com>"); MODULE_LICENSE("GPL"); diff --git a/drivers/net/phy/amd-xgbe-phy.c b/drivers/net/phy/amd-xgbe-phy.c index fb276f64cd64..34a75cba3b73 100644 --- a/drivers/net/phy/amd-xgbe-phy.c +++ b/drivers/net/phy/amd-xgbe-phy.c @@ -755,6 +755,45 @@ static int amd_xgbe_phy_set_mode(struct phy_device *phydev, return ret; } +static bool amd_xgbe_phy_use_xgmii_mode(struct phy_device *phydev) +{ + if (phydev->autoneg == AUTONEG_ENABLE) { + if (phydev->advertising & ADVERTISED_10000baseKR_Full) + return true; + } else { + if (phydev->speed == SPEED_10000) + return true; + } + + return false; +} + +static bool amd_xgbe_phy_use_gmii_2500_mode(struct phy_device *phydev) +{ + if (phydev->autoneg == AUTONEG_ENABLE) { + if (phydev->advertising & ADVERTISED_2500baseX_Full) + return true; + } else { + if (phydev->speed == SPEED_2500) + return true; + } + + return false; +} + +static bool amd_xgbe_phy_use_gmii_mode(struct phy_device *phydev) +{ + if (phydev->autoneg == AUTONEG_ENABLE) { + if (phydev->advertising & ADVERTISED_1000baseKX_Full) + return true; + } else { + if (phydev->speed == SPEED_1000) + return true; + } + + return false; +} + static int amd_xgbe_phy_set_an(struct phy_device *phydev, bool enable, bool restart) { @@ -1235,11 +1274,11 @@ static int amd_xgbe_phy_config_init(struct phy_device *phydev) /* Set initial mode - call the mode setting routines * directly to insure we are properly configured */ - if (phydev->advertising & SUPPORTED_10000baseKR_Full) + if (amd_xgbe_phy_use_xgmii_mode(phydev)) ret = amd_xgbe_phy_xgmii_mode(phydev); - else if (phydev->advertising & SUPPORTED_1000baseKX_Full) + else if (amd_xgbe_phy_use_gmii_mode(phydev)) ret = amd_xgbe_phy_gmii_mode(phydev); - else if (phydev->advertising & SUPPORTED_2500baseX_Full) + else if (amd_xgbe_phy_use_gmii_2500_mode(phydev)) ret = amd_xgbe_phy_gmii_2500_mode(phydev); else ret = -EINVAL; diff --git a/drivers/net/phy/bcm7xxx.c b/drivers/net/phy/bcm7xxx.c index 64c74c6a4828..b5dc59de094e 100644 --- a/drivers/net/phy/bcm7xxx.c +++ b/drivers/net/phy/bcm7xxx.c @@ -404,7 +404,7 @@ static struct phy_driver bcm7xxx_driver[] = { .name = "Broadcom BCM7425", .features = PHY_GBIT_FEATURES | SUPPORTED_Pause | SUPPORTED_Asym_Pause, - .flags = 0, + .flags = PHY_IS_INTERNAL, .config_init = bcm7xxx_config_init, .config_aneg = genphy_config_aneg, .read_status = genphy_read_status, diff --git a/drivers/net/phy/dp83640.c b/drivers/net/phy/dp83640.c index 496e02f961d3..00cb41e71312 100644 --- a/drivers/net/phy/dp83640.c +++ b/drivers/net/phy/dp83640.c @@ -47,7 +47,7 @@ #define PSF_TX 0x1000 #define EXT_EVENT 1 #define CAL_EVENT 7 -#define CAL_TRIGGER 7 +#define CAL_TRIGGER 1 #define DP83640_N_PINS 12 #define MII_DP83640_MICR 0x11 @@ -496,7 +496,9 @@ static int ptp_dp83640_enable(struct ptp_clock_info *ptp, else evnt |= EVNT_RISE; } + mutex_lock(&clock->extreg_lock); ext_write(0, phydev, PAGE5, PTP_EVNT, evnt); + mutex_unlock(&clock->extreg_lock); return 0; case PTP_CLK_REQ_PEROUT: @@ -532,6 +534,8 @@ static u8 status_frame_src[6] = { 0x08, 0x00, 0x17, 0x0B, 0x6B, 0x0F }; static void enable_status_frames(struct phy_device *phydev, bool on) { + struct dp83640_private *dp83640 = phydev->priv; + struct dp83640_clock *clock = dp83640->clock; u16 cfg0 = 0, ver; if (on) @@ -539,9 +543,13 @@ static void enable_status_frames(struct phy_device *phydev, bool on) ver = (PSF_PTPVER & VERSIONPTP_MASK) << VERSIONPTP_SHIFT; + mutex_lock(&clock->extreg_lock); + ext_write(0, phydev, PAGE5, PSF_CFG0, cfg0); ext_write(0, phydev, PAGE6, PSF_CFG1, ver); + mutex_unlock(&clock->extreg_lock); + if (!phydev->attached_dev) { pr_warn("expected to find an attached netdevice\n"); return; @@ -838,7 +846,7 @@ static void decode_rxts(struct dp83640_private *dp83640, list_del_init(&rxts->list); phy2rxts(phy_rxts, rxts); - spin_lock_irqsave(&dp83640->rx_queue.lock, flags); + spin_lock(&dp83640->rx_queue.lock); skb_queue_walk(&dp83640->rx_queue, skb) { struct dp83640_skb_info *skb_info; @@ -853,7 +861,7 @@ static void decode_rxts(struct dp83640_private *dp83640, break; } } - spin_unlock_irqrestore(&dp83640->rx_queue.lock, flags); + spin_unlock(&dp83640->rx_queue.lock); if (!shhwtstamps) list_add_tail(&rxts->list, &dp83640->rxts); @@ -1173,11 +1181,18 @@ static int dp83640_config_init(struct phy_device *phydev) if (clock->chosen && !list_empty(&clock->phylist)) recalibrate(clock); - else + else { + mutex_lock(&clock->extreg_lock); enable_broadcast(phydev, clock->page, 1); + mutex_unlock(&clock->extreg_lock); + } enable_status_frames(phydev, true); + + mutex_lock(&clock->extreg_lock); ext_write(0, phydev, PAGE4, PTP_CTL, PTP_ENABLE); + mutex_unlock(&clock->extreg_lock); + return 0; } diff --git a/drivers/net/wireless/brcm80211/brcmfmac/msgbuf.c b/drivers/net/wireless/brcm80211/brcmfmac/msgbuf.c index 4ec9811f49c8..65efb1468988 100644 --- a/drivers/net/wireless/brcm80211/brcmfmac/msgbuf.c +++ b/drivers/net/wireless/brcm80211/brcmfmac/msgbuf.c @@ -511,11 +511,9 @@ static int brcmf_msgbuf_query_dcmd(struct brcmf_pub *drvr, int ifidx, msgbuf->rx_pktids, msgbuf->ioctl_resp_pktid); if (msgbuf->ioctl_resp_ret_len != 0) { - if (!skb) { - brcmf_err("Invalid packet id idx recv'd %d\n", - msgbuf->ioctl_resp_pktid); + if (!skb) return -EBADF; - } + memcpy(buf, skb->data, (len < msgbuf->ioctl_resp_ret_len) ? len : msgbuf->ioctl_resp_ret_len); } @@ -874,10 +872,8 @@ brcmf_msgbuf_process_txstatus(struct brcmf_msgbuf *msgbuf, void *buf) flowid -= BRCMF_NROF_H2D_COMMON_MSGRINGS; skb = brcmf_msgbuf_get_pktid(msgbuf->drvr->bus_if->dev, msgbuf->tx_pktids, idx); - if (!skb) { - brcmf_err("Invalid packet id idx recv'd %d\n", idx); + if (!skb) return; - } set_bit(flowid, msgbuf->txstatus_done_map); commonring = msgbuf->flowrings[flowid]; @@ -1156,6 +1152,8 @@ brcmf_msgbuf_process_rx_complete(struct brcmf_msgbuf *msgbuf, void *buf) skb = brcmf_msgbuf_get_pktid(msgbuf->drvr->bus_if->dev, msgbuf->rx_pktids, idx); + if (!skb) + return; if (data_offset) skb_pull(skb, data_offset); diff --git a/drivers/net/wireless/iwlwifi/Kconfig b/drivers/net/wireless/iwlwifi/Kconfig index ab019b45551b..f89f446e5c8a 100644 --- a/drivers/net/wireless/iwlwifi/Kconfig +++ b/drivers/net/wireless/iwlwifi/Kconfig @@ -21,6 +21,7 @@ config IWLWIFI Intel 7260 Wi-Fi Adapter Intel 3160 Wi-Fi Adapter Intel 7265 Wi-Fi Adapter + Intel 3165 Wi-Fi Adapter This driver uses the kernel's mac80211 subsystem. diff --git a/drivers/net/wireless/iwlwifi/iwl-7000.c b/drivers/net/wireless/iwlwifi/iwl-7000.c index 36e786f0387b..74ad278116be 100644 --- a/drivers/net/wireless/iwlwifi/iwl-7000.c +++ b/drivers/net/wireless/iwlwifi/iwl-7000.c @@ -70,15 +70,14 @@ /* Highest firmware API version supported */ #define IWL7260_UCODE_API_MAX 13 -#define IWL3160_UCODE_API_MAX 13 /* Oldest version we won't warn about */ #define IWL7260_UCODE_API_OK 12 -#define IWL3160_UCODE_API_OK 12 +#define IWL3165_UCODE_API_OK 13 /* Lowest firmware API version supported */ #define IWL7260_UCODE_API_MIN 10 -#define IWL3160_UCODE_API_MIN 10 +#define IWL3165_UCODE_API_MIN 13 /* NVM versions */ #define IWL7260_NVM_VERSION 0x0a1d @@ -104,9 +103,6 @@ #define IWL3160_FW_PRE "iwlwifi-3160-" #define IWL3160_MODULE_FIRMWARE(api) IWL3160_FW_PRE __stringify(api) ".ucode" -#define IWL3165_FW_PRE "iwlwifi-3165-" -#define IWL3165_MODULE_FIRMWARE(api) IWL3165_FW_PRE __stringify(api) ".ucode" - #define IWL7265_FW_PRE "iwlwifi-7265-" #define IWL7265_MODULE_FIRMWARE(api) IWL7265_FW_PRE __stringify(api) ".ucode" @@ -248,8 +244,13 @@ static const struct iwl_ht_params iwl7265_ht_params = { const struct iwl_cfg iwl3165_2ac_cfg = { .name = "Intel(R) Dual Band Wireless AC 3165", - .fw_name_pre = IWL3165_FW_PRE, + .fw_name_pre = IWL7265D_FW_PRE, IWL_DEVICE_7000, + /* sparse doens't like the re-assignment but it is safe */ +#ifndef __CHECKER__ + .ucode_api_ok = IWL3165_UCODE_API_OK, + .ucode_api_min = IWL3165_UCODE_API_MIN, +#endif .ht_params = &iwl7000_ht_params, .nvm_ver = IWL3165_NVM_VERSION, .nvm_calib_ver = IWL3165_TX_POWER_VERSION, @@ -325,6 +326,5 @@ const struct iwl_cfg iwl7265d_n_cfg = { MODULE_FIRMWARE(IWL7260_MODULE_FIRMWARE(IWL7260_UCODE_API_OK)); MODULE_FIRMWARE(IWL3160_MODULE_FIRMWARE(IWL3160_UCODE_API_OK)); -MODULE_FIRMWARE(IWL3165_MODULE_FIRMWARE(IWL3160_UCODE_API_OK)); MODULE_FIRMWARE(IWL7265_MODULE_FIRMWARE(IWL7260_UCODE_API_OK)); MODULE_FIRMWARE(IWL7265D_MODULE_FIRMWARE(IWL7260_UCODE_API_OK)); diff --git a/drivers/net/wireless/iwlwifi/iwl-eeprom-parse.c b/drivers/net/wireless/iwlwifi/iwl-eeprom-parse.c index 41ff85de7334..21302b6f2bfd 100644 --- a/drivers/net/wireless/iwlwifi/iwl-eeprom-parse.c +++ b/drivers/net/wireless/iwlwifi/iwl-eeprom-parse.c @@ -6,6 +6,7 @@ * GPL LICENSE SUMMARY * * Copyright(c) 2008 - 2014 Intel Corporation. All rights reserved. + * Copyright(c) 2015 Intel Mobile Communications GmbH * * This program is free software; you can redistribute it and/or modify * it under the terms of version 2 of the GNU General Public License as @@ -31,6 +32,7 @@ * BSD LICENSE * * Copyright(c) 2005 - 2014 Intel Corporation. All rights reserved. + * Copyright(c) 2015 Intel Mobile Communications GmbH * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -748,6 +750,9 @@ void iwl_init_ht_hw_capab(const struct iwl_cfg *cfg, return; } + if (data->sku_cap_mimo_disabled) + rx_chains = 1; + ht_info->ht_supported = true; ht_info->cap = IEEE80211_HT_CAP_DSSSCCK40; diff --git a/drivers/net/wireless/iwlwifi/iwl-eeprom-parse.h b/drivers/net/wireless/iwlwifi/iwl-eeprom-parse.h index 5234a0bf11e4..750c8c9ee70d 100644 --- a/drivers/net/wireless/iwlwifi/iwl-eeprom-parse.h +++ b/drivers/net/wireless/iwlwifi/iwl-eeprom-parse.h @@ -6,6 +6,7 @@ * GPL LICENSE SUMMARY * * Copyright(c) 2008 - 2014 Intel Corporation. All rights reserved. + * Copyright(c) 2015 Intel Mobile Communications GmbH * * This program is free software; you can redistribute it and/or modify * it under the terms of version 2 of the GNU General Public License as @@ -31,6 +32,7 @@ * BSD LICENSE * * Copyright(c) 2005 - 2014 Intel Corporation. All rights reserved. + * Copyright(c) 2015 Intel Mobile Communications GmbH * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -84,6 +86,7 @@ struct iwl_nvm_data { bool sku_cap_11ac_enable; bool sku_cap_amt_enable; bool sku_cap_ipan_enable; + bool sku_cap_mimo_disabled; u16 radio_cfg_type; u8 radio_cfg_step; diff --git a/drivers/net/wireless/iwlwifi/iwl-nvm-parse.c b/drivers/net/wireless/iwlwifi/iwl-nvm-parse.c index 83903a5025c2..8e604a3931ca 100644 --- a/drivers/net/wireless/iwlwifi/iwl-nvm-parse.c +++ b/drivers/net/wireless/iwlwifi/iwl-nvm-parse.c @@ -6,7 +6,7 @@ * GPL LICENSE SUMMARY * * Copyright(c) 2008 - 2014 Intel Corporation. All rights reserved. - * Copyright(c) 2013 - 2014 Intel Mobile Communications GmbH + * Copyright(c) 2013 - 2015 Intel Mobile Communications GmbH * * This program is free software; you can redistribute it and/or modify * it under the terms of version 2 of the GNU General Public License as @@ -32,7 +32,7 @@ * BSD LICENSE * * Copyright(c) 2005 - 2014 Intel Corporation. All rights reserved. - * Copyright(c) 2013 - 2014 Intel Mobile Communications GmbH + * Copyright(c) 2013 - 2015 Intel Mobile Communications GmbH * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -116,10 +116,11 @@ enum family_8000_nvm_offsets { /* SKU Capabilities (actual values from NVM definition) */ enum nvm_sku_bits { - NVM_SKU_CAP_BAND_24GHZ = BIT(0), - NVM_SKU_CAP_BAND_52GHZ = BIT(1), - NVM_SKU_CAP_11N_ENABLE = BIT(2), - NVM_SKU_CAP_11AC_ENABLE = BIT(3), + NVM_SKU_CAP_BAND_24GHZ = BIT(0), + NVM_SKU_CAP_BAND_52GHZ = BIT(1), + NVM_SKU_CAP_11N_ENABLE = BIT(2), + NVM_SKU_CAP_11AC_ENABLE = BIT(3), + NVM_SKU_CAP_MIMO_DISABLE = BIT(5), }; /* @@ -368,6 +369,11 @@ static void iwl_init_vht_hw_capab(const struct iwl_cfg *cfg, if (cfg->ht_params->ldpc) vht_cap->cap |= IEEE80211_VHT_CAP_RXLDPC; + if (data->sku_cap_mimo_disabled) { + num_rx_ants = 1; + num_tx_ants = 1; + } + if (num_tx_ants > 1) vht_cap->cap |= IEEE80211_VHT_CAP_TXSTBC; else @@ -465,7 +471,7 @@ static int iwl_get_radio_cfg(const struct iwl_cfg *cfg, const __le16 *nvm_sw, if (cfg->device_family != IWL_DEVICE_FAMILY_8000) return le16_to_cpup(nvm_sw + RADIO_CFG); - return le32_to_cpup((__le32 *)(nvm_sw + RADIO_CFG_FAMILY_8000)); + return le32_to_cpup((__le32 *)(phy_sku + RADIO_CFG_FAMILY_8000)); } @@ -527,6 +533,10 @@ static void iwl_set_hw_address_family_8000(struct device *dev, const u8 *hw_addr; if (mac_override) { + static const u8 reserved_mac[] = { + 0x02, 0xcc, 0xaa, 0xff, 0xee, 0x00 + }; + hw_addr = (const u8 *)(mac_override + MAC_ADDRESS_OVERRIDE_FAMILY_8000); @@ -538,7 +548,12 @@ static void iwl_set_hw_address_family_8000(struct device *dev, data->hw_addr[4] = hw_addr[5]; data->hw_addr[5] = hw_addr[4]; - if (is_valid_ether_addr(data->hw_addr)) + /* + * Force the use of the OTP MAC address in case of reserved MAC + * address in the NVM, or if address is given but invalid. + */ + if (is_valid_ether_addr(data->hw_addr) && + memcmp(reserved_mac, hw_addr, ETH_ALEN) != 0) return; IWL_ERR_DEV(dev, @@ -610,6 +625,7 @@ iwl_parse_nvm_data(struct device *dev, const struct iwl_cfg *cfg, data->sku_cap_11n_enable = false; data->sku_cap_11ac_enable = data->sku_cap_11n_enable && (sku & NVM_SKU_CAP_11AC_ENABLE); + data->sku_cap_mimo_disabled = sku & NVM_SKU_CAP_MIMO_DISABLE; data->n_hw_addrs = iwl_get_n_hw_addrs(cfg, nvm_sw); diff --git a/drivers/net/wireless/iwlwifi/mvm/coex_legacy.c b/drivers/net/wireless/iwlwifi/mvm/coex_legacy.c index d954591e0be5..6ac6de2af977 100644 --- a/drivers/net/wireless/iwlwifi/mvm/coex_legacy.c +++ b/drivers/net/wireless/iwlwifi/mvm/coex_legacy.c @@ -776,7 +776,7 @@ static int iwl_mvm_bt_coex_reduced_txp(struct iwl_mvm *mvm, u8 sta_id, struct iwl_host_cmd cmd = { .id = BT_CONFIG, .len = { sizeof(*bt_cmd), }, - .dataflags = { IWL_HCMD_DFL_NOCOPY, }, + .dataflags = { IWL_HCMD_DFL_DUP, }, .flags = CMD_ASYNC, }; struct iwl_mvm_sta *mvmsta; diff --git a/drivers/net/wireless/iwlwifi/mvm/d3.c b/drivers/net/wireless/iwlwifi/mvm/d3.c index 1b1b2bf26819..4310cf102d78 100644 --- a/drivers/net/wireless/iwlwifi/mvm/d3.c +++ b/drivers/net/wireless/iwlwifi/mvm/d3.c @@ -1750,8 +1750,10 @@ static void iwl_mvm_query_netdetect_reasons(struct iwl_mvm *mvm, int i, j, n_matches, ret; fw_status = iwl_mvm_get_wakeup_status(mvm, vif); - if (!IS_ERR_OR_NULL(fw_status)) + if (!IS_ERR_OR_NULL(fw_status)) { reasons = le32_to_cpu(fw_status->wakeup_reasons); + kfree(fw_status); + } if (reasons & IWL_WOWLAN_WAKEUP_BY_RFKILL_DEASSERTED) wakeup.rfkill_release = true; @@ -1868,15 +1870,15 @@ static int __iwl_mvm_resume(struct iwl_mvm *mvm, bool test) /* get the BSS vif pointer again */ vif = iwl_mvm_get_bss_vif(mvm); if (IS_ERR_OR_NULL(vif)) - goto out_unlock; + goto err; ret = iwl_trans_d3_resume(mvm->trans, &d3_status, test); if (ret) - goto out_unlock; + goto err; if (d3_status != IWL_D3_STATUS_ALIVE) { IWL_INFO(mvm, "Device was reset during suspend\n"); - goto out_unlock; + goto err; } /* query SRAM first in case we want event logging */ @@ -1902,7 +1904,8 @@ static int __iwl_mvm_resume(struct iwl_mvm *mvm, bool test) goto out_iterate; } - out_unlock: +err: + iwl_mvm_free_nd(mvm); mutex_unlock(&mvm->mutex); out_iterate: @@ -1915,6 +1918,14 @@ out: /* return 1 to reconfigure the device */ set_bit(IWL_MVM_STATUS_IN_HW_RESTART, &mvm->status); set_bit(IWL_MVM_STATUS_D3_RECONFIG, &mvm->status); + + /* We always return 1, which causes mac80211 to do a reconfig + * with IEEE80211_RECONFIG_TYPE_RESTART. This type of + * reconfig calls iwl_mvm_restart_complete(), where we unref + * the IWL_MVM_REF_UCODE_DOWN, so we need to take the + * reference here. + */ + iwl_mvm_ref(mvm, IWL_MVM_REF_UCODE_DOWN); return 1; } @@ -2021,7 +2032,6 @@ static int iwl_mvm_d3_test_release(struct inode *inode, struct file *file) __iwl_mvm_resume(mvm, true); rtnl_unlock(); iwl_abort_notification_waits(&mvm->notif_wait); - iwl_mvm_ref(mvm, IWL_MVM_REF_UCODE_DOWN); ieee80211_restart_hw(mvm->hw); /* wait for restart and disconnect all interfaces */ diff --git a/drivers/net/wireless/iwlwifi/mvm/mac80211.c b/drivers/net/wireless/iwlwifi/mvm/mac80211.c index 40265b9c66ae..dda9f7b5f342 100644 --- a/drivers/net/wireless/iwlwifi/mvm/mac80211.c +++ b/drivers/net/wireless/iwlwifi/mvm/mac80211.c @@ -3995,9 +3995,6 @@ static void iwl_mvm_mac_event_callback(struct ieee80211_hw *hw, if (!iwl_fw_dbg_trigger_enabled(mvm->fw, FW_DBG_TRIGGER_MLME)) return; - if (event->u.mlme.status == MLME_SUCCESS) - return; - trig = iwl_fw_dbg_get_trigger(mvm->fw, FW_DBG_TRIGGER_MLME); trig_mlme = (void *)trig->data; if (!iwl_fw_dbg_trigger_check_stop(mvm, vif, trig)) diff --git a/drivers/net/wireless/iwlwifi/mvm/ops.c b/drivers/net/wireless/iwlwifi/mvm/ops.c index 1c66297d82c0..2ea01238754e 100644 --- a/drivers/net/wireless/iwlwifi/mvm/ops.c +++ b/drivers/net/wireless/iwlwifi/mvm/ops.c @@ -1263,11 +1263,13 @@ static void iwl_mvm_d0i3_exit_work(struct work_struct *wk) ieee80211_iterate_active_interfaces( mvm->hw, IEEE80211_IFACE_ITER_NORMAL, iwl_mvm_d0i3_disconnect_iter, mvm); - - iwl_free_resp(&get_status_cmd); out: iwl_mvm_d0i3_enable_tx(mvm, qos_seq); + /* qos_seq might point inside resp_pkt, so free it only now */ + if (get_status_cmd.resp_pkt) + iwl_free_resp(&get_status_cmd); + /* the FW might have updated the regdomain */ iwl_mvm_update_changed_regdom(mvm); diff --git a/drivers/net/wireless/iwlwifi/mvm/rs.c b/drivers/net/wireless/iwlwifi/mvm/rs.c index f9928f2c125f..33cd68ae7bf9 100644 --- a/drivers/net/wireless/iwlwifi/mvm/rs.c +++ b/drivers/net/wireless/iwlwifi/mvm/rs.c @@ -180,6 +180,9 @@ static bool rs_mimo_allow(struct iwl_mvm *mvm, struct ieee80211_sta *sta, if (iwl_mvm_vif_low_latency(mvmvif) && mvmsta->vif->p2p) return false; + if (mvm->nvm_data->sku_cap_mimo_disabled) + return false; + return true; } diff --git a/drivers/net/wireless/iwlwifi/pcie/internal.h b/drivers/net/wireless/iwlwifi/pcie/internal.h index 01996c9d98a7..376b84e54ad7 100644 --- a/drivers/net/wireless/iwlwifi/pcie/internal.h +++ b/drivers/net/wireless/iwlwifi/pcie/internal.h @@ -1,7 +1,7 @@ /****************************************************************************** * - * Copyright(c) 2003 - 2014 Intel Corporation. All rights reserved. - * Copyright(c) 2013 - 2014 Intel Mobile Communications GmbH + * Copyright(c) 2003 - 2015 Intel Corporation. All rights reserved. + * Copyright(c) 2013 - 2015 Intel Mobile Communications GmbH * * Portions of this file are derived from the ipw3945 project, as well * as portions of the ieee80211 subsystem header files. @@ -320,7 +320,7 @@ struct iwl_trans_pcie { /*protect hw register */ spinlock_t reg_lock; - bool cmd_in_flight; + bool cmd_hold_nic_awake; bool ref_cmd_in_flight; /* protect ref counter */ diff --git a/drivers/net/wireless/iwlwifi/pcie/trans.c b/drivers/net/wireless/iwlwifi/pcie/trans.c index 47bbf573fdc8..dc179094e6a0 100644 --- a/drivers/net/wireless/iwlwifi/pcie/trans.c +++ b/drivers/net/wireless/iwlwifi/pcie/trans.c @@ -1049,9 +1049,11 @@ static void iwl_trans_pcie_stop_device(struct iwl_trans *trans, bool low_power) iwl_pcie_rx_stop(trans); /* Power-down device's busmaster DMA clocks */ - iwl_write_prph(trans, APMG_CLK_DIS_REG, - APMG_CLK_VAL_DMA_CLK_RQT); - udelay(5); + if (trans->cfg->device_family != IWL_DEVICE_FAMILY_8000) { + iwl_write_prph(trans, APMG_CLK_DIS_REG, + APMG_CLK_VAL_DMA_CLK_RQT); + udelay(5); + } } /* Make sure (redundant) we've released our request to stay awake */ @@ -1370,7 +1372,7 @@ static bool iwl_trans_pcie_grab_nic_access(struct iwl_trans *trans, bool silent, spin_lock_irqsave(&trans_pcie->reg_lock, *flags); - if (trans_pcie->cmd_in_flight) + if (trans_pcie->cmd_hold_nic_awake) goto out; /* this bit wakes up the NIC */ @@ -1436,7 +1438,7 @@ static void iwl_trans_pcie_release_nic_access(struct iwl_trans *trans, */ __acquire(&trans_pcie->reg_lock); - if (trans_pcie->cmd_in_flight) + if (trans_pcie->cmd_hold_nic_awake) goto out; __iwl_trans_pcie_clear_bit(trans, CSR_GP_CNTRL, diff --git a/drivers/net/wireless/iwlwifi/pcie/tx.c b/drivers/net/wireless/iwlwifi/pcie/tx.c index 06952aadfd7b..5ef8044c2ea3 100644 --- a/drivers/net/wireless/iwlwifi/pcie/tx.c +++ b/drivers/net/wireless/iwlwifi/pcie/tx.c @@ -1039,18 +1039,14 @@ static int iwl_pcie_set_cmd_in_flight(struct iwl_trans *trans, iwl_trans_pcie_ref(trans); } - if (trans_pcie->cmd_in_flight) - return 0; - - trans_pcie->cmd_in_flight = true; - /* * wake up the NIC to make sure that the firmware will see the host * command - we will let the NIC sleep once all the host commands * returned. This needs to be done only on NICs that have * apmg_wake_up_wa set. */ - if (trans->cfg->base_params->apmg_wake_up_wa) { + if (trans->cfg->base_params->apmg_wake_up_wa && + !trans_pcie->cmd_hold_nic_awake) { __iwl_trans_pcie_set_bit(trans, CSR_GP_CNTRL, CSR_GP_CNTRL_REG_FLAG_MAC_ACCESS_REQ); if (trans->cfg->device_family == IWL_DEVICE_FAMILY_8000) @@ -1064,10 +1060,10 @@ static int iwl_pcie_set_cmd_in_flight(struct iwl_trans *trans, if (ret < 0) { __iwl_trans_pcie_clear_bit(trans, CSR_GP_CNTRL, CSR_GP_CNTRL_REG_FLAG_MAC_ACCESS_REQ); - trans_pcie->cmd_in_flight = false; IWL_ERR(trans, "Failed to wake NIC for hcmd\n"); return -EIO; } + trans_pcie->cmd_hold_nic_awake = true; } return 0; @@ -1085,15 +1081,14 @@ static int iwl_pcie_clear_cmd_in_flight(struct iwl_trans *trans) iwl_trans_pcie_unref(trans); } - if (WARN_ON(!trans_pcie->cmd_in_flight)) - return 0; - - trans_pcie->cmd_in_flight = false; + if (trans->cfg->base_params->apmg_wake_up_wa) { + if (WARN_ON(!trans_pcie->cmd_hold_nic_awake)) + return 0; - if (trans->cfg->base_params->apmg_wake_up_wa) + trans_pcie->cmd_hold_nic_awake = false; __iwl_trans_pcie_clear_bit(trans, CSR_GP_CNTRL, - CSR_GP_CNTRL_REG_FLAG_MAC_ACCESS_REQ); - + CSR_GP_CNTRL_REG_FLAG_MAC_ACCESS_REQ); + } return 0; } diff --git a/drivers/net/xen-netback/netback.c b/drivers/net/xen-netback/netback.c index 4de46aa61d95..0d2594395ffb 100644 --- a/drivers/net/xen-netback/netback.c +++ b/drivers/net/xen-netback/netback.c @@ -1250,7 +1250,7 @@ static void xenvif_tx_build_gops(struct xenvif_queue *queue, netdev_err(queue->vif->dev, "txreq.offset: %x, size: %u, end: %lu\n", txreq.offset, txreq.size, - (txreq.offset&~PAGE_MASK) + txreq.size); + (unsigned long)(txreq.offset&~PAGE_MASK) + txreq.size); xenvif_fatal_tx_err(queue->vif); break; } diff --git a/drivers/net/xen-netback/xenbus.c b/drivers/net/xen-netback/xenbus.c index 3d8dbf5f2d39..968787abf78d 100644 --- a/drivers/net/xen-netback/xenbus.c +++ b/drivers/net/xen-netback/xenbus.c @@ -34,6 +34,8 @@ struct backend_info { enum xenbus_state frontend_state; struct xenbus_watch hotplug_status_watch; u8 have_hotplug_status_watch:1; + + const char *hotplug_script; }; static int connect_rings(struct backend_info *be, struct xenvif_queue *queue); @@ -238,6 +240,7 @@ static int netback_remove(struct xenbus_device *dev) xenvif_free(be->vif); be->vif = NULL; } + kfree(be->hotplug_script); kfree(be); dev_set_drvdata(&dev->dev, NULL); return 0; @@ -255,6 +258,7 @@ static int netback_probe(struct xenbus_device *dev, struct xenbus_transaction xbt; int err; int sg; + const char *script; struct backend_info *be = kzalloc(sizeof(struct backend_info), GFP_KERNEL); if (!be) { @@ -347,6 +351,15 @@ static int netback_probe(struct xenbus_device *dev, if (err) pr_debug("Error writing multi-queue-max-queues\n"); + script = xenbus_read(XBT_NIL, dev->nodename, "script", NULL); + if (IS_ERR(script)) { + err = PTR_ERR(script); + xenbus_dev_fatal(dev, err, "reading script"); + goto fail; + } + + be->hotplug_script = script; + err = xenbus_switch_state(dev, XenbusStateInitWait); if (err) goto fail; @@ -379,22 +392,14 @@ static int netback_uevent(struct xenbus_device *xdev, struct kobj_uevent_env *env) { struct backend_info *be = dev_get_drvdata(&xdev->dev); - char *val; - val = xenbus_read(XBT_NIL, xdev->nodename, "script", NULL); - if (IS_ERR(val)) { - int err = PTR_ERR(val); - xenbus_dev_fatal(xdev, err, "reading script"); - return err; - } else { - if (add_uevent_var(env, "script=%s", val)) { - kfree(val); - return -ENOMEM; - } - kfree(val); - } + if (!be) + return 0; - if (!be || !be->vif) + if (add_uevent_var(env, "script=%s", be->hotplug_script)) + return -ENOMEM; + + if (!be->vif) return 0; return add_uevent_var(env, "vif=%s", be->vif->dev->name); @@ -793,6 +798,7 @@ static void connect(struct backend_info *be) goto err; } + queue->credit_bytes = credit_bytes; queue->remaining_credit = credit_bytes; queue->credit_usec = credit_usec; diff --git a/drivers/net/xen-netfront.c b/drivers/net/xen-netfront.c index 3f45afd4382e..e031c943286e 100644 --- a/drivers/net/xen-netfront.c +++ b/drivers/net/xen-netfront.c @@ -1698,6 +1698,7 @@ static void xennet_destroy_queues(struct netfront_info *info) if (netif_running(info->netdev)) napi_disable(&queue->napi); + del_timer_sync(&queue->rx_refill_timer); netif_napi_del(&queue->napi); } @@ -2102,9 +2103,6 @@ static const struct attribute_group xennet_dev_group = { static int xennet_remove(struct xenbus_device *dev) { struct netfront_info *info = dev_get_drvdata(&dev->dev); - unsigned int num_queues = info->netdev->real_num_tx_queues; - struct netfront_queue *queue = NULL; - unsigned int i = 0; dev_dbg(&dev->dev, "%s\n", dev->nodename); @@ -2112,16 +2110,7 @@ static int xennet_remove(struct xenbus_device *dev) unregister_netdev(info->netdev); - for (i = 0; i < num_queues; ++i) { - queue = &info->queues[i]; - del_timer_sync(&queue->rx_refill_timer); - } - - if (num_queues) { - kfree(info->queues); - info->queues = NULL; - } - + xennet_destroy_queues(info); xennet_free_netdev(info->netdev); return 0; diff --git a/drivers/pci/htirq.c b/drivers/pci/htirq.c index a94dd2c4183a..7eb4109a3df4 100644 --- a/drivers/pci/htirq.c +++ b/drivers/pci/htirq.c @@ -23,20 +23,11 @@ */ static DEFINE_SPINLOCK(ht_irq_lock); -struct ht_irq_cfg { - struct pci_dev *dev; - /* Update callback used to cope with buggy hardware */ - ht_irq_update_t *update; - unsigned pos; - unsigned idx; - struct ht_irq_msg msg; -}; - - void write_ht_irq_msg(unsigned int irq, struct ht_irq_msg *msg) { struct ht_irq_cfg *cfg = irq_get_handler_data(irq); unsigned long flags; + spin_lock_irqsave(&ht_irq_lock, flags); if (cfg->msg.address_lo != msg->address_lo) { pci_write_config_byte(cfg->dev, cfg->pos + 2, cfg->idx); @@ -55,6 +46,7 @@ void write_ht_irq_msg(unsigned int irq, struct ht_irq_msg *msg) void fetch_ht_irq_msg(unsigned int irq, struct ht_irq_msg *msg) { struct ht_irq_cfg *cfg = irq_get_handler_data(irq); + *msg = cfg->msg; } @@ -86,7 +78,6 @@ void unmask_ht_irq(struct irq_data *data) */ int __ht_create_irq(struct pci_dev *dev, int idx, ht_irq_update_t *update) { - struct ht_irq_cfg *cfg; int max_irq, pos, irq; unsigned long flags; u32 data; @@ -105,29 +96,9 @@ int __ht_create_irq(struct pci_dev *dev, int idx, ht_irq_update_t *update) if (idx > max_irq) return -EINVAL; - cfg = kmalloc(sizeof(*cfg), GFP_KERNEL); - if (!cfg) - return -ENOMEM; - - cfg->dev = dev; - cfg->update = update; - cfg->pos = pos; - cfg->idx = 0x10 + (idx * 2); - /* Initialize msg to a value that will never match the first write. */ - cfg->msg.address_lo = 0xffffffff; - cfg->msg.address_hi = 0xffffffff; - - irq = irq_alloc_hwirq(dev_to_node(&dev->dev)); - if (!irq) { - kfree(cfg); - return -EBUSY; - } - irq_set_handler_data(irq, cfg); - - if (arch_setup_ht_irq(irq, dev) < 0) { - ht_destroy_irq(irq); - return -EBUSY; - } + irq = arch_setup_ht_irq(idx, pos, dev, update); + if (irq > 0) + dev_dbg(&dev->dev, "irq %d for HT\n", irq); return irq; } @@ -158,13 +129,6 @@ EXPORT_SYMBOL(ht_create_irq); */ void ht_destroy_irq(unsigned int irq) { - struct ht_irq_cfg *cfg; - - cfg = irq_get_handler_data(irq); - irq_set_chip(irq, NULL); - irq_set_handler_data(irq, NULL); - irq_free_hwirq(irq); - - kfree(cfg); + arch_teardown_ht_irq(irq); } EXPORT_SYMBOL(ht_destroy_irq); diff --git a/drivers/pci/quirks.c b/drivers/pci/quirks.c index c6dc1dfd25d5..2890ad7cf7c6 100644 --- a/drivers/pci/quirks.c +++ b/drivers/pci/quirks.c @@ -819,13 +819,6 @@ static void quirk_amd_ioapic(struct pci_dev *dev) } } DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_VIPER_7410, quirk_amd_ioapic); - -static void quirk_ioapic_rmw(struct pci_dev *dev) -{ - if (dev->devfn == 0 && dev->bus->number == 0) - sis_apic_bug = 1; -} -DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_SI, PCI_ANY_ID, quirk_ioapic_rmw); #endif /* CONFIG_X86_IO_APIC */ /* diff --git a/drivers/pinctrl/bcm/pinctrl-cygnus-gpio.c b/drivers/pinctrl/bcm/pinctrl-cygnus-gpio.c index 4ad5c1a996e3..e406e3d8c1c7 100644 --- a/drivers/pinctrl/bcm/pinctrl-cygnus-gpio.c +++ b/drivers/pinctrl/bcm/pinctrl-cygnus-gpio.c @@ -643,7 +643,9 @@ static const struct cygnus_gpio_pin_range cygnus_gpio_pintable[] = { CYGNUS_PINRANGE(87, 104, 12), CYGNUS_PINRANGE(99, 102, 2), CYGNUS_PINRANGE(101, 90, 4), - CYGNUS_PINRANGE(105, 116, 10), + CYGNUS_PINRANGE(105, 116, 6), + CYGNUS_PINRANGE(111, 100, 2), + CYGNUS_PINRANGE(113, 122, 4), CYGNUS_PINRANGE(123, 11, 1), CYGNUS_PINRANGE(124, 38, 4), CYGNUS_PINRANGE(128, 43, 1), diff --git a/drivers/pinctrl/intel/pinctrl-cherryview.c b/drivers/pinctrl/intel/pinctrl-cherryview.c index 82f691eeeec4..732ff757a95f 100644 --- a/drivers/pinctrl/intel/pinctrl-cherryview.c +++ b/drivers/pinctrl/intel/pinctrl-cherryview.c @@ -1292,6 +1292,49 @@ static void chv_gpio_irq_unmask(struct irq_data *d) chv_gpio_irq_mask_unmask(d, false); } +static unsigned chv_gpio_irq_startup(struct irq_data *d) +{ + /* + * Check if the interrupt has been requested with 0 as triggering + * type. In that case it is assumed that the current values + * programmed to the hardware are used (e.g BIOS configured + * defaults). + * + * In that case ->irq_set_type() will never be called so we need to + * read back the values from hardware now, set correct flow handler + * and update mappings before the interrupt is being used. + */ + if (irqd_get_trigger_type(d) == IRQ_TYPE_NONE) { + struct gpio_chip *gc = irq_data_get_irq_chip_data(d); + struct chv_pinctrl *pctrl = gpiochip_to_pinctrl(gc); + unsigned offset = irqd_to_hwirq(d); + int pin = chv_gpio_offset_to_pin(pctrl, offset); + irq_flow_handler_t handler; + unsigned long flags; + u32 intsel, value; + + intsel = readl(chv_padreg(pctrl, pin, CHV_PADCTRL0)); + intsel &= CHV_PADCTRL0_INTSEL_MASK; + intsel >>= CHV_PADCTRL0_INTSEL_SHIFT; + + value = readl(chv_padreg(pctrl, pin, CHV_PADCTRL1)); + if (value & CHV_PADCTRL1_INTWAKECFG_LEVEL) + handler = handle_level_irq; + else + handler = handle_edge_irq; + + spin_lock_irqsave(&pctrl->lock, flags); + if (!pctrl->intr_lines[intsel]) { + __irq_set_handler_locked(d->irq, handler); + pctrl->intr_lines[intsel] = offset; + } + spin_unlock_irqrestore(&pctrl->lock, flags); + } + + chv_gpio_irq_unmask(d); + return 0; +} + static int chv_gpio_irq_type(struct irq_data *d, unsigned type) { struct gpio_chip *gc = irq_data_get_irq_chip_data(d); @@ -1357,6 +1400,7 @@ static int chv_gpio_irq_type(struct irq_data *d, unsigned type) static struct irq_chip chv_gpio_irqchip = { .name = "chv-gpio", + .irq_startup = chv_gpio_irq_startup, .irq_ack = chv_gpio_irq_ack, .irq_mask = chv_gpio_irq_mask, .irq_unmask = chv_gpio_irq_unmask, diff --git a/drivers/pinctrl/meson/pinctrl-meson.c b/drivers/pinctrl/meson/pinctrl-meson.c index edcd140e0899..a70a5fe79d44 100644 --- a/drivers/pinctrl/meson/pinctrl-meson.c +++ b/drivers/pinctrl/meson/pinctrl-meson.c @@ -569,7 +569,7 @@ static int meson_gpiolib_register(struct meson_pinctrl *pc) domain->chip.direction_output = meson_gpio_direction_output; domain->chip.get = meson_gpio_get; domain->chip.set = meson_gpio_set; - domain->chip.base = -1; + domain->chip.base = domain->data->pin_base; domain->chip.ngpio = domain->data->num_pins; domain->chip.can_sleep = false; domain->chip.of_node = domain->of_node; diff --git a/drivers/pinctrl/meson/pinctrl-meson8b.c b/drivers/pinctrl/meson/pinctrl-meson8b.c index 2f7ea6229880..9677807db364 100644 --- a/drivers/pinctrl/meson/pinctrl-meson8b.c +++ b/drivers/pinctrl/meson/pinctrl-meson8b.c @@ -876,13 +876,13 @@ static struct meson_domain_data meson8b_domain_data[] = { .banks = meson8b_banks, .num_banks = ARRAY_SIZE(meson8b_banks), .pin_base = 0, - .num_pins = 83, + .num_pins = 130, }, { .name = "ao-bank", .banks = meson8b_ao_banks, .num_banks = ARRAY_SIZE(meson8b_ao_banks), - .pin_base = 83, + .pin_base = 130, .num_pins = 16, }, }; diff --git a/drivers/platform/x86/thinkpad_acpi.c b/drivers/platform/x86/thinkpad_acpi.c index 9bb9ad6d4a1b..28f328136f0d 100644 --- a/drivers/platform/x86/thinkpad_acpi.c +++ b/drivers/platform/x86/thinkpad_acpi.c @@ -2897,7 +2897,7 @@ static ssize_t hotkey_wakeup_reason_show(struct device *dev, return snprintf(buf, PAGE_SIZE, "%d\n", hotkey_wakeup_reason); } -static DEVICE_ATTR_RO(hotkey_wakeup_reason); +static DEVICE_ATTR(wakeup_reason, S_IRUGO, hotkey_wakeup_reason_show, NULL); static void hotkey_wakeup_reason_notify_change(void) { @@ -2913,7 +2913,8 @@ static ssize_t hotkey_wakeup_hotunplug_complete_show(struct device *dev, return snprintf(buf, PAGE_SIZE, "%d\n", hotkey_autosleep_ack); } -static DEVICE_ATTR_RO(hotkey_wakeup_hotunplug_complete); +static DEVICE_ATTR(wakeup_hotunplug_complete, S_IRUGO, + hotkey_wakeup_hotunplug_complete_show, NULL); static void hotkey_wakeup_hotunplug_complete_notify_change(void) { @@ -2978,8 +2979,8 @@ static struct attribute *hotkey_attributes[] __initdata = { &dev_attr_hotkey_enable.attr, &dev_attr_hotkey_bios_enabled.attr, &dev_attr_hotkey_bios_mask.attr, - &dev_attr_hotkey_wakeup_reason.attr, - &dev_attr_hotkey_wakeup_hotunplug_complete.attr, + &dev_attr_wakeup_reason.attr, + &dev_attr_wakeup_hotunplug_complete.attr, &dev_attr_hotkey_mask.attr, &dev_attr_hotkey_all_mask.attr, &dev_attr_hotkey_recommended_mask.attr, @@ -4393,12 +4394,13 @@ static ssize_t wan_enable_store(struct device *dev, attr, buf, count); } -static DEVICE_ATTR_RW(wan_enable); +static DEVICE_ATTR(wwan_enable, S_IWUSR | S_IRUGO, + wan_enable_show, wan_enable_store); /* --------------------------------------------------------------------- */ static struct attribute *wan_attributes[] = { - &dev_attr_wan_enable.attr, + &dev_attr_wwan_enable.attr, NULL }; @@ -8138,7 +8140,8 @@ static ssize_t fan_pwm1_enable_store(struct device *dev, return count; } -static DEVICE_ATTR_RW(fan_pwm1_enable); +static DEVICE_ATTR(pwm1_enable, S_IWUSR | S_IRUGO, + fan_pwm1_enable_show, fan_pwm1_enable_store); /* sysfs fan pwm1 ------------------------------------------------------ */ static ssize_t fan_pwm1_show(struct device *dev, @@ -8198,7 +8201,7 @@ static ssize_t fan_pwm1_store(struct device *dev, return (rc) ? rc : count; } -static DEVICE_ATTR_RW(fan_pwm1); +static DEVICE_ATTR(pwm1, S_IWUSR | S_IRUGO, fan_pwm1_show, fan_pwm1_store); /* sysfs fan fan1_input ------------------------------------------------ */ static ssize_t fan_fan1_input_show(struct device *dev, @@ -8215,7 +8218,7 @@ static ssize_t fan_fan1_input_show(struct device *dev, return snprintf(buf, PAGE_SIZE, "%u\n", speed); } -static DEVICE_ATTR_RO(fan_fan1_input); +static DEVICE_ATTR(fan1_input, S_IRUGO, fan_fan1_input_show, NULL); /* sysfs fan fan2_input ------------------------------------------------ */ static ssize_t fan_fan2_input_show(struct device *dev, @@ -8232,7 +8235,7 @@ static ssize_t fan_fan2_input_show(struct device *dev, return snprintf(buf, PAGE_SIZE, "%u\n", speed); } -static DEVICE_ATTR_RO(fan_fan2_input); +static DEVICE_ATTR(fan2_input, S_IRUGO, fan_fan2_input_show, NULL); /* sysfs fan fan_watchdog (hwmon driver) ------------------------------- */ static ssize_t fan_fan_watchdog_show(struct device_driver *drv, @@ -8265,8 +8268,8 @@ static DRIVER_ATTR(fan_watchdog, S_IWUSR | S_IRUGO, /* --------------------------------------------------------------------- */ static struct attribute *fan_attributes[] = { - &dev_attr_fan_pwm1_enable.attr, &dev_attr_fan_pwm1.attr, - &dev_attr_fan_fan1_input.attr, + &dev_attr_pwm1_enable.attr, &dev_attr_pwm1.attr, + &dev_attr_fan1_input.attr, NULL, /* for fan2_input */ NULL }; @@ -8400,7 +8403,7 @@ static int __init fan_init(struct ibm_init_struct *iibm) if (tp_features.second_fan) { /* attach second fan tachometer */ fan_attributes[ARRAY_SIZE(fan_attributes)-2] = - &dev_attr_fan_fan2_input.attr; + &dev_attr_fan2_input.attr; } rc = sysfs_create_group(&tpacpi_sensors_pdev->dev.kobj, &fan_attr_group); @@ -8848,7 +8851,7 @@ static ssize_t thinkpad_acpi_pdev_name_show(struct device *dev, return snprintf(buf, PAGE_SIZE, "%s\n", TPACPI_NAME); } -static DEVICE_ATTR_RO(thinkpad_acpi_pdev_name); +static DEVICE_ATTR(name, S_IRUGO, thinkpad_acpi_pdev_name_show, NULL); /* --------------------------------------------------------------------- */ @@ -9390,8 +9393,7 @@ static void thinkpad_acpi_module_exit(void) hwmon_device_unregister(tpacpi_hwmon); if (tp_features.sensors_pdev_attrs_registered) - device_remove_file(&tpacpi_sensors_pdev->dev, - &dev_attr_thinkpad_acpi_pdev_name); + device_remove_file(&tpacpi_sensors_pdev->dev, &dev_attr_name); if (tpacpi_sensors_pdev) platform_device_unregister(tpacpi_sensors_pdev); if (tpacpi_pdev) @@ -9512,8 +9514,7 @@ static int __init thinkpad_acpi_module_init(void) thinkpad_acpi_module_exit(); return ret; } - ret = device_create_file(&tpacpi_sensors_pdev->dev, - &dev_attr_thinkpad_acpi_pdev_name); + ret = device_create_file(&tpacpi_sensors_pdev->dev, &dev_attr_name); if (ret) { pr_err("unable to create sysfs hwmon device attributes\n"); thinkpad_acpi_module_exit(); diff --git a/drivers/regulator/da9052-regulator.c b/drivers/regulator/da9052-regulator.c index 8a4df7a1f2ee..e628d4c2f2ae 100644 --- a/drivers/regulator/da9052-regulator.c +++ b/drivers/regulator/da9052-regulator.c @@ -394,6 +394,7 @@ static inline struct da9052_regulator_info *find_regulator_info(u8 chip_id, static int da9052_regulator_probe(struct platform_device *pdev) { + const struct mfd_cell *cell = mfd_get_cell(pdev); struct regulator_config config = { }; struct da9052_regulator *regulator; struct da9052 *da9052; @@ -409,7 +410,7 @@ static int da9052_regulator_probe(struct platform_device *pdev) regulator->da9052 = da9052; regulator->info = find_regulator_info(regulator->da9052->chip_id, - pdev->id); + cell->id); if (regulator->info == NULL) { dev_err(&pdev->dev, "invalid regulator ID specified\n"); return -EINVAL; @@ -419,7 +420,7 @@ static int da9052_regulator_probe(struct platform_device *pdev) config.driver_data = regulator; config.regmap = da9052->regmap; if (pdata && pdata->regulators) { - config.init_data = pdata->regulators[pdev->id]; + config.init_data = pdata->regulators[cell->id]; } else { #ifdef CONFIG_OF struct device_node *nproot = da9052->dev->of_node; diff --git a/drivers/scsi/qla2xxx/tcm_qla2xxx.c b/drivers/scsi/qla2xxx/tcm_qla2xxx.c index 68c2002e78bf..5c9e680aa375 100644 --- a/drivers/scsi/qla2xxx/tcm_qla2xxx.c +++ b/drivers/scsi/qla2xxx/tcm_qla2xxx.c @@ -1020,8 +1020,7 @@ static void tcm_qla2xxx_depend_tpg(struct work_struct *work) struct se_portal_group *se_tpg = &base_tpg->se_tpg; struct scsi_qla_host *base_vha = base_tpg->lport->qla_vha; - if (!configfs_depend_item(se_tpg->se_tpg_tfo->tf_subsys, - &se_tpg->tpg_group.cg_item)) { + if (!target_depend_item(&se_tpg->tpg_group.cg_item)) { atomic_set(&base_tpg->lport_tpg_enabled, 1); qlt_enable_vha(base_vha); } @@ -1037,8 +1036,7 @@ static void tcm_qla2xxx_undepend_tpg(struct work_struct *work) if (!qlt_stop_phase1(base_vha->vha_tgt.qla_tgt)) { atomic_set(&base_tpg->lport_tpg_enabled, 0); - configfs_undepend_item(se_tpg->se_tpg_tfo->tf_subsys, - &se_tpg->tpg_group.cg_item); + target_undepend_item(&se_tpg->tpg_group.cg_item); } complete(&base_tpg->tpg_base_comp); } diff --git a/drivers/ssb/driver_pcicore.c b/drivers/ssb/driver_pcicore.c index 15a7ee3859dd..5fe1c22e289b 100644 --- a/drivers/ssb/driver_pcicore.c +++ b/drivers/ssb/driver_pcicore.c @@ -359,12 +359,13 @@ static void ssb_pcicore_init_hostmode(struct ssb_pcicore *pc) /* * Accessing PCI config without a proper delay after devices reset (not - * GPIO reset) was causing reboots on WRT300N v1.0. + * GPIO reset) was causing reboots on WRT300N v1.0 (BCM4704). * Tested delay 850 us lowered reboot chance to 50-80%, 1000 us fixed it * completely. Flushing all writes was also tested but with no luck. + * The same problem was reported for WRT350N v1 (BCM4705), so we just + * sleep here unconditionally. */ - if (pc->dev->bus->chip_id == 0x4704) - usleep_range(1000, 2000); + usleep_range(1000, 2000); /* Enable PCI bridge BAR0 prefetch and burst */ val = PCI_COMMAND_MASTER | PCI_COMMAND_MEMORY; diff --git a/drivers/target/iscsi/iscsi_target.c b/drivers/target/iscsi/iscsi_target.c index 34871a628b11..74e6114ff18f 100644 --- a/drivers/target/iscsi/iscsi_target.c +++ b/drivers/target/iscsi/iscsi_target.c @@ -230,7 +230,7 @@ int iscsit_access_np(struct iscsi_np *np, struct iscsi_portal_group *tpg) * Here we serialize access across the TIQN+TPG Tuple. */ ret = down_interruptible(&tpg->np_login_sem); - if ((ret != 0) || signal_pending(current)) + if (ret != 0) return -1; spin_lock_bh(&tpg->tpg_state_lock); diff --git a/drivers/target/iscsi/iscsi_target_login.c b/drivers/target/iscsi/iscsi_target_login.c index 8ce94ff744e6..70d799dfab03 100644 --- a/drivers/target/iscsi/iscsi_target_login.c +++ b/drivers/target/iscsi/iscsi_target_login.c @@ -346,6 +346,7 @@ static int iscsi_login_zero_tsih_s1( if (IS_ERR(sess->se_sess)) { iscsit_tx_login_rsp(conn, ISCSI_STATUS_CLS_TARGET_ERR, ISCSI_LOGIN_STATUS_NO_RESOURCES); + kfree(sess->sess_ops); kfree(sess); return -ENOMEM; } diff --git a/drivers/target/iscsi/iscsi_target_tpg.c b/drivers/target/iscsi/iscsi_target_tpg.c index e8a240818353..5e3295fe404d 100644 --- a/drivers/target/iscsi/iscsi_target_tpg.c +++ b/drivers/target/iscsi/iscsi_target_tpg.c @@ -161,10 +161,7 @@ struct iscsi_portal_group *iscsit_get_tpg_from_np( int iscsit_get_tpg( struct iscsi_portal_group *tpg) { - int ret; - - ret = mutex_lock_interruptible(&tpg->tpg_access_lock); - return ((ret != 0) || signal_pending(current)) ? -1 : 0; + return mutex_lock_interruptible(&tpg->tpg_access_lock); } void iscsit_put_tpg(struct iscsi_portal_group *tpg) diff --git a/drivers/target/target_core_alua.c b/drivers/target/target_core_alua.c index 75cbde1f7c5b..4f8d4d459aa4 100644 --- a/drivers/target/target_core_alua.c +++ b/drivers/target/target_core_alua.c @@ -704,7 +704,7 @@ target_alua_state_check(struct se_cmd *cmd) if (dev->se_hba->hba_flags & HBA_FLAGS_INTERNAL_USE) return 0; - if (dev->transport->transport_type == TRANSPORT_PLUGIN_PHBA_PDEV) + if (dev->transport->transport_flags & TRANSPORT_FLAG_PASSTHROUGH) return 0; if (!port) @@ -2377,7 +2377,7 @@ ssize_t core_alua_store_secondary_write_metadata( int core_setup_alua(struct se_device *dev) { - if (dev->transport->transport_type != TRANSPORT_PLUGIN_PHBA_PDEV && + if (!(dev->transport->transport_flags & TRANSPORT_FLAG_PASSTHROUGH) && !(dev->se_hba->hba_flags & HBA_FLAGS_INTERNAL_USE)) { struct t10_alua_lu_gp_member *lu_gp_mem; diff --git a/drivers/target/target_core_configfs.c b/drivers/target/target_core_configfs.c index ddaf76a4ac2a..e7b0430a0575 100644 --- a/drivers/target/target_core_configfs.c +++ b/drivers/target/target_core_configfs.c @@ -212,10 +212,6 @@ static struct config_group *target_core_register_fabric( pr_debug("Target_Core_ConfigFS: REGISTER -> Allocated Fabric:" " %s\n", tf->tf_group.cg_item.ci_name); - /* - * Setup tf_ops.tf_subsys pointer for usage with configfs_depend_item() - */ - tf->tf_ops.tf_subsys = tf->tf_subsys; tf->tf_fabric = &tf->tf_group.cg_item; pr_debug("Target_Core_ConfigFS: REGISTER -> Set tf->tf_fabric" " for %s\n", name); @@ -291,10 +287,17 @@ static struct configfs_subsystem target_core_fabrics = { }, }; -struct configfs_subsystem *target_core_subsystem[] = { - &target_core_fabrics, - NULL, -}; +int target_depend_item(struct config_item *item) +{ + return configfs_depend_item(&target_core_fabrics, item); +} +EXPORT_SYMBOL(target_depend_item); + +void target_undepend_item(struct config_item *item) +{ + return configfs_undepend_item(&target_core_fabrics, item); +} +EXPORT_SYMBOL(target_undepend_item); /*############################################################################## // Start functions called by external Target Fabrics Modules @@ -467,7 +470,6 @@ int target_register_template(const struct target_core_fabric_ops *fo) * struct target_fabric_configfs->tf_cit_tmpl */ tf->tf_module = fo->module; - tf->tf_subsys = target_core_subsystem[0]; snprintf(tf->tf_name, TARGET_FABRIC_NAME_SIZE, "%s", fo->name); tf->tf_ops = *fo; @@ -809,7 +811,7 @@ static ssize_t target_core_dev_pr_show_attr_res_holder(struct se_device *dev, { int ret; - if (dev->transport->transport_type == TRANSPORT_PLUGIN_PHBA_PDEV) + if (dev->transport->transport_flags & TRANSPORT_FLAG_PASSTHROUGH) return sprintf(page, "Passthrough\n"); spin_lock(&dev->dev_reservation_lock); @@ -960,7 +962,7 @@ SE_DEV_PR_ATTR_RO(res_pr_type); static ssize_t target_core_dev_pr_show_attr_res_type( struct se_device *dev, char *page) { - if (dev->transport->transport_type == TRANSPORT_PLUGIN_PHBA_PDEV) + if (dev->transport->transport_flags & TRANSPORT_FLAG_PASSTHROUGH) return sprintf(page, "SPC_PASSTHROUGH\n"); else if (dev->dev_reservation_flags & DRF_SPC2_RESERVATIONS) return sprintf(page, "SPC2_RESERVATIONS\n"); @@ -973,7 +975,7 @@ SE_DEV_PR_ATTR_RO(res_type); static ssize_t target_core_dev_pr_show_attr_res_aptpl_active( struct se_device *dev, char *page) { - if (dev->transport->transport_type == TRANSPORT_PLUGIN_PHBA_PDEV) + if (dev->transport->transport_flags & TRANSPORT_FLAG_PASSTHROUGH) return 0; return sprintf(page, "APTPL Bit Status: %s\n", @@ -988,7 +990,7 @@ SE_DEV_PR_ATTR_RO(res_aptpl_active); static ssize_t target_core_dev_pr_show_attr_res_aptpl_metadata( struct se_device *dev, char *page) { - if (dev->transport->transport_type == TRANSPORT_PLUGIN_PHBA_PDEV) + if (dev->transport->transport_flags & TRANSPORT_FLAG_PASSTHROUGH) return 0; return sprintf(page, "Ready to process PR APTPL metadata..\n"); @@ -1035,7 +1037,7 @@ static ssize_t target_core_dev_pr_store_attr_res_aptpl_metadata( u16 port_rpti = 0, tpgt = 0; u8 type = 0, scope; - if (dev->transport->transport_type == TRANSPORT_PLUGIN_PHBA_PDEV) + if (dev->transport->transport_flags & TRANSPORT_FLAG_PASSTHROUGH) return 0; if (dev->dev_reservation_flags & DRF_SPC2_RESERVATIONS) return 0; @@ -2870,7 +2872,7 @@ static int __init target_core_init_configfs(void) { struct config_group *target_cg, *hba_cg = NULL, *alua_cg = NULL; struct config_group *lu_gp_cg = NULL; - struct configfs_subsystem *subsys; + struct configfs_subsystem *subsys = &target_core_fabrics; struct t10_alua_lu_gp *lu_gp; int ret; @@ -2878,7 +2880,6 @@ static int __init target_core_init_configfs(void) " Engine: %s on %s/%s on "UTS_RELEASE"\n", TARGET_CORE_VERSION, utsname()->sysname, utsname()->machine); - subsys = target_core_subsystem[0]; config_group_init(&subsys->su_group); mutex_init(&subsys->su_mutex); @@ -3008,13 +3009,10 @@ out_global: static void __exit target_core_exit_configfs(void) { - struct configfs_subsystem *subsys; struct config_group *hba_cg, *alua_cg, *lu_gp_cg; struct config_item *item; int i; - subsys = target_core_subsystem[0]; - lu_gp_cg = &alua_lu_gps_group; for (i = 0; lu_gp_cg->default_groups[i]; i++) { item = &lu_gp_cg->default_groups[i]->cg_item; @@ -3045,8 +3043,8 @@ static void __exit target_core_exit_configfs(void) * We expect subsys->su_group.default_groups to be released * by configfs subsystem provider logic.. */ - configfs_unregister_subsystem(subsys); - kfree(subsys->su_group.default_groups); + configfs_unregister_subsystem(&target_core_fabrics); + kfree(target_core_fabrics.su_group.default_groups); core_alua_free_lu_gp(default_lu_gp); default_lu_gp = NULL; diff --git a/drivers/target/target_core_device.c b/drivers/target/target_core_device.c index 7faa6aef9a4d..ce5f768181ff 100644 --- a/drivers/target/target_core_device.c +++ b/drivers/target/target_core_device.c @@ -33,6 +33,7 @@ #include <linux/kthread.h> #include <linux/in.h> #include <linux/export.h> +#include <asm/unaligned.h> #include <net/sock.h> #include <net/tcp.h> #include <scsi/scsi.h> @@ -527,7 +528,7 @@ static void core_export_port( list_add_tail(&port->sep_list, &dev->dev_sep_list); spin_unlock(&dev->se_port_lock); - if (dev->transport->transport_type != TRANSPORT_PLUGIN_PHBA_PDEV && + if (!(dev->transport->transport_flags & TRANSPORT_FLAG_PASSTHROUGH) && !(dev->se_hba->hba_flags & HBA_FLAGS_INTERNAL_USE)) { tg_pt_gp_mem = core_alua_allocate_tg_pt_gp_mem(port); if (IS_ERR(tg_pt_gp_mem) || !tg_pt_gp_mem) { @@ -1603,7 +1604,7 @@ int target_configure_device(struct se_device *dev) * anything virtual (IBLOCK, FILEIO, RAMDISK), but not for TCM/pSCSI * passthrough because this is being provided by the backend LLD. */ - if (dev->transport->transport_type != TRANSPORT_PLUGIN_PHBA_PDEV) { + if (!(dev->transport->transport_flags & TRANSPORT_FLAG_PASSTHROUGH)) { strncpy(&dev->t10_wwn.vendor[0], "LIO-ORG", 8); strncpy(&dev->t10_wwn.model[0], dev->transport->inquiry_prod, 16); @@ -1707,3 +1708,76 @@ void core_dev_release_virtual_lun0(void) target_free_device(g_lun0_dev); core_delete_hba(hba); } + +/* + * Common CDB parsing for kernel and user passthrough. + */ +sense_reason_t +passthrough_parse_cdb(struct se_cmd *cmd, + sense_reason_t (*exec_cmd)(struct se_cmd *cmd)) +{ + unsigned char *cdb = cmd->t_task_cdb; + + /* + * Clear a lun set in the cdb if the initiator talking to use spoke + * and old standards version, as we can't assume the underlying device + * won't choke up on it. + */ + switch (cdb[0]) { + case READ_10: /* SBC - RDProtect */ + case READ_12: /* SBC - RDProtect */ + case READ_16: /* SBC - RDProtect */ + case SEND_DIAGNOSTIC: /* SPC - SELF-TEST Code */ + case VERIFY: /* SBC - VRProtect */ + case VERIFY_16: /* SBC - VRProtect */ + case WRITE_VERIFY: /* SBC - VRProtect */ + case WRITE_VERIFY_12: /* SBC - VRProtect */ + case MAINTENANCE_IN: /* SPC - Parameter Data Format for SA RTPG */ + break; + default: + cdb[1] &= 0x1f; /* clear logical unit number */ + break; + } + + /* + * For REPORT LUNS we always need to emulate the response, for everything + * else, pass it up. + */ + if (cdb[0] == REPORT_LUNS) { + cmd->execute_cmd = spc_emulate_report_luns; + return TCM_NO_SENSE; + } + + /* Set DATA_CDB flag for ops that should have it */ + switch (cdb[0]) { + case READ_6: + case READ_10: + case READ_12: + case READ_16: + case WRITE_6: + case WRITE_10: + case WRITE_12: + case WRITE_16: + case WRITE_VERIFY: + case WRITE_VERIFY_12: + case 0x8e: /* WRITE_VERIFY_16 */ + case COMPARE_AND_WRITE: + case XDWRITEREAD_10: + cmd->se_cmd_flags |= SCF_SCSI_DATA_CDB; + break; + case VARIABLE_LENGTH_CMD: + switch (get_unaligned_be16(&cdb[8])) { + case READ_32: + case WRITE_32: + case 0x0c: /* WRITE_VERIFY_32 */ + case XDWRITEREAD_32: + cmd->se_cmd_flags |= SCF_SCSI_DATA_CDB; + break; + } + } + + cmd->execute_cmd = exec_cmd; + + return TCM_NO_SENSE; +} +EXPORT_SYMBOL(passthrough_parse_cdb); diff --git a/drivers/target/target_core_file.c b/drivers/target/target_core_file.c index f7e6e51aed36..3f27bfd816d8 100644 --- a/drivers/target/target_core_file.c +++ b/drivers/target/target_core_file.c @@ -958,7 +958,6 @@ static struct se_subsystem_api fileio_template = { .inquiry_prod = "FILEIO", .inquiry_rev = FD_VERSION, .owner = THIS_MODULE, - .transport_type = TRANSPORT_PLUGIN_VHBA_PDEV, .attach_hba = fd_attach_hba, .detach_hba = fd_detach_hba, .alloc_device = fd_alloc_device, diff --git a/drivers/target/target_core_iblock.c b/drivers/target/target_core_iblock.c index 1b7947c2510f..8c965683789f 100644 --- a/drivers/target/target_core_iblock.c +++ b/drivers/target/target_core_iblock.c @@ -904,7 +904,6 @@ static struct se_subsystem_api iblock_template = { .inquiry_prod = "IBLOCK", .inquiry_rev = IBLOCK_VERSION, .owner = THIS_MODULE, - .transport_type = TRANSPORT_PLUGIN_VHBA_PDEV, .attach_hba = iblock_attach_hba, .detach_hba = iblock_detach_hba, .alloc_device = iblock_alloc_device, diff --git a/drivers/target/target_core_internal.h b/drivers/target/target_core_internal.h index 874a9bc988d8..68bd7f5d9f73 100644 --- a/drivers/target/target_core_internal.h +++ b/drivers/target/target_core_internal.h @@ -4,9 +4,6 @@ /* target_core_alua.c */ extern struct t10_alua_lu_gp *default_lu_gp; -/* target_core_configfs.c */ -extern struct configfs_subsystem *target_core_subsystem[]; - /* target_core_device.c */ extern struct mutex g_device_mutex; extern struct list_head g_device_list; diff --git a/drivers/target/target_core_pr.c b/drivers/target/target_core_pr.c index c1aa9655e96e..a15411c79ae9 100644 --- a/drivers/target/target_core_pr.c +++ b/drivers/target/target_core_pr.c @@ -1367,41 +1367,26 @@ void core_scsi3_free_all_registrations( static int core_scsi3_tpg_depend_item(struct se_portal_group *tpg) { - return configfs_depend_item(tpg->se_tpg_tfo->tf_subsys, - &tpg->tpg_group.cg_item); + return target_depend_item(&tpg->tpg_group.cg_item); } static void core_scsi3_tpg_undepend_item(struct se_portal_group *tpg) { - configfs_undepend_item(tpg->se_tpg_tfo->tf_subsys, - &tpg->tpg_group.cg_item); - + target_undepend_item(&tpg->tpg_group.cg_item); atomic_dec_mb(&tpg->tpg_pr_ref_count); } static int core_scsi3_nodeacl_depend_item(struct se_node_acl *nacl) { - struct se_portal_group *tpg = nacl->se_tpg; - if (nacl->dynamic_node_acl) return 0; - - return configfs_depend_item(tpg->se_tpg_tfo->tf_subsys, - &nacl->acl_group.cg_item); + return target_depend_item(&nacl->acl_group.cg_item); } static void core_scsi3_nodeacl_undepend_item(struct se_node_acl *nacl) { - struct se_portal_group *tpg = nacl->se_tpg; - - if (nacl->dynamic_node_acl) { - atomic_dec_mb(&nacl->acl_pr_ref_count); - return; - } - - configfs_undepend_item(tpg->se_tpg_tfo->tf_subsys, - &nacl->acl_group.cg_item); - + if (!nacl->dynamic_node_acl) + target_undepend_item(&nacl->acl_group.cg_item); atomic_dec_mb(&nacl->acl_pr_ref_count); } @@ -1419,8 +1404,7 @@ static int core_scsi3_lunacl_depend_item(struct se_dev_entry *se_deve) nacl = lun_acl->se_lun_nacl; tpg = nacl->se_tpg; - return configfs_depend_item(tpg->se_tpg_tfo->tf_subsys, - &lun_acl->se_lun_group.cg_item); + return target_depend_item(&lun_acl->se_lun_group.cg_item); } static void core_scsi3_lunacl_undepend_item(struct se_dev_entry *se_deve) @@ -1438,9 +1422,7 @@ static void core_scsi3_lunacl_undepend_item(struct se_dev_entry *se_deve) nacl = lun_acl->se_lun_nacl; tpg = nacl->se_tpg; - configfs_undepend_item(tpg->se_tpg_tfo->tf_subsys, - &lun_acl->se_lun_group.cg_item); - + target_undepend_item(&lun_acl->se_lun_group.cg_item); atomic_dec_mb(&se_deve->pr_ref_count); } @@ -4111,7 +4093,7 @@ target_check_reservation(struct se_cmd *cmd) return 0; if (dev->se_hba->hba_flags & HBA_FLAGS_INTERNAL_USE) return 0; - if (dev->transport->transport_type == TRANSPORT_PLUGIN_PHBA_PDEV) + if (dev->transport->transport_flags & TRANSPORT_FLAG_PASSTHROUGH) return 0; spin_lock(&dev->dev_reservation_lock); diff --git a/drivers/target/target_core_pscsi.c b/drivers/target/target_core_pscsi.c index f6c954c4635f..ecc5eaef13d6 100644 --- a/drivers/target/target_core_pscsi.c +++ b/drivers/target/target_core_pscsi.c @@ -521,6 +521,7 @@ static int pscsi_configure_device(struct se_device *dev) " pdv_host_id: %d\n", pdv->pdv_host_id); return -EINVAL; } + pdv->pdv_lld_host = sh; } } else { if (phv->phv_mode == PHV_VIRTUAL_HOST_ID) { @@ -603,6 +604,8 @@ static void pscsi_free_device(struct se_device *dev) if ((phv->phv_mode == PHV_LLD_SCSI_HOST_NO) && (phv->phv_lld_host != NULL)) scsi_host_put(phv->phv_lld_host); + else if (pdv->pdv_lld_host) + scsi_host_put(pdv->pdv_lld_host); if ((sd->type == TYPE_DISK) || (sd->type == TYPE_ROM)) scsi_device_put(sd); @@ -970,64 +973,13 @@ fail: return TCM_LOGICAL_UNIT_COMMUNICATION_FAILURE; } -/* - * Clear a lun set in the cdb if the initiator talking to use spoke - * and old standards version, as we can't assume the underlying device - * won't choke up on it. - */ -static inline void pscsi_clear_cdb_lun(unsigned char *cdb) -{ - switch (cdb[0]) { - case READ_10: /* SBC - RDProtect */ - case READ_12: /* SBC - RDProtect */ - case READ_16: /* SBC - RDProtect */ - case SEND_DIAGNOSTIC: /* SPC - SELF-TEST Code */ - case VERIFY: /* SBC - VRProtect */ - case VERIFY_16: /* SBC - VRProtect */ - case WRITE_VERIFY: /* SBC - VRProtect */ - case WRITE_VERIFY_12: /* SBC - VRProtect */ - case MAINTENANCE_IN: /* SPC - Parameter Data Format for SA RTPG */ - break; - default: - cdb[1] &= 0x1f; /* clear logical unit number */ - break; - } -} - static sense_reason_t pscsi_parse_cdb(struct se_cmd *cmd) { - unsigned char *cdb = cmd->t_task_cdb; - if (cmd->se_cmd_flags & SCF_BIDI) return TCM_UNSUPPORTED_SCSI_OPCODE; - pscsi_clear_cdb_lun(cdb); - - /* - * For REPORT LUNS we always need to emulate the response, for everything - * else the default for pSCSI is to pass the command to the underlying - * LLD / physical hardware. - */ - switch (cdb[0]) { - case REPORT_LUNS: - cmd->execute_cmd = spc_emulate_report_luns; - return 0; - case READ_6: - case READ_10: - case READ_12: - case READ_16: - case WRITE_6: - case WRITE_10: - case WRITE_12: - case WRITE_16: - case WRITE_VERIFY: - cmd->se_cmd_flags |= SCF_SCSI_DATA_CDB; - /* FALLTHROUGH*/ - default: - cmd->execute_cmd = pscsi_execute_cmd; - return 0; - } + return passthrough_parse_cdb(cmd, pscsi_execute_cmd); } static sense_reason_t @@ -1189,7 +1141,7 @@ static struct configfs_attribute *pscsi_backend_dev_attrs[] = { static struct se_subsystem_api pscsi_template = { .name = "pscsi", .owner = THIS_MODULE, - .transport_type = TRANSPORT_PLUGIN_PHBA_PDEV, + .transport_flags = TRANSPORT_FLAG_PASSTHROUGH, .attach_hba = pscsi_attach_hba, .detach_hba = pscsi_detach_hba, .pmode_enable_hba = pscsi_pmode_enable_hba, diff --git a/drivers/target/target_core_pscsi.h b/drivers/target/target_core_pscsi.h index 1bd757dff8ee..820d3052b775 100644 --- a/drivers/target/target_core_pscsi.h +++ b/drivers/target/target_core_pscsi.h @@ -45,6 +45,7 @@ struct pscsi_dev_virt { int pdv_lun_id; struct block_device *pdv_bd; struct scsi_device *pdv_sd; + struct Scsi_Host *pdv_lld_host; } ____cacheline_aligned; typedef enum phv_modes { diff --git a/drivers/target/target_core_rd.c b/drivers/target/target_core_rd.c index a263bf5fab8d..d16489b6a1a4 100644 --- a/drivers/target/target_core_rd.c +++ b/drivers/target/target_core_rd.c @@ -733,7 +733,6 @@ static struct se_subsystem_api rd_mcp_template = { .name = "rd_mcp", .inquiry_prod = "RAMDISK-MCP", .inquiry_rev = RD_MCP_VERSION, - .transport_type = TRANSPORT_PLUGIN_VHBA_VDEV, .attach_hba = rd_attach_hba, .detach_hba = rd_detach_hba, .alloc_device = rd_alloc_device, diff --git a/drivers/target/target_core_sbc.c b/drivers/target/target_core_sbc.c index 8855781ac653..733824e3825f 100644 --- a/drivers/target/target_core_sbc.c +++ b/drivers/target/target_core_sbc.c @@ -568,7 +568,7 @@ sbc_compare_and_write(struct se_cmd *cmd) * comparision using SGLs at cmd->t_bidi_data_sg.. */ rc = down_interruptible(&dev->caw_sem); - if ((rc != 0) || signal_pending(current)) { + if (rc != 0) { cmd->transport_complete_callback = NULL; return TCM_LOGICAL_UNIT_COMMUNICATION_FAILURE; } diff --git a/drivers/target/target_core_transport.c b/drivers/target/target_core_transport.c index 3fe5cb240b6f..675f2d9d1f14 100644 --- a/drivers/target/target_core_transport.c +++ b/drivers/target/target_core_transport.c @@ -1196,7 +1196,7 @@ transport_check_alloc_task_attr(struct se_cmd *cmd) * Check if SAM Task Attribute emulation is enabled for this * struct se_device storage object */ - if (dev->transport->transport_type == TRANSPORT_PLUGIN_PHBA_PDEV) + if (dev->transport->transport_flags & TRANSPORT_FLAG_PASSTHROUGH) return 0; if (cmd->sam_task_attr == TCM_ACA_TAG) { @@ -1770,7 +1770,7 @@ static int target_write_prot_action(struct se_cmd *cmd) sectors, 0, NULL, 0); if (unlikely(cmd->pi_err)) { spin_lock_irq(&cmd->t_state_lock); - cmd->transport_state &= ~CMD_T_BUSY|CMD_T_SENT; + cmd->transport_state &= ~(CMD_T_BUSY|CMD_T_SENT); spin_unlock_irq(&cmd->t_state_lock); transport_generic_request_failure(cmd, cmd->pi_err); return -1; @@ -1787,7 +1787,7 @@ static bool target_handle_task_attr(struct se_cmd *cmd) { struct se_device *dev = cmd->se_dev; - if (dev->transport->transport_type == TRANSPORT_PLUGIN_PHBA_PDEV) + if (dev->transport->transport_flags & TRANSPORT_FLAG_PASSTHROUGH) return false; /* @@ -1868,7 +1868,7 @@ void target_execute_cmd(struct se_cmd *cmd) if (target_handle_task_attr(cmd)) { spin_lock_irq(&cmd->t_state_lock); - cmd->transport_state &= ~CMD_T_BUSY|CMD_T_SENT; + cmd->transport_state &= ~(CMD_T_BUSY | CMD_T_SENT); spin_unlock_irq(&cmd->t_state_lock); return; } @@ -1912,7 +1912,7 @@ static void transport_complete_task_attr(struct se_cmd *cmd) { struct se_device *dev = cmd->se_dev; - if (dev->transport->transport_type == TRANSPORT_PLUGIN_PHBA_PDEV) + if (dev->transport->transport_flags & TRANSPORT_FLAG_PASSTHROUGH) return; if (cmd->sam_task_attr == TCM_SIMPLE_TAG) { @@ -1957,8 +1957,7 @@ static void transport_complete_qf(struct se_cmd *cmd) case DMA_TO_DEVICE: if (cmd->se_cmd_flags & SCF_BIDI) { ret = cmd->se_tfo->queue_data_in(cmd); - if (ret < 0) - break; + break; } /* Fall through for DMA_TO_DEVICE */ case DMA_NONE: diff --git a/drivers/target/target_core_user.c b/drivers/target/target_core_user.c index dbc872a6c981..07d2996d8c1f 100644 --- a/drivers/target/target_core_user.c +++ b/drivers/target/target_core_user.c @@ -71,13 +71,6 @@ struct tcmu_hba { u32 host_id; }; -/* User wants all cmds or just some */ -enum passthru_level { - TCMU_PASS_ALL = 0, - TCMU_PASS_IO, - TCMU_PASS_INVALID, -}; - #define TCMU_CONFIG_LEN 256 struct tcmu_dev { @@ -89,7 +82,6 @@ struct tcmu_dev { #define TCMU_DEV_BIT_OPEN 0 #define TCMU_DEV_BIT_BROKEN 1 unsigned long flags; - enum passthru_level pass_level; struct uio_info uio_info; @@ -683,8 +675,6 @@ static struct se_device *tcmu_alloc_device(struct se_hba *hba, const char *name) setup_timer(&udev->timeout, tcmu_device_timedout, (unsigned long)udev); - udev->pass_level = TCMU_PASS_ALL; - return &udev->se_dev; } @@ -948,13 +938,13 @@ static void tcmu_free_device(struct se_device *dev) } enum { - Opt_dev_config, Opt_dev_size, Opt_err, Opt_pass_level, + Opt_dev_config, Opt_dev_size, Opt_hw_block_size, Opt_err, }; static match_table_t tokens = { {Opt_dev_config, "dev_config=%s"}, {Opt_dev_size, "dev_size=%u"}, - {Opt_pass_level, "pass_level=%u"}, + {Opt_hw_block_size, "hw_block_size=%u"}, {Opt_err, NULL} }; @@ -965,7 +955,7 @@ static ssize_t tcmu_set_configfs_dev_params(struct se_device *dev, char *orig, *ptr, *opts, *arg_p; substring_t args[MAX_OPT_ARGS]; int ret = 0, token; - int arg; + unsigned long tmp_ul; opts = kstrdup(page, GFP_KERNEL); if (!opts) @@ -998,15 +988,23 @@ static ssize_t tcmu_set_configfs_dev_params(struct se_device *dev, if (ret < 0) pr_err("kstrtoul() failed for dev_size=\n"); break; - case Opt_pass_level: - match_int(args, &arg); - if (arg >= TCMU_PASS_INVALID) { - pr_warn("TCMU: Invalid pass_level: %d\n", arg); + case Opt_hw_block_size: + arg_p = match_strdup(&args[0]); + if (!arg_p) { + ret = -ENOMEM; break; } - - pr_debug("TCMU: Setting pass_level to %d\n", arg); - udev->pass_level = arg; + ret = kstrtoul(arg_p, 0, &tmp_ul); + kfree(arg_p); + if (ret < 0) { + pr_err("kstrtoul() failed for hw_block_size=\n"); + break; + } + if (!tmp_ul) { + pr_err("hw_block_size must be nonzero\n"); + break; + } + dev->dev_attrib.hw_block_size = tmp_ul; break; default: break; @@ -1024,8 +1022,7 @@ static ssize_t tcmu_show_configfs_dev_params(struct se_device *dev, char *b) bl = sprintf(b + bl, "Config: %s ", udev->dev_config[0] ? udev->dev_config : "NULL"); - bl += sprintf(b + bl, "Size: %zu PassLevel: %u\n", - udev->dev_size, udev->pass_level); + bl += sprintf(b + bl, "Size: %zu\n", udev->dev_size); return bl; } @@ -1039,20 +1036,6 @@ static sector_t tcmu_get_blocks(struct se_device *dev) } static sense_reason_t -tcmu_execute_rw(struct se_cmd *se_cmd, struct scatterlist *sgl, u32 sgl_nents, - enum dma_data_direction data_direction) -{ - int ret; - - ret = tcmu_queue_cmd(se_cmd); - - if (ret != 0) - return TCM_LOGICAL_UNIT_COMMUNICATION_FAILURE; - else - return TCM_NO_SENSE; -} - -static sense_reason_t tcmu_pass_op(struct se_cmd *se_cmd) { int ret = tcmu_queue_cmd(se_cmd); @@ -1063,91 +1046,29 @@ tcmu_pass_op(struct se_cmd *se_cmd) return TCM_NO_SENSE; } -static struct sbc_ops tcmu_sbc_ops = { - .execute_rw = tcmu_execute_rw, - .execute_sync_cache = tcmu_pass_op, - .execute_write_same = tcmu_pass_op, - .execute_write_same_unmap = tcmu_pass_op, - .execute_unmap = tcmu_pass_op, -}; - static sense_reason_t tcmu_parse_cdb(struct se_cmd *cmd) { - unsigned char *cdb = cmd->t_task_cdb; - struct tcmu_dev *udev = TCMU_DEV(cmd->se_dev); - sense_reason_t ret; - - switch (udev->pass_level) { - case TCMU_PASS_ALL: - /* We're just like pscsi, then */ - /* - * For REPORT LUNS we always need to emulate the response, for everything - * else, pass it up. - */ - switch (cdb[0]) { - case REPORT_LUNS: - cmd->execute_cmd = spc_emulate_report_luns; - break; - case READ_6: - case READ_10: - case READ_12: - case READ_16: - case WRITE_6: - case WRITE_10: - case WRITE_12: - case WRITE_16: - case WRITE_VERIFY: - cmd->se_cmd_flags |= SCF_SCSI_DATA_CDB; - /* FALLTHROUGH */ - default: - cmd->execute_cmd = tcmu_pass_op; - } - ret = TCM_NO_SENSE; - break; - case TCMU_PASS_IO: - ret = sbc_parse_cdb(cmd, &tcmu_sbc_ops); - break; - default: - pr_err("Unknown tcm-user pass level %d\n", udev->pass_level); - ret = TCM_CHECK_CONDITION_ABORT_CMD; - } - - return ret; + return passthrough_parse_cdb(cmd, tcmu_pass_op); } -DEF_TB_DEFAULT_ATTRIBS(tcmu); +DEF_TB_DEV_ATTRIB_RO(tcmu, hw_pi_prot_type); +TB_DEV_ATTR_RO(tcmu, hw_pi_prot_type); + +DEF_TB_DEV_ATTRIB_RO(tcmu, hw_block_size); +TB_DEV_ATTR_RO(tcmu, hw_block_size); + +DEF_TB_DEV_ATTRIB_RO(tcmu, hw_max_sectors); +TB_DEV_ATTR_RO(tcmu, hw_max_sectors); + +DEF_TB_DEV_ATTRIB_RO(tcmu, hw_queue_depth); +TB_DEV_ATTR_RO(tcmu, hw_queue_depth); static struct configfs_attribute *tcmu_backend_dev_attrs[] = { - &tcmu_dev_attrib_emulate_model_alias.attr, - &tcmu_dev_attrib_emulate_dpo.attr, - &tcmu_dev_attrib_emulate_fua_write.attr, - &tcmu_dev_attrib_emulate_fua_read.attr, - &tcmu_dev_attrib_emulate_write_cache.attr, - &tcmu_dev_attrib_emulate_ua_intlck_ctrl.attr, - &tcmu_dev_attrib_emulate_tas.attr, - &tcmu_dev_attrib_emulate_tpu.attr, - &tcmu_dev_attrib_emulate_tpws.attr, - &tcmu_dev_attrib_emulate_caw.attr, - &tcmu_dev_attrib_emulate_3pc.attr, - &tcmu_dev_attrib_pi_prot_type.attr, &tcmu_dev_attrib_hw_pi_prot_type.attr, - &tcmu_dev_attrib_pi_prot_format.attr, - &tcmu_dev_attrib_enforce_pr_isids.attr, - &tcmu_dev_attrib_is_nonrot.attr, - &tcmu_dev_attrib_emulate_rest_reord.attr, - &tcmu_dev_attrib_force_pr_aptpl.attr, &tcmu_dev_attrib_hw_block_size.attr, - &tcmu_dev_attrib_block_size.attr, &tcmu_dev_attrib_hw_max_sectors.attr, - &tcmu_dev_attrib_optimal_sectors.attr, &tcmu_dev_attrib_hw_queue_depth.attr, - &tcmu_dev_attrib_queue_depth.attr, - &tcmu_dev_attrib_max_unmap_lba_count.attr, - &tcmu_dev_attrib_max_unmap_block_desc_count.attr, - &tcmu_dev_attrib_unmap_granularity.attr, - &tcmu_dev_attrib_unmap_granularity_alignment.attr, - &tcmu_dev_attrib_max_write_same_len.attr, NULL, }; @@ -1156,7 +1077,7 @@ static struct se_subsystem_api tcmu_template = { .inquiry_prod = "USER", .inquiry_rev = TCMU_VERSION, .owner = THIS_MODULE, - .transport_type = TRANSPORT_PLUGIN_VHBA_PDEV, + .transport_flags = TRANSPORT_FLAG_PASSTHROUGH, .attach_hba = tcmu_attach_hba, .detach_hba = tcmu_detach_hba, .alloc_device = tcmu_alloc_device, diff --git a/drivers/target/target_core_xcopy.c b/drivers/target/target_core_xcopy.c index a600ff15dcfd..8fd680ac941b 100644 --- a/drivers/target/target_core_xcopy.c +++ b/drivers/target/target_core_xcopy.c @@ -58,7 +58,6 @@ static int target_xcopy_locate_se_dev_e4(struct se_cmd *se_cmd, struct xcopy_op bool src) { struct se_device *se_dev; - struct configfs_subsystem *subsys = target_core_subsystem[0]; unsigned char tmp_dev_wwn[XCOPY_NAA_IEEE_REGEX_LEN], *dev_wwn; int rc; @@ -90,8 +89,7 @@ static int target_xcopy_locate_se_dev_e4(struct se_cmd *se_cmd, struct xcopy_op " se_dev\n", xop->src_dev); } - rc = configfs_depend_item(subsys, - &se_dev->dev_group.cg_item); + rc = target_depend_item(&se_dev->dev_group.cg_item); if (rc != 0) { pr_err("configfs_depend_item attempt failed:" " %d for se_dev: %p\n", rc, se_dev); @@ -99,8 +97,8 @@ static int target_xcopy_locate_se_dev_e4(struct se_cmd *se_cmd, struct xcopy_op return rc; } - pr_debug("Called configfs_depend_item for subsys: %p se_dev: %p" - " se_dev->se_dev_group: %p\n", subsys, se_dev, + pr_debug("Called configfs_depend_item for se_dev: %p" + " se_dev->se_dev_group: %p\n", se_dev, &se_dev->dev_group); mutex_unlock(&g_device_mutex); @@ -373,7 +371,6 @@ static int xcopy_pt_get_cmd_state(struct se_cmd *se_cmd) static void xcopy_pt_undepend_remotedev(struct xcopy_op *xop) { - struct configfs_subsystem *subsys = target_core_subsystem[0]; struct se_device *remote_dev; if (xop->op_origin == XCOL_SOURCE_RECV_OP) @@ -381,11 +378,11 @@ static void xcopy_pt_undepend_remotedev(struct xcopy_op *xop) else remote_dev = xop->src_dev; - pr_debug("Calling configfs_undepend_item for subsys: %p" + pr_debug("Calling configfs_undepend_item for" " remote_dev: %p remote_dev->dev_group: %p\n", - subsys, remote_dev, &remote_dev->dev_group.cg_item); + remote_dev, &remote_dev->dev_group.cg_item); - configfs_undepend_item(subsys, &remote_dev->dev_group.cg_item); + target_undepend_item(&remote_dev->dev_group.cg_item); } static void xcopy_pt_release_cmd(struct se_cmd *se_cmd) diff --git a/drivers/tty/mips_ejtag_fdc.c b/drivers/tty/mips_ejtag_fdc.c index 04d9e23d1ee1..358323c83b4f 100644 --- a/drivers/tty/mips_ejtag_fdc.c +++ b/drivers/tty/mips_ejtag_fdc.c @@ -174,13 +174,13 @@ struct mips_ejtag_fdc_tty { static inline void mips_ejtag_fdc_write(struct mips_ejtag_fdc_tty *priv, unsigned int offs, unsigned int data) { - iowrite32(data, priv->reg + offs); + __raw_writel(data, priv->reg + offs); } static inline unsigned int mips_ejtag_fdc_read(struct mips_ejtag_fdc_tty *priv, unsigned int offs) { - return ioread32(priv->reg + offs); + return __raw_readl(priv->reg + offs); } /* Encoding of byte stream in FDC words */ @@ -347,9 +347,9 @@ static void mips_ejtag_fdc_console_write(struct console *c, const char *s, s += inc[word.bytes - 1]; /* Busy wait until there's space in fifo */ - while (ioread32(regs + REG_FDSTAT) & REG_FDSTAT_TXF) + while (__raw_readl(regs + REG_FDSTAT) & REG_FDSTAT_TXF) ; - iowrite32(word.word, regs + REG_FDTX(c->index)); + __raw_writel(word.word, regs + REG_FDTX(c->index)); } out: local_irq_restore(flags); @@ -1227,7 +1227,7 @@ static int kgdbfdc_read_char(void) /* Read next word from KGDB channel */ do { - stat = ioread32(regs + REG_FDSTAT); + stat = __raw_readl(regs + REG_FDSTAT); /* No data waiting? */ if (stat & REG_FDSTAT_RXE) @@ -1236,7 +1236,7 @@ static int kgdbfdc_read_char(void) /* Read next word */ channel = (stat & REG_FDSTAT_RXCHAN) >> REG_FDSTAT_RXCHAN_SHIFT; - data = ioread32(regs + REG_FDRX); + data = __raw_readl(regs + REG_FDRX); } while (channel != CONFIG_MIPS_EJTAG_FDC_KGDB_CHAN); /* Decode into rbuf */ @@ -1266,9 +1266,10 @@ static void kgdbfdc_push_one(void) return; /* Busy wait until there's space in fifo */ - while (ioread32(regs + REG_FDSTAT) & REG_FDSTAT_TXF) + while (__raw_readl(regs + REG_FDSTAT) & REG_FDSTAT_TXF) ; - iowrite32(word.word, regs + REG_FDTX(CONFIG_MIPS_EJTAG_FDC_KGDB_CHAN)); + __raw_writel(word.word, + regs + REG_FDTX(CONFIG_MIPS_EJTAG_FDC_KGDB_CHAN)); } /* flush the whole write buffer to the TX FIFO */ diff --git a/drivers/vhost/scsi.c b/drivers/vhost/scsi.c index 5e19bb53b3a9..ea32b386797f 100644 --- a/drivers/vhost/scsi.c +++ b/drivers/vhost/scsi.c @@ -1409,8 +1409,7 @@ vhost_scsi_set_endpoint(struct vhost_scsi *vs, * dependency now. */ se_tpg = &tpg->se_tpg; - ret = configfs_depend_item(se_tpg->se_tpg_tfo->tf_subsys, - &se_tpg->tpg_group.cg_item); + ret = target_depend_item(&se_tpg->tpg_group.cg_item); if (ret) { pr_warn("configfs_depend_item() failed: %d\n", ret); kfree(vs_tpg); @@ -1513,8 +1512,7 @@ vhost_scsi_clear_endpoint(struct vhost_scsi *vs, * to allow vhost-scsi WWPN se_tpg->tpg_group shutdown to occur. */ se_tpg = &tpg->se_tpg; - configfs_undepend_item(se_tpg->se_tpg_tfo->tf_subsys, - &se_tpg->tpg_group.cg_item); + target_undepend_item(&se_tpg->tpg_group.cg_item); } if (match) { for (i = 0; i < VHOST_SCSI_MAX_VQ; i++) { diff --git a/drivers/video/backlight/pwm_bl.c b/drivers/video/backlight/pwm_bl.c index 3a145a643e0d..6897f1c1bc73 100644 --- a/drivers/video/backlight/pwm_bl.c +++ b/drivers/video/backlight/pwm_bl.c @@ -274,6 +274,10 @@ static int pwm_backlight_probe(struct platform_device *pdev) pb->pwm = devm_pwm_get(&pdev->dev, NULL); if (IS_ERR(pb->pwm)) { + ret = PTR_ERR(pb->pwm); + if (ret == -EPROBE_DEFER) + goto err_alloc; + dev_err(&pdev->dev, "unable to request PWM, trying legacy API\n"); pb->legacy = true; pb->pwm = pwm_request(data->pwm_id, "pwm-backlight"); diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c index 241ef68d2893..cd46e4158830 100644 --- a/fs/binfmt_elf.c +++ b/fs/binfmt_elf.c @@ -918,7 +918,7 @@ static int load_elf_binary(struct linux_binprm *bprm) total_size = total_mapping_size(elf_phdata, loc->elf_ex.e_phnum); if (!total_size) { - error = -EINVAL; + retval = -EINVAL; goto out_free_dentry; } } diff --git a/fs/cifs/cifs_dfs_ref.c b/fs/cifs/cifs_dfs_ref.c index 430e0348c99e..7dc886c9a78f 100644 --- a/fs/cifs/cifs_dfs_ref.c +++ b/fs/cifs/cifs_dfs_ref.c @@ -24,6 +24,7 @@ #include "cifsfs.h" #include "dns_resolve.h" #include "cifs_debug.h" +#include "cifs_unicode.h" static LIST_HEAD(cifs_dfs_automount_list); @@ -312,7 +313,7 @@ static struct vfsmount *cifs_dfs_do_automount(struct dentry *mntpt) xid = get_xid(); rc = get_dfs_path(xid, ses, full_path + 1, cifs_sb->local_nls, &num_referrals, &referrals, - cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SPECIAL_CHR); + cifs_remap(cifs_sb)); free_xid(xid); cifs_put_tlink(tlink); diff --git a/fs/cifs/cifs_unicode.c b/fs/cifs/cifs_unicode.c index 0303c6793d90..5a53ac6b1e02 100644 --- a/fs/cifs/cifs_unicode.c +++ b/fs/cifs/cifs_unicode.c @@ -27,41 +27,6 @@ #include "cifsglob.h" #include "cifs_debug.h" -/* - * cifs_utf16_bytes - how long will a string be after conversion? - * @utf16 - pointer to input string - * @maxbytes - don't go past this many bytes of input string - * @codepage - destination codepage - * - * Walk a utf16le string and return the number of bytes that the string will - * be after being converted to the given charset, not including any null - * termination required. Don't walk past maxbytes in the source buffer. - */ -int -cifs_utf16_bytes(const __le16 *from, int maxbytes, - const struct nls_table *codepage) -{ - int i; - int charlen, outlen = 0; - int maxwords = maxbytes / 2; - char tmp[NLS_MAX_CHARSET_SIZE]; - __u16 ftmp; - - for (i = 0; i < maxwords; i++) { - ftmp = get_unaligned_le16(&from[i]); - if (ftmp == 0) - break; - - charlen = codepage->uni2char(ftmp, tmp, NLS_MAX_CHARSET_SIZE); - if (charlen > 0) - outlen += charlen; - else - outlen++; - } - - return outlen; -} - int cifs_remap(struct cifs_sb_info *cifs_sb) { int map_type; @@ -155,10 +120,13 @@ convert_sfm_char(const __u16 src_char, char *target) * enough to hold the result of the conversion (at least NLS_MAX_CHARSET_SIZE). */ static int -cifs_mapchar(char *target, const __u16 src_char, const struct nls_table *cp, +cifs_mapchar(char *target, const __u16 *from, const struct nls_table *cp, int maptype) { int len = 1; + __u16 src_char; + + src_char = *from; if ((maptype == SFM_MAP_UNI_RSVD) && convert_sfm_char(src_char, target)) return len; @@ -168,10 +136,23 @@ cifs_mapchar(char *target, const __u16 src_char, const struct nls_table *cp, /* if character not one of seven in special remap set */ len = cp->uni2char(src_char, target, NLS_MAX_CHARSET_SIZE); - if (len <= 0) { - *target = '?'; - len = 1; - } + if (len <= 0) + goto surrogate_pair; + + return len; + +surrogate_pair: + /* convert SURROGATE_PAIR and IVS */ + if (strcmp(cp->charset, "utf8")) + goto unknown; + len = utf16s_to_utf8s(from, 3, UTF16_LITTLE_ENDIAN, target, 6); + if (len <= 0) + goto unknown; + return len; + +unknown: + *target = '?'; + len = 1; return len; } @@ -206,7 +187,7 @@ cifs_from_utf16(char *to, const __le16 *from, int tolen, int fromlen, int nullsize = nls_nullsize(codepage); int fromwords = fromlen / 2; char tmp[NLS_MAX_CHARSET_SIZE]; - __u16 ftmp; + __u16 ftmp[3]; /* ftmp[3] = 3array x 2bytes = 6bytes UTF-16 */ /* * because the chars can be of varying widths, we need to take care @@ -217,9 +198,17 @@ cifs_from_utf16(char *to, const __le16 *from, int tolen, int fromlen, safelen = tolen - (NLS_MAX_CHARSET_SIZE + nullsize); for (i = 0; i < fromwords; i++) { - ftmp = get_unaligned_le16(&from[i]); - if (ftmp == 0) + ftmp[0] = get_unaligned_le16(&from[i]); + if (ftmp[0] == 0) break; + if (i + 1 < fromwords) + ftmp[1] = get_unaligned_le16(&from[i + 1]); + else + ftmp[1] = 0; + if (i + 2 < fromwords) + ftmp[2] = get_unaligned_le16(&from[i + 2]); + else + ftmp[2] = 0; /* * check to see if converting this character might make the @@ -234,6 +223,17 @@ cifs_from_utf16(char *to, const __le16 *from, int tolen, int fromlen, /* put converted char into 'to' buffer */ charlen = cifs_mapchar(&to[outlen], ftmp, codepage, map_type); outlen += charlen; + + /* charlen (=bytes of UTF-8 for 1 character) + * 4bytes UTF-8(surrogate pair) is charlen=4 + * (4bytes UTF-16 code) + * 7-8bytes UTF-8(IVS) is charlen=3+4 or 4+4 + * (2 UTF-8 pairs divided to 2 UTF-16 pairs) */ + if (charlen == 4) + i++; + else if (charlen >= 5) + /* 5-6bytes UTF-8 */ + i += 2; } /* properly null-terminate string */ @@ -296,6 +296,46 @@ success: } /* + * cifs_utf16_bytes - how long will a string be after conversion? + * @utf16 - pointer to input string + * @maxbytes - don't go past this many bytes of input string + * @codepage - destination codepage + * + * Walk a utf16le string and return the number of bytes that the string will + * be after being converted to the given charset, not including any null + * termination required. Don't walk past maxbytes in the source buffer. + */ +int +cifs_utf16_bytes(const __le16 *from, int maxbytes, + const struct nls_table *codepage) +{ + int i; + int charlen, outlen = 0; + int maxwords = maxbytes / 2; + char tmp[NLS_MAX_CHARSET_SIZE]; + __u16 ftmp[3]; + + for (i = 0; i < maxwords; i++) { + ftmp[0] = get_unaligned_le16(&from[i]); + if (ftmp[0] == 0) + break; + if (i + 1 < maxwords) + ftmp[1] = get_unaligned_le16(&from[i + 1]); + else + ftmp[1] = 0; + if (i + 2 < maxwords) + ftmp[2] = get_unaligned_le16(&from[i + 2]); + else + ftmp[2] = 0; + + charlen = cifs_mapchar(tmp, ftmp, codepage, NO_MAP_UNI_RSVD); + outlen += charlen; + } + + return outlen; +} + +/* * cifs_strndup_from_utf16 - copy a string from wire format to the local * codepage * @src - source string @@ -409,10 +449,15 @@ cifsConvertToUTF16(__le16 *target, const char *source, int srclen, char src_char; __le16 dst_char; wchar_t tmp; + wchar_t *wchar_to; /* UTF-16 */ + int ret; + unicode_t u; if (map_chars == NO_MAP_UNI_RSVD) return cifs_strtoUTF16(target, source, PATH_MAX, cp); + wchar_to = kzalloc(6, GFP_KERNEL); + for (i = 0; i < srclen; j++) { src_char = source[i]; charlen = 1; @@ -441,11 +486,55 @@ cifsConvertToUTF16(__le16 *target, const char *source, int srclen, * if no match, use question mark, which at least in * some cases serves as wild card */ - if (charlen < 1) { - dst_char = cpu_to_le16(0x003f); - charlen = 1; + if (charlen > 0) + goto ctoUTF16; + + /* convert SURROGATE_PAIR */ + if (strcmp(cp->charset, "utf8") || !wchar_to) + goto unknown; + if (*(source + i) & 0x80) { + charlen = utf8_to_utf32(source + i, 6, &u); + if (charlen < 0) + goto unknown; + } else + goto unknown; + ret = utf8s_to_utf16s(source + i, charlen, + UTF16_LITTLE_ENDIAN, + wchar_to, 6); + if (ret < 0) + goto unknown; + + i += charlen; + dst_char = cpu_to_le16(*wchar_to); + if (charlen <= 3) + /* 1-3bytes UTF-8 to 2bytes UTF-16 */ + put_unaligned(dst_char, &target[j]); + else if (charlen == 4) { + /* 4bytes UTF-8(surrogate pair) to 4bytes UTF-16 + * 7-8bytes UTF-8(IVS) divided to 2 UTF-16 + * (charlen=3+4 or 4+4) */ + put_unaligned(dst_char, &target[j]); + dst_char = cpu_to_le16(*(wchar_to + 1)); + j++; + put_unaligned(dst_char, &target[j]); + } else if (charlen >= 5) { + /* 5-6bytes UTF-8 to 6bytes UTF-16 */ + put_unaligned(dst_char, &target[j]); + dst_char = cpu_to_le16(*(wchar_to + 1)); + j++; + put_unaligned(dst_char, &target[j]); + dst_char = cpu_to_le16(*(wchar_to + 2)); + j++; + put_unaligned(dst_char, &target[j]); } + continue; + +unknown: + dst_char = cpu_to_le16(0x003f); + charlen = 1; } + +ctoUTF16: /* * character may take more than one byte in the source string, * but will take exactly two bytes in the target string @@ -456,6 +545,7 @@ cifsConvertToUTF16(__le16 *target, const char *source, int srclen, ctoUTF16_out: put_unaligned(0, &target[j]); /* Null terminate target unicode string */ + kfree(wchar_to); return j; } diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c index f5089bde3635..0a9fb6b53126 100644 --- a/fs/cifs/cifsfs.c +++ b/fs/cifs/cifsfs.c @@ -469,6 +469,8 @@ cifs_show_options(struct seq_file *s, struct dentry *root) seq_puts(s, ",nouser_xattr"); if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SPECIAL_CHR) seq_puts(s, ",mapchars"); + if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SFM_CHR) + seq_puts(s, ",mapposix"); if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_UNX_EMUL) seq_puts(s, ",sfu"); if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NO_BRL) diff --git a/fs/cifs/cifsproto.h b/fs/cifs/cifsproto.h index c31ce98c1704..c63fd1dde25b 100644 --- a/fs/cifs/cifsproto.h +++ b/fs/cifs/cifsproto.h @@ -361,11 +361,11 @@ extern int CIFSUnixCreateHardLink(const unsigned int xid, extern int CIFSUnixCreateSymLink(const unsigned int xid, struct cifs_tcon *tcon, const char *fromName, const char *toName, - const struct nls_table *nls_codepage); + const struct nls_table *nls_codepage, int remap); extern int CIFSSMBUnixQuerySymLink(const unsigned int xid, struct cifs_tcon *tcon, const unsigned char *searchName, char **syminfo, - const struct nls_table *nls_codepage); + const struct nls_table *nls_codepage, int remap); extern int CIFSSMBQuerySymLink(const unsigned int xid, struct cifs_tcon *tcon, __u16 fid, char **symlinkinfo, const struct nls_table *nls_codepage); diff --git a/fs/cifs/cifssmb.c b/fs/cifs/cifssmb.c index 84650a51c7c4..f26ffbfc64d8 100644 --- a/fs/cifs/cifssmb.c +++ b/fs/cifs/cifssmb.c @@ -2784,7 +2784,7 @@ copyRetry: int CIFSUnixCreateSymLink(const unsigned int xid, struct cifs_tcon *tcon, const char *fromName, const char *toName, - const struct nls_table *nls_codepage) + const struct nls_table *nls_codepage, int remap) { TRANSACTION2_SPI_REQ *pSMB = NULL; TRANSACTION2_SPI_RSP *pSMBr = NULL; @@ -2804,9 +2804,9 @@ createSymLinkRetry: if (pSMB->hdr.Flags2 & SMBFLG2_UNICODE) { name_len = - cifs_strtoUTF16((__le16 *) pSMB->FileName, fromName, - /* find define for this maxpathcomponent */ - PATH_MAX, nls_codepage); + cifsConvertToUTF16((__le16 *) pSMB->FileName, fromName, + /* find define for this maxpathcomponent */ + PATH_MAX, nls_codepage, remap); name_len++; /* trailing null */ name_len *= 2; @@ -2828,9 +2828,9 @@ createSymLinkRetry: data_offset = (char *) (&pSMB->hdr.Protocol) + offset; if (pSMB->hdr.Flags2 & SMBFLG2_UNICODE) { name_len_target = - cifs_strtoUTF16((__le16 *) data_offset, toName, PATH_MAX - /* find define for this maxpathcomponent */ - , nls_codepage); + cifsConvertToUTF16((__le16 *) data_offset, toName, + /* find define for this maxpathcomponent */ + PATH_MAX, nls_codepage, remap); name_len_target++; /* trailing null */ name_len_target *= 2; } else { /* BB improve the check for buffer overruns BB */ @@ -3034,7 +3034,7 @@ winCreateHardLinkRetry: int CIFSSMBUnixQuerySymLink(const unsigned int xid, struct cifs_tcon *tcon, const unsigned char *searchName, char **symlinkinfo, - const struct nls_table *nls_codepage) + const struct nls_table *nls_codepage, int remap) { /* SMB_QUERY_FILE_UNIX_LINK */ TRANSACTION2_QPI_REQ *pSMB = NULL; @@ -3055,8 +3055,9 @@ querySymLinkRetry: if (pSMB->hdr.Flags2 & SMBFLG2_UNICODE) { name_len = - cifs_strtoUTF16((__le16 *) pSMB->FileName, searchName, - PATH_MAX, nls_codepage); + cifsConvertToUTF16((__le16 *) pSMB->FileName, + searchName, PATH_MAX, nls_codepage, + remap); name_len++; /* trailing null */ name_len *= 2; } else { /* BB improve the check for buffer overruns BB */ @@ -4917,7 +4918,7 @@ getDFSRetry: strncpy(pSMB->RequestFileName, search_name, name_len); } - if (ses->server && ses->server->sign) + if (ses->server->sign) pSMB->hdr.Flags2 |= SMBFLG2_SECURITY_SIGNATURE; pSMB->hdr.Uid = ses->Suid; diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c index f3bfe08e177b..8383d5ea4202 100644 --- a/fs/cifs/connect.c +++ b/fs/cifs/connect.c @@ -386,6 +386,7 @@ cifs_reconnect(struct TCP_Server_Info *server) rc = generic_ip_connect(server); if (rc) { cifs_dbg(FYI, "reconnect error %d\n", rc); + mutex_unlock(&server->srv_mutex); msleep(3000); } else { atomic_inc(&tcpSesReconnectCount); @@ -393,8 +394,8 @@ cifs_reconnect(struct TCP_Server_Info *server) if (server->tcpStatus != CifsExiting) server->tcpStatus = CifsNeedNegotiate; spin_unlock(&GlobalMid_Lock); + mutex_unlock(&server->srv_mutex); } - mutex_unlock(&server->srv_mutex); } while (server->tcpStatus == CifsNeedReconnect); return rc; diff --git a/fs/cifs/dir.c b/fs/cifs/dir.c index 338d56936f6a..c3eb998a99bd 100644 --- a/fs/cifs/dir.c +++ b/fs/cifs/dir.c @@ -620,8 +620,7 @@ int cifs_mknod(struct inode *inode, struct dentry *direntry, umode_t mode, } rc = CIFSSMBUnixSetPathInfo(xid, tcon, full_path, &args, cifs_sb->local_nls, - cifs_sb->mnt_cifs_flags & - CIFS_MOUNT_MAP_SPECIAL_CHR); + cifs_remap(cifs_sb)); if (rc) goto mknod_out; diff --git a/fs/cifs/file.c b/fs/cifs/file.c index cafbf10521d5..3f50cee79df9 100644 --- a/fs/cifs/file.c +++ b/fs/cifs/file.c @@ -140,8 +140,7 @@ int cifs_posix_open(char *full_path, struct inode **pinode, posix_flags = cifs_posix_convert_flags(f_flags); rc = CIFSPOSIXCreate(xid, tcon, posix_flags, mode, pnetfid, presp_data, poplock, full_path, cifs_sb->local_nls, - cifs_sb->mnt_cifs_flags & - CIFS_MOUNT_MAP_SPECIAL_CHR); + cifs_remap(cifs_sb)); cifs_put_tlink(tlink); if (rc) @@ -1553,8 +1552,8 @@ cifs_setlk(struct file *file, struct file_lock *flock, __u32 type, rc = server->ops->mand_unlock_range(cfile, flock, xid); out: - if (flock->fl_flags & FL_POSIX) - posix_lock_file_wait(file, flock); + if (flock->fl_flags & FL_POSIX && !rc) + rc = posix_lock_file_wait(file, flock); return rc; } diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c index 55b58112d122..f621b44cb800 100644 --- a/fs/cifs/inode.c +++ b/fs/cifs/inode.c @@ -373,8 +373,7 @@ int cifs_get_inode_info_unix(struct inode **pinode, /* could have done a find first instead but this returns more info */ rc = CIFSSMBUnixQPathInfo(xid, tcon, full_path, &find_data, - cifs_sb->local_nls, cifs_sb->mnt_cifs_flags & - CIFS_MOUNT_MAP_SPECIAL_CHR); + cifs_sb->local_nls, cifs_remap(cifs_sb)); cifs_put_tlink(tlink); if (!rc) { @@ -402,9 +401,25 @@ int cifs_get_inode_info_unix(struct inode **pinode, rc = -ENOMEM; } else { /* we already have inode, update it */ + + /* if uniqueid is different, return error */ + if (unlikely(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_SERVER_INUM && + CIFS_I(*pinode)->uniqueid != fattr.cf_uniqueid)) { + rc = -ESTALE; + goto cgiiu_exit; + } + + /* if filetype is different, return error */ + if (unlikely(((*pinode)->i_mode & S_IFMT) != + (fattr.cf_mode & S_IFMT))) { + rc = -ESTALE; + goto cgiiu_exit; + } + cifs_fattr_to_inode(*pinode, &fattr); } +cgiiu_exit: return rc; } @@ -839,6 +854,15 @@ cifs_get_inode_info(struct inode **inode, const char *full_path, if (!*inode) rc = -ENOMEM; } else { + /* we already have inode, update it */ + + /* if filetype is different, return error */ + if (unlikely(((*inode)->i_mode & S_IFMT) != + (fattr.cf_mode & S_IFMT))) { + rc = -ESTALE; + goto cgii_exit; + } + cifs_fattr_to_inode(*inode, &fattr); } @@ -2215,8 +2239,7 @@ cifs_setattr_unix(struct dentry *direntry, struct iattr *attrs) pTcon = tlink_tcon(tlink); rc = CIFSSMBUnixSetPathInfo(xid, pTcon, full_path, args, cifs_sb->local_nls, - cifs_sb->mnt_cifs_flags & - CIFS_MOUNT_MAP_SPECIAL_CHR); + cifs_remap(cifs_sb)); cifs_put_tlink(tlink); } diff --git a/fs/cifs/link.c b/fs/cifs/link.c index 252e672d5604..e6c707cc62b3 100644 --- a/fs/cifs/link.c +++ b/fs/cifs/link.c @@ -717,7 +717,8 @@ cifs_symlink(struct inode *inode, struct dentry *direntry, const char *symname) rc = create_mf_symlink(xid, pTcon, cifs_sb, full_path, symname); else if (pTcon->unix_ext) rc = CIFSUnixCreateSymLink(xid, pTcon, full_path, symname, - cifs_sb->local_nls); + cifs_sb->local_nls, + cifs_remap(cifs_sb)); /* else rc = CIFSCreateReparseSymLink(xid, pTcon, fromName, toName, cifs_sb_target->local_nls); */ diff --git a/fs/cifs/readdir.c b/fs/cifs/readdir.c index b4a47237486b..b1eede3678a9 100644 --- a/fs/cifs/readdir.c +++ b/fs/cifs/readdir.c @@ -90,6 +90,8 @@ cifs_prime_dcache(struct dentry *parent, struct qstr *name, if (dentry) { inode = d_inode(dentry); if (inode) { + if (d_mountpoint(dentry)) + goto out; /* * If we're generating inode numbers, then we don't * want to clobber the existing one with the one that diff --git a/fs/cifs/smb1ops.c b/fs/cifs/smb1ops.c index 7bfdd6066276..fc537c29044e 100644 --- a/fs/cifs/smb1ops.c +++ b/fs/cifs/smb1ops.c @@ -960,7 +960,8 @@ cifs_query_symlink(const unsigned int xid, struct cifs_tcon *tcon, /* Check for unix extensions */ if (cap_unix(tcon->ses)) { rc = CIFSSMBUnixQuerySymLink(xid, tcon, full_path, target_path, - cifs_sb->local_nls); + cifs_sb->local_nls, + cifs_remap(cifs_sb)); if (rc == -EREMOTE) rc = cifs_unix_dfs_readlink(xid, tcon, full_path, target_path, diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c index 65cd7a84c8bc..54cbe19d9c08 100644 --- a/fs/cifs/smb2pdu.c +++ b/fs/cifs/smb2pdu.c @@ -110,7 +110,7 @@ smb2_hdr_assemble(struct smb2_hdr *hdr, __le16 smb2_cmd /* command */ , /* GLOBAL_CAP_LARGE_MTU will only be set if dialect > SMB2.02 */ /* See sections 2.2.4 and 3.2.4.1.5 of MS-SMB2 */ - if ((tcon->ses) && + if ((tcon->ses) && (tcon->ses->server) && (tcon->ses->server->capabilities & SMB2_GLOBAL_CAP_LARGE_MTU)) hdr->CreditCharge = cpu_to_le16(1); /* else CreditCharge MBZ */ diff --git a/fs/dcache.c b/fs/dcache.c index 656ce522a218..37b5afdaf698 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -1239,13 +1239,13 @@ ascend: /* might go back up the wrong parent if we have had a rename. */ if (need_seqretry(&rename_lock, seq)) goto rename_retry; - next = child->d_child.next; - while (unlikely(child->d_flags & DCACHE_DENTRY_KILLED)) { + /* go into the first sibling still alive */ + do { + next = child->d_child.next; if (next == &this_parent->d_subdirs) goto ascend; child = list_entry(next, struct dentry, d_child); - next = next->next; - } + } while (unlikely(child->d_flags & DCACHE_DENTRY_KILLED)); rcu_read_unlock(); goto resume; } diff --git a/fs/omfs/bitmap.c b/fs/omfs/bitmap.c index 082234581d05..83f4e76511c2 100644 --- a/fs/omfs/bitmap.c +++ b/fs/omfs/bitmap.c @@ -159,7 +159,7 @@ int omfs_allocate_range(struct super_block *sb, goto out; found: - *return_block = i * bits_per_entry + bit; + *return_block = (u64) i * bits_per_entry + bit; *return_size = run; ret = set_run(sb, i, bits_per_entry, bit, run, 1); diff --git a/fs/omfs/inode.c b/fs/omfs/inode.c index 138321b0c6c2..3d935c81789a 100644 --- a/fs/omfs/inode.c +++ b/fs/omfs/inode.c @@ -306,7 +306,8 @@ static const struct super_operations omfs_sops = { */ static int omfs_get_imap(struct super_block *sb) { - unsigned int bitmap_size, count, array_size; + unsigned int bitmap_size, array_size; + int count; struct omfs_sb_info *sbi = OMFS_SB(sb); struct buffer_head *bh; unsigned long **ptr; @@ -359,7 +360,7 @@ nomem: } enum { - Opt_uid, Opt_gid, Opt_umask, Opt_dmask, Opt_fmask + Opt_uid, Opt_gid, Opt_umask, Opt_dmask, Opt_fmask, Opt_err }; static const match_table_t tokens = { @@ -368,6 +369,7 @@ static const match_table_t tokens = { {Opt_umask, "umask=%o"}, {Opt_dmask, "dmask=%o"}, {Opt_fmask, "fmask=%o"}, + {Opt_err, NULL}, }; static int parse_options(char *options, struct omfs_sb_info *sbi) @@ -548,8 +550,10 @@ static int omfs_fill_super(struct super_block *sb, void *data, int silent) } sb->s_root = d_make_root(root); - if (!sb->s_root) + if (!sb->s_root) { + ret = -ENOMEM; goto out_brelse_bh2; + } printk(KERN_DEBUG "omfs: Mounted volume %s\n", omfs_rb->r_name); ret = 0; diff --git a/fs/overlayfs/copy_up.c b/fs/overlayfs/copy_up.c index 24f640441bd9..84d693d37428 100644 --- a/fs/overlayfs/copy_up.c +++ b/fs/overlayfs/copy_up.c @@ -299,6 +299,9 @@ int ovl_copy_up_one(struct dentry *parent, struct dentry *dentry, struct cred *override_cred; char *link = NULL; + if (WARN_ON(!workdir)) + return -EROFS; + ovl_path_upper(parent, &parentpath); upperdir = parentpath.dentry; diff --git a/fs/overlayfs/dir.c b/fs/overlayfs/dir.c index d139405d2bfa..692ceda3bc21 100644 --- a/fs/overlayfs/dir.c +++ b/fs/overlayfs/dir.c @@ -222,6 +222,9 @@ static struct dentry *ovl_clear_empty(struct dentry *dentry, struct kstat stat; int err; + if (WARN_ON(!workdir)) + return ERR_PTR(-EROFS); + err = ovl_lock_rename_workdir(workdir, upperdir); if (err) goto out; @@ -322,6 +325,9 @@ static int ovl_create_over_whiteout(struct dentry *dentry, struct inode *inode, struct dentry *newdentry; int err; + if (WARN_ON(!workdir)) + return -EROFS; + err = ovl_lock_rename_workdir(workdir, upperdir); if (err) goto out; @@ -506,11 +512,28 @@ static int ovl_remove_and_whiteout(struct dentry *dentry, bool is_dir) struct dentry *opaquedir = NULL; int err; - if (is_dir && OVL_TYPE_MERGE_OR_LOWER(ovl_path_type(dentry))) { - opaquedir = ovl_check_empty_and_clear(dentry); - err = PTR_ERR(opaquedir); - if (IS_ERR(opaquedir)) - goto out; + if (WARN_ON(!workdir)) + return -EROFS; + + if (is_dir) { + if (OVL_TYPE_MERGE_OR_LOWER(ovl_path_type(dentry))) { + opaquedir = ovl_check_empty_and_clear(dentry); + err = PTR_ERR(opaquedir); + if (IS_ERR(opaquedir)) + goto out; + } else { + LIST_HEAD(list); + + /* + * When removing an empty opaque directory, then it + * makes no sense to replace it with an exact replica of + * itself. But emptiness still needs to be checked. + */ + err = ovl_check_empty_dir(dentry, &list); + ovl_cache_free(&list); + if (err) + goto out; + } } err = ovl_lock_rename_workdir(workdir, upperdir); diff --git a/fs/overlayfs/super.c b/fs/overlayfs/super.c index 5f0d1993e6e3..bf8537c7f455 100644 --- a/fs/overlayfs/super.c +++ b/fs/overlayfs/super.c @@ -529,7 +529,7 @@ static int ovl_remount(struct super_block *sb, int *flags, char *data) { struct ovl_fs *ufs = sb->s_fs_info; - if (!(*flags & MS_RDONLY) && !ufs->upper_mnt) + if (!(*flags & MS_RDONLY) && (!ufs->upper_mnt || !ufs->workdir)) return -EROFS; return 0; @@ -925,9 +925,10 @@ static int ovl_fill_super(struct super_block *sb, void *data, int silent) ufs->workdir = ovl_workdir_create(ufs->upper_mnt, workpath.dentry); err = PTR_ERR(ufs->workdir); if (IS_ERR(ufs->workdir)) { - pr_err("overlayfs: failed to create directory %s/%s\n", - ufs->config.workdir, OVL_WORKDIR_NAME); - goto out_put_upper_mnt; + pr_warn("overlayfs: failed to create directory %s/%s (errno: %i); mounting read-only\n", + ufs->config.workdir, OVL_WORKDIR_NAME, -err); + sb->s_flags |= MS_RDONLY; + ufs->workdir = NULL; } } @@ -997,7 +998,6 @@ out_put_lower_mnt: kfree(ufs->lower_mnt); out_put_workdir: dput(ufs->workdir); -out_put_upper_mnt: mntput(ufs->upper_mnt); out_put_lowerpath: for (i = 0; i < numlower; i++) diff --git a/fs/xfs/libxfs/xfs_attr_leaf.c b/fs/xfs/libxfs/xfs_attr_leaf.c index 04e79d57bca6..e9d401ce93bb 100644 --- a/fs/xfs/libxfs/xfs_attr_leaf.c +++ b/fs/xfs/libxfs/xfs_attr_leaf.c @@ -574,8 +574,8 @@ xfs_attr_shortform_add(xfs_da_args_t *args, int forkoff) * After the last attribute is removed revert to original inode format, * making all literal area available to the data fork once more. */ -STATIC void -xfs_attr_fork_reset( +void +xfs_attr_fork_remove( struct xfs_inode *ip, struct xfs_trans *tp) { @@ -641,7 +641,7 @@ xfs_attr_shortform_remove(xfs_da_args_t *args) (mp->m_flags & XFS_MOUNT_ATTR2) && (dp->i_d.di_format != XFS_DINODE_FMT_BTREE) && !(args->op_flags & XFS_DA_OP_ADDNAME)) { - xfs_attr_fork_reset(dp, args->trans); + xfs_attr_fork_remove(dp, args->trans); } else { xfs_idata_realloc(dp, -size, XFS_ATTR_FORK); dp->i_d.di_forkoff = xfs_attr_shortform_bytesfit(dp, totsize); @@ -905,7 +905,7 @@ xfs_attr3_leaf_to_shortform( if (forkoff == -1) { ASSERT(dp->i_mount->m_flags & XFS_MOUNT_ATTR2); ASSERT(dp->i_d.di_format != XFS_DINODE_FMT_BTREE); - xfs_attr_fork_reset(dp, args->trans); + xfs_attr_fork_remove(dp, args->trans); goto out; } diff --git a/fs/xfs/libxfs/xfs_attr_leaf.h b/fs/xfs/libxfs/xfs_attr_leaf.h index 025c4b820c03..882c8d338891 100644 --- a/fs/xfs/libxfs/xfs_attr_leaf.h +++ b/fs/xfs/libxfs/xfs_attr_leaf.h @@ -53,7 +53,7 @@ int xfs_attr_shortform_remove(struct xfs_da_args *args); int xfs_attr_shortform_list(struct xfs_attr_list_context *context); int xfs_attr_shortform_allfit(struct xfs_buf *bp, struct xfs_inode *dp); int xfs_attr_shortform_bytesfit(xfs_inode_t *dp, int bytes); - +void xfs_attr_fork_remove(struct xfs_inode *ip, struct xfs_trans *tp); /* * Internal routines when attribute fork size == XFS_LBSIZE(mp). diff --git a/fs/xfs/libxfs/xfs_bmap.c b/fs/xfs/libxfs/xfs_bmap.c index aeffeaaac0ec..f1026e86dabc 100644 --- a/fs/xfs/libxfs/xfs_bmap.c +++ b/fs/xfs/libxfs/xfs_bmap.c @@ -3224,12 +3224,24 @@ xfs_bmap_extsize_align( align_alen += temp; align_off -= temp; } + + /* Same adjustment for the end of the requested area. */ + temp = (align_alen % extsz); + if (temp) + align_alen += extsz - temp; + /* - * Same adjustment for the end of the requested area. + * For large extent hint sizes, the aligned extent might be larger than + * MAXEXTLEN. In that case, reduce the size by an extsz so that it pulls + * the length back under MAXEXTLEN. The outer allocation loops handle + * short allocation just fine, so it is safe to do this. We only want to + * do it when we are forced to, though, because it means more allocation + * operations are required. */ - if ((temp = (align_alen % extsz))) { - align_alen += extsz - temp; - } + while (align_alen > MAXEXTLEN) + align_alen -= extsz; + ASSERT(align_alen <= MAXEXTLEN); + /* * If the previous block overlaps with this proposed allocation * then move the start forward without adjusting the length. @@ -3318,7 +3330,9 @@ xfs_bmap_extsize_align( return -EINVAL; } else { ASSERT(orig_off >= align_off); - ASSERT(orig_end <= align_off + align_alen); + /* see MAXEXTLEN handling above */ + ASSERT(orig_end <= align_off + align_alen || + align_alen + extsz > MAXEXTLEN); } #ifdef DEBUG @@ -4099,13 +4113,6 @@ xfs_bmapi_reserve_delalloc( /* Figure out the extent size, adjust alen */ extsz = xfs_get_extsz_hint(ip); if (extsz) { - /* - * Make sure we don't exceed a single extent length when we - * align the extent by reducing length we are going to - * allocate by the maximum amount extent size aligment may - * require. - */ - alen = XFS_FILBLKS_MIN(len, MAXEXTLEN - (2 * extsz - 1)); error = xfs_bmap_extsize_align(mp, got, prev, extsz, rt, eof, 1, 0, &aoff, &alen); ASSERT(!error); diff --git a/fs/xfs/libxfs/xfs_ialloc.c b/fs/xfs/libxfs/xfs_ialloc.c index 07349a183a11..1c9e75521250 100644 --- a/fs/xfs/libxfs/xfs_ialloc.c +++ b/fs/xfs/libxfs/xfs_ialloc.c @@ -376,7 +376,7 @@ xfs_ialloc_ag_alloc( */ newlen = args.mp->m_ialloc_inos; if (args.mp->m_maxicount && - percpu_counter_read(&args.mp->m_icount) + newlen > + percpu_counter_read_positive(&args.mp->m_icount) + newlen > args.mp->m_maxicount) return -ENOSPC; args.minlen = args.maxlen = args.mp->m_ialloc_blks; @@ -1339,10 +1339,13 @@ xfs_dialloc( * If we have already hit the ceiling of inode blocks then clear * okalloc so we scan all available agi structures for a free * inode. + * + * Read rough value of mp->m_icount by percpu_counter_read_positive, + * which will sacrifice the preciseness but improve the performance. */ if (mp->m_maxicount && - percpu_counter_read(&mp->m_icount) + mp->m_ialloc_inos > - mp->m_maxicount) { + percpu_counter_read_positive(&mp->m_icount) + mp->m_ialloc_inos + > mp->m_maxicount) { noroom = 1; okalloc = 0; } diff --git a/fs/xfs/xfs_attr_inactive.c b/fs/xfs/xfs_attr_inactive.c index f9c1c64782d3..3fbf167cfb4c 100644 --- a/fs/xfs/xfs_attr_inactive.c +++ b/fs/xfs/xfs_attr_inactive.c @@ -380,23 +380,31 @@ xfs_attr3_root_inactive( return error; } +/* + * xfs_attr_inactive kills all traces of an attribute fork on an inode. It + * removes both the on-disk and in-memory inode fork. Note that this also has to + * handle the condition of inodes without attributes but with an attribute fork + * configured, so we can't use xfs_inode_hasattr() here. + * + * The in-memory attribute fork is removed even on error. + */ int -xfs_attr_inactive(xfs_inode_t *dp) +xfs_attr_inactive( + struct xfs_inode *dp) { - xfs_trans_t *trans; - xfs_mount_t *mp; - int error; + struct xfs_trans *trans; + struct xfs_mount *mp; + int cancel_flags = 0; + int lock_mode = XFS_ILOCK_SHARED; + int error = 0; mp = dp->i_mount; ASSERT(! XFS_NOT_DQATTACHED(mp, dp)); - xfs_ilock(dp, XFS_ILOCK_SHARED); - if (!xfs_inode_hasattr(dp) || - dp->i_d.di_aformat == XFS_DINODE_FMT_LOCAL) { - xfs_iunlock(dp, XFS_ILOCK_SHARED); - return 0; - } - xfs_iunlock(dp, XFS_ILOCK_SHARED); + xfs_ilock(dp, lock_mode); + if (!XFS_IFORK_Q(dp)) + goto out_destroy_fork; + xfs_iunlock(dp, lock_mode); /* * Start our first transaction of the day. @@ -408,13 +416,18 @@ xfs_attr_inactive(xfs_inode_t *dp) * the inode in every transaction to let it float upward through * the log. */ + lock_mode = 0; trans = xfs_trans_alloc(mp, XFS_TRANS_ATTRINVAL); error = xfs_trans_reserve(trans, &M_RES(mp)->tr_attrinval, 0, 0); - if (error) { - xfs_trans_cancel(trans, 0); - return error; - } - xfs_ilock(dp, XFS_ILOCK_EXCL); + if (error) + goto out_cancel; + + lock_mode = XFS_ILOCK_EXCL; + cancel_flags = XFS_TRANS_RELEASE_LOG_RES | XFS_TRANS_ABORT; + xfs_ilock(dp, lock_mode); + + if (!XFS_IFORK_Q(dp)) + goto out_cancel; /* * No need to make quota reservations here. We expect to release some @@ -422,29 +435,31 @@ xfs_attr_inactive(xfs_inode_t *dp) */ xfs_trans_ijoin(trans, dp, 0); - /* - * Decide on what work routines to call based on the inode size. - */ - if (!xfs_inode_hasattr(dp) || - dp->i_d.di_aformat == XFS_DINODE_FMT_LOCAL) { - error = 0; - goto out; + /* invalidate and truncate the attribute fork extents */ + if (dp->i_d.di_aformat != XFS_DINODE_FMT_LOCAL) { + error = xfs_attr3_root_inactive(&trans, dp); + if (error) + goto out_cancel; + + error = xfs_itruncate_extents(&trans, dp, XFS_ATTR_FORK, 0); + if (error) + goto out_cancel; } - error = xfs_attr3_root_inactive(&trans, dp); - if (error) - goto out; - error = xfs_itruncate_extents(&trans, dp, XFS_ATTR_FORK, 0); - if (error) - goto out; + /* Reset the attribute fork - this also destroys the in-core fork */ + xfs_attr_fork_remove(dp, trans); error = xfs_trans_commit(trans, XFS_TRANS_RELEASE_LOG_RES); - xfs_iunlock(dp, XFS_ILOCK_EXCL); - + xfs_iunlock(dp, lock_mode); return error; -out: - xfs_trans_cancel(trans, XFS_TRANS_RELEASE_LOG_RES|XFS_TRANS_ABORT); - xfs_iunlock(dp, XFS_ILOCK_EXCL); +out_cancel: + xfs_trans_cancel(trans, cancel_flags); +out_destroy_fork: + /* kill the in-core attr fork before we drop the inode lock */ + if (dp->i_afp) + xfs_idestroy_fork(dp, XFS_ATTR_FORK); + if (lock_mode) + xfs_iunlock(dp, lock_mode); return error; } diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c index 8121e75352ee..3b7591224f4a 100644 --- a/fs/xfs/xfs_file.c +++ b/fs/xfs/xfs_file.c @@ -124,7 +124,7 @@ xfs_iozero( status = 0; } while (count); - return (-status); + return status; } int diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c index d6ebc85192b7..539a85fddbc2 100644 --- a/fs/xfs/xfs_inode.c +++ b/fs/xfs/xfs_inode.c @@ -1946,21 +1946,17 @@ xfs_inactive( /* * If there are attributes associated with the file then blow them away * now. The code calls a routine that recursively deconstructs the - * attribute fork. We need to just commit the current transaction - * because we can't use it for xfs_attr_inactive(). + * attribute fork. If also blows away the in-core attribute fork. */ - if (ip->i_d.di_anextents > 0) { - ASSERT(ip->i_d.di_forkoff != 0); - + if (XFS_IFORK_Q(ip)) { error = xfs_attr_inactive(ip); if (error) return; } - if (ip->i_afp) - xfs_idestroy_fork(ip, XFS_ATTR_FORK); - + ASSERT(!ip->i_afp); ASSERT(ip->i_d.di_anextents == 0); + ASSERT(ip->i_d.di_forkoff == 0); /* * Free the inode. @@ -2883,7 +2879,13 @@ xfs_rename_alloc_whiteout( if (error) return error; - /* Satisfy xfs_bumplink that this is a real tmpfile */ + /* + * Prepare the tmpfile inode as if it were created through the VFS. + * Otherwise, the link increment paths will complain about nlink 0->1. + * Drop the link count as done by d_tmpfile(), complete the inode setup + * and flag it as linkable. + */ + drop_nlink(VFS_I(tmpfile)); xfs_finish_inode_setup(tmpfile); VFS_I(tmpfile)->i_state |= I_LINKABLE; @@ -3151,7 +3153,7 @@ xfs_rename( * intermediate state on disk. */ if (wip) { - ASSERT(wip->i_d.di_nlink == 0); + ASSERT(VFS_I(wip)->i_nlink == 0 && wip->i_d.di_nlink == 0); error = xfs_bumplink(tp, wip); if (error) goto out_trans_abort; diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c index 2ce7ee3b4ec1..6f23fbdfb365 100644 --- a/fs/xfs/xfs_mount.c +++ b/fs/xfs/xfs_mount.c @@ -1084,14 +1084,18 @@ xfs_log_sbcount(xfs_mount_t *mp) return xfs_sync_sb(mp, true); } +/* + * Deltas for the inode count are +/-64, hence we use a large batch size + * of 128 so we don't need to take the counter lock on every update. + */ +#define XFS_ICOUNT_BATCH 128 int xfs_mod_icount( struct xfs_mount *mp, int64_t delta) { - /* deltas are +/-64, hence the large batch size of 128. */ - __percpu_counter_add(&mp->m_icount, delta, 128); - if (percpu_counter_compare(&mp->m_icount, 0) < 0) { + __percpu_counter_add(&mp->m_icount, delta, XFS_ICOUNT_BATCH); + if (__percpu_counter_compare(&mp->m_icount, 0, XFS_ICOUNT_BATCH) < 0) { ASSERT(0); percpu_counter_add(&mp->m_icount, -delta); return -EINVAL; @@ -1113,6 +1117,14 @@ xfs_mod_ifree( return 0; } +/* + * Deltas for the block count can vary from 1 to very large, but lock contention + * only occurs on frequent small block count updates such as in the delayed + * allocation path for buffered writes (page a time updates). Hence we set + * a large batch count (1024) to minimise global counter updates except when + * we get near to ENOSPC and we have to be very accurate with our updates. + */ +#define XFS_FDBLOCKS_BATCH 1024 int xfs_mod_fdblocks( struct xfs_mount *mp, @@ -1151,25 +1163,19 @@ xfs_mod_fdblocks( * Taking blocks away, need to be more accurate the closer we * are to zero. * - * batch size is set to a maximum of 1024 blocks - if we are - * allocating of freeing extents larger than this then we aren't - * going to be hammering the counter lock so a lock per update - * is not a problem. - * * If the counter has a value of less than 2 * max batch size, * then make everything serialise as we are real close to * ENOSPC. */ -#define __BATCH 1024 - if (percpu_counter_compare(&mp->m_fdblocks, 2 * __BATCH) < 0) + if (__percpu_counter_compare(&mp->m_fdblocks, 2 * XFS_FDBLOCKS_BATCH, + XFS_FDBLOCKS_BATCH) < 0) batch = 1; else - batch = __BATCH; -#undef __BATCH + batch = XFS_FDBLOCKS_BATCH; __percpu_counter_add(&mp->m_fdblocks, delta, batch); - if (percpu_counter_compare(&mp->m_fdblocks, - XFS_ALLOC_SET_ASIDE(mp)) >= 0) { + if (__percpu_counter_compare(&mp->m_fdblocks, XFS_ALLOC_SET_ASIDE(mp), + XFS_FDBLOCKS_BATCH) >= 0) { /* we had space! */ return 0; } diff --git a/include/linux/brcmphy.h b/include/linux/brcmphy.h index ae2982c0f7a6..656da2a12ffe 100644 --- a/include/linux/brcmphy.h +++ b/include/linux/brcmphy.h @@ -17,7 +17,7 @@ #define PHY_ID_BCM7250 0xae025280 #define PHY_ID_BCM7364 0xae025260 #define PHY_ID_BCM7366 0x600d8490 -#define PHY_ID_BCM7425 0x03625e60 +#define PHY_ID_BCM7425 0x600d86b0 #define PHY_ID_BCM7429 0x600d8730 #define PHY_ID_BCM7439 0x600d8480 #define PHY_ID_BCM7439_2 0xae025080 diff --git a/include/linux/cpumask.h b/include/linux/cpumask.h index 27e285b92b5f..59915ea5373c 100644 --- a/include/linux/cpumask.h +++ b/include/linux/cpumask.h @@ -151,10 +151,8 @@ static inline unsigned int cpumask_any_but(const struct cpumask *mask, return 1; } -static inline int cpumask_set_cpu_local_first(int i, int numa_node, cpumask_t *dstp) +static inline unsigned int cpumask_local_spread(unsigned int i, int node) { - set_bit(0, cpumask_bits(dstp)); - return 0; } @@ -208,7 +206,7 @@ static inline unsigned int cpumask_next_zero(int n, const struct cpumask *srcp) int cpumask_next_and(int n, const struct cpumask *, const struct cpumask *); int cpumask_any_but(const struct cpumask *mask, unsigned int cpu); -int cpumask_set_cpu_local_first(int i, int numa_node, cpumask_t *dstp); +unsigned int cpumask_local_spread(unsigned int i, int node); /** * for_each_cpu - iterate over every cpu in a mask diff --git a/include/linux/dmar.h b/include/linux/dmar.h index 30624954dec5..84737565c1fd 100644 --- a/include/linux/dmar.h +++ b/include/linux/dmar.h @@ -227,6 +227,7 @@ extern void dmar_msi_read(int irq, struct msi_msg *msg); extern void dmar_msi_write(int irq, struct msi_msg *msg); extern int dmar_set_interrupt(struct intel_iommu *iommu); extern irqreturn_t dmar_fault(int irq, void *dev_id); -extern int arch_setup_dmar_msi(unsigned int irq); +extern int dmar_alloc_hwirq(int id, int node, void *arg); +extern void dmar_free_hwirq(int irq); #endif /* __DMAR_H__ */ diff --git a/include/linux/htirq.h b/include/linux/htirq.h index 70a1dbbf2093..d4a527e58434 100644 --- a/include/linux/htirq.h +++ b/include/linux/htirq.h @@ -1,24 +1,38 @@ #ifndef LINUX_HTIRQ_H #define LINUX_HTIRQ_H +struct pci_dev; +struct irq_data; + struct ht_irq_msg { u32 address_lo; /* low 32 bits of the ht irq message */ u32 address_hi; /* high 32 bits of the it irq message */ }; +typedef void (ht_irq_update_t)(struct pci_dev *dev, int irq, + struct ht_irq_msg *msg); + +struct ht_irq_cfg { + struct pci_dev *dev; + /* Update callback used to cope with buggy hardware */ + ht_irq_update_t *update; + unsigned pos; + unsigned idx; + struct ht_irq_msg msg; +}; + /* Helper functions.. */ void fetch_ht_irq_msg(unsigned int irq, struct ht_irq_msg *msg); void write_ht_irq_msg(unsigned int irq, struct ht_irq_msg *msg); -struct irq_data; void mask_ht_irq(struct irq_data *data); void unmask_ht_irq(struct irq_data *data); /* The arch hook for getting things started */ -int arch_setup_ht_irq(unsigned int irq, struct pci_dev *dev); +int arch_setup_ht_irq(int idx, int pos, struct pci_dev *dev, + ht_irq_update_t *update); +void arch_teardown_ht_irq(unsigned int irq); /* For drivers of buggy hardware */ -typedef void (ht_irq_update_t)(struct pci_dev *dev, int irq, - struct ht_irq_msg *msg); int __ht_create_irq(struct pci_dev *dev, int idx, ht_irq_update_t *update); #endif /* LINUX_HTIRQ_H */ diff --git a/include/linux/intel-iommu.h b/include/linux/intel-iommu.h index 796ef9645827..0af9b03e2b1c 100644 --- a/include/linux/intel-iommu.h +++ b/include/linux/intel-iommu.h @@ -298,6 +298,8 @@ struct q_inval { #define INTR_REMAP_TABLE_ENTRIES 65536 +struct irq_domain; + struct ir_table { struct irte *base; unsigned long *bitmap; @@ -347,6 +349,8 @@ struct intel_iommu { #ifdef CONFIG_IRQ_REMAP struct ir_table *ir_table; /* Interrupt remapping info */ + struct irq_domain *ir_domain; + struct irq_domain *ir_msi_domain; #endif struct device *iommu_dev; /* IOMMU-sysfs device */ int node; diff --git a/include/linux/irq.h b/include/linux/irq.h index 62c6901cab55..48cb7d1aa58f 100644 --- a/include/linux/irq.h +++ b/include/linux/irq.h @@ -327,6 +327,7 @@ static inline irq_hw_number_t irqd_to_hwirq(struct irq_data *d) * @irq_write_msi_msg: optional to write message content for MSI * @irq_get_irqchip_state: return the internal state of an interrupt * @irq_set_irqchip_state: set the internal state of a interrupt + * @irq_set_vcpu_affinity: optional to target a vCPU in a virtual machine * @flags: chip specific flags */ struct irq_chip { @@ -369,6 +370,8 @@ struct irq_chip { int (*irq_get_irqchip_state)(struct irq_data *data, enum irqchip_irq_state which, bool *state); int (*irq_set_irqchip_state)(struct irq_data *data, enum irqchip_irq_state which, bool state); + int (*irq_set_vcpu_affinity)(struct irq_data *data, void *vcpu_info); + unsigned long flags; }; @@ -422,6 +425,7 @@ extern void irq_cpu_online(void); extern void irq_cpu_offline(void); extern int irq_set_affinity_locked(struct irq_data *data, const struct cpumask *cpumask, bool force); +extern int irq_set_vcpu_affinity(unsigned int irq, void *vcpu_info); #if defined(CONFIG_SMP) && defined(CONFIG_GENERIC_PENDING_IRQ) void irq_move_irq(struct irq_data *data); @@ -467,6 +471,8 @@ extern int irq_chip_set_affinity_parent(struct irq_data *data, const struct cpumask *dest, bool force); extern int irq_chip_set_wake_parent(struct irq_data *data, unsigned int on); +extern int irq_chip_set_vcpu_affinity_parent(struct irq_data *data, + void *vcpu_info); #endif /* Handling of unhandled and spurious interrupts: */ diff --git a/include/linux/percpu_counter.h b/include/linux/percpu_counter.h index 50e50095c8d1..84a109449610 100644 --- a/include/linux/percpu_counter.h +++ b/include/linux/percpu_counter.h @@ -41,7 +41,12 @@ void percpu_counter_destroy(struct percpu_counter *fbc); void percpu_counter_set(struct percpu_counter *fbc, s64 amount); void __percpu_counter_add(struct percpu_counter *fbc, s64 amount, s32 batch); s64 __percpu_counter_sum(struct percpu_counter *fbc); -int percpu_counter_compare(struct percpu_counter *fbc, s64 rhs); +int __percpu_counter_compare(struct percpu_counter *fbc, s64 rhs, s32 batch); + +static inline int percpu_counter_compare(struct percpu_counter *fbc, s64 rhs) +{ + return __percpu_counter_compare(fbc, rhs, percpu_counter_batch); +} static inline void percpu_counter_add(struct percpu_counter *fbc, s64 amount) { @@ -116,6 +121,12 @@ static inline int percpu_counter_compare(struct percpu_counter *fbc, s64 rhs) return 0; } +static inline int +__percpu_counter_compare(struct percpu_counter *fbc, s64 rhs, s32 batch) +{ + return percpu_counter_compare(fbc, rhs); +} + static inline void percpu_counter_add(struct percpu_counter *fbc, s64 amount) { diff --git a/include/net/inet_connection_sock.h b/include/net/inet_connection_sock.h index 497bc14cdb85..0320bbb7d7b5 100644 --- a/include/net/inet_connection_sock.h +++ b/include/net/inet_connection_sock.h @@ -98,7 +98,8 @@ struct inet_connection_sock { const struct tcp_congestion_ops *icsk_ca_ops; const struct inet_connection_sock_af_ops *icsk_af_ops; unsigned int (*icsk_sync_mss)(struct sock *sk, u32 pmtu); - __u8 icsk_ca_state:7, + __u8 icsk_ca_state:6, + icsk_ca_setsockopt:1, icsk_ca_dst_locked:1; __u8 icsk_retransmits; __u8 icsk_pending; diff --git a/include/net/mac80211.h b/include/net/mac80211.h index 8e3668b44c29..fc57f6b82fc5 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -354,7 +354,7 @@ enum ieee80211_rssi_event_data { }; /** - * enum ieee80211_rssi_event - data attached to an %RSSI_EVENT + * struct ieee80211_rssi_event - data attached to an %RSSI_EVENT * @data: See &enum ieee80211_rssi_event_data */ struct ieee80211_rssi_event { @@ -388,7 +388,7 @@ enum ieee80211_mlme_event_status { }; /** - * enum ieee80211_mlme_event - data attached to an %MLME_EVENT + * struct ieee80211_mlme_event - data attached to an %MLME_EVENT * @data: See &enum ieee80211_mlme_event_data * @status: See &enum ieee80211_mlme_event_status * @reason: the reason code if applicable @@ -401,9 +401,10 @@ struct ieee80211_mlme_event { /** * struct ieee80211_event - event to be sent to the driver - * @type The event itself. See &enum ieee80211_event_type. + * @type: The event itself. See &enum ieee80211_event_type. * @rssi: relevant if &type is %RSSI_EVENT * @mlme: relevant if &type is %AUTH_EVENT + * @u: union holding the above two fields */ struct ieee80211_event { enum ieee80211_event_type type; diff --git a/include/net/sctp/sctp.h b/include/net/sctp/sctp.h index c56a438c3a1e..ce13cf20f625 100644 --- a/include/net/sctp/sctp.h +++ b/include/net/sctp/sctp.h @@ -574,11 +574,14 @@ static inline void sctp_v6_map_v4(union sctp_addr *addr) /* Map v4 address to v4-mapped v6 address */ static inline void sctp_v4_map_v6(union sctp_addr *addr) { + __be16 port; + + port = addr->v4.sin_port; + addr->v6.sin6_addr.s6_addr32[3] = addr->v4.sin_addr.s_addr; + addr->v6.sin6_port = port; addr->v6.sin6_family = AF_INET6; addr->v6.sin6_flowinfo = 0; addr->v6.sin6_scope_id = 0; - addr->v6.sin6_port = addr->v4.sin_port; - addr->v6.sin6_addr.s6_addr32[3] = addr->v4.sin_addr.s_addr; addr->v6.sin6_addr.s6_addr32[0] = 0; addr->v6.sin6_addr.s6_addr32[1] = 0; addr->v6.sin6_addr.s6_addr32[2] = htonl(0x0000ffff); diff --git a/include/target/target_core_backend.h b/include/target/target_core_backend.h index d61be7297b2c..5f1225706993 100644 --- a/include/target/target_core_backend.h +++ b/include/target/target_core_backend.h @@ -1,9 +1,7 @@ #ifndef TARGET_CORE_BACKEND_H #define TARGET_CORE_BACKEND_H -#define TRANSPORT_PLUGIN_PHBA_PDEV 1 -#define TRANSPORT_PLUGIN_VHBA_PDEV 2 -#define TRANSPORT_PLUGIN_VHBA_VDEV 3 +#define TRANSPORT_FLAG_PASSTHROUGH 1 struct target_backend_cits { struct config_item_type tb_dev_cit; @@ -22,7 +20,7 @@ struct se_subsystem_api { char inquiry_rev[4]; struct module *owner; - u8 transport_type; + u8 transport_flags; int (*attach_hba)(struct se_hba *, u32); void (*detach_hba)(struct se_hba *); @@ -138,5 +136,7 @@ int se_dev_set_queue_depth(struct se_device *, u32); int se_dev_set_max_sectors(struct se_device *, u32); int se_dev_set_optimal_sectors(struct se_device *, u32); int se_dev_set_block_size(struct se_device *, u32); +sense_reason_t passthrough_parse_cdb(struct se_cmd *cmd, + sense_reason_t (*exec_cmd)(struct se_cmd *cmd)); #endif /* TARGET_CORE_BACKEND_H */ diff --git a/include/target/target_core_configfs.h b/include/target/target_core_configfs.h index 25bb04c4209e..b99c01170392 100644 --- a/include/target/target_core_configfs.h +++ b/include/target/target_core_configfs.h @@ -40,8 +40,6 @@ struct target_fabric_configfs { struct config_item *tf_fabric; /* Passed from fabric modules */ struct config_item_type *tf_fabric_cit; - /* Pointer to target core subsystem */ - struct configfs_subsystem *tf_subsys; /* Pointer to fabric's struct module */ struct module *tf_module; struct target_core_fabric_ops tf_ops; diff --git a/include/target/target_core_fabric.h b/include/target/target_core_fabric.h index 17c7f5ac7ea0..0f4dc3768587 100644 --- a/include/target/target_core_fabric.h +++ b/include/target/target_core_fabric.h @@ -4,7 +4,6 @@ struct target_core_fabric_ops { struct module *module; const char *name; - struct configfs_subsystem *tf_subsys; char *(*get_fabric_name)(void); u8 (*get_fabric_proto_ident)(struct se_portal_group *); char *(*tpg_get_wwn)(struct se_portal_group *); @@ -109,6 +108,9 @@ struct target_core_fabric_ops { int target_register_template(const struct target_core_fabric_ops *fo); void target_unregister_template(const struct target_core_fabric_ops *fo); +int target_depend_item(struct config_item *item); +void target_undepend_item(struct config_item *item); + struct se_session *transport_init_session(enum target_prot_op); int transport_alloc_session_tags(struct se_session *, unsigned int, unsigned int); diff --git a/include/trace/events/kmem.h b/include/trace/events/kmem.h index 81ea59812117..f7554fd7fc62 100644 --- a/include/trace/events/kmem.h +++ b/include/trace/events/kmem.h @@ -140,19 +140,42 @@ DEFINE_EVENT(kmem_free, kfree, TP_ARGS(call_site, ptr) ); -DEFINE_EVENT(kmem_free, kmem_cache_free, +DEFINE_EVENT_CONDITION(kmem_free, kmem_cache_free, TP_PROTO(unsigned long call_site, const void *ptr), - TP_ARGS(call_site, ptr) + TP_ARGS(call_site, ptr), + + /* + * This trace can be potentially called from an offlined cpu. + * Since trace points use RCU and RCU should not be used from + * offline cpus, filter such calls out. + * While this trace can be called from a preemptable section, + * it has no impact on the condition since tasks can migrate + * only from online cpus to other online cpus. Thus its safe + * to use raw_smp_processor_id. + */ + TP_CONDITION(cpu_online(raw_smp_processor_id())) ); -TRACE_EVENT(mm_page_free, +TRACE_EVENT_CONDITION(mm_page_free, TP_PROTO(struct page *page, unsigned int order), TP_ARGS(page, order), + + /* + * This trace can be potentially called from an offlined cpu. + * Since trace points use RCU and RCU should not be used from + * offline cpus, filter such calls out. + * While this trace can be called from a preemptable section, + * it has no impact on the condition since tasks can migrate + * only from online cpus to other online cpus. Thus its safe + * to use raw_smp_processor_id. + */ + TP_CONDITION(cpu_online(raw_smp_processor_id())), + TP_STRUCT__entry( __field( unsigned long, pfn ) __field( unsigned int, order ) @@ -253,12 +276,35 @@ DEFINE_EVENT(mm_page, mm_page_alloc_zone_locked, TP_ARGS(page, order, migratetype) ); -DEFINE_EVENT_PRINT(mm_page, mm_page_pcpu_drain, +TRACE_EVENT_CONDITION(mm_page_pcpu_drain, TP_PROTO(struct page *page, unsigned int order, int migratetype), TP_ARGS(page, order, migratetype), + /* + * This trace can be potentially called from an offlined cpu. + * Since trace points use RCU and RCU should not be used from + * offline cpus, filter such calls out. + * While this trace can be called from a preemptable section, + * it has no impact on the condition since tasks can migrate + * only from online cpus to other online cpus. Thus its safe + * to use raw_smp_processor_id. + */ + TP_CONDITION(cpu_online(raw_smp_processor_id())), + + TP_STRUCT__entry( + __field( unsigned long, pfn ) + __field( unsigned int, order ) + __field( int, migratetype ) + ), + + TP_fast_assign( + __entry->pfn = page ? page_to_pfn(page) : -1UL; + __entry->order = order; + __entry->migratetype = migratetype; + ), + TP_printk("page=%p pfn=%lu order=%d migratetype=%d", pfn_to_page(__entry->pfn), __entry->pfn, __entry->order, __entry->migratetype) diff --git a/include/uapi/linux/virtio_balloon.h b/include/uapi/linux/virtio_balloon.h index 984169a819ee..d7f1cbc3766c 100644 --- a/include/uapi/linux/virtio_balloon.h +++ b/include/uapi/linux/virtio_balloon.h @@ -26,6 +26,7 @@ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include <linux/types.h> +#include <linux/virtio_types.h> #include <linux/virtio_ids.h> #include <linux/virtio_config.h> diff --git a/kernel/irq/chip.c b/kernel/irq/chip.c index eb9a4ea394ab..55016b2151f3 100644 --- a/kernel/irq/chip.c +++ b/kernel/irq/chip.c @@ -950,6 +950,20 @@ int irq_chip_retrigger_hierarchy(struct irq_data *data) } /** + * irq_chip_set_vcpu_affinity_parent - Set vcpu affinity on the parent interrupt + * @data: Pointer to interrupt specific data + * @dest: The vcpu affinity information + */ +int irq_chip_set_vcpu_affinity_parent(struct irq_data *data, void *vcpu_info) +{ + data = data->parent_data; + if (data->chip->irq_set_vcpu_affinity) + return data->chip->irq_set_vcpu_affinity(data, vcpu_info); + + return -ENOSYS; +} + +/** * irq_chip_set_wake_parent - Set/reset wake-up on the parent interrupt * @data: Pointer to interrupt specific data * @on: Whether to set or reset the wake-up capability of this irq diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c index e68932bb308e..b1c7e8f46bfb 100644 --- a/kernel/irq/manage.c +++ b/kernel/irq/manage.c @@ -256,6 +256,37 @@ int irq_set_affinity_hint(unsigned int irq, const struct cpumask *m) } EXPORT_SYMBOL_GPL(irq_set_affinity_hint); +/** + * irq_set_vcpu_affinity - Set vcpu affinity for the interrupt + * @irq: interrupt number to set affinity + * @vcpu_info: vCPU specific data + * + * This function uses the vCPU specific data to set the vCPU + * affinity for an irq. The vCPU specific data is passed from + * outside, such as KVM. One example code path is as below: + * KVM -> IOMMU -> irq_set_vcpu_affinity(). + */ +int irq_set_vcpu_affinity(unsigned int irq, void *vcpu_info) +{ + unsigned long flags; + struct irq_desc *desc = irq_get_desc_lock(irq, &flags, 0); + struct irq_data *data; + struct irq_chip *chip; + int ret = -ENOSYS; + + if (!desc) + return -EINVAL; + + data = irq_desc_get_irq_data(desc); + chip = irq_data_get_irq_chip(data); + if (chip && chip->irq_set_vcpu_affinity) + ret = chip->irq_set_vcpu_affinity(data, vcpu_info); + irq_put_desc_unlock(desc, flags); + + return ret; +} +EXPORT_SYMBOL_GPL(irq_set_vcpu_affinity); + static void irq_affinity_notify(struct work_struct *work) { struct irq_affinity_notify *notify = diff --git a/kernel/module.c b/kernel/module.c index 42a1d2afb217..cfc9e843a924 100644 --- a/kernel/module.c +++ b/kernel/module.c @@ -3370,6 +3370,9 @@ static int load_module(struct load_info *info, const char __user *uargs, module_bug_cleanup(mod); mutex_unlock(&module_mutex); + blocking_notifier_call_chain(&module_notify_list, + MODULE_STATE_GOING, mod); + /* we can't deallocate the module until we clear memory protection */ unset_module_init_ro_nx(mod); unset_module_core_ro_nx(mod); diff --git a/lib/cpumask.c b/lib/cpumask.c index 830dd5dec40f..5f627084f2e9 100644 --- a/lib/cpumask.c +++ b/lib/cpumask.c @@ -139,64 +139,42 @@ void __init free_bootmem_cpumask_var(cpumask_var_t mask) #endif /** - * cpumask_set_cpu_local_first - set i'th cpu with local numa cpu's first - * + * cpumask_local_spread - select the i'th cpu with local numa cpu's first * @i: index number - * @numa_node: local numa_node - * @dstp: cpumask with the relevant cpu bit set according to the policy + * @node: local numa_node * - * This function sets the cpumask according to a numa aware policy. - * cpumask could be used as an affinity hint for the IRQ related to a - * queue. When the policy is to spread queues across cores - local cores - * first. + * This function selects an online CPU according to a numa aware policy; + * local cpus are returned first, followed by non-local ones, then it + * wraps around. * - * Returns 0 on success, -ENOMEM for no memory, and -EAGAIN when failed to set - * the cpu bit and need to re-call the function. + * It's not very efficient, but useful for setup. */ -int cpumask_set_cpu_local_first(int i, int numa_node, cpumask_t *dstp) +unsigned int cpumask_local_spread(unsigned int i, int node) { - cpumask_var_t mask; int cpu; - int ret = 0; - - if (!zalloc_cpumask_var(&mask, GFP_KERNEL)) - return -ENOMEM; + /* Wrap: we always want a cpu. */ i %= num_online_cpus(); - if (numa_node == -1 || !cpumask_of_node(numa_node)) { - /* Use all online cpu's for non numa aware system */ - cpumask_copy(mask, cpu_online_mask); + if (node == -1) { + for_each_cpu(cpu, cpu_online_mask) + if (i-- == 0) + return cpu; } else { - int n; - - cpumask_and(mask, - cpumask_of_node(numa_node), cpu_online_mask); - - n = cpumask_weight(mask); - if (i >= n) { - i -= n; - - /* If index > number of local cpu's, mask out local - * cpu's - */ - cpumask_andnot(mask, cpu_online_mask, mask); + /* NUMA first. */ + for_each_cpu_and(cpu, cpumask_of_node(node), cpu_online_mask) + if (i-- == 0) + return cpu; + + for_each_cpu(cpu, cpu_online_mask) { + /* Skip NUMA nodes, done above. */ + if (cpumask_test_cpu(cpu, cpumask_of_node(node))) + continue; + + if (i-- == 0) + return cpu; } } - - for_each_cpu(cpu, mask) { - if (--i < 0) - goto out; - } - - ret = -EAGAIN; - -out: - free_cpumask_var(mask); - - if (!ret) - cpumask_set_cpu(cpu, dstp); - - return ret; + BUG(); } -EXPORT_SYMBOL(cpumask_set_cpu_local_first); +EXPORT_SYMBOL(cpumask_local_spread); diff --git a/lib/percpu_counter.c b/lib/percpu_counter.c index 48144cdae819..f051d69f0910 100644 --- a/lib/percpu_counter.c +++ b/lib/percpu_counter.c @@ -197,13 +197,13 @@ static int percpu_counter_hotcpu_callback(struct notifier_block *nb, * Compare counter against given value. * Return 1 if greater, 0 if equal and -1 if less */ -int percpu_counter_compare(struct percpu_counter *fbc, s64 rhs) +int __percpu_counter_compare(struct percpu_counter *fbc, s64 rhs, s32 batch) { s64 count; count = percpu_counter_read(fbc); /* Check to see if rough count will be sufficient for comparison */ - if (abs(count - rhs) > (percpu_counter_batch*num_online_cpus())) { + if (abs(count - rhs) > (batch * num_online_cpus())) { if (count > rhs) return 1; else @@ -218,7 +218,7 @@ int percpu_counter_compare(struct percpu_counter *fbc, s64 rhs) else return 0; } -EXPORT_SYMBOL(percpu_counter_compare); +EXPORT_SYMBOL(__percpu_counter_compare); static int __init percpu_counter_startup(void) { diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c index a3abe6ed111e..22fd0419b314 100644 --- a/net/bridge/br_multicast.c +++ b/net/bridge/br_multicast.c @@ -1822,7 +1822,7 @@ static void br_multicast_query_expired(struct net_bridge *br, if (query->startup_sent < br->multicast_startup_query_count) query->startup_sent++; - RCU_INIT_POINTER(querier, NULL); + RCU_INIT_POINTER(querier->port, NULL); br_multicast_send_query(br, NULL, query); spin_unlock(&br->multicast_lock); } diff --git a/net/bridge/netfilter/ebtables.c b/net/bridge/netfilter/ebtables.c index 24c7c96bf5f8..91180a7fc943 100644 --- a/net/bridge/netfilter/ebtables.c +++ b/net/bridge/netfilter/ebtables.c @@ -1117,8 +1117,6 @@ static int do_replace(struct net *net, const void __user *user, return -ENOMEM; if (tmp.num_counters >= INT_MAX / sizeof(struct ebt_counter)) return -ENOMEM; - if (tmp.num_counters == 0) - return -EINVAL; tmp.name[sizeof(tmp.name) - 1] = 0; @@ -2161,8 +2159,6 @@ static int compat_copy_ebt_replace_from_user(struct ebt_replace *repl, return -ENOMEM; if (tmp.num_counters >= INT_MAX / sizeof(struct ebt_counter)) return -ENOMEM; - if (tmp.num_counters == 0) - return -EINVAL; memcpy(repl, &tmp, offsetof(struct ebt_replace, hook_entry)); diff --git a/net/caif/caif_socket.c b/net/caif/caif_socket.c index 4ec0c803aef1..112ad784838a 100644 --- a/net/caif/caif_socket.c +++ b/net/caif/caif_socket.c @@ -330,6 +330,10 @@ static long caif_stream_data_wait(struct sock *sk, long timeo) release_sock(sk); timeo = schedule_timeout(timeo); lock_sock(sk); + + if (sock_flag(sk, SOCK_DEAD)) + break; + clear_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags); } @@ -373,6 +377,10 @@ static int caif_stream_recvmsg(struct socket *sock, struct msghdr *msg, struct sk_buff *skb; lock_sock(sk); + if (sock_flag(sk, SOCK_DEAD)) { + err = -ECONNRESET; + goto unlock; + } skb = skb_dequeue(&sk->sk_receive_queue); caif_check_flow_release(sk); diff --git a/net/core/ethtool.c b/net/core/ethtool.c index 1347e11f5cc9..1d00b8922902 100644 --- a/net/core/ethtool.c +++ b/net/core/ethtool.c @@ -359,15 +359,7 @@ static int ethtool_get_settings(struct net_device *dev, void __user *useraddr) int err; struct ethtool_cmd cmd; - if (!dev->ethtool_ops->get_settings) - return -EOPNOTSUPP; - - if (copy_from_user(&cmd, useraddr, sizeof(cmd))) - return -EFAULT; - - cmd.cmd = ETHTOOL_GSET; - - err = dev->ethtool_ops->get_settings(dev, &cmd); + err = __ethtool_get_settings(dev, &cmd); if (err < 0) return err; diff --git a/net/dsa/dsa.c b/net/dsa/dsa.c index e6f6cc3a1bcf..392e29a0227d 100644 --- a/net/dsa/dsa.c +++ b/net/dsa/dsa.c @@ -359,7 +359,7 @@ dsa_switch_setup(struct dsa_switch_tree *dst, int index, */ ds = kzalloc(sizeof(*ds) + drv->priv_size, GFP_KERNEL); if (ds == NULL) - return NULL; + return ERR_PTR(-ENOMEM); ds->dst = dst; ds->index = index; @@ -370,7 +370,7 @@ dsa_switch_setup(struct dsa_switch_tree *dst, int index, ret = dsa_switch_setup_one(ds, parent); if (ret) - return NULL; + return ERR_PTR(ret); return ds; } diff --git a/net/ipv4/esp4.c b/net/ipv4/esp4.c index 421a80b09b62..30b544f025ac 100644 --- a/net/ipv4/esp4.c +++ b/net/ipv4/esp4.c @@ -256,7 +256,8 @@ static int esp_output(struct xfrm_state *x, struct sk_buff *skb) aead_givcrypt_set_crypt(req, sg, sg, clen, iv); aead_givcrypt_set_assoc(req, asg, assoclen); aead_givcrypt_set_giv(req, esph->enc_data, - XFRM_SKB_CB(skb)->seq.output.low); + XFRM_SKB_CB(skb)->seq.output.low + + ((u64)XFRM_SKB_CB(skb)->seq.output.hi << 32)); ESP_SKB_CB(skb)->tmp = tmp; err = crypto_aead_givencrypt(req); diff --git a/net/ipv4/ip_vti.c b/net/ipv4/ip_vti.c index 9f7269f3c54a..0c152087ca15 100644 --- a/net/ipv4/ip_vti.c +++ b/net/ipv4/ip_vti.c @@ -65,7 +65,6 @@ static int vti_input(struct sk_buff *skb, int nexthdr, __be32 spi, goto drop; XFRM_TUNNEL_SKB_CB(skb)->tunnel.ip4 = tunnel; - skb->mark = be32_to_cpu(tunnel->parms.i_key); return xfrm_input(skb, nexthdr, spi, encap_type); } @@ -91,6 +90,8 @@ static int vti_rcv_cb(struct sk_buff *skb, int err) struct pcpu_sw_netstats *tstats; struct xfrm_state *x; struct ip_tunnel *tunnel = XFRM_TUNNEL_SKB_CB(skb)->tunnel.ip4; + u32 orig_mark = skb->mark; + int ret; if (!tunnel) return 1; @@ -107,7 +108,11 @@ static int vti_rcv_cb(struct sk_buff *skb, int err) x = xfrm_input_state(skb); family = x->inner_mode->afinfo->family; - if (!xfrm_policy_check(NULL, XFRM_POLICY_IN, skb, family)) + skb->mark = be32_to_cpu(tunnel->parms.i_key); + ret = xfrm_policy_check(NULL, XFRM_POLICY_IN, skb, family); + skb->mark = orig_mark; + + if (!ret) return -EPERM; skb_scrub_packet(skb, !net_eq(tunnel->net, dev_net(skb->dev))); @@ -216,8 +221,6 @@ static netdev_tx_t vti_tunnel_xmit(struct sk_buff *skb, struct net_device *dev) memset(&fl, 0, sizeof(fl)); - skb->mark = be32_to_cpu(tunnel->parms.o_key); - switch (skb->protocol) { case htons(ETH_P_IP): xfrm_decode_session(skb, &fl, AF_INET); @@ -233,6 +236,9 @@ static netdev_tx_t vti_tunnel_xmit(struct sk_buff *skb, struct net_device *dev) return NETDEV_TX_OK; } + /* override mark with tunnel output key */ + fl.flowi_mark = be32_to_cpu(tunnel->parms.o_key); + return vti_xmit(skb, dev, &fl); } diff --git a/net/ipv4/tcp_cong.c b/net/ipv4/tcp_cong.c index 7a5ae50c80c8..84be008c945c 100644 --- a/net/ipv4/tcp_cong.c +++ b/net/ipv4/tcp_cong.c @@ -187,6 +187,7 @@ static void tcp_reinit_congestion_control(struct sock *sk, tcp_cleanup_congestion_control(sk); icsk->icsk_ca_ops = ca; + icsk->icsk_ca_setsockopt = 1; if (sk->sk_state != TCP_CLOSE && icsk->icsk_ca_ops->init) icsk->icsk_ca_ops->init(sk); @@ -335,8 +336,10 @@ int tcp_set_congestion_control(struct sock *sk, const char *name) rcu_read_lock(); ca = __tcp_ca_find_autoload(name); /* No change asking for existing value */ - if (ca == icsk->icsk_ca_ops) + if (ca == icsk->icsk_ca_ops) { + icsk->icsk_ca_setsockopt = 1; goto out; + } if (!ca) err = -ENOENT; else if (!((ca->flags & TCP_CONG_NON_RESTRICTED) || diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c index b5732a54f2ad..17e7339ee5ca 100644 --- a/net/ipv4/tcp_minisocks.c +++ b/net/ipv4/tcp_minisocks.c @@ -420,7 +420,10 @@ void tcp_ca_openreq_child(struct sock *sk, const struct dst_entry *dst) rcu_read_unlock(); } - if (!ca_got_dst && !try_module_get(icsk->icsk_ca_ops->owner)) + /* If no valid choice made yet, assign current system default ca. */ + if (!ca_got_dst && + (!icsk->icsk_ca_setsockopt || + !try_module_get(icsk->icsk_ca_ops->owner))) tcp_assign_congestion_control(sk); tcp_set_ca_state(sk, TCP_CA_Open); diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index d10b7e0112eb..1c92ea67baef 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -1345,10 +1345,8 @@ csum_copy_err: } unlock_sock_fast(sk, slow); - if (noblock) - return -EAGAIN; - - /* starting over for a new packet */ + /* starting over for a new packet, but check if we need to yield */ + cond_resched(); msg->msg_flags &= ~MSG_TRUNC; goto try_again; } diff --git a/net/ipv6/esp6.c b/net/ipv6/esp6.c index 31f1b5d5e2ef..7c07ce36aae2 100644 --- a/net/ipv6/esp6.c +++ b/net/ipv6/esp6.c @@ -248,7 +248,8 @@ static int esp6_output(struct xfrm_state *x, struct sk_buff *skb) aead_givcrypt_set_crypt(req, sg, sg, clen, iv); aead_givcrypt_set_assoc(req, asg, assoclen); aead_givcrypt_set_giv(req, esph->enc_data, - XFRM_SKB_CB(skb)->seq.output.low); + XFRM_SKB_CB(skb)->seq.output.low + + ((u64)XFRM_SKB_CB(skb)->seq.output.hi << 32)); ESP_SKB_CB(skb)->tmp = tmp; err = crypto_aead_givencrypt(req); diff --git a/net/ipv6/ip6_vti.c b/net/ipv6/ip6_vti.c index ed9d681207fa..0224c032dca5 100644 --- a/net/ipv6/ip6_vti.c +++ b/net/ipv6/ip6_vti.c @@ -322,7 +322,6 @@ static int vti6_rcv(struct sk_buff *skb) } XFRM_TUNNEL_SKB_CB(skb)->tunnel.ip6 = t; - skb->mark = be32_to_cpu(t->parms.i_key); rcu_read_unlock(); @@ -342,6 +341,8 @@ static int vti6_rcv_cb(struct sk_buff *skb, int err) struct pcpu_sw_netstats *tstats; struct xfrm_state *x; struct ip6_tnl *t = XFRM_TUNNEL_SKB_CB(skb)->tunnel.ip6; + u32 orig_mark = skb->mark; + int ret; if (!t) return 1; @@ -358,7 +359,11 @@ static int vti6_rcv_cb(struct sk_buff *skb, int err) x = xfrm_input_state(skb); family = x->inner_mode->afinfo->family; - if (!xfrm_policy_check(NULL, XFRM_POLICY_IN, skb, family)) + skb->mark = be32_to_cpu(t->parms.i_key); + ret = xfrm_policy_check(NULL, XFRM_POLICY_IN, skb, family); + skb->mark = orig_mark; + + if (!ret) return -EPERM; skb_scrub_packet(skb, !net_eq(t->net, dev_net(skb->dev))); @@ -430,6 +435,7 @@ vti6_xmit(struct sk_buff *skb, struct net_device *dev, struct flowi *fl) struct net_device *tdev; struct xfrm_state *x; int err = -1; + int mtu; if (!dst) goto tx_err_link_failure; @@ -463,6 +469,19 @@ vti6_xmit(struct sk_buff *skb, struct net_device *dev, struct flowi *fl) skb_dst_set(skb, dst); skb->dev = skb_dst(skb)->dev; + mtu = dst_mtu(dst); + if (!skb->ignore_df && skb->len > mtu) { + skb_dst(skb)->ops->update_pmtu(dst, NULL, skb, mtu); + + if (skb->protocol == htons(ETH_P_IPV6)) + icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu); + else + icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, + htonl(mtu)); + + return -EMSGSIZE; + } + err = dst_output(skb); if (net_xmit_eval(err) == 0) { struct pcpu_sw_netstats *tstats = this_cpu_ptr(dev->tstats); @@ -495,7 +514,6 @@ vti6_tnl_xmit(struct sk_buff *skb, struct net_device *dev) int ret; memset(&fl, 0, sizeof(fl)); - skb->mark = be32_to_cpu(t->parms.o_key); switch (skb->protocol) { case htons(ETH_P_IPV6): @@ -516,6 +534,9 @@ vti6_tnl_xmit(struct sk_buff *skb, struct net_device *dev) goto tx_err; } + /* override mark with tunnel output key */ + fl.flowi_mark = be32_to_cpu(t->parms.o_key); + ret = vti6_xmit(skb, dev, &fl); if (ret < 0) goto tx_err; diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index c2ec41617a35..e51fc3eee6db 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -525,10 +525,8 @@ csum_copy_err: } unlock_sock_fast(sk, slow); - if (noblock) - return -EAGAIN; - - /* starting over for a new packet */ + /* starting over for a new packet, but check if we need to yield */ + cond_resched(); msg->msg_flags &= ~MSG_TRUNC; goto try_again; } diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c index 265e42721a66..ff347a0eebd4 100644 --- a/net/mac80211/cfg.c +++ b/net/mac80211/cfg.c @@ -2495,51 +2495,22 @@ static bool ieee80211_coalesce_started_roc(struct ieee80211_local *local, struct ieee80211_roc_work *new_roc, struct ieee80211_roc_work *cur_roc) { - unsigned long j = jiffies; - unsigned long cur_roc_end = cur_roc->hw_start_time + - msecs_to_jiffies(cur_roc->duration); - struct ieee80211_roc_work *next_roc; - int new_dur; + unsigned long now = jiffies; + unsigned long remaining = cur_roc->hw_start_time + + msecs_to_jiffies(cur_roc->duration) - + now; if (WARN_ON(!cur_roc->started || !cur_roc->hw_begun)) return false; - if (time_after(j + IEEE80211_ROC_MIN_LEFT, cur_roc_end)) + /* if it doesn't fit entirely, schedule a new one */ + if (new_roc->duration > jiffies_to_msecs(remaining)) return false; ieee80211_handle_roc_started(new_roc); - new_dur = new_roc->duration - jiffies_to_msecs(cur_roc_end - j); - - /* cur_roc is long enough - add new_roc to the dependents list. */ - if (new_dur <= 0) { - list_add_tail(&new_roc->list, &cur_roc->dependents); - return true; - } - - new_roc->duration = new_dur; - - /* - * if cur_roc was already coalesced before, we might - * want to extend the next roc instead of adding - * a new one. - */ - next_roc = list_entry(cur_roc->list.next, - struct ieee80211_roc_work, list); - if (&next_roc->list != &local->roc_list && - next_roc->chan == new_roc->chan && - next_roc->sdata == new_roc->sdata && - !WARN_ON(next_roc->started)) { - list_add_tail(&new_roc->list, &next_roc->dependents); - next_roc->duration = max(next_roc->duration, - new_roc->duration); - next_roc->type = max(next_roc->type, new_roc->type); - return true; - } - - /* add right after cur_roc */ - list_add(&new_roc->list, &cur_roc->list); - + /* add to dependents so we send the expired event properly */ + list_add_tail(&new_roc->list, &cur_roc->dependents); return true; } @@ -2652,17 +2623,9 @@ static int ieee80211_start_roc_work(struct ieee80211_local *local, * In the offloaded ROC case, if it hasn't begun, add * this new one to the dependent list to be handled * when the master one begins. If it has begun, - * check that there's still a minimum time left and - * if so, start this one, transmitting the frame, but - * add it to the list directly after this one with - * a reduced time so we'll ask the driver to execute - * it right after finishing the previous one, in the - * hope that it'll also be executed right afterwards, - * effectively extending the old one. - * If there's no minimum time left, just add it to the - * normal list. - * TODO: the ROC type is ignored here, assuming that it - * is better to immediately use the current ROC. + * check if it fits entirely within the existing one, + * in which case it will just be dependent as well. + * Otherwise, schedule it by itself. */ if (!tmp->hw_begun) { list_add_tail(&roc->list, &tmp->dependents); diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h index ab46ab4a7249..c0a9187bc3a9 100644 --- a/net/mac80211/ieee80211_i.h +++ b/net/mac80211/ieee80211_i.h @@ -205,6 +205,8 @@ enum ieee80211_packet_rx_flags { * @IEEE80211_RX_CMNTR: received on cooked monitor already * @IEEE80211_RX_BEACON_REPORTED: This frame was already reported * to cfg80211_report_obss_beacon(). + * @IEEE80211_RX_REORDER_TIMER: this frame is released by the + * reorder buffer timeout timer, not the normal RX path * * These flags are used across handling multiple interfaces * for a single frame. @@ -212,6 +214,7 @@ enum ieee80211_packet_rx_flags { enum ieee80211_rx_flags { IEEE80211_RX_CMNTR = BIT(0), IEEE80211_RX_BEACON_REPORTED = BIT(1), + IEEE80211_RX_REORDER_TIMER = BIT(2), }; struct ieee80211_rx_data { @@ -325,12 +328,6 @@ struct mesh_preq_queue { u8 flags; }; -#if HZ/100 == 0 -#define IEEE80211_ROC_MIN_LEFT 1 -#else -#define IEEE80211_ROC_MIN_LEFT (HZ/100) -#endif - struct ieee80211_roc_work { struct list_head list; struct list_head dependents; diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c index bab5c63c0bad..84cef600c573 100644 --- a/net/mac80211/iface.c +++ b/net/mac80211/iface.c @@ -522,6 +522,12 @@ int ieee80211_do_open(struct wireless_dev *wdev, bool coming_up) memcpy(sdata->vif.hw_queue, master->vif.hw_queue, sizeof(sdata->vif.hw_queue)); sdata->vif.bss_conf.chandef = master->vif.bss_conf.chandef; + + mutex_lock(&local->key_mtx); + sdata->crypto_tx_tailroom_needed_cnt += + master->crypto_tx_tailroom_needed_cnt; + mutex_unlock(&local->key_mtx); + break; } case NL80211_IFTYPE_AP: diff --git a/net/mac80211/key.c b/net/mac80211/key.c index 2291cd730091..a907f2d5c12d 100644 --- a/net/mac80211/key.c +++ b/net/mac80211/key.c @@ -58,6 +58,22 @@ static void assert_key_lock(struct ieee80211_local *local) lockdep_assert_held(&local->key_mtx); } +static void +update_vlan_tailroom_need_count(struct ieee80211_sub_if_data *sdata, int delta) +{ + struct ieee80211_sub_if_data *vlan; + + if (sdata->vif.type != NL80211_IFTYPE_AP) + return; + + mutex_lock(&sdata->local->mtx); + + list_for_each_entry(vlan, &sdata->u.ap.vlans, u.vlan.list) + vlan->crypto_tx_tailroom_needed_cnt += delta; + + mutex_unlock(&sdata->local->mtx); +} + static void increment_tailroom_need_count(struct ieee80211_sub_if_data *sdata) { /* @@ -79,6 +95,8 @@ static void increment_tailroom_need_count(struct ieee80211_sub_if_data *sdata) * http://mid.gmane.org/1308590980.4322.19.camel@jlt3.sipsolutions.net */ + update_vlan_tailroom_need_count(sdata, 1); + if (!sdata->crypto_tx_tailroom_needed_cnt++) { /* * Flush all XMIT packets currently using HW encryption or no @@ -88,6 +106,15 @@ static void increment_tailroom_need_count(struct ieee80211_sub_if_data *sdata) } } +static void decrease_tailroom_need_count(struct ieee80211_sub_if_data *sdata, + int delta) +{ + WARN_ON_ONCE(sdata->crypto_tx_tailroom_needed_cnt < delta); + + update_vlan_tailroom_need_count(sdata, -delta); + sdata->crypto_tx_tailroom_needed_cnt -= delta; +} + static int ieee80211_key_enable_hw_accel(struct ieee80211_key *key) { struct ieee80211_sub_if_data *sdata; @@ -144,7 +171,7 @@ static int ieee80211_key_enable_hw_accel(struct ieee80211_key *key) if (!((key->conf.flags & IEEE80211_KEY_FLAG_GENERATE_MMIC) || (key->conf.flags & IEEE80211_KEY_FLAG_RESERVE_TAILROOM))) - sdata->crypto_tx_tailroom_needed_cnt--; + decrease_tailroom_need_count(sdata, 1); WARN_ON((key->conf.flags & IEEE80211_KEY_FLAG_PUT_IV_SPACE) && (key->conf.flags & IEEE80211_KEY_FLAG_GENERATE_IV)); @@ -541,7 +568,7 @@ static void __ieee80211_key_destroy(struct ieee80211_key *key, schedule_delayed_work(&sdata->dec_tailroom_needed_wk, HZ/2); } else { - sdata->crypto_tx_tailroom_needed_cnt--; + decrease_tailroom_need_count(sdata, 1); } } @@ -631,6 +658,7 @@ void ieee80211_key_free(struct ieee80211_key *key, bool delay_tailroom) void ieee80211_enable_keys(struct ieee80211_sub_if_data *sdata) { struct ieee80211_key *key; + struct ieee80211_sub_if_data *vlan; ASSERT_RTNL(); @@ -639,7 +667,14 @@ void ieee80211_enable_keys(struct ieee80211_sub_if_data *sdata) mutex_lock(&sdata->local->key_mtx); - sdata->crypto_tx_tailroom_needed_cnt = 0; + WARN_ON_ONCE(sdata->crypto_tx_tailroom_needed_cnt || + sdata->crypto_tx_tailroom_pending_dec); + + if (sdata->vif.type == NL80211_IFTYPE_AP) { + list_for_each_entry(vlan, &sdata->u.ap.vlans, u.vlan.list) + WARN_ON_ONCE(vlan->crypto_tx_tailroom_needed_cnt || + vlan->crypto_tx_tailroom_pending_dec); + } list_for_each_entry(key, &sdata->key_list, list) { increment_tailroom_need_count(sdata); @@ -649,6 +684,22 @@ void ieee80211_enable_keys(struct ieee80211_sub_if_data *sdata) mutex_unlock(&sdata->local->key_mtx); } +void ieee80211_reset_crypto_tx_tailroom(struct ieee80211_sub_if_data *sdata) +{ + struct ieee80211_sub_if_data *vlan; + + mutex_lock(&sdata->local->key_mtx); + + sdata->crypto_tx_tailroom_needed_cnt = 0; + + if (sdata->vif.type == NL80211_IFTYPE_AP) { + list_for_each_entry(vlan, &sdata->u.ap.vlans, u.vlan.list) + vlan->crypto_tx_tailroom_needed_cnt = 0; + } + + mutex_unlock(&sdata->local->key_mtx); +} + void ieee80211_iter_keys(struct ieee80211_hw *hw, struct ieee80211_vif *vif, void (*iter)(struct ieee80211_hw *hw, @@ -688,8 +739,8 @@ static void ieee80211_free_keys_iface(struct ieee80211_sub_if_data *sdata, { struct ieee80211_key *key, *tmp; - sdata->crypto_tx_tailroom_needed_cnt -= - sdata->crypto_tx_tailroom_pending_dec; + decrease_tailroom_need_count(sdata, + sdata->crypto_tx_tailroom_pending_dec); sdata->crypto_tx_tailroom_pending_dec = 0; ieee80211_debugfs_key_remove_mgmt_default(sdata); @@ -709,6 +760,7 @@ void ieee80211_free_keys(struct ieee80211_sub_if_data *sdata, { struct ieee80211_local *local = sdata->local; struct ieee80211_sub_if_data *vlan; + struct ieee80211_sub_if_data *master; struct ieee80211_key *key, *tmp; LIST_HEAD(keys); @@ -728,8 +780,20 @@ void ieee80211_free_keys(struct ieee80211_sub_if_data *sdata, list_for_each_entry_safe(key, tmp, &keys, list) __ieee80211_key_destroy(key, false); - WARN_ON_ONCE(sdata->crypto_tx_tailroom_needed_cnt || - sdata->crypto_tx_tailroom_pending_dec); + if (sdata->vif.type == NL80211_IFTYPE_AP_VLAN) { + if (sdata->bss) { + master = container_of(sdata->bss, + struct ieee80211_sub_if_data, + u.ap); + + WARN_ON_ONCE(sdata->crypto_tx_tailroom_needed_cnt != + master->crypto_tx_tailroom_needed_cnt); + } + } else { + WARN_ON_ONCE(sdata->crypto_tx_tailroom_needed_cnt || + sdata->crypto_tx_tailroom_pending_dec); + } + if (sdata->vif.type == NL80211_IFTYPE_AP) { list_for_each_entry(vlan, &sdata->u.ap.vlans, u.vlan.list) WARN_ON_ONCE(vlan->crypto_tx_tailroom_needed_cnt || @@ -793,8 +857,8 @@ void ieee80211_delayed_tailroom_dec(struct work_struct *wk) */ mutex_lock(&sdata->local->key_mtx); - sdata->crypto_tx_tailroom_needed_cnt -= - sdata->crypto_tx_tailroom_pending_dec; + decrease_tailroom_need_count(sdata, + sdata->crypto_tx_tailroom_pending_dec); sdata->crypto_tx_tailroom_pending_dec = 0; mutex_unlock(&sdata->local->key_mtx); } diff --git a/net/mac80211/key.h b/net/mac80211/key.h index c5a31835be0e..96557dd1e77d 100644 --- a/net/mac80211/key.h +++ b/net/mac80211/key.h @@ -161,6 +161,7 @@ void ieee80211_free_keys(struct ieee80211_sub_if_data *sdata, void ieee80211_free_sta_keys(struct ieee80211_local *local, struct sta_info *sta); void ieee80211_enable_keys(struct ieee80211_sub_if_data *sdata); +void ieee80211_reset_crypto_tx_tailroom(struct ieee80211_sub_if_data *sdata); #define key_mtx_dereference(local, ref) \ rcu_dereference_protected(ref, lockdep_is_held(&((local)->key_mtx))) diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c index 260eed45b6d2..5793f75c5ffd 100644 --- a/net/mac80211/rx.c +++ b/net/mac80211/rx.c @@ -2121,7 +2121,8 @@ ieee80211_deliver_skb(struct ieee80211_rx_data *rx) /* deliver to local stack */ skb->protocol = eth_type_trans(skb, dev); memset(skb->cb, 0, sizeof(skb->cb)); - if (rx->local->napi) + if (!(rx->flags & IEEE80211_RX_REORDER_TIMER) && + rx->local->napi) napi_gro_receive(rx->local->napi, skb); else netif_receive_skb(skb); @@ -3231,7 +3232,7 @@ void ieee80211_release_reorder_timeout(struct sta_info *sta, int tid) /* This is OK -- must be QoS data frame */ .security_idx = tid, .seqno_idx = tid, - .flags = 0, + .flags = IEEE80211_RX_REORDER_TIMER, }; struct tid_ampdu_rx *tid_agg_rx; diff --git a/net/mac80211/util.c b/net/mac80211/util.c index 79412f16b61d..b864ebc6ab8f 100644 --- a/net/mac80211/util.c +++ b/net/mac80211/util.c @@ -2023,6 +2023,9 @@ int ieee80211_reconfig(struct ieee80211_local *local) /* add back keys */ list_for_each_entry(sdata, &local->interfaces, list) + ieee80211_reset_crypto_tx_tailroom(sdata); + + list_for_each_entry(sdata, &local->interfaces, list) if (ieee80211_sdata_running(sdata)) ieee80211_enable_keys(sdata); diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c index ad9eed70bc8f..1e1c89e51a11 100644 --- a/net/sched/sch_api.c +++ b/net/sched/sch_api.c @@ -815,10 +815,8 @@ static int qdisc_graft(struct net_device *dev, struct Qdisc *parent, if (dev->flags & IFF_UP) dev_deactivate(dev); - if (new && new->ops->attach) { - new->ops->attach(new); - num_q = 0; - } + if (new && new->ops->attach) + goto skip; for (i = 0; i < num_q; i++) { struct netdev_queue *dev_queue = dev_ingress_queue(dev); @@ -834,12 +832,16 @@ static int qdisc_graft(struct net_device *dev, struct Qdisc *parent, qdisc_destroy(old); } +skip: if (!ingress) { notify_and_destroy(net, skb, n, classid, dev->qdisc, new); if (new && !new->ops->attach) atomic_inc(&new->refcnt); dev->qdisc = new ? : &noop_qdisc; + + if (new && new->ops->attach) + new->ops->attach(new); } else { notify_and_destroy(net, skb, n, classid, old, new); } diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index 5266ea7b922b..06430598cf51 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -1880,6 +1880,10 @@ static long unix_stream_data_wait(struct sock *sk, long timeo, unix_state_unlock(sk); timeo = freezable_schedule_timeout(timeo); unix_state_lock(sk); + + if (sock_flag(sk, SOCK_DEAD)) + break; + clear_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags); } @@ -1939,6 +1943,10 @@ static int unix_stream_recvmsg(struct socket *sock, struct msghdr *msg, struct sk_buff *skb, *last; unix_state_lock(sk); + if (sock_flag(sk, SOCK_DEAD)) { + err = -ECONNRESET; + goto unlock; + } last = skb = skb_peek(&sk->sk_receive_queue); again: if (skb == NULL) { diff --git a/net/xfrm/xfrm_input.c b/net/xfrm/xfrm_input.c index 526c4feb3b50..b58286ecd156 100644 --- a/net/xfrm/xfrm_input.c +++ b/net/xfrm/xfrm_input.c @@ -13,6 +13,8 @@ #include <net/dst.h> #include <net/ip.h> #include <net/xfrm.h> +#include <net/ip_tunnels.h> +#include <net/ip6_tunnel.h> static struct kmem_cache *secpath_cachep __read_mostly; @@ -186,6 +188,7 @@ int xfrm_input(struct sk_buff *skb, int nexthdr, __be32 spi, int encap_type) struct xfrm_state *x = NULL; xfrm_address_t *daddr; struct xfrm_mode *inner_mode; + u32 mark = skb->mark; unsigned int family; int decaps = 0; int async = 0; @@ -203,6 +206,18 @@ int xfrm_input(struct sk_buff *skb, int nexthdr, __be32 spi, int encap_type) XFRM_SPI_SKB_CB(skb)->daddroff); family = XFRM_SPI_SKB_CB(skb)->family; + /* if tunnel is present override skb->mark value with tunnel i_key */ + if (XFRM_TUNNEL_SKB_CB(skb)->tunnel.ip4) { + switch (family) { + case AF_INET: + mark = be32_to_cpu(XFRM_TUNNEL_SKB_CB(skb)->tunnel.ip4->parms.i_key); + break; + case AF_INET6: + mark = be32_to_cpu(XFRM_TUNNEL_SKB_CB(skb)->tunnel.ip6->parms.i_key); + break; + } + } + /* Allocate new secpath or COW existing one. */ if (!skb->sp || atomic_read(&skb->sp->refcnt) != 1) { struct sec_path *sp; @@ -229,7 +244,7 @@ int xfrm_input(struct sk_buff *skb, int nexthdr, __be32 spi, int encap_type) goto drop; } - x = xfrm_state_lookup(net, skb->mark, daddr, spi, nexthdr, family); + x = xfrm_state_lookup(net, mark, daddr, spi, nexthdr, family); if (x == NULL) { XFRM_INC_STATS(net, LINUX_MIB_XFRMINNOSTATES); xfrm_audit_state_notfound(skb, family, spi, seq); diff --git a/net/xfrm/xfrm_replay.c b/net/xfrm/xfrm_replay.c index dab57daae408..4fd725a0c500 100644 --- a/net/xfrm/xfrm_replay.c +++ b/net/xfrm/xfrm_replay.c @@ -99,6 +99,7 @@ static int xfrm_replay_overflow(struct xfrm_state *x, struct sk_buff *skb) if (x->type->flags & XFRM_TYPE_REPLAY_PROT) { XFRM_SKB_CB(skb)->seq.output.low = ++x->replay.oseq; + XFRM_SKB_CB(skb)->seq.output.hi = 0; if (unlikely(x->replay.oseq == 0)) { x->replay.oseq--; xfrm_audit_state_replay_overflow(x, skb); @@ -177,6 +178,7 @@ static int xfrm_replay_overflow_bmp(struct xfrm_state *x, struct sk_buff *skb) if (x->type->flags & XFRM_TYPE_REPLAY_PROT) { XFRM_SKB_CB(skb)->seq.output.low = ++replay_esn->oseq; + XFRM_SKB_CB(skb)->seq.output.hi = 0; if (unlikely(replay_esn->oseq == 0)) { replay_esn->oseq--; xfrm_audit_state_replay_overflow(x, skb); diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c index f5e39e35d73a..96688cd0f6f1 100644 --- a/net/xfrm/xfrm_state.c +++ b/net/xfrm/xfrm_state.c @@ -927,8 +927,8 @@ struct xfrm_state *xfrm_state_lookup_byspi(struct net *net, __be32 spi, x->id.spi != spi) continue; - spin_unlock_bh(&net->xfrm.xfrm_state_lock); xfrm_state_hold(x); + spin_unlock_bh(&net->xfrm.xfrm_state_lock); return x; } spin_unlock_bh(&net->xfrm.xfrm_state_lock); diff --git a/scripts/gdb/linux/modules.py b/scripts/gdb/linux/modules.py index a1504c4f1900..25db8cff44a2 100644 --- a/scripts/gdb/linux/modules.py +++ b/scripts/gdb/linux/modules.py @@ -73,18 +73,11 @@ class LxLsmod(gdb.Command): " " if utils.get_long_type().sizeof == 8 else "")) for module in module_list(): - ref = 0 - module_refptr = module['refptr'] - for cpu in cpus.cpu_list("cpu_possible_mask"): - refptr = cpus.per_cpu(module_refptr, cpu) - ref += refptr['incs'] - ref -= refptr['decs'] - gdb.write("{address} {name:<19} {size:>8} {ref}".format( address=str(module['module_core']).split()[0], name=module['name'].string(), size=str(module['core_size']), - ref=str(ref))) + ref=str(module['refcnt']['counter']))) source_list = module['source_list'] t = self._module_use_type.get_type().pointer() diff --git a/sound/pci/hda/hda_generic.c b/sound/pci/hda/hda_generic.c index 1c8678775f40..ac0db1679f09 100644 --- a/sound/pci/hda/hda_generic.c +++ b/sound/pci/hda/hda_generic.c @@ -4926,9 +4926,12 @@ int snd_hda_gen_parse_auto_config(struct hda_codec *codec, dig_only: parse_digital(codec); - if (spec->power_down_unused || codec->power_save_node) + if (spec->power_down_unused || codec->power_save_node) { if (!codec->power_filter) codec->power_filter = snd_hda_gen_path_power_filter; + if (!codec->patch_ops.stream_pm) + codec->patch_ops.stream_pm = snd_hda_gen_stream_pm; + } if (!spec->no_analog && spec->beep_nid) { err = snd_hda_attach_beep_device(codec, spec->beep_nid); diff --git a/sound/pci/hda/hda_intel.c b/sound/pci/hda/hda_intel.c index 34040d26c94f..fea198c58196 100644 --- a/sound/pci/hda/hda_intel.c +++ b/sound/pci/hda/hda_intel.c @@ -2089,6 +2089,8 @@ static const struct pci_device_id azx_ids[] = { .driver_data = AZX_DRIVER_ATIHDMI_NS | AZX_DCAPS_PRESET_ATI_HDMI_NS }, { PCI_DEVICE(0x1002, 0xaab0), .driver_data = AZX_DRIVER_ATIHDMI_NS | AZX_DCAPS_PRESET_ATI_HDMI_NS }, + { PCI_DEVICE(0x1002, 0xaac8), + .driver_data = AZX_DRIVER_ATIHDMI_NS | AZX_DCAPS_PRESET_ATI_HDMI_NS }, /* VIA VT8251/VT8237A */ { PCI_DEVICE(0x1106, 0x3288), .driver_data = AZX_DRIVER_VIA | AZX_DCAPS_POSFIX_VIA }, diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index 31f8f13be907..464168426465 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -884,6 +884,7 @@ static struct alc_codec_rename_pci_table rename_pci_tbl[] = { { 0x10ec0275, 0x1028, 0, "ALC3260" }, { 0x10ec0899, 0x1028, 0, "ALC3861" }, { 0x10ec0298, 0x1028, 0, "ALC3266" }, + { 0x10ec0256, 0x1028, 0, "ALC3246" }, { 0x10ec0670, 0x1025, 0, "ALC669X" }, { 0x10ec0676, 0x1025, 0, "ALC679X" }, { 0x10ec0282, 0x1043, 0, "ALC3229" }, @@ -4227,6 +4228,11 @@ static void alc_fixup_headset_mode_alc662(struct hda_codec *codec, if (action == HDA_FIXUP_ACT_PRE_PROBE) { spec->parse_flags |= HDA_PINCFG_HEADSET_MIC; spec->gen.hp_mic = 1; /* Mic-in is same pin as headphone */ + + /* Disable boost for mic-in permanently. (This code is only called + from quirks that guarantee that the headphone is at NID 0x1b.) */ + snd_hda_codec_write(codec, 0x1b, 0, AC_VERB_SET_AMP_GAIN_MUTE, 0x7000); + snd_hda_override_wcaps(codec, 0x1b, get_wcaps(codec, 0x1b) & ~AC_WCAP_IN_AMP); } else alc_fixup_headset_mode(codec, fix, action); } diff --git a/sound/pci/hda/patch_sigmatel.c b/sound/pci/hda/patch_sigmatel.c index 43c99ce4a520..6833c74ed6ff 100644 --- a/sound/pci/hda/patch_sigmatel.c +++ b/sound/pci/hda/patch_sigmatel.c @@ -4403,7 +4403,6 @@ static const struct hda_codec_ops stac_patch_ops = { #ifdef CONFIG_PM .suspend = stac_suspend, #endif - .stream_pm = snd_hda_gen_stream_pm, .reboot_notify = stac_shutup, }; @@ -4697,7 +4696,8 @@ static int patch_stac92hd71bxx(struct hda_codec *codec) return err; spec = codec->spec; - codec->power_save_node = 1; + /* disabled power_save_node since it causes noises on a Dell machine */ + /* codec->power_save_node = 1; */ spec->linear_tone_beep = 0; spec->gen.own_eapd_ctl = 1; spec->gen.power_down_unused = 1; diff --git a/sound/pci/hda/thinkpad_helper.c b/sound/pci/hda/thinkpad_helper.c index d51703e30523..0a4ad5feb82e 100644 --- a/sound/pci/hda/thinkpad_helper.c +++ b/sound/pci/hda/thinkpad_helper.c @@ -72,7 +72,6 @@ static void hda_fixup_thinkpad_acpi(struct hda_codec *codec, if (led_set_func(TPACPI_LED_MUTE, false) >= 0) { old_vmaster_hook = spec->vmaster_mute.hook; spec->vmaster_mute.hook = update_tpacpi_mute_led; - spec->vmaster_mute_enum = 1; removefunc = false; } if (led_set_func(TPACPI_LED_MICMUTE, false) >= 0) { diff --git a/sound/usb/quirks.c b/sound/usb/quirks.c index 46facfc9aec1..29175346cc4f 100644 --- a/sound/usb/quirks.c +++ b/sound/usb/quirks.c @@ -1118,6 +1118,7 @@ bool snd_usb_get_sample_rate_quirk(struct snd_usb_audio *chip) case USB_ID(0x045E, 0x075D): /* MS Lifecam Cinema */ case USB_ID(0x045E, 0x076D): /* MS Lifecam HD-5000 */ case USB_ID(0x045E, 0x0772): /* MS Lifecam Studio */ + case USB_ID(0x045E, 0x0779): /* MS Lifecam HD-3000 */ case USB_ID(0x04D8, 0xFEEA): /* Benchmark DAC1 Pre */ return true; } diff --git a/tools/net/bpf_jit_disasm.c b/tools/net/bpf_jit_disasm.c index c5baf9c591b7..618c2bcd4eab 100644 --- a/tools/net/bpf_jit_disasm.c +++ b/tools/net/bpf_jit_disasm.c @@ -123,6 +123,8 @@ static int get_last_jit_image(char *haystack, size_t hlen, assert(ret == 0); ptr = haystack; + memset(pmatch, 0, sizeof(pmatch)); + while (1) { ret = regexec(®ex, ptr, 1, pmatch, 0); if (ret == 0) { diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c index bac98ca3d4ca..323b65edfc97 100644 --- a/tools/power/x86/turbostat/turbostat.c +++ b/tools/power/x86/turbostat/turbostat.c @@ -52,6 +52,7 @@ unsigned int skip_c0; unsigned int skip_c1; unsigned int do_nhm_cstates; unsigned int do_snb_cstates; +unsigned int do_knl_cstates; unsigned int do_pc2; unsigned int do_pc3; unsigned int do_pc6; @@ -91,6 +92,7 @@ unsigned int do_gfx_perf_limit_reasons; unsigned int do_ring_perf_limit_reasons; unsigned int crystal_hz; unsigned long long tsc_hz; +int base_cpu; #define RAPL_PKG (1 << 0) /* 0x610 MSR_PKG_POWER_LIMIT */ @@ -316,7 +318,7 @@ void print_header(void) if (do_nhm_cstates) outp += sprintf(outp, " CPU%%c1"); - if (do_nhm_cstates && !do_slm_cstates) + if (do_nhm_cstates && !do_slm_cstates && !do_knl_cstates) outp += sprintf(outp, " CPU%%c3"); if (do_nhm_cstates) outp += sprintf(outp, " CPU%%c6"); @@ -546,7 +548,7 @@ int format_counters(struct thread_data *t, struct core_data *c, if (!(t->flags & CPU_IS_FIRST_THREAD_IN_CORE)) goto done; - if (do_nhm_cstates && !do_slm_cstates) + if (do_nhm_cstates && !do_slm_cstates && !do_knl_cstates) outp += sprintf(outp, "%8.2f", 100.0 * c->c3/t->tsc); if (do_nhm_cstates) outp += sprintf(outp, "%8.2f", 100.0 * c->c6/t->tsc); @@ -1018,14 +1020,17 @@ int get_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p) if (!(t->flags & CPU_IS_FIRST_THREAD_IN_CORE)) return 0; - if (do_nhm_cstates && !do_slm_cstates) { + if (do_nhm_cstates && !do_slm_cstates && !do_knl_cstates) { if (get_msr(cpu, MSR_CORE_C3_RESIDENCY, &c->c3)) return -6; } - if (do_nhm_cstates) { + if (do_nhm_cstates && !do_knl_cstates) { if (get_msr(cpu, MSR_CORE_C6_RESIDENCY, &c->c6)) return -7; + } else if (do_knl_cstates) { + if (get_msr(cpu, MSR_KNL_CORE_C6_RESIDENCY, &c->c6)) + return -7; } if (do_snb_cstates) @@ -1150,7 +1155,7 @@ dump_nhm_platform_info(void) unsigned long long msr; unsigned int ratio; - get_msr(0, MSR_NHM_PLATFORM_INFO, &msr); + get_msr(base_cpu, MSR_NHM_PLATFORM_INFO, &msr); fprintf(stderr, "cpu0: MSR_NHM_PLATFORM_INFO: 0x%08llx\n", msr); @@ -1162,7 +1167,7 @@ dump_nhm_platform_info(void) fprintf(stderr, "%d * %.0f = %.0f MHz base frequency\n", ratio, bclk, ratio * bclk); - get_msr(0, MSR_IA32_POWER_CTL, &msr); + get_msr(base_cpu, MSR_IA32_POWER_CTL, &msr); fprintf(stderr, "cpu0: MSR_IA32_POWER_CTL: 0x%08llx (C1E auto-promotion: %sabled)\n", msr, msr & 0x2 ? "EN" : "DIS"); @@ -1175,7 +1180,7 @@ dump_hsw_turbo_ratio_limits(void) unsigned long long msr; unsigned int ratio; - get_msr(0, MSR_TURBO_RATIO_LIMIT2, &msr); + get_msr(base_cpu, MSR_TURBO_RATIO_LIMIT2, &msr); fprintf(stderr, "cpu0: MSR_TURBO_RATIO_LIMIT2: 0x%08llx\n", msr); @@ -1197,7 +1202,7 @@ dump_ivt_turbo_ratio_limits(void) unsigned long long msr; unsigned int ratio; - get_msr(0, MSR_TURBO_RATIO_LIMIT1, &msr); + get_msr(base_cpu, MSR_TURBO_RATIO_LIMIT1, &msr); fprintf(stderr, "cpu0: MSR_TURBO_RATIO_LIMIT1: 0x%08llx\n", msr); @@ -1249,7 +1254,7 @@ dump_nhm_turbo_ratio_limits(void) unsigned long long msr; unsigned int ratio; - get_msr(0, MSR_TURBO_RATIO_LIMIT, &msr); + get_msr(base_cpu, MSR_TURBO_RATIO_LIMIT, &msr); fprintf(stderr, "cpu0: MSR_TURBO_RATIO_LIMIT: 0x%08llx\n", msr); @@ -1296,11 +1301,72 @@ dump_nhm_turbo_ratio_limits(void) } static void +dump_knl_turbo_ratio_limits(void) +{ + int cores; + unsigned int ratio; + unsigned long long msr; + int delta_cores; + int delta_ratio; + int i; + + get_msr(base_cpu, MSR_NHM_TURBO_RATIO_LIMIT, &msr); + + fprintf(stderr, "cpu0: MSR_NHM_TURBO_RATIO_LIMIT: 0x%08llx\n", + msr); + + /** + * Turbo encoding in KNL is as follows: + * [7:0] -- Base value of number of active cores of bucket 1. + * [15:8] -- Base value of freq ratio of bucket 1. + * [20:16] -- +ve delta of number of active cores of bucket 2. + * i.e. active cores of bucket 2 = + * active cores of bucket 1 + delta + * [23:21] -- Negative delta of freq ratio of bucket 2. + * i.e. freq ratio of bucket 2 = + * freq ratio of bucket 1 - delta + * [28:24]-- +ve delta of number of active cores of bucket 3. + * [31:29]-- -ve delta of freq ratio of bucket 3. + * [36:32]-- +ve delta of number of active cores of bucket 4. + * [39:37]-- -ve delta of freq ratio of bucket 4. + * [44:40]-- +ve delta of number of active cores of bucket 5. + * [47:45]-- -ve delta of freq ratio of bucket 5. + * [52:48]-- +ve delta of number of active cores of bucket 6. + * [55:53]-- -ve delta of freq ratio of bucket 6. + * [60:56]-- +ve delta of number of active cores of bucket 7. + * [63:61]-- -ve delta of freq ratio of bucket 7. + */ + cores = msr & 0xFF; + ratio = (msr >> 8) && 0xFF; + if (ratio > 0) + fprintf(stderr, + "%d * %.0f = %.0f MHz max turbo %d active cores\n", + ratio, bclk, ratio * bclk, cores); + + for (i = 16; i < 64; i = i + 8) { + delta_cores = (msr >> i) & 0x1F; + delta_ratio = (msr >> (i + 5)) && 0x7; + if (!delta_cores || !delta_ratio) + return; + cores = cores + delta_cores; + ratio = ratio - delta_ratio; + + /** -ve ratios will make successive ratio calculations + * negative. Hence return instead of carrying on. + */ + if (ratio > 0) + fprintf(stderr, + "%d * %.0f = %.0f MHz max turbo %d active cores\n", + ratio, bclk, ratio * bclk, cores); + } +} + +static void dump_nhm_cst_cfg(void) { unsigned long long msr; - get_msr(0, MSR_NHM_SNB_PKG_CST_CFG_CTL, &msr); + get_msr(base_cpu, MSR_NHM_SNB_PKG_CST_CFG_CTL, &msr); #define SNB_C1_AUTO_UNDEMOTE (1UL << 27) #define SNB_C3_AUTO_UNDEMOTE (1UL << 28) @@ -1381,12 +1447,41 @@ int parse_int_file(const char *fmt, ...) } /* - * cpu_is_first_sibling_in_core(cpu) - * return 1 if given CPU is 1st HT sibling in the core + * get_cpu_position_in_core(cpu) + * return the position of the CPU among its HT siblings in the core + * return -1 if the sibling is not in list */ -int cpu_is_first_sibling_in_core(int cpu) +int get_cpu_position_in_core(int cpu) { - return cpu == parse_int_file("/sys/devices/system/cpu/cpu%d/topology/thread_siblings_list", cpu); + char path[64]; + FILE *filep; + int this_cpu; + char character; + int i; + + sprintf(path, + "/sys/devices/system/cpu/cpu%d/topology/thread_siblings_list", + cpu); + filep = fopen(path, "r"); + if (filep == NULL) { + perror(path); + exit(1); + } + + for (i = 0; i < topo.num_threads_per_core; i++) { + fscanf(filep, "%d", &this_cpu); + if (this_cpu == cpu) { + fclose(filep); + return i; + } + + /* Account for no separator after last thread*/ + if (i != (topo.num_threads_per_core - 1)) + fscanf(filep, "%c", &character); + } + + fclose(filep); + return -1; } /* @@ -1412,25 +1507,31 @@ int get_num_ht_siblings(int cpu) { char path[80]; FILE *filep; - int sib1, sib2; - int matches; + int sib1; + int matches = 0; char character; + char str[100]; + char *ch; sprintf(path, "/sys/devices/system/cpu/cpu%d/topology/thread_siblings_list", cpu); filep = fopen_or_die(path, "r"); + /* * file format: - * if a pair of number with a character between: 2 siblings (eg. 1-2, or 1,4) - * otherwinse 1 sibling (self). + * A ',' separated or '-' separated set of numbers + * (eg 1-2 or 1,3,4,5) */ - matches = fscanf(filep, "%d%c%d\n", &sib1, &character, &sib2); + fscanf(filep, "%d%c\n", &sib1, &character); + fseek(filep, 0, SEEK_SET); + fgets(str, 100, filep); + ch = strchr(str, character); + while (ch != NULL) { + matches++; + ch = strchr(ch+1, character); + } fclose(filep); - - if (matches == 3) - return 2; - else - return 1; + return matches+1; } /* @@ -1594,8 +1695,10 @@ restart: void check_dev_msr() { struct stat sb; + char pathname[32]; - if (stat("/dev/cpu/0/msr", &sb)) + sprintf(pathname, "/dev/cpu/%d/msr", base_cpu); + if (stat(pathname, &sb)) if (system("/sbin/modprobe msr > /dev/null 2>&1")) err(-5, "no /dev/cpu/0/msr, Try \"# modprobe msr\" "); } @@ -1608,6 +1711,7 @@ void check_permissions() cap_user_data_t cap_data = &cap_data_data; extern int capget(cap_user_header_t hdrp, cap_user_data_t datap); int do_exit = 0; + char pathname[32]; /* check for CAP_SYS_RAWIO */ cap_header->pid = getpid(); @@ -1622,7 +1726,8 @@ void check_permissions() } /* test file permissions */ - if (euidaccess("/dev/cpu/0/msr", R_OK)) { + sprintf(pathname, "/dev/cpu/%d/msr", base_cpu); + if (euidaccess(pathname, R_OK)) { do_exit++; warn("/dev/cpu/0/msr open failed, try chown or chmod +r /dev/cpu/*/msr"); } @@ -1704,7 +1809,7 @@ int probe_nhm_msrs(unsigned int family, unsigned int model) default: return 0; } - get_msr(0, MSR_NHM_SNB_PKG_CST_CFG_CTL, &msr); + get_msr(base_cpu, MSR_NHM_SNB_PKG_CST_CFG_CTL, &msr); pkg_cstate_limit = pkg_cstate_limits[msr & 0xF]; @@ -1753,6 +1858,21 @@ int has_hsw_turbo_ratio_limit(unsigned int family, unsigned int model) } } +int has_knl_turbo_ratio_limit(unsigned int family, unsigned int model) +{ + if (!genuine_intel) + return 0; + + if (family != 6) + return 0; + + switch (model) { + case 0x57: /* Knights Landing */ + return 1; + default: + return 0; + } +} static void dump_cstate_pstate_config_info(family, model) { @@ -1770,6 +1890,9 @@ dump_cstate_pstate_config_info(family, model) if (has_nhm_turbo_ratio_limit(family, model)) dump_nhm_turbo_ratio_limits(); + if (has_knl_turbo_ratio_limit(family, model)) + dump_knl_turbo_ratio_limits(); + dump_nhm_cst_cfg(); } @@ -1801,7 +1924,7 @@ int print_epb(struct thread_data *t, struct core_data *c, struct pkg_data *p) if (get_msr(cpu, MSR_IA32_ENERGY_PERF_BIAS, &msr)) return 0; - switch (msr & 0x7) { + switch (msr & 0xF) { case ENERGY_PERF_BIAS_PERFORMANCE: epb_string = "performance"; break; @@ -1925,7 +2048,7 @@ double get_tdp(model) unsigned long long msr; if (do_rapl & RAPL_PKG_POWER_INFO) - if (!get_msr(0, MSR_PKG_POWER_INFO, &msr)) + if (!get_msr(base_cpu, MSR_PKG_POWER_INFO, &msr)) return ((msr >> 0) & RAPL_POWER_GRANULARITY) * rapl_power_units; switch (model) { @@ -1950,6 +2073,7 @@ rapl_dram_energy_units_probe(int model, double rapl_energy_units) case 0x3F: /* HSX */ case 0x4F: /* BDX */ case 0x56: /* BDX-DE */ + case 0x57: /* KNL */ return (rapl_dram_energy_units = 15.3 / 1000000); default: return (rapl_energy_units); @@ -1991,6 +2115,7 @@ void rapl_probe(unsigned int family, unsigned int model) case 0x3F: /* HSX */ case 0x4F: /* BDX */ case 0x56: /* BDX-DE */ + case 0x57: /* KNL */ do_rapl = RAPL_PKG | RAPL_DRAM | RAPL_DRAM_POWER_INFO | RAPL_DRAM_PERF_STATUS | RAPL_PKG_PERF_STATUS | RAPL_PKG_POWER_INFO; break; case 0x2D: @@ -2006,7 +2131,7 @@ void rapl_probe(unsigned int family, unsigned int model) } /* units on package 0, verify later other packages match */ - if (get_msr(0, MSR_RAPL_POWER_UNIT, &msr)) + if (get_msr(base_cpu, MSR_RAPL_POWER_UNIT, &msr)) return; rapl_power_units = 1.0 / (1 << (msr & 0xF)); @@ -2331,6 +2456,17 @@ int is_slm(unsigned int family, unsigned int model) return 0; } +int is_knl(unsigned int family, unsigned int model) +{ + if (!genuine_intel) + return 0; + switch (model) { + case 0x57: /* KNL */ + return 1; + } + return 0; +} + #define SLM_BCLK_FREQS 5 double slm_freq_table[SLM_BCLK_FREQS] = { 83.3, 100.0, 133.3, 116.7, 80.0}; @@ -2340,7 +2476,7 @@ double slm_bclk(void) unsigned int i; double freq; - if (get_msr(0, MSR_FSB_FREQ, &msr)) + if (get_msr(base_cpu, MSR_FSB_FREQ, &msr)) fprintf(stderr, "SLM BCLK: unknown\n"); i = msr & 0xf; @@ -2408,7 +2544,7 @@ int set_temperature_target(struct thread_data *t, struct core_data *c, struct pk if (!do_nhm_platform_info) goto guess; - if (get_msr(0, MSR_IA32_TEMPERATURE_TARGET, &msr)) + if (get_msr(base_cpu, MSR_IA32_TEMPERATURE_TARGET, &msr)) goto guess; target_c_local = (msr >> 16) & 0xFF; @@ -2541,6 +2677,7 @@ void process_cpuid() do_c8_c9_c10 = has_hsw_msrs(family, model); do_skl_residency = has_skl_msrs(family, model); do_slm_cstates = is_slm(family, model); + do_knl_cstates = is_knl(family, model); bclk = discover_bclk(family, model); rapl_probe(family, model); @@ -2755,13 +2892,9 @@ int initialize_counters(int cpu_id) my_package_id = get_physical_package_id(cpu_id); my_core_id = get_core_id(cpu_id); - - if (cpu_is_first_sibling_in_core(cpu_id)) { - my_thread_id = 0; + my_thread_id = get_cpu_position_in_core(cpu_id); + if (!my_thread_id) topo.num_cores++; - } else { - my_thread_id = 1; - } init_counter(EVEN_COUNTERS, my_thread_id, my_core_id, my_package_id, cpu_id); init_counter(ODD_COUNTERS, my_thread_id, my_core_id, my_package_id, cpu_id); @@ -2785,13 +2918,24 @@ void setup_all_buffers(void) for_all_proc_cpus(initialize_counters); } +void set_base_cpu(void) +{ + base_cpu = sched_getcpu(); + if (base_cpu < 0) + err(-ENODEV, "No valid cpus found"); + + if (debug > 1) + fprintf(stderr, "base_cpu = %d\n", base_cpu); +} + void turbostat_init() { + setup_all_buffers(); + set_base_cpu(); check_dev_msr(); check_permissions(); process_cpuid(); - setup_all_buffers(); if (debug) for_all_cpus(print_epb, ODD_COUNTERS); @@ -2870,7 +3014,7 @@ int get_and_dump_counters(void) } void print_version() { - fprintf(stderr, "turbostat version 4.5 2 Apr, 2015" + fprintf(stderr, "turbostat version 4.7 27-May, 2015" " - Len Brown <lenb@kernel.org>\n"); } diff --git a/tools/testing/selftests/x86/Makefile b/tools/testing/selftests/x86/Makefile index 5bdb781163d1..59d364aef1a8 100644 --- a/tools/testing/selftests/x86/Makefile +++ b/tools/testing/selftests/x86/Makefile @@ -4,7 +4,7 @@ include ../lib.mk .PHONY: all all_32 all_64 warn_32bit_failure clean -TARGETS_C_BOTHBITS := sigreturn single_step_syscall +TARGETS_C_BOTHBITS := sigreturn single_step_syscall sysret_ss_attrs BINARIES_32 := $(TARGETS_C_BOTHBITS:%=%_32) BINARIES_64 := $(TARGETS_C_BOTHBITS:%=%_64) @@ -55,3 +55,6 @@ warn_32bit_failure: echo " yum install glibc-devel.*i686"; \ exit 0; endif + +# Some tests have additional dependencies. +sysret_ss_attrs_64: thunks.S diff --git a/tools/testing/selftests/x86/sysret_ss_attrs.c b/tools/testing/selftests/x86/sysret_ss_attrs.c new file mode 100644 index 000000000000..ce42d5a64009 --- /dev/null +++ b/tools/testing/selftests/x86/sysret_ss_attrs.c @@ -0,0 +1,112 @@ +/* + * sysret_ss_attrs.c - test that syscalls return valid hidden SS attributes + * Copyright (c) 2015 Andrew Lutomirski + * + * This program is free software; you can redistribute it and/or modify + * it under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * On AMD CPUs, SYSRET can return with a valid SS descriptor with with + * the hidden attributes set to an unusable state. Make sure the kernel + * doesn't let this happen. + */ + +#define _GNU_SOURCE + +#include <stdlib.h> +#include <unistd.h> +#include <stdio.h> +#include <string.h> +#include <sys/mman.h> +#include <err.h> +#include <stddef.h> +#include <stdbool.h> +#include <pthread.h> + +static void *threadproc(void *ctx) +{ + /* + * Do our best to cause sleeps on this CPU to exit the kernel and + * re-enter with SS = 0. + */ + while (true) + ; + + return NULL; +} + +#ifdef __x86_64__ +extern unsigned long call32_from_64(void *stack, void (*function)(void)); + +asm (".pushsection .text\n\t" + ".code32\n\t" + "test_ss:\n\t" + "pushl $0\n\t" + "popl %eax\n\t" + "ret\n\t" + ".code64"); +extern void test_ss(void); +#endif + +int main() +{ + /* + * Start a busy-looping thread on the same CPU we're on. + * For simplicity, just stick everything to CPU 0. This will + * fail in some containers, but that's probably okay. + */ + cpu_set_t cpuset; + CPU_ZERO(&cpuset); + CPU_SET(0, &cpuset); + if (sched_setaffinity(0, sizeof(cpuset), &cpuset) != 0) + printf("[WARN]\tsched_setaffinity failed\n"); + + pthread_t thread; + if (pthread_create(&thread, 0, threadproc, 0) != 0) + err(1, "pthread_create"); + +#ifdef __x86_64__ + unsigned char *stack32 = mmap(NULL, 4096, PROT_READ | PROT_WRITE, + MAP_32BIT | MAP_ANONYMOUS | MAP_PRIVATE, + -1, 0); + if (stack32 == MAP_FAILED) + err(1, "mmap"); +#endif + + printf("[RUN]\tSyscalls followed by SS validation\n"); + + for (int i = 0; i < 1000; i++) { + /* + * Go to sleep and return using sysret (if we're 64-bit + * or we're 32-bit on AMD on a 64-bit kernel). On AMD CPUs, + * SYSRET doesn't fix up the cached SS descriptor, so the + * kernel needs some kind of workaround to make sure that we + * end the system call with a valid stack segment. This + * can be a confusing failure because the SS *selector* + * is the same regardless. + */ + usleep(2); + +#ifdef __x86_64__ + /* + * On 32-bit, just doing a syscall through glibc is enough + * to cause a crash if our cached SS descriptor is invalid. + * On 64-bit, it's not, so try extra hard. + */ + call32_from_64(stack32 + 4088, test_ss); +#endif + } + + printf("[OK]\tWe survived\n"); + +#ifdef __x86_64__ + munmap(stack32, 4096); +#endif + + return 0; +} diff --git a/tools/testing/selftests/x86/thunks.S b/tools/testing/selftests/x86/thunks.S new file mode 100644 index 000000000000..ce8a995bbb17 --- /dev/null +++ b/tools/testing/selftests/x86/thunks.S @@ -0,0 +1,67 @@ +/* + * thunks.S - assembly helpers for mixed-bitness code + * Copyright (c) 2015 Andrew Lutomirski + * + * This program is free software; you can redistribute it and/or modify + * it under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * These are little helpers that make it easier to switch bitness on + * the fly. + */ + + .text + + .global call32_from_64 + .type call32_from_64, @function +call32_from_64: + // rdi: stack to use + // esi: function to call + + // Save registers + pushq %rbx + pushq %rbp + pushq %r12 + pushq %r13 + pushq %r14 + pushq %r15 + pushfq + + // Switch stacks + mov %rsp,(%rdi) + mov %rdi,%rsp + + // Switch to compatibility mode + pushq $0x23 /* USER32_CS */ + pushq $1f + lretq + +1: + .code32 + // Call the function + call *%esi + // Switch back to long mode + jmp $0x33,$1f + .code64 + +1: + // Restore the stack + mov (%rsp),%rsp + + // Restore registers + popfq + popq %r15 + popq %r14 + popq %r13 + popq %r12 + popq %rbp + popq %rbx + + ret + +.size call32_from_64, .-call32_from_64 |