diff options
Diffstat (limited to 'include')
39 files changed, 1460 insertions, 956 deletions
diff --git a/include/asm-generic/dma-mapping-common.h b/include/asm-generic/dma-mapping-common.h index 5406a601185c..e694263445f7 100644 --- a/include/asm-generic/dma-mapping-common.h +++ b/include/asm-generic/dma-mapping-common.h @@ -103,7 +103,6 @@ static inline void dma_sync_single_for_cpu(struct device *dev, dma_addr_t addr, if (ops->sync_single_for_cpu) ops->sync_single_for_cpu(dev, addr, size, dir); debug_dma_sync_single_for_cpu(dev, addr, size, dir); - flush_write_buffers(); } static inline void dma_sync_single_for_device(struct device *dev, @@ -116,7 +115,6 @@ static inline void dma_sync_single_for_device(struct device *dev, if (ops->sync_single_for_device) ops->sync_single_for_device(dev, addr, size, dir); debug_dma_sync_single_for_device(dev, addr, size, dir); - flush_write_buffers(); } static inline void dma_sync_single_range_for_cpu(struct device *dev, @@ -132,7 +130,6 @@ static inline void dma_sync_single_range_for_cpu(struct device *dev, ops->sync_single_range_for_cpu(dev, addr, offset, size, dir); debug_dma_sync_single_range_for_cpu(dev, addr, offset, size, dir); - flush_write_buffers(); } else dma_sync_single_for_cpu(dev, addr, size, dir); } @@ -150,7 +147,6 @@ static inline void dma_sync_single_range_for_device(struct device *dev, ops->sync_single_range_for_device(dev, addr, offset, size, dir); debug_dma_sync_single_range_for_device(dev, addr, offset, size, dir); - flush_write_buffers(); } else dma_sync_single_for_device(dev, addr, size, dir); } @@ -165,7 +161,6 @@ dma_sync_sg_for_cpu(struct device *dev, struct scatterlist *sg, if (ops->sync_sg_for_cpu) ops->sync_sg_for_cpu(dev, sg, nelems, dir); debug_dma_sync_sg_for_cpu(dev, sg, nelems, dir); - flush_write_buffers(); } static inline void @@ -179,7 +174,6 @@ dma_sync_sg_for_device(struct device *dev, struct scatterlist *sg, ops->sync_sg_for_device(dev, sg, nelems, dir); debug_dma_sync_sg_for_device(dev, sg, nelems, dir); - flush_write_buffers(); } #define dma_map_single(d, a, s, r) dma_map_single_attrs(d, a, s, r, NULL) diff --git a/include/linux/ata.h b/include/linux/ata.h index 9c75921f0c16..6299a259ed19 100644 --- a/include/linux/ata.h +++ b/include/linux/ata.h @@ -210,15 +210,25 @@ enum { ATA_CMD_STANDBY = 0xE2, /* place in standby power mode */ ATA_CMD_IDLE = 0xE3, /* place in idle power mode */ ATA_CMD_EDD = 0x90, /* execute device diagnostic */ + ATA_CMD_DOWNLOAD_MICRO = 0x92, + ATA_CMD_NOP = 0x00, ATA_CMD_FLUSH = 0xE7, ATA_CMD_FLUSH_EXT = 0xEA, ATA_CMD_ID_ATA = 0xEC, ATA_CMD_ID_ATAPI = 0xA1, + ATA_CMD_SERVICE = 0xA2, ATA_CMD_READ = 0xC8, ATA_CMD_READ_EXT = 0x25, + ATA_CMD_READ_QUEUED = 0x26, + ATA_CMD_READ_STREAM_EXT = 0x2B, + ATA_CMD_READ_STREAM_DMA_EXT = 0x2A, ATA_CMD_WRITE = 0xCA, ATA_CMD_WRITE_EXT = 0x35, + ATA_CMD_WRITE_QUEUED = 0x36, + ATA_CMD_WRITE_STREAM_EXT = 0x3B, + ATA_CMD_WRITE_STREAM_DMA_EXT = 0x3A, ATA_CMD_WRITE_FUA_EXT = 0x3D, + ATA_CMD_WRITE_QUEUED_FUA_EXT = 0x3E, ATA_CMD_FPDMA_READ = 0x60, ATA_CMD_FPDMA_WRITE = 0x61, ATA_CMD_PIO_READ = 0x20, @@ -235,6 +245,7 @@ enum { ATA_CMD_PACKET = 0xA0, ATA_CMD_VERIFY = 0x40, ATA_CMD_VERIFY_EXT = 0x42, + ATA_CMD_WRITE_UNCORR_EXT = 0x45, ATA_CMD_STANDBYNOW1 = 0xE0, ATA_CMD_IDLEIMMEDIATE = 0xE1, ATA_CMD_SLEEP = 0xE6, @@ -243,15 +254,34 @@ enum { ATA_CMD_READ_NATIVE_MAX_EXT = 0x27, ATA_CMD_SET_MAX = 0xF9, ATA_CMD_SET_MAX_EXT = 0x37, - ATA_CMD_READ_LOG_EXT = 0x2f, + ATA_CMD_READ_LOG_EXT = 0x2F, + ATA_CMD_WRITE_LOG_EXT = 0x3F, + ATA_CMD_READ_LOG_DMA_EXT = 0x47, + ATA_CMD_WRITE_LOG_DMA_EXT = 0x57, + ATA_CMD_TRUSTED_RCV = 0x5C, + ATA_CMD_TRUSTED_RCV_DMA = 0x5D, + ATA_CMD_TRUSTED_SND = 0x5E, + ATA_CMD_TRUSTED_SND_DMA = 0x5F, ATA_CMD_PMP_READ = 0xE4, ATA_CMD_PMP_WRITE = 0xE8, ATA_CMD_CONF_OVERLAY = 0xB1, + ATA_CMD_SEC_SET_PASS = 0xF1, + ATA_CMD_SEC_UNLOCK = 0xF2, + ATA_CMD_SEC_ERASE_PREP = 0xF3, + ATA_CMD_SEC_ERASE_UNIT = 0xF4, ATA_CMD_SEC_FREEZE_LOCK = 0xF5, + ATA_CMD_SEC_DISABLE_PASS = 0xF6, + ATA_CMD_CONFIG_STREAM = 0x51, ATA_CMD_SMART = 0xB0, ATA_CMD_MEDIA_LOCK = 0xDE, ATA_CMD_MEDIA_UNLOCK = 0xDF, ATA_CMD_DSM = 0x06, + ATA_CMD_CHK_MED_CRD_TYP = 0xD1, + ATA_CMD_CFA_REQ_EXT_ERR = 0x03, + ATA_CMD_CFA_WRITE_NE = 0x38, + ATA_CMD_CFA_TRANS_SECT = 0x87, + ATA_CMD_CFA_ERASE = 0xC0, + ATA_CMD_CFA_WRITE_MULT_NE = 0xCD, /* marked obsolete in the ATA/ATAPI-7 spec */ ATA_CMD_RESTORE = 0x10, @@ -306,6 +336,7 @@ enum { /* SETFEATURE Sector counts for SATA features */ SATA_AN = 0x05, /* Asynchronous Notification */ SATA_DIPM = 0x03, /* Device Initiated Power Management */ + SATA_FPDMA_AA = 0x02, /* DMA Setup FIS Auto-Activate */ /* feature values for SET_MAX */ ATA_SET_MAX_ADDR = 0x00, @@ -525,6 +556,9 @@ static inline int ata_is_data(u8 prot) #define ata_id_has_atapi_AN(id) \ ( (((id)[76] != 0x0000) && ((id)[76] != 0xffff)) && \ ((id)[78] & (1 << 5)) ) +#define ata_id_has_fpdma_aa(id) \ + ( (((id)[76] != 0x0000) && ((id)[76] != 0xffff)) && \ + ((id)[78] & (1 << 2)) ) #define ata_id_iordy_disable(id) ((id)[ATA_ID_CAPABILITY] & (1 << 10)) #define ata_id_has_iordy(id) ((id)[ATA_ID_CAPABILITY] & (1 << 11)) #define ata_id_u32(id,n) \ diff --git a/include/linux/cpu.h b/include/linux/cpu.h index 4d668e05d458..47536197ffdd 100644 --- a/include/linux/cpu.h +++ b/include/linux/cpu.h @@ -48,6 +48,15 @@ struct notifier_block; #ifdef CONFIG_SMP /* Need to know about CPUs going up/down? */ +#if defined(CONFIG_HOTPLUG_CPU) || !defined(MODULE) +#define cpu_notifier(fn, pri) { \ + static struct notifier_block fn##_nb __cpuinitdata = \ + { .notifier_call = fn, .priority = pri }; \ + register_cpu_notifier(&fn##_nb); \ +} +#else /* #if defined(CONFIG_HOTPLUG_CPU) || !defined(MODULE) */ +#define cpu_notifier(fn, pri) do { (void)(fn); } while (0) +#endif /* #else #if defined(CONFIG_HOTPLUG_CPU) || !defined(MODULE) */ #ifdef CONFIG_HOTPLUG_CPU extern int register_cpu_notifier(struct notifier_block *nb); extern void unregister_cpu_notifier(struct notifier_block *nb); @@ -74,6 +83,8 @@ extern void cpu_maps_update_done(void); #else /* CONFIG_SMP */ +#define cpu_notifier(fn, pri) do { (void)(fn); } while (0) + static inline int register_cpu_notifier(struct notifier_block *nb) { return 0; @@ -99,11 +110,7 @@ extern struct sysdev_class cpu_sysdev_class; extern void get_online_cpus(void); extern void put_online_cpus(void); -#define hotcpu_notifier(fn, pri) { \ - static struct notifier_block fn##_nb __cpuinitdata = \ - { .notifier_call = fn, .priority = pri }; \ - register_cpu_notifier(&fn##_nb); \ -} +#define hotcpu_notifier(fn, pri) cpu_notifier(fn, pri) #define register_hotcpu_notifier(nb) register_cpu_notifier(nb) #define unregister_hotcpu_notifier(nb) unregister_cpu_notifier(nb) int cpu_down(unsigned int cpu); diff --git a/include/linux/dma-mapping.h b/include/linux/dma-mapping.h index 07dfd460d286..c0f6c3cd788c 100644 --- a/include/linux/dma-mapping.h +++ b/include/linux/dma-mapping.h @@ -98,11 +98,6 @@ static inline int is_device_dma_capable(struct device *dev) return dev->dma_mask != NULL && *dev->dma_mask != DMA_MASK_NONE; } -static inline int is_buffer_dma_capable(u64 mask, dma_addr_t addr, size_t size) -{ - return addr + size <= mask; -} - #ifdef CONFIG_HAS_DMA #include <asm/dma-mapping.h> #else diff --git a/include/linux/dmi.h b/include/linux/dmi.h index bb5489c82c99..a8a3e1ac281d 100644 --- a/include/linux/dmi.h +++ b/include/linux/dmi.h @@ -43,7 +43,7 @@ extern const char * dmi_get_system_info(int field); extern const struct dmi_device * dmi_find_device(int type, const char *name, const struct dmi_device *from); extern void dmi_scan_machine(void); -extern int dmi_get_year(int field); +extern bool dmi_get_date(int field, int *yearp, int *monthp, int *dayp); extern int dmi_name_in_vendors(const char *str); extern int dmi_name_in_serial(const char *str); extern int dmi_available; @@ -58,7 +58,16 @@ static inline const char * dmi_get_system_info(int field) { return NULL; } static inline const struct dmi_device * dmi_find_device(int type, const char *name, const struct dmi_device *from) { return NULL; } static inline void dmi_scan_machine(void) { return; } -static inline int dmi_get_year(int year) { return 0; } +static inline bool dmi_get_date(int field, int *yearp, int *monthp, int *dayp) +{ + if (yearp) + *yearp = 0; + if (monthp) + *monthp = 0; + if (dayp) + *dayp = 0; + return false; +} static inline int dmi_name_in_vendors(const char *s) { return 0; } static inline int dmi_name_in_serial(const char *s) { return 0; } #define dmi_available 0 diff --git a/include/linux/ftrace_event.h b/include/linux/ftrace_event.h index a81170de7f6b..23f7179bf74e 100644 --- a/include/linux/ftrace_event.h +++ b/include/linux/ftrace_event.h @@ -93,16 +93,22 @@ void tracing_generic_entry_update(struct trace_entry *entry, unsigned long flags, int pc); struct ring_buffer_event * -trace_current_buffer_lock_reserve(int type, unsigned long len, +trace_current_buffer_lock_reserve(struct ring_buffer **current_buffer, + int type, unsigned long len, unsigned long flags, int pc); -void trace_current_buffer_unlock_commit(struct ring_buffer_event *event, +void trace_current_buffer_unlock_commit(struct ring_buffer *buffer, + struct ring_buffer_event *event, unsigned long flags, int pc); -void trace_nowake_buffer_unlock_commit(struct ring_buffer_event *event, +void trace_nowake_buffer_unlock_commit(struct ring_buffer *buffer, + struct ring_buffer_event *event, unsigned long flags, int pc); -void trace_current_buffer_discard_commit(struct ring_buffer_event *event); +void trace_current_buffer_discard_commit(struct ring_buffer *buffer, + struct ring_buffer_event *event); void tracing_record_cmdline(struct task_struct *tsk); +struct event_filter; + struct ftrace_event_call { struct list_head list; char *name; @@ -110,16 +116,18 @@ struct ftrace_event_call { struct dentry *dir; struct trace_event *event; int enabled; - int (*regfunc)(void); - void (*unregfunc)(void); + int (*regfunc)(void *); + void (*unregfunc)(void *); int id; int (*raw_init)(void); - int (*show_format)(struct trace_seq *s); - int (*define_fields)(void); + int (*show_format)(struct ftrace_event_call *call, + struct trace_seq *s); + int (*define_fields)(struct ftrace_event_call *); struct list_head fields; int filter_active; - void *filter; + struct event_filter *filter; void *mod; + void *data; atomic_t profile_count; int (*profile_enable)(struct ftrace_event_call *); @@ -129,15 +137,25 @@ struct ftrace_event_call { #define MAX_FILTER_PRED 32 #define MAX_FILTER_STR_VAL 128 -extern int init_preds(struct ftrace_event_call *call); extern void destroy_preds(struct ftrace_event_call *call); extern int filter_match_preds(struct ftrace_event_call *call, void *rec); -extern int filter_current_check_discard(struct ftrace_event_call *call, +extern int filter_current_check_discard(struct ring_buffer *buffer, + struct ftrace_event_call *call, void *rec, struct ring_buffer_event *event); -extern int trace_define_field(struct ftrace_event_call *call, char *type, - char *name, int offset, int size, int is_signed); +enum { + FILTER_OTHER = 0, + FILTER_STATIC_STRING, + FILTER_DYN_STRING, + FILTER_PTR_STRING, +}; + +extern int trace_define_field(struct ftrace_event_call *call, + const char *type, const char *name, + int offset, int size, int is_signed, + int filter_type); +extern int trace_define_common_fields(struct ftrace_event_call *call); #define is_signed_type(type) (((type)(-1)) < 0) @@ -162,11 +180,4 @@ do { \ __trace_printk(ip, fmt, ##args); \ } while (0) -#define __common_field(type, item, is_signed) \ - ret = trace_define_field(event_call, #type, "common_" #item, \ - offsetof(typeof(field.ent), item), \ - sizeof(field.ent.item), is_signed); \ - if (ret) \ - return ret; - #endif /* _LINUX_FTRACE_EVENT_H */ diff --git a/include/linux/hardirq.h b/include/linux/hardirq.h index 8246c697863d..6d527ee82b2b 100644 --- a/include/linux/hardirq.h +++ b/include/linux/hardirq.h @@ -64,6 +64,12 @@ #define HARDIRQ_OFFSET (1UL << HARDIRQ_SHIFT) #define NMI_OFFSET (1UL << NMI_SHIFT) +#ifndef PREEMPT_ACTIVE +#define PREEMPT_ACTIVE_BITS 1 +#define PREEMPT_ACTIVE_SHIFT (NMI_SHIFT + NMI_BITS) +#define PREEMPT_ACTIVE (__IRQ_MASK(PREEMPT_ACTIVE_BITS) << PREEMPT_ACTIVE_SHIFT) +#endif + #if PREEMPT_ACTIVE < (1 << (NMI_SHIFT + NMI_BITS)) #error PREEMPT_ACTIVE is too low! #endif @@ -132,7 +138,7 @@ static inline void account_system_vtime(struct task_struct *tsk) } #endif -#if defined(CONFIG_NO_HZ) && !defined(CONFIG_CLASSIC_RCU) +#if defined(CONFIG_NO_HZ) extern void rcu_irq_enter(void); extern void rcu_irq_exit(void); extern void rcu_nmi_enter(void); @@ -142,7 +148,7 @@ extern void rcu_nmi_exit(void); # define rcu_irq_exit() do { } while (0) # define rcu_nmi_enter() do { } while (0) # define rcu_nmi_exit() do { } while (0) -#endif /* #if defined(CONFIG_NO_HZ) && !defined(CONFIG_CLASSIC_RCU) */ +#endif /* #if defined(CONFIG_NO_HZ) */ /* * It is safe to do non-atomic ops on ->hardirq_context, diff --git a/include/linux/init_task.h b/include/linux/init_task.h index 7fc01b13be43..9e7f2e8fc66e 100644 --- a/include/linux/init_task.h +++ b/include/linux/init_task.h @@ -94,6 +94,16 @@ extern struct group_info init_groups; # define CAP_INIT_BSET CAP_INIT_EFF_SET #endif +#ifdef CONFIG_TREE_PREEMPT_RCU +#define INIT_TASK_RCU_PREEMPT(tsk) \ + .rcu_read_lock_nesting = 0, \ + .rcu_read_unlock_special = 0, \ + .rcu_blocked_node = NULL, \ + .rcu_node_entry = LIST_HEAD_INIT(tsk.rcu_node_entry), +#else +#define INIT_TASK_RCU_PREEMPT(tsk) +#endif + extern struct cred init_cred; #ifdef CONFIG_PERF_COUNTERS @@ -173,6 +183,7 @@ extern struct cred init_cred; INIT_LOCKDEP \ INIT_FTRACE_GRAPH \ INIT_TRACE_RECURSION \ + INIT_TASK_RCU_PREEMPT(tsk) \ } diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h index 35e7df1e9f30..1ac57e522a1f 100644 --- a/include/linux/interrupt.h +++ b/include/linux/interrupt.h @@ -50,6 +50,9 @@ * IRQF_IRQPOLL - Interrupt is used for polling (only the interrupt that is * registered first in an shared interrupt is considered for * performance reasons) + * IRQF_ONESHOT - Interrupt is not reenabled after the hardirq handler finished. + * Used by threaded interrupts which need to keep the + * irq line disabled until the threaded handler has been run. */ #define IRQF_DISABLED 0x00000020 #define IRQF_SAMPLE_RANDOM 0x00000040 @@ -59,6 +62,7 @@ #define IRQF_PERCPU 0x00000400 #define IRQF_NOBALANCING 0x00000800 #define IRQF_IRQPOLL 0x00001000 +#define IRQF_ONESHOT 0x00002000 /* * Bits used by threaded handlers: diff --git a/include/linux/irq.h b/include/linux/irq.h index cb2e77a3f7f7..ae9653dbcd78 100644 --- a/include/linux/irq.h +++ b/include/linux/irq.h @@ -69,6 +69,8 @@ typedef void (*irq_flow_handler_t)(unsigned int irq, #define IRQ_MOVE_PCNTXT 0x01000000 /* IRQ migration from process context */ #define IRQ_AFFINITY_SET 0x02000000 /* IRQ affinity was set from userspace*/ #define IRQ_SUSPENDED 0x04000000 /* IRQ has gone through suspend sequence */ +#define IRQ_ONESHOT 0x08000000 /* IRQ is not unmasked after hardirq */ +#define IRQ_NESTED_THREAD 0x10000000 /* IRQ is nested into another, no own handler thread */ #ifdef CONFIG_IRQ_PER_CPU # define CHECK_IRQ_PER_CPU(var) ((var) & IRQ_PER_CPU) @@ -100,6 +102,9 @@ struct msi_desc; * @set_type: set the flow type (IRQ_TYPE_LEVEL/etc.) of an IRQ * @set_wake: enable/disable power-management wake-on of an IRQ * + * @bus_lock: function to lock access to slow bus (i2c) chips + * @bus_sync_unlock: function to sync and unlock slow bus (i2c) chips + * * @release: release function solely used by UML * @typename: obsoleted by name, kept as migration helper */ @@ -123,6 +128,9 @@ struct irq_chip { int (*set_type)(unsigned int irq, unsigned int flow_type); int (*set_wake)(unsigned int irq, unsigned int on); + void (*bus_lock)(unsigned int irq); + void (*bus_sync_unlock)(unsigned int irq); + /* Currently used only by UML, might disappear one day.*/ #ifdef CONFIG_IRQ_RELEASE_METHOD void (*release)(unsigned int irq, void *dev_id); @@ -220,13 +228,6 @@ static inline struct irq_desc *move_irq_desc(struct irq_desc *desc, int node) extern struct irq_desc *irq_to_desc_alloc_node(unsigned int irq, int node); /* - * Migration helpers for obsolete names, they will go away: - */ -#define hw_interrupt_type irq_chip -#define no_irq_type no_irq_chip -typedef struct irq_desc irq_desc_t; - -/* * Pick up the arch-dependent methods: */ #include <asm/hw_irq.h> @@ -289,6 +290,7 @@ extern void handle_edge_irq(unsigned int irq, struct irq_desc *desc); extern void handle_simple_irq(unsigned int irq, struct irq_desc *desc); extern void handle_percpu_irq(unsigned int irq, struct irq_desc *desc); extern void handle_bad_irq(unsigned int irq, struct irq_desc *desc); +extern void handle_nested_irq(unsigned int irq); /* * Monolithic do_IRQ implementation. @@ -379,6 +381,8 @@ set_irq_chained_handler(unsigned int irq, __set_irq_handler(irq, handle, 1, NULL); } +extern void set_irq_nested_thread(unsigned int irq, int nest); + extern void set_irq_noprobe(unsigned int irq); extern void set_irq_probe(unsigned int irq); diff --git a/include/linux/irqnr.h b/include/linux/irqnr.h index ec87b212ff7d..7bf89bc8cbca 100644 --- a/include/linux/irqnr.h +++ b/include/linux/irqnr.h @@ -41,6 +41,12 @@ extern struct irq_desc *irq_to_desc(unsigned int irq); ; \ else +#ifdef CONFIG_SMP +#define irq_node(irq) (irq_to_desc(irq)->node) +#else +#define irq_node(irq) 0 +#endif + #endif /* CONFIG_GENERIC_HARDIRQS */ #define for_each_irq_nr(irq) \ diff --git a/include/linux/kernel.h b/include/linux/kernel.h index d6320a3e8def..2b5b1e0899a8 100644 --- a/include/linux/kernel.h +++ b/include/linux/kernel.h @@ -125,7 +125,7 @@ extern int _cond_resched(void); #endif #ifdef CONFIG_DEBUG_SPINLOCK_SLEEP - void __might_sleep(char *file, int line); + void __might_sleep(char *file, int line, int preempt_offset); /** * might_sleep - annotation for functions that can sleep * @@ -137,8 +137,9 @@ extern int _cond_resched(void); * supposed to. */ # define might_sleep() \ - do { __might_sleep(__FILE__, __LINE__); might_resched(); } while (0) + do { __might_sleep(__FILE__, __LINE__, 0); might_resched(); } while (0) #else + static inline void __might_sleep(char *file, int line, int preempt_offset) { } # define might_sleep() do { might_resched(); } while (0) #endif diff --git a/include/linux/libata.h b/include/linux/libata.h index e5b6e33c6571..76319bf03e37 100644 --- a/include/linux/libata.h +++ b/include/linux/libata.h @@ -143,7 +143,6 @@ enum { ATA_DFLAG_PIO = (1 << 12), /* device limited to PIO mode */ ATA_DFLAG_NCQ_OFF = (1 << 13), /* device limited to non-NCQ mode */ - ATA_DFLAG_SPUNDOWN = (1 << 14), /* XXX: for spindown_compat */ ATA_DFLAG_SLEEPING = (1 << 15), /* device is sleeping */ ATA_DFLAG_DUBIOUS_XFER = (1 << 16), /* data transfer not verified */ ATA_DFLAG_NO_UNLOAD = (1 << 17), /* device doesn't support unload */ @@ -190,6 +189,7 @@ enum { ATA_FLAG_NO_POWEROFF_SPINDOWN = (1 << 11), /* don't spindown before poweroff */ ATA_FLAG_NO_HIBERNATE_SPINDOWN = (1 << 12), /* don't spindown before hibernation */ ATA_FLAG_DEBUGMSG = (1 << 13), + ATA_FLAG_FPDMA_AA = (1 << 14), /* driver supports Auto-Activate */ ATA_FLAG_IGN_SIMPLEX = (1 << 15), /* ignore SIMPLEX */ ATA_FLAG_NO_IORDY = (1 << 16), /* controller lacks iordy */ ATA_FLAG_ACPI_SATA = (1 << 17), /* need native SATA ACPI layout */ @@ -386,6 +386,7 @@ enum { ATA_HORKAGE_FIRMWARE_WARN = (1 << 12), /* firmware update warning */ ATA_HORKAGE_1_5_GBPS = (1 << 13), /* force 1.5 Gbps */ ATA_HORKAGE_NOSETXFER = (1 << 14), /* skip SETXFER, SATA only */ + ATA_HORKAGE_BROKEN_FPDMA_AA = (1 << 15), /* skip AA */ /* DMA mask for user DMA control: User visible values; DO NOT renumber */ diff --git a/include/linux/lockdep.h b/include/linux/lockdep.h index b25d1b53df0d..9ccf0e286b2a 100644 --- a/include/linux/lockdep.h +++ b/include/linux/lockdep.h @@ -149,6 +149,12 @@ struct lock_list { struct lock_class *class; struct stack_trace trace; int distance; + + /* + * The parent field is used to implement breadth-first search, and the + * bit 0 is reused to indicate if the lock has been accessed in BFS. + */ + struct lock_list *parent; }; /* @@ -208,10 +214,12 @@ struct held_lock { * interrupt context: */ unsigned int irq_context:2; /* bit 0 - soft, bit 1 - hard */ - unsigned int trylock:1; + unsigned int trylock:1; /* 16 bits */ + unsigned int read:2; /* see lock_acquire() comment */ unsigned int check:2; /* see lock_acquire() comment */ unsigned int hardirqs_off:1; + unsigned int references:11; /* 32 bits */ }; /* @@ -291,6 +299,10 @@ extern void lock_acquire(struct lockdep_map *lock, unsigned int subclass, extern void lock_release(struct lockdep_map *lock, int nested, unsigned long ip); +#define lockdep_is_held(lock) lock_is_held(&(lock)->dep_map) + +extern int lock_is_held(struct lockdep_map *lock); + extern void lock_set_class(struct lockdep_map *lock, const char *name, struct lock_class_key *key, unsigned int subclass, unsigned long ip); @@ -309,6 +321,8 @@ extern void lockdep_trace_alloc(gfp_t mask); #define lockdep_depth(tsk) (debug_locks ? (tsk)->lockdep_depth : 0) +#define lockdep_assert_held(l) WARN_ON(debug_locks && !lockdep_is_held(l)) + #else /* !LOCKDEP */ static inline void lockdep_off(void) @@ -353,6 +367,8 @@ struct lock_class_key { }; #define lockdep_depth(tsk) (0) +#define lockdep_assert_held(l) do { } while (0) + #endif /* !LOCKDEP */ #ifdef CONFIG_LOCK_STAT diff --git a/include/linux/module.h b/include/linux/module.h index 098bdb7bfacf..f8f92d015efe 100644 --- a/include/linux/module.h +++ b/include/linux/module.h @@ -17,10 +17,12 @@ #include <linux/moduleparam.h> #include <linux/marker.h> #include <linux/tracepoint.h> -#include <asm/local.h> +#include <asm/local.h> #include <asm/module.h> +#include <trace/events/module.h> + /* Not Yet Implemented */ #define MODULE_SUPPORTED_DEVICE(name) @@ -462,7 +464,10 @@ static inline local_t *__module_ref_addr(struct module *mod, int cpu) static inline void __module_get(struct module *module) { if (module) { - local_inc(__module_ref_addr(module, get_cpu())); + unsigned int cpu = get_cpu(); + local_inc(__module_ref_addr(module, cpu)); + trace_module_get(module, _THIS_IP_, + local_read(__module_ref_addr(module, cpu))); put_cpu(); } } @@ -473,8 +478,11 @@ static inline int try_module_get(struct module *module) if (module) { unsigned int cpu = get_cpu(); - if (likely(module_is_live(module))) + if (likely(module_is_live(module))) { local_inc(__module_ref_addr(module, cpu)); + trace_module_get(module, _THIS_IP_, + local_read(__module_ref_addr(module, cpu))); + } else ret = 0; put_cpu(); diff --git a/include/linux/nmi.h b/include/linux/nmi.h index 29af2d5df097..b752e807adde 100644 --- a/include/linux/nmi.h +++ b/include/linux/nmi.h @@ -28,8 +28,23 @@ static inline void acpi_nmi_disable(void) { } static inline void acpi_nmi_enable(void) { } #endif -#ifndef trigger_all_cpu_backtrace -#define trigger_all_cpu_backtrace() do { } while (0) +/* + * Create trigger_all_cpu_backtrace() out of the arch-provided + * base function. Return whether such support was available, + * to allow calling code to fall back to some other mechanism: + */ +#ifdef arch_trigger_all_cpu_backtrace +static inline bool trigger_all_cpu_backtrace(void) +{ + arch_trigger_all_cpu_backtrace(); + + return true; +} +#else +static inline bool trigger_all_cpu_backtrace(void) +{ + return false; +} #endif #endif diff --git a/include/linux/oprofile.h b/include/linux/oprofile.h index 1d9518bc4c58..5171639ecf0f 100644 --- a/include/linux/oprofile.h +++ b/include/linux/oprofile.h @@ -67,6 +67,9 @@ struct oprofile_operations { /* Initiate a stack backtrace. Optional. */ void (*backtrace)(struct pt_regs * const regs, unsigned int depth); + + /* Multiplex between different events. Optional. */ + int (*switch_events)(void); /* CPU identification string. */ char * cpu_type; }; @@ -171,7 +174,6 @@ struct op_sample; struct op_entry { struct ring_buffer_event *event; struct op_sample *sample; - unsigned long irq_flags; unsigned long size; unsigned long *data; }; @@ -180,6 +182,7 @@ void oprofile_write_reserve(struct op_entry *entry, struct pt_regs * const regs, unsigned long pc, int code, int size); int oprofile_add_data(struct op_entry *entry, unsigned long val); +int oprofile_add_data64(struct op_entry *entry, u64 val); int oprofile_write_commit(struct op_entry *entry); #endif /* OPROFILE_H */ diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h index aec3252afcf5..ed5d7501e181 100644 --- a/include/linux/pagemap.h +++ b/include/linux/pagemap.h @@ -132,7 +132,7 @@ static inline int page_cache_get_speculative(struct page *page) { VM_BUG_ON(in_interrupt()); -#if !defined(CONFIG_SMP) && defined(CONFIG_CLASSIC_RCU) +#if !defined(CONFIG_SMP) && defined(CONFIG_TREE_RCU) # ifdef CONFIG_PREEMPT VM_BUG_ON(!in_atomic()); # endif @@ -170,7 +170,7 @@ static inline int page_cache_add_speculative(struct page *page, int count) { VM_BUG_ON(in_interrupt()); -#if !defined(CONFIG_SMP) && defined(CONFIG_CLASSIC_RCU) +#if !defined(CONFIG_SMP) && defined(CONFIG_TREE_RCU) # ifdef CONFIG_PREEMPT VM_BUG_ON(!in_atomic()); # endif diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h index 73b46b6b904f..c8fdcadce437 100644 --- a/include/linux/pci_ids.h +++ b/include/linux/pci_ids.h @@ -376,6 +376,9 @@ #define PCI_DEVICE_ID_ATI_IXP600_IDE 0x438c #define PCI_DEVICE_ID_ATI_IXP700_SATA 0x4390 #define PCI_DEVICE_ID_ATI_IXP700_IDE 0x439c +/* AMD SB Chipset */ +#define PCI_DEVICE_ID_AMD_SB900_IDE 0x780c +#define PCI_DEVICE_ID_AMD_SB900_SATA_IDE 0x7800 #define PCI_VENDOR_ID_VLSI 0x1004 #define PCI_DEVICE_ID_VLSI_82C592 0x0005 @@ -537,6 +540,7 @@ #define PCI_DEVICE_ID_AMD_8131_BRIDGE 0x7450 #define PCI_DEVICE_ID_AMD_8131_APIC 0x7451 #define PCI_DEVICE_ID_AMD_8132_BRIDGE 0x7458 +#define PCI_DEVICE_ID_AMD_CS5535_IDE 0x208F #define PCI_DEVICE_ID_AMD_CS5536_ISA 0x2090 #define PCI_DEVICE_ID_AMD_CS5536_FLASH 0x2091 #define PCI_DEVICE_ID_AMD_CS5536_AUDIO 0x2093 diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h index b53f7006cc4e..972f90d7a32f 100644 --- a/include/linux/perf_counter.h +++ b/include/linux/perf_counter.h @@ -216,6 +216,7 @@ struct perf_counter_attr { #define PERF_COUNTER_IOC_REFRESH _IO ('$', 2) #define PERF_COUNTER_IOC_RESET _IO ('$', 3) #define PERF_COUNTER_IOC_PERIOD _IOW('$', 4, u64) +#define PERF_COUNTER_IOC_SET_OUTPUT _IO ('$', 5) enum perf_counter_ioc_flags { PERF_IOC_FLAG_GROUP = 1U << 0, @@ -415,6 +416,9 @@ enum perf_callchain_context { PERF_CONTEXT_MAX = (__u64)-4095, }; +#define PERF_FLAG_FD_NO_GROUP (1U << 0) +#define PERF_FLAG_FD_OUTPUT (1U << 1) + #ifdef __KERNEL__ /* * Kernel-internal data types and definitions: @@ -536,6 +540,7 @@ struct perf_counter { struct list_head sibling_list; int nr_siblings; struct perf_counter *group_leader; + struct perf_counter *output; const struct pmu *pmu; enum perf_counter_active_state state; @@ -761,6 +766,8 @@ extern int sysctl_perf_counter_mlock; extern int sysctl_perf_counter_sample_rate; extern void perf_counter_init(void); +extern void perf_tpcounter_event(int event_id, u64 addr, u64 count, + void *record, int entry_size); #ifndef perf_misc_flags #define perf_misc_flags(regs) (user_mode(regs) ? PERF_EVENT_MISC_USER : \ diff --git a/include/linux/rcuclassic.h b/include/linux/rcuclassic.h deleted file mode 100644 index bfd92e1e5d2c..000000000000 --- a/include/linux/rcuclassic.h +++ /dev/null @@ -1,178 +0,0 @@ -/* - * Read-Copy Update mechanism for mutual exclusion (classic version) - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. - * - * Copyright IBM Corporation, 2001 - * - * Author: Dipankar Sarma <dipankar@in.ibm.com> - * - * Based on the original work by Paul McKenney <paulmck@us.ibm.com> - * and inputs from Rusty Russell, Andrea Arcangeli and Andi Kleen. - * Papers: - * http://www.rdrop.com/users/paulmck/paper/rclockpdcsproof.pdf - * http://lse.sourceforge.net/locking/rclock_OLS.2001.05.01c.sc.pdf (OLS2001) - * - * For detailed explanation of Read-Copy Update mechanism see - - * Documentation/RCU - * - */ - -#ifndef __LINUX_RCUCLASSIC_H -#define __LINUX_RCUCLASSIC_H - -#include <linux/cache.h> -#include <linux/spinlock.h> -#include <linux/threads.h> -#include <linux/cpumask.h> -#include <linux/seqlock.h> - -#ifdef CONFIG_RCU_CPU_STALL_DETECTOR -#define RCU_SECONDS_TILL_STALL_CHECK (10 * HZ) /* for rcp->jiffies_stall */ -#define RCU_SECONDS_TILL_STALL_RECHECK (30 * HZ) /* for rcp->jiffies_stall */ -#endif /* #ifdef CONFIG_RCU_CPU_STALL_DETECTOR */ - -/* Global control variables for rcupdate callback mechanism. */ -struct rcu_ctrlblk { - long cur; /* Current batch number. */ - long completed; /* Number of the last completed batch */ - long pending; /* Number of the last pending batch */ -#ifdef CONFIG_RCU_CPU_STALL_DETECTOR - unsigned long gp_start; /* Time at which GP started in jiffies. */ - unsigned long jiffies_stall; - /* Time at which to check for CPU stalls. */ -#endif /* #ifdef CONFIG_RCU_CPU_STALL_DETECTOR */ - - int signaled; - - spinlock_t lock ____cacheline_internodealigned_in_smp; - DECLARE_BITMAP(cpumask, NR_CPUS); /* CPUs that need to switch for */ - /* current batch to proceed. */ -} ____cacheline_internodealigned_in_smp; - -/* Is batch a before batch b ? */ -static inline int rcu_batch_before(long a, long b) -{ - return (a - b) < 0; -} - -/* Is batch a after batch b ? */ -static inline int rcu_batch_after(long a, long b) -{ - return (a - b) > 0; -} - -/* Per-CPU data for Read-Copy UPdate. */ -struct rcu_data { - /* 1) quiescent state handling : */ - long quiescbatch; /* Batch # for grace period */ - int passed_quiesc; /* User-mode/idle loop etc. */ - int qs_pending; /* core waits for quiesc state */ - - /* 2) batch handling */ - /* - * if nxtlist is not NULL, then: - * batch: - * The batch # for the last entry of nxtlist - * [*nxttail[1], NULL = *nxttail[2]): - * Entries that batch # <= batch - * [*nxttail[0], *nxttail[1]): - * Entries that batch # <= batch - 1 - * [nxtlist, *nxttail[0]): - * Entries that batch # <= batch - 2 - * The grace period for these entries has completed, and - * the other grace-period-completed entries may be moved - * here temporarily in rcu_process_callbacks(). - */ - long batch; - struct rcu_head *nxtlist; - struct rcu_head **nxttail[3]; - long qlen; /* # of queued callbacks */ - struct rcu_head *donelist; - struct rcu_head **donetail; - long blimit; /* Upper limit on a processed batch */ - int cpu; - struct rcu_head barrier; -}; - -/* - * Increment the quiescent state counter. - * The counter is a bit degenerated: We do not need to know - * how many quiescent states passed, just if there was at least - * one since the start of the grace period. Thus just a flag. - */ -extern void rcu_qsctr_inc(int cpu); -extern void rcu_bh_qsctr_inc(int cpu); - -extern int rcu_pending(int cpu); -extern int rcu_needs_cpu(int cpu); - -#ifdef CONFIG_DEBUG_LOCK_ALLOC -extern struct lockdep_map rcu_lock_map; -# define rcu_read_acquire() \ - lock_acquire(&rcu_lock_map, 0, 0, 2, 1, NULL, _THIS_IP_) -# define rcu_read_release() lock_release(&rcu_lock_map, 1, _THIS_IP_) -#else -# define rcu_read_acquire() do { } while (0) -# define rcu_read_release() do { } while (0) -#endif - -#define __rcu_read_lock() \ - do { \ - preempt_disable(); \ - __acquire(RCU); \ - rcu_read_acquire(); \ - } while (0) -#define __rcu_read_unlock() \ - do { \ - rcu_read_release(); \ - __release(RCU); \ - preempt_enable(); \ - } while (0) -#define __rcu_read_lock_bh() \ - do { \ - local_bh_disable(); \ - __acquire(RCU_BH); \ - rcu_read_acquire(); \ - } while (0) -#define __rcu_read_unlock_bh() \ - do { \ - rcu_read_release(); \ - __release(RCU_BH); \ - local_bh_enable(); \ - } while (0) - -#define __synchronize_sched() synchronize_rcu() - -#define call_rcu_sched(head, func) call_rcu(head, func) - -extern void __rcu_init(void); -#define rcu_init_sched() do { } while (0) -extern void rcu_check_callbacks(int cpu, int user); -extern void rcu_restart_cpu(int cpu); - -extern long rcu_batches_completed(void); -extern long rcu_batches_completed_bh(void); - -#define rcu_enter_nohz() do { } while (0) -#define rcu_exit_nohz() do { } while (0) - -/* A context switch is a grace period for rcuclassic. */ -static inline int rcu_blocking_is_gp(void) -{ - return num_online_cpus() == 1; -} - -#endif /* __LINUX_RCUCLASSIC_H */ diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h index 15fbb3ca634d..95e0615f4d75 100644 --- a/include/linux/rcupdate.h +++ b/include/linux/rcupdate.h @@ -51,18 +51,26 @@ struct rcu_head { void (*func)(struct rcu_head *head); }; -/* Internal to kernel, but needed by rcupreempt.h. */ +/* Exported common interfaces */ +extern void synchronize_rcu(void); +extern void synchronize_rcu_bh(void); +extern void rcu_barrier(void); +extern void rcu_barrier_bh(void); +extern void rcu_barrier_sched(void); +extern void synchronize_sched_expedited(void); +extern int sched_expedited_torture_stats(char *page); + +/* Internal to kernel */ +extern void rcu_init(void); +extern void rcu_scheduler_starting(void); +extern int rcu_needs_cpu(int cpu); extern int rcu_scheduler_active; -#if defined(CONFIG_CLASSIC_RCU) -#include <linux/rcuclassic.h> -#elif defined(CONFIG_TREE_RCU) +#if defined(CONFIG_TREE_RCU) || defined(CONFIG_TREE_PREEMPT_RCU) #include <linux/rcutree.h> -#elif defined(CONFIG_PREEMPT_RCU) -#include <linux/rcupreempt.h> #else #error "Unknown RCU implementation specified to kernel configuration" -#endif /* #else #if defined(CONFIG_CLASSIC_RCU) */ +#endif #define RCU_HEAD_INIT { .next = NULL, .func = NULL } #define RCU_HEAD(head) struct rcu_head head = RCU_HEAD_INIT @@ -70,6 +78,16 @@ extern int rcu_scheduler_active; (ptr)->next = NULL; (ptr)->func = NULL; \ } while (0) +#ifdef CONFIG_DEBUG_LOCK_ALLOC +extern struct lockdep_map rcu_lock_map; +# define rcu_read_acquire() \ + lock_acquire(&rcu_lock_map, 0, 0, 2, 1, NULL, _THIS_IP_) +# define rcu_read_release() lock_release(&rcu_lock_map, 1, _THIS_IP_) +#else +# define rcu_read_acquire() do { } while (0) +# define rcu_read_release() do { } while (0) +#endif + /** * rcu_read_lock - mark the beginning of an RCU read-side critical section. * @@ -99,7 +117,12 @@ extern int rcu_scheduler_active; * * It is illegal to block while in an RCU read-side critical section. */ -#define rcu_read_lock() __rcu_read_lock() +static inline void rcu_read_lock(void) +{ + __rcu_read_lock(); + __acquire(RCU); + rcu_read_acquire(); +} /** * rcu_read_unlock - marks the end of an RCU read-side critical section. @@ -116,7 +139,12 @@ extern int rcu_scheduler_active; * used as well. RCU does not care how the writers keep out of each * others' way, as long as they do so. */ -#define rcu_read_unlock() __rcu_read_unlock() +static inline void rcu_read_unlock(void) +{ + rcu_read_release(); + __release(RCU); + __rcu_read_unlock(); +} /** * rcu_read_lock_bh - mark the beginning of a softirq-only RCU critical section @@ -129,14 +157,24 @@ extern int rcu_scheduler_active; * can use just rcu_read_lock(). * */ -#define rcu_read_lock_bh() __rcu_read_lock_bh() +static inline void rcu_read_lock_bh(void) +{ + __rcu_read_lock_bh(); + __acquire(RCU_BH); + rcu_read_acquire(); +} /* * rcu_read_unlock_bh - marks the end of a softirq-only RCU critical section * * See rcu_read_lock_bh() for more information. */ -#define rcu_read_unlock_bh() __rcu_read_unlock_bh() +static inline void rcu_read_unlock_bh(void) +{ + rcu_read_release(); + __release(RCU_BH); + __rcu_read_unlock_bh(); +} /** * rcu_read_lock_sched - mark the beginning of a RCU-classic critical section @@ -147,17 +185,34 @@ extern int rcu_scheduler_active; * - call_rcu_sched() and rcu_barrier_sched() * on the write-side to insure proper synchronization. */ -#define rcu_read_lock_sched() preempt_disable() -#define rcu_read_lock_sched_notrace() preempt_disable_notrace() +static inline void rcu_read_lock_sched(void) +{ + preempt_disable(); + __acquire(RCU_SCHED); + rcu_read_acquire(); +} +static inline notrace void rcu_read_lock_sched_notrace(void) +{ + preempt_disable_notrace(); + __acquire(RCU_SCHED); +} /* * rcu_read_unlock_sched - marks the end of a RCU-classic critical section * * See rcu_read_lock_sched for more information. */ -#define rcu_read_unlock_sched() preempt_enable() -#define rcu_read_unlock_sched_notrace() preempt_enable_notrace() - +static inline void rcu_read_unlock_sched(void) +{ + rcu_read_release(); + __release(RCU_SCHED); + preempt_enable(); +} +static inline notrace void rcu_read_unlock_sched_notrace(void) +{ + __release(RCU_SCHED); + preempt_enable_notrace(); +} /** @@ -259,15 +314,4 @@ extern void call_rcu(struct rcu_head *head, extern void call_rcu_bh(struct rcu_head *head, void (*func)(struct rcu_head *head)); -/* Exported common interfaces */ -extern void synchronize_rcu(void); -extern void rcu_barrier(void); -extern void rcu_barrier_bh(void); -extern void rcu_barrier_sched(void); - -/* Internal to kernel */ -extern void rcu_init(void); -extern void rcu_scheduler_starting(void); -extern int rcu_needs_cpu(int cpu); - #endif /* __LINUX_RCUPDATE_H */ diff --git a/include/linux/rcupreempt.h b/include/linux/rcupreempt.h deleted file mode 100644 index fce522782ffa..000000000000 --- a/include/linux/rcupreempt.h +++ /dev/null @@ -1,127 +0,0 @@ -/* - * Read-Copy Update mechanism for mutual exclusion (RT implementation) - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. - * - * Copyright (C) IBM Corporation, 2006 - * - * Author: Paul McKenney <paulmck@us.ibm.com> - * - * Based on the original work by Paul McKenney <paul.mckenney@us.ibm.com> - * and inputs from Rusty Russell, Andrea Arcangeli and Andi Kleen. - * Papers: - * http://www.rdrop.com/users/paulmck/paper/rclockpdcsproof.pdf - * http://lse.sourceforge.net/locking/rclock_OLS.2001.05.01c.sc.pdf (OLS2001) - * - * For detailed explanation of Read-Copy Update mechanism see - - * Documentation/RCU - * - */ - -#ifndef __LINUX_RCUPREEMPT_H -#define __LINUX_RCUPREEMPT_H - -#include <linux/cache.h> -#include <linux/spinlock.h> -#include <linux/threads.h> -#include <linux/smp.h> -#include <linux/cpumask.h> -#include <linux/seqlock.h> - -extern void rcu_qsctr_inc(int cpu); -static inline void rcu_bh_qsctr_inc(int cpu) { } - -/* - * Someone might want to pass call_rcu_bh as a function pointer. - * So this needs to just be a rename and not a macro function. - * (no parentheses) - */ -#define call_rcu_bh call_rcu - -/** - * call_rcu_sched - Queue RCU callback for invocation after sched grace period. - * @head: structure to be used for queueing the RCU updates. - * @func: actual update function to be invoked after the grace period - * - * The update function will be invoked some time after a full - * synchronize_sched()-style grace period elapses, in other words after - * all currently executing preempt-disabled sections of code (including - * hardirq handlers, NMI handlers, and local_irq_save() blocks) have - * completed. - */ -extern void call_rcu_sched(struct rcu_head *head, - void (*func)(struct rcu_head *head)); - -extern void __rcu_read_lock(void) __acquires(RCU); -extern void __rcu_read_unlock(void) __releases(RCU); -extern int rcu_pending(int cpu); -extern int rcu_needs_cpu(int cpu); - -#define __rcu_read_lock_bh() { rcu_read_lock(); local_bh_disable(); } -#define __rcu_read_unlock_bh() { local_bh_enable(); rcu_read_unlock(); } - -extern void __synchronize_sched(void); - -extern void __rcu_init(void); -extern void rcu_init_sched(void); -extern void rcu_check_callbacks(int cpu, int user); -extern void rcu_restart_cpu(int cpu); -extern long rcu_batches_completed(void); - -/* - * Return the number of RCU batches processed thus far. Useful for debug - * and statistic. The _bh variant is identifcal to straight RCU - */ -static inline long rcu_batches_completed_bh(void) -{ - return rcu_batches_completed(); -} - -#ifdef CONFIG_RCU_TRACE -struct rcupreempt_trace; -extern long *rcupreempt_flipctr(int cpu); -extern long rcupreempt_data_completed(void); -extern int rcupreempt_flip_flag(int cpu); -extern int rcupreempt_mb_flag(int cpu); -extern char *rcupreempt_try_flip_state_name(void); -extern struct rcupreempt_trace *rcupreempt_trace_cpu(int cpu); -#endif - -struct softirq_action; - -#ifdef CONFIG_NO_HZ -extern void rcu_enter_nohz(void); -extern void rcu_exit_nohz(void); -#else -# define rcu_enter_nohz() do { } while (0) -# define rcu_exit_nohz() do { } while (0) -#endif - -/* - * A context switch is a grace period for rcupreempt synchronize_rcu() - * only during early boot, before the scheduler has been initialized. - * So, how the heck do we get a context switch? Well, if the caller - * invokes synchronize_rcu(), they are willing to accept a context - * switch, so we simply pretend that one happened. - * - * After boot, there might be a blocked or preempted task in an RCU - * read-side critical section, so we cannot then take the fastpath. - */ -static inline int rcu_blocking_is_gp(void) -{ - return num_online_cpus() == 1 && !rcu_scheduler_active; -} - -#endif /* __LINUX_RCUPREEMPT_H */ diff --git a/include/linux/rcupreempt_trace.h b/include/linux/rcupreempt_trace.h deleted file mode 100644 index b99ae073192a..000000000000 --- a/include/linux/rcupreempt_trace.h +++ /dev/null @@ -1,97 +0,0 @@ -/* - * Read-Copy Update mechanism for mutual exclusion (RT implementation) - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. - * - * Copyright (C) IBM Corporation, 2006 - * - * Author: Paul McKenney <paulmck@us.ibm.com> - * - * Based on the original work by Paul McKenney <paul.mckenney@us.ibm.com> - * and inputs from Rusty Russell, Andrea Arcangeli and Andi Kleen. - * Papers: - * http://www.rdrop.com/users/paulmck/paper/rclockpdcsproof.pdf - * http://lse.sourceforge.net/locking/rclock_OLS.2001.05.01c.sc.pdf (OLS2001) - * - * For detailed explanation of the Preemptible Read-Copy Update mechanism see - - * http://lwn.net/Articles/253651/ - */ - -#ifndef __LINUX_RCUPREEMPT_TRACE_H -#define __LINUX_RCUPREEMPT_TRACE_H - -#include <linux/types.h> -#include <linux/kernel.h> - -#include <asm/atomic.h> - -/* - * PREEMPT_RCU data structures. - */ - -struct rcupreempt_trace { - long next_length; - long next_add; - long wait_length; - long wait_add; - long done_length; - long done_add; - long done_remove; - atomic_t done_invoked; - long rcu_check_callbacks; - atomic_t rcu_try_flip_1; - atomic_t rcu_try_flip_e1; - long rcu_try_flip_i1; - long rcu_try_flip_ie1; - long rcu_try_flip_g1; - long rcu_try_flip_a1; - long rcu_try_flip_ae1; - long rcu_try_flip_a2; - long rcu_try_flip_z1; - long rcu_try_flip_ze1; - long rcu_try_flip_z2; - long rcu_try_flip_m1; - long rcu_try_flip_me1; - long rcu_try_flip_m2; -}; - -#ifdef CONFIG_RCU_TRACE -#define RCU_TRACE(fn, arg) fn(arg); -#else -#define RCU_TRACE(fn, arg) -#endif - -extern void rcupreempt_trace_move2done(struct rcupreempt_trace *trace); -extern void rcupreempt_trace_move2wait(struct rcupreempt_trace *trace); -extern void rcupreempt_trace_try_flip_1(struct rcupreempt_trace *trace); -extern void rcupreempt_trace_try_flip_e1(struct rcupreempt_trace *trace); -extern void rcupreempt_trace_try_flip_i1(struct rcupreempt_trace *trace); -extern void rcupreempt_trace_try_flip_ie1(struct rcupreempt_trace *trace); -extern void rcupreempt_trace_try_flip_g1(struct rcupreempt_trace *trace); -extern void rcupreempt_trace_try_flip_a1(struct rcupreempt_trace *trace); -extern void rcupreempt_trace_try_flip_ae1(struct rcupreempt_trace *trace); -extern void rcupreempt_trace_try_flip_a2(struct rcupreempt_trace *trace); -extern void rcupreempt_trace_try_flip_z1(struct rcupreempt_trace *trace); -extern void rcupreempt_trace_try_flip_ze1(struct rcupreempt_trace *trace); -extern void rcupreempt_trace_try_flip_z2(struct rcupreempt_trace *trace); -extern void rcupreempt_trace_try_flip_m1(struct rcupreempt_trace *trace); -extern void rcupreempt_trace_try_flip_me1(struct rcupreempt_trace *trace); -extern void rcupreempt_trace_try_flip_m2(struct rcupreempt_trace *trace); -extern void rcupreempt_trace_check_callbacks(struct rcupreempt_trace *trace); -extern void rcupreempt_trace_done_remove(struct rcupreempt_trace *trace); -extern void rcupreempt_trace_invoke(struct rcupreempt_trace *trace); -extern void rcupreempt_trace_next_add(struct rcupreempt_trace *trace); - -#endif /* __LINUX_RCUPREEMPT_TRACE_H */ diff --git a/include/linux/rcutree.h b/include/linux/rcutree.h index 5a5153806c42..a89307717825 100644 --- a/include/linux/rcutree.h +++ b/include/linux/rcutree.h @@ -30,264 +30,57 @@ #ifndef __LINUX_RCUTREE_H #define __LINUX_RCUTREE_H -#include <linux/cache.h> -#include <linux/spinlock.h> -#include <linux/threads.h> -#include <linux/cpumask.h> -#include <linux/seqlock.h> +extern void rcu_sched_qs(int cpu); +extern void rcu_bh_qs(int cpu); -/* - * Define shape of hierarchy based on NR_CPUS and CONFIG_RCU_FANOUT. - * In theory, it should be possible to add more levels straightforwardly. - * In practice, this has not been tested, so there is probably some - * bug somewhere. - */ -#define MAX_RCU_LVLS 3 -#define RCU_FANOUT (CONFIG_RCU_FANOUT) -#define RCU_FANOUT_SQ (RCU_FANOUT * RCU_FANOUT) -#define RCU_FANOUT_CUBE (RCU_FANOUT_SQ * RCU_FANOUT) - -#if NR_CPUS <= RCU_FANOUT -# define NUM_RCU_LVLS 1 -# define NUM_RCU_LVL_0 1 -# define NUM_RCU_LVL_1 (NR_CPUS) -# define NUM_RCU_LVL_2 0 -# define NUM_RCU_LVL_3 0 -#elif NR_CPUS <= RCU_FANOUT_SQ -# define NUM_RCU_LVLS 2 -# define NUM_RCU_LVL_0 1 -# define NUM_RCU_LVL_1 (((NR_CPUS) + RCU_FANOUT - 1) / RCU_FANOUT) -# define NUM_RCU_LVL_2 (NR_CPUS) -# define NUM_RCU_LVL_3 0 -#elif NR_CPUS <= RCU_FANOUT_CUBE -# define NUM_RCU_LVLS 3 -# define NUM_RCU_LVL_0 1 -# define NUM_RCU_LVL_1 (((NR_CPUS) + RCU_FANOUT_SQ - 1) / RCU_FANOUT_SQ) -# define NUM_RCU_LVL_2 (((NR_CPUS) + (RCU_FANOUT) - 1) / (RCU_FANOUT)) -# define NUM_RCU_LVL_3 NR_CPUS -#else -# error "CONFIG_RCU_FANOUT insufficient for NR_CPUS" -#endif /* #if (NR_CPUS) <= RCU_FANOUT */ - -#define RCU_SUM (NUM_RCU_LVL_0 + NUM_RCU_LVL_1 + NUM_RCU_LVL_2 + NUM_RCU_LVL_3) -#define NUM_RCU_NODES (RCU_SUM - NR_CPUS) - -/* - * Dynticks per-CPU state. - */ -struct rcu_dynticks { - int dynticks_nesting; /* Track nesting level, sort of. */ - int dynticks; /* Even value for dynticks-idle, else odd. */ - int dynticks_nmi; /* Even value for either dynticks-idle or */ - /* not in nmi handler, else odd. So this */ - /* remains even for nmi from irq handler. */ -}; - -/* - * Definition for node within the RCU grace-period-detection hierarchy. - */ -struct rcu_node { - spinlock_t lock; - unsigned long qsmask; /* CPUs or groups that need to switch in */ - /* order for current grace period to proceed.*/ - unsigned long qsmaskinit; - /* Per-GP initialization for qsmask. */ - unsigned long grpmask; /* Mask to apply to parent qsmask. */ - int grplo; /* lowest-numbered CPU or group here. */ - int grphi; /* highest-numbered CPU or group here. */ - u8 grpnum; /* CPU/group number for next level up. */ - u8 level; /* root is at level 0. */ - struct rcu_node *parent; -} ____cacheline_internodealigned_in_smp; - -/* Index values for nxttail array in struct rcu_data. */ -#define RCU_DONE_TAIL 0 /* Also RCU_WAIT head. */ -#define RCU_WAIT_TAIL 1 /* Also RCU_NEXT_READY head. */ -#define RCU_NEXT_READY_TAIL 2 /* Also RCU_NEXT head. */ -#define RCU_NEXT_TAIL 3 -#define RCU_NEXT_SIZE 4 - -/* Per-CPU data for read-copy update. */ -struct rcu_data { - /* 1) quiescent-state and grace-period handling : */ - long completed; /* Track rsp->completed gp number */ - /* in order to detect GP end. */ - long gpnum; /* Highest gp number that this CPU */ - /* is aware of having started. */ - long passed_quiesc_completed; - /* Value of completed at time of qs. */ - bool passed_quiesc; /* User-mode/idle loop etc. */ - bool qs_pending; /* Core waits for quiesc state. */ - bool beenonline; /* CPU online at least once. */ - struct rcu_node *mynode; /* This CPU's leaf of hierarchy */ - unsigned long grpmask; /* Mask to apply to leaf qsmask. */ - - /* 2) batch handling */ - /* - * If nxtlist is not NULL, it is partitioned as follows. - * Any of the partitions might be empty, in which case the - * pointer to that partition will be equal to the pointer for - * the following partition. When the list is empty, all of - * the nxttail elements point to nxtlist, which is NULL. - * - * [*nxttail[RCU_NEXT_READY_TAIL], NULL = *nxttail[RCU_NEXT_TAIL]): - * Entries that might have arrived after current GP ended - * [*nxttail[RCU_WAIT_TAIL], *nxttail[RCU_NEXT_READY_TAIL]): - * Entries known to have arrived before current GP ended - * [*nxttail[RCU_DONE_TAIL], *nxttail[RCU_WAIT_TAIL]): - * Entries that batch # <= ->completed - 1: waiting for current GP - * [nxtlist, *nxttail[RCU_DONE_TAIL]): - * Entries that batch # <= ->completed - * The grace period for these entries has completed, and - * the other grace-period-completed entries may be moved - * here temporarily in rcu_process_callbacks(). - */ - struct rcu_head *nxtlist; - struct rcu_head **nxttail[RCU_NEXT_SIZE]; - long qlen; /* # of queued callbacks */ - long blimit; /* Upper limit on a processed batch */ - -#ifdef CONFIG_NO_HZ - /* 3) dynticks interface. */ - struct rcu_dynticks *dynticks; /* Shared per-CPU dynticks state. */ - int dynticks_snap; /* Per-GP tracking for dynticks. */ - int dynticks_nmi_snap; /* Per-GP tracking for dynticks_nmi. */ -#endif /* #ifdef CONFIG_NO_HZ */ - - /* 4) reasons this CPU needed to be kicked by force_quiescent_state */ -#ifdef CONFIG_NO_HZ - unsigned long dynticks_fqs; /* Kicked due to dynticks idle. */ -#endif /* #ifdef CONFIG_NO_HZ */ - unsigned long offline_fqs; /* Kicked due to being offline. */ - unsigned long resched_ipi; /* Sent a resched IPI. */ - - /* 5) __rcu_pending() statistics. */ - long n_rcu_pending; /* rcu_pending() calls since boot. */ - long n_rp_qs_pending; - long n_rp_cb_ready; - long n_rp_cpu_needs_gp; - long n_rp_gp_completed; - long n_rp_gp_started; - long n_rp_need_fqs; - long n_rp_need_nothing; - - int cpu; -}; - -/* Values for signaled field in struct rcu_state. */ -#define RCU_GP_INIT 0 /* Grace period being initialized. */ -#define RCU_SAVE_DYNTICK 1 /* Need to scan dyntick state. */ -#define RCU_FORCE_QS 2 /* Need to force quiescent state. */ -#ifdef CONFIG_NO_HZ -#define RCU_SIGNAL_INIT RCU_SAVE_DYNTICK -#else /* #ifdef CONFIG_NO_HZ */ -#define RCU_SIGNAL_INIT RCU_FORCE_QS -#endif /* #else #ifdef CONFIG_NO_HZ */ - -#define RCU_JIFFIES_TILL_FORCE_QS 3 /* for rsp->jiffies_force_qs */ -#ifdef CONFIG_RCU_CPU_STALL_DETECTOR -#define RCU_SECONDS_TILL_STALL_CHECK (10 * HZ) /* for rsp->jiffies_stall */ -#define RCU_SECONDS_TILL_STALL_RECHECK (30 * HZ) /* for rsp->jiffies_stall */ -#define RCU_STALL_RAT_DELAY 2 /* Allow other CPUs time */ - /* to take at least one */ - /* scheduling clock irq */ - /* before ratting on them. */ - -#endif /* #ifdef CONFIG_RCU_CPU_STALL_DETECTOR */ - -/* - * RCU global state, including node hierarchy. This hierarchy is - * represented in "heap" form in a dense array. The root (first level) - * of the hierarchy is in ->node[0] (referenced by ->level[0]), the second - * level in ->node[1] through ->node[m] (->node[1] referenced by ->level[1]), - * and the third level in ->node[m+1] and following (->node[m+1] referenced - * by ->level[2]). The number of levels is determined by the number of - * CPUs and by CONFIG_RCU_FANOUT. Small systems will have a "hierarchy" - * consisting of a single rcu_node. - */ -struct rcu_state { - struct rcu_node node[NUM_RCU_NODES]; /* Hierarchy. */ - struct rcu_node *level[NUM_RCU_LVLS]; /* Hierarchy levels. */ - u32 levelcnt[MAX_RCU_LVLS + 1]; /* # nodes in each level. */ - u8 levelspread[NUM_RCU_LVLS]; /* kids/node in each level. */ - struct rcu_data *rda[NR_CPUS]; /* array of rdp pointers. */ - - /* The following fields are guarded by the root rcu_node's lock. */ - - u8 signaled ____cacheline_internodealigned_in_smp; - /* Force QS state. */ - long gpnum; /* Current gp number. */ - long completed; /* # of last completed gp. */ - spinlock_t onofflock; /* exclude on/offline and */ - /* starting new GP. */ - spinlock_t fqslock; /* Only one task forcing */ - /* quiescent states. */ - unsigned long jiffies_force_qs; /* Time at which to invoke */ - /* force_quiescent_state(). */ - unsigned long n_force_qs; /* Number of calls to */ - /* force_quiescent_state(). */ - unsigned long n_force_qs_lh; /* ~Number of calls leaving */ - /* due to lock unavailable. */ - unsigned long n_force_qs_ngp; /* Number of calls leaving */ - /* due to no GP active. */ -#ifdef CONFIG_RCU_CPU_STALL_DETECTOR - unsigned long gp_start; /* Time at which GP started, */ - /* but in jiffies. */ - unsigned long jiffies_stall; /* Time at which to check */ - /* for CPU stalls. */ -#endif /* #ifdef CONFIG_RCU_CPU_STALL_DETECTOR */ -#ifdef CONFIG_NO_HZ - long dynticks_completed; /* Value of completed @ snap. */ -#endif /* #ifdef CONFIG_NO_HZ */ -}; +extern int rcu_needs_cpu(int cpu); -extern void rcu_qsctr_inc(int cpu); -extern void rcu_bh_qsctr_inc(int cpu); +#ifdef CONFIG_TREE_PREEMPT_RCU -extern int rcu_pending(int cpu); -extern int rcu_needs_cpu(int cpu); +extern void __rcu_read_lock(void); +extern void __rcu_read_unlock(void); +extern void exit_rcu(void); -#ifdef CONFIG_DEBUG_LOCK_ALLOC -extern struct lockdep_map rcu_lock_map; -# define rcu_read_acquire() \ - lock_acquire(&rcu_lock_map, 0, 0, 2, 1, NULL, _THIS_IP_) -# define rcu_read_release() lock_release(&rcu_lock_map, 1, _THIS_IP_) -#else -# define rcu_read_acquire() do { } while (0) -# define rcu_read_release() do { } while (0) -#endif +#else /* #ifdef CONFIG_TREE_PREEMPT_RCU */ static inline void __rcu_read_lock(void) { preempt_disable(); - __acquire(RCU); - rcu_read_acquire(); } + static inline void __rcu_read_unlock(void) { - rcu_read_release(); - __release(RCU); preempt_enable(); } + +static inline void exit_rcu(void) +{ +} + +#endif /* #else #ifdef CONFIG_TREE_PREEMPT_RCU */ + static inline void __rcu_read_lock_bh(void) { local_bh_disable(); - __acquire(RCU_BH); - rcu_read_acquire(); } static inline void __rcu_read_unlock_bh(void) { - rcu_read_release(); - __release(RCU_BH); local_bh_enable(); } #define __synchronize_sched() synchronize_rcu() -#define call_rcu_sched(head, func) call_rcu(head, func) +extern void call_rcu_sched(struct rcu_head *head, + void (*func)(struct rcu_head *rcu)); -static inline void rcu_init_sched(void) +static inline void synchronize_rcu_expedited(void) { + synchronize_sched_expedited(); +} + +static inline void synchronize_rcu_bh_expedited(void) +{ + synchronize_sched_expedited(); } extern void __rcu_init(void); @@ -296,6 +89,11 @@ extern void rcu_restart_cpu(int cpu); extern long rcu_batches_completed(void); extern long rcu_batches_completed_bh(void); +extern long rcu_batches_completed_sched(void); + +static inline void rcu_init_sched(void) +{ +} #ifdef CONFIG_NO_HZ void rcu_enter_nohz(void); diff --git a/include/linux/ring_buffer.h b/include/linux/ring_buffer.h index 29f8599e6bea..5fcc31ed5771 100644 --- a/include/linux/ring_buffer.h +++ b/include/linux/ring_buffer.h @@ -75,20 +75,6 @@ ring_buffer_event_time_delta(struct ring_buffer_event *event) } /* - * ring_buffer_event_discard can discard any event in the ring buffer. - * it is up to the caller to protect against a reader from - * consuming it or a writer from wrapping and replacing it. - * - * No external protection is needed if this is called before - * the event is commited. But in that case it would be better to - * use ring_buffer_discard_commit. - * - * Note, if an event that has not been committed is discarded - * with ring_buffer_event_discard, it must still be committed. - */ -void ring_buffer_event_discard(struct ring_buffer_event *event); - -/* * ring_buffer_discard_commit will remove an event that has not * ben committed yet. If this is used, then ring_buffer_unlock_commit * must not be called on the discarded event. This function @@ -154,8 +140,17 @@ unsigned long ring_buffer_size(struct ring_buffer *buffer); void ring_buffer_reset_cpu(struct ring_buffer *buffer, int cpu); void ring_buffer_reset(struct ring_buffer *buffer); +#ifdef CONFIG_RING_BUFFER_ALLOW_SWAP int ring_buffer_swap_cpu(struct ring_buffer *buffer_a, struct ring_buffer *buffer_b, int cpu); +#else +static inline int +ring_buffer_swap_cpu(struct ring_buffer *buffer_a, + struct ring_buffer *buffer_b, int cpu) +{ + return -ENODEV; +} +#endif int ring_buffer_empty(struct ring_buffer *buffer); int ring_buffer_empty_cpu(struct ring_buffer *buffer, int cpu); @@ -170,7 +165,6 @@ unsigned long ring_buffer_overruns(struct ring_buffer *buffer); unsigned long ring_buffer_entries_cpu(struct ring_buffer *buffer, int cpu); unsigned long ring_buffer_overrun_cpu(struct ring_buffer *buffer, int cpu); unsigned long ring_buffer_commit_overrun_cpu(struct ring_buffer *buffer, int cpu); -unsigned long ring_buffer_nmi_dropped_cpu(struct ring_buffer *buffer, int cpu); u64 ring_buffer_time_stamp(struct ring_buffer *buffer, int cpu); void ring_buffer_normalize_time_stamp(struct ring_buffer *buffer, diff --git a/include/linux/sched.h b/include/linux/sched.h index 9304027673b0..f3d74bd04d18 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -38,6 +38,8 @@ #define SCHED_BATCH 3 /* SCHED_ISO: reserved but not implemented yet */ #define SCHED_IDLE 5 +/* Can be ORed in to make sure the process is reverted back to SCHED_NORMAL on fork */ +#define SCHED_RESET_ON_FORK 0x40000000 #ifdef __KERNEL__ @@ -796,18 +798,19 @@ enum cpu_idle_type { #define SCHED_LOAD_SCALE_FUZZ SCHED_LOAD_SCALE #ifdef CONFIG_SMP -#define SD_LOAD_BALANCE 1 /* Do load balancing on this domain. */ -#define SD_BALANCE_NEWIDLE 2 /* Balance when about to become idle */ -#define SD_BALANCE_EXEC 4 /* Balance on exec */ -#define SD_BALANCE_FORK 8 /* Balance on fork, clone */ -#define SD_WAKE_IDLE 16 /* Wake to idle CPU on task wakeup */ -#define SD_WAKE_AFFINE 32 /* Wake task to waking CPU */ -#define SD_WAKE_BALANCE 64 /* Perform balancing at task wakeup */ -#define SD_SHARE_CPUPOWER 128 /* Domain members share cpu power */ -#define SD_POWERSAVINGS_BALANCE 256 /* Balance for power savings */ -#define SD_SHARE_PKG_RESOURCES 512 /* Domain members share cpu pkg resources */ -#define SD_SERIALIZE 1024 /* Only a single load balancing instance */ -#define SD_WAKE_IDLE_FAR 2048 /* Gain latency sacrificing cache hit */ +#define SD_LOAD_BALANCE 0x0001 /* Do load balancing on this domain. */ +#define SD_BALANCE_NEWIDLE 0x0002 /* Balance when about to become idle */ +#define SD_BALANCE_EXEC 0x0004 /* Balance on exec */ +#define SD_BALANCE_FORK 0x0008 /* Balance on fork, clone */ +#define SD_WAKE_IDLE 0x0010 /* Wake to idle CPU on task wakeup */ +#define SD_WAKE_AFFINE 0x0020 /* Wake task to waking CPU */ +#define SD_WAKE_BALANCE 0x0040 /* Perform balancing at task wakeup */ +#define SD_SHARE_CPUPOWER 0x0080 /* Domain members share cpu power */ +#define SD_POWERSAVINGS_BALANCE 0x0100 /* Balance for power savings */ +#define SD_SHARE_PKG_RESOURCES 0x0200 /* Domain members share cpu pkg resources */ +#define SD_SERIALIZE 0x0400 /* Only a single load balancing instance */ +#define SD_WAKE_IDLE_FAR 0x0800 /* Gain latency sacrificing cache hit */ +#define SD_PREFER_SIBLING 0x1000 /* Prefer to place tasks in a sibling domain */ enum powersavings_balance_level { POWERSAVINGS_BALANCE_NONE = 0, /* No power saving load balance */ @@ -827,7 +830,7 @@ static inline int sd_balance_for_mc_power(void) if (sched_smt_power_savings) return SD_POWERSAVINGS_BALANCE; - return 0; + return SD_PREFER_SIBLING; } static inline int sd_balance_for_package_power(void) @@ -835,7 +838,7 @@ static inline int sd_balance_for_package_power(void) if (sched_mc_power_savings | sched_smt_power_savings) return SD_POWERSAVINGS_BALANCE; - return 0; + return SD_PREFER_SIBLING; } /* @@ -857,15 +860,9 @@ struct sched_group { /* * CPU power of this group, SCHED_LOAD_SCALE being max power for a - * single CPU. This is read only (except for setup, hotplug CPU). - * Note : Never change cpu_power without recompute its reciprocal + * single CPU. */ - unsigned int __cpu_power; - /* - * reciprocal value of cpu_power to avoid expensive divides - * (see include/linux/reciprocal_div.h) - */ - u32 reciprocal_cpu_power; + unsigned int cpu_power; /* * The CPUs this group covers. @@ -918,6 +915,7 @@ struct sched_domain { unsigned int newidle_idx; unsigned int wake_idx; unsigned int forkexec_idx; + unsigned int smt_gain; int flags; /* See SD_* */ enum sched_domain_level level; @@ -1045,7 +1043,6 @@ struct sched_class { struct rq *busiest, struct sched_domain *sd, enum cpu_idle_type idle); void (*pre_schedule) (struct rq *this_rq, struct task_struct *task); - int (*needs_post_schedule) (struct rq *this_rq); void (*post_schedule) (struct rq *this_rq); void (*task_wake_up) (struct rq *this_rq, struct task_struct *task); @@ -1110,6 +1107,8 @@ struct sched_entity { u64 wait_max; u64 wait_count; u64 wait_sum; + u64 iowait_count; + u64 iowait_sum; u64 sleep_start; u64 sleep_max; @@ -1163,6 +1162,8 @@ struct sched_rt_entity { #endif }; +struct rcu_node; + struct task_struct { volatile long state; /* -1 unrunnable, 0 runnable, >0 stopped */ void *stack; @@ -1206,10 +1207,12 @@ struct task_struct { unsigned int policy; cpumask_t cpus_allowed; -#ifdef CONFIG_PREEMPT_RCU +#ifdef CONFIG_TREE_PREEMPT_RCU int rcu_read_lock_nesting; - int rcu_flipctr_idx; -#endif /* #ifdef CONFIG_PREEMPT_RCU */ + char rcu_read_unlock_special; + struct rcu_node *rcu_blocked_node; + struct list_head rcu_node_entry; +#endif /* #ifdef CONFIG_TREE_PREEMPT_RCU */ #if defined(CONFIG_SCHEDSTATS) || defined(CONFIG_TASK_DELAY_ACCT) struct sched_info sched_info; @@ -1230,11 +1233,19 @@ struct task_struct { unsigned did_exec:1; unsigned in_execve:1; /* Tell the LSMs that the process is doing an * execve */ + unsigned in_iowait:1; + + + /* Revert to default priority/policy when forking */ + unsigned sched_reset_on_fork:1; + pid_t pid; pid_t tgid; +#ifdef CONFIG_CC_STACKPROTECTOR /* Canary value for the -fstack-protector gcc feature */ unsigned long stack_canary; +#endif /* * pointers to (original) parent process, youngest child, younger sibling, @@ -1725,6 +1736,28 @@ extern cputime_t task_gtime(struct task_struct *p); #define tsk_used_math(p) ((p)->flags & PF_USED_MATH) #define used_math() tsk_used_math(current) +#ifdef CONFIG_TREE_PREEMPT_RCU + +#define RCU_READ_UNLOCK_BLOCKED (1 << 0) /* blocked while in RCU read-side. */ +#define RCU_READ_UNLOCK_NEED_QS (1 << 1) /* RCU core needs CPU response. */ +#define RCU_READ_UNLOCK_GOT_QS (1 << 2) /* CPU has responded to RCU core. */ + +static inline void rcu_copy_process(struct task_struct *p) +{ + p->rcu_read_lock_nesting = 0; + p->rcu_read_unlock_special = 0; + p->rcu_blocked_node = NULL; + INIT_LIST_HEAD(&p->rcu_node_entry); +} + +#else + +static inline void rcu_copy_process(struct task_struct *p) +{ +} + +#endif + #ifdef CONFIG_SMP extern int set_cpus_allowed_ptr(struct task_struct *p, const struct cpumask *new_mask); @@ -1814,11 +1847,12 @@ extern unsigned int sysctl_sched_min_granularity; extern unsigned int sysctl_sched_wakeup_granularity; extern unsigned int sysctl_sched_shares_ratelimit; extern unsigned int sysctl_sched_shares_thresh; -#ifdef CONFIG_SCHED_DEBUG extern unsigned int sysctl_sched_child_runs_first; +#ifdef CONFIG_SCHED_DEBUG extern unsigned int sysctl_sched_features; extern unsigned int sysctl_sched_migration_cost; extern unsigned int sysctl_sched_nr_migrate; +extern unsigned int sysctl_sched_time_avg; extern unsigned int sysctl_timer_migration; int sched_nr_latency_handler(struct ctl_table *table, int write, @@ -2282,23 +2316,31 @@ static inline int need_resched(void) * cond_resched_softirq() will enable bhs before scheduling. */ extern int _cond_resched(void); -#ifdef CONFIG_PREEMPT_BKL -static inline int cond_resched(void) -{ - return 0; -} + +#define cond_resched() ({ \ + __might_sleep(__FILE__, __LINE__, 0); \ + _cond_resched(); \ +}) + +extern int __cond_resched_lock(spinlock_t *lock); + +#ifdef CONFIG_PREEMPT +#define PREEMPT_LOCK_OFFSET PREEMPT_OFFSET #else -static inline int cond_resched(void) -{ - return _cond_resched(); -} +#define PREEMPT_LOCK_OFFSET 0 #endif -extern int cond_resched_lock(spinlock_t * lock); -extern int cond_resched_softirq(void); -static inline int cond_resched_bkl(void) -{ - return _cond_resched(); -} + +#define cond_resched_lock(lock) ({ \ + __might_sleep(__FILE__, __LINE__, PREEMPT_LOCK_OFFSET); \ + __cond_resched_lock(lock); \ +}) + +extern int __cond_resched_softirq(void); + +#define cond_resched_softirq() ({ \ + __might_sleep(__FILE__, __LINE__, SOFTIRQ_OFFSET); \ + __cond_resched_softirq(); \ +}) /* * Does a critical section need to be broken due to another diff --git a/include/linux/spinlock.h b/include/linux/spinlock.h index 4be57ab03478..f0ca7a7a1757 100644 --- a/include/linux/spinlock.h +++ b/include/linux/spinlock.h @@ -143,15 +143,6 @@ static inline void smp_mb__after_lock(void) { smp_mb(); } */ #define spin_unlock_wait(lock) __raw_spin_unlock_wait(&(lock)->raw_lock) -/* - * Pull the _spin_*()/_read_*()/_write_*() functions/declarations: - */ -#if defined(CONFIG_SMP) || defined(CONFIG_DEBUG_SPINLOCK) -# include <linux/spinlock_api_smp.h> -#else -# include <linux/spinlock_api_up.h> -#endif - #ifdef CONFIG_DEBUG_SPINLOCK extern void _raw_spin_lock(spinlock_t *lock); #define _raw_spin_lock_flags(lock, flags) _raw_spin_lock(lock) @@ -268,50 +259,16 @@ static inline void smp_mb__after_lock(void) { smp_mb(); } #define spin_lock_irq(lock) _spin_lock_irq(lock) #define spin_lock_bh(lock) _spin_lock_bh(lock) - #define read_lock_irq(lock) _read_lock_irq(lock) #define read_lock_bh(lock) _read_lock_bh(lock) - #define write_lock_irq(lock) _write_lock_irq(lock) #define write_lock_bh(lock) _write_lock_bh(lock) - -/* - * We inline the unlock functions in the nondebug case: - */ -#if defined(CONFIG_DEBUG_SPINLOCK) || defined(CONFIG_PREEMPT) || \ - !defined(CONFIG_SMP) -# define spin_unlock(lock) _spin_unlock(lock) -# define read_unlock(lock) _read_unlock(lock) -# define write_unlock(lock) _write_unlock(lock) -# define spin_unlock_irq(lock) _spin_unlock_irq(lock) -# define read_unlock_irq(lock) _read_unlock_irq(lock) -# define write_unlock_irq(lock) _write_unlock_irq(lock) -#else -# define spin_unlock(lock) \ - do {__raw_spin_unlock(&(lock)->raw_lock); __release(lock); } while (0) -# define read_unlock(lock) \ - do {__raw_read_unlock(&(lock)->raw_lock); __release(lock); } while (0) -# define write_unlock(lock) \ - do {__raw_write_unlock(&(lock)->raw_lock); __release(lock); } while (0) -# define spin_unlock_irq(lock) \ -do { \ - __raw_spin_unlock(&(lock)->raw_lock); \ - __release(lock); \ - local_irq_enable(); \ -} while (0) -# define read_unlock_irq(lock) \ -do { \ - __raw_read_unlock(&(lock)->raw_lock); \ - __release(lock); \ - local_irq_enable(); \ -} while (0) -# define write_unlock_irq(lock) \ -do { \ - __raw_write_unlock(&(lock)->raw_lock); \ - __release(lock); \ - local_irq_enable(); \ -} while (0) -#endif +#define spin_unlock(lock) _spin_unlock(lock) +#define read_unlock(lock) _read_unlock(lock) +#define write_unlock(lock) _write_unlock(lock) +#define spin_unlock_irq(lock) _spin_unlock_irq(lock) +#define read_unlock_irq(lock) _read_unlock_irq(lock) +#define write_unlock_irq(lock) _write_unlock_irq(lock) #define spin_unlock_irqrestore(lock, flags) \ do { \ @@ -380,4 +337,13 @@ extern int _atomic_dec_and_lock(atomic_t *atomic, spinlock_t *lock); */ #define spin_can_lock(lock) (!spin_is_locked(lock)) +/* + * Pull the _spin_*()/_read_*()/_write_*() functions/declarations: + */ +#if defined(CONFIG_SMP) || defined(CONFIG_DEBUG_SPINLOCK) +# include <linux/spinlock_api_smp.h> +#else +# include <linux/spinlock_api_up.h> +#endif + #endif /* __LINUX_SPINLOCK_H */ diff --git a/include/linux/spinlock_api_smp.h b/include/linux/spinlock_api_smp.h index d79845d034b5..7a7e18fc2415 100644 --- a/include/linux/spinlock_api_smp.h +++ b/include/linux/spinlock_api_smp.h @@ -60,4 +60,398 @@ void __lockfunc _read_unlock_irqrestore(rwlock_t *lock, unsigned long flags) void __lockfunc _write_unlock_irqrestore(rwlock_t *lock, unsigned long flags) __releases(lock); +/* + * We inline the unlock functions in the nondebug case: + */ +#if !defined(CONFIG_DEBUG_SPINLOCK) && !defined(CONFIG_PREEMPT) +#define __always_inline__spin_unlock +#define __always_inline__read_unlock +#define __always_inline__write_unlock +#define __always_inline__spin_unlock_irq +#define __always_inline__read_unlock_irq +#define __always_inline__write_unlock_irq +#endif + +#ifndef CONFIG_DEBUG_SPINLOCK +#ifndef CONFIG_GENERIC_LOCKBREAK + +#ifdef __always_inline__spin_lock +#define _spin_lock(lock) __spin_lock(lock) +#endif + +#ifdef __always_inline__read_lock +#define _read_lock(lock) __read_lock(lock) +#endif + +#ifdef __always_inline__write_lock +#define _write_lock(lock) __write_lock(lock) +#endif + +#ifdef __always_inline__spin_lock_bh +#define _spin_lock_bh(lock) __spin_lock_bh(lock) +#endif + +#ifdef __always_inline__read_lock_bh +#define _read_lock_bh(lock) __read_lock_bh(lock) +#endif + +#ifdef __always_inline__write_lock_bh +#define _write_lock_bh(lock) __write_lock_bh(lock) +#endif + +#ifdef __always_inline__spin_lock_irq +#define _spin_lock_irq(lock) __spin_lock_irq(lock) +#endif + +#ifdef __always_inline__read_lock_irq +#define _read_lock_irq(lock) __read_lock_irq(lock) +#endif + +#ifdef __always_inline__write_lock_irq +#define _write_lock_irq(lock) __write_lock_irq(lock) +#endif + +#ifdef __always_inline__spin_lock_irqsave +#define _spin_lock_irqsave(lock) __spin_lock_irqsave(lock) +#endif + +#ifdef __always_inline__read_lock_irqsave +#define _read_lock_irqsave(lock) __read_lock_irqsave(lock) +#endif + +#ifdef __always_inline__write_lock_irqsave +#define _write_lock_irqsave(lock) __write_lock_irqsave(lock) +#endif + +#endif /* !CONFIG_GENERIC_LOCKBREAK */ + +#ifdef __always_inline__spin_trylock +#define _spin_trylock(lock) __spin_trylock(lock) +#endif + +#ifdef __always_inline__read_trylock +#define _read_trylock(lock) __read_trylock(lock) +#endif + +#ifdef __always_inline__write_trylock +#define _write_trylock(lock) __write_trylock(lock) +#endif + +#ifdef __always_inline__spin_trylock_bh +#define _spin_trylock_bh(lock) __spin_trylock_bh(lock) +#endif + +#ifdef __always_inline__spin_unlock +#define _spin_unlock(lock) __spin_unlock(lock) +#endif + +#ifdef __always_inline__read_unlock +#define _read_unlock(lock) __read_unlock(lock) +#endif + +#ifdef __always_inline__write_unlock +#define _write_unlock(lock) __write_unlock(lock) +#endif + +#ifdef __always_inline__spin_unlock_bh +#define _spin_unlock_bh(lock) __spin_unlock_bh(lock) +#endif + +#ifdef __always_inline__read_unlock_bh +#define _read_unlock_bh(lock) __read_unlock_bh(lock) +#endif + +#ifdef __always_inline__write_unlock_bh +#define _write_unlock_bh(lock) __write_unlock_bh(lock) +#endif + +#ifdef __always_inline__spin_unlock_irq +#define _spin_unlock_irq(lock) __spin_unlock_irq(lock) +#endif + +#ifdef __always_inline__read_unlock_irq +#define _read_unlock_irq(lock) __read_unlock_irq(lock) +#endif + +#ifdef __always_inline__write_unlock_irq +#define _write_unlock_irq(lock) __write_unlock_irq(lock) +#endif + +#ifdef __always_inline__spin_unlock_irqrestore +#define _spin_unlock_irqrestore(lock, flags) __spin_unlock_irqrestore(lock, flags) +#endif + +#ifdef __always_inline__read_unlock_irqrestore +#define _read_unlock_irqrestore(lock, flags) __read_unlock_irqrestore(lock, flags) +#endif + +#ifdef __always_inline__write_unlock_irqrestore +#define _write_unlock_irqrestore(lock, flags) __write_unlock_irqrestore(lock, flags) +#endif + +#endif /* CONFIG_DEBUG_SPINLOCK */ + +static inline int __spin_trylock(spinlock_t *lock) +{ + preempt_disable(); + if (_raw_spin_trylock(lock)) { + spin_acquire(&lock->dep_map, 0, 1, _RET_IP_); + return 1; + } + preempt_enable(); + return 0; +} + +static inline int __read_trylock(rwlock_t *lock) +{ + preempt_disable(); + if (_raw_read_trylock(lock)) { + rwlock_acquire_read(&lock->dep_map, 0, 1, _RET_IP_); + return 1; + } + preempt_enable(); + return 0; +} + +static inline int __write_trylock(rwlock_t *lock) +{ + preempt_disable(); + if (_raw_write_trylock(lock)) { + rwlock_acquire(&lock->dep_map, 0, 1, _RET_IP_); + return 1; + } + preempt_enable(); + return 0; +} + +/* + * If lockdep is enabled then we use the non-preemption spin-ops + * even on CONFIG_PREEMPT, because lockdep assumes that interrupts are + * not re-enabled during lock-acquire (which the preempt-spin-ops do): + */ +#if !defined(CONFIG_GENERIC_LOCKBREAK) || defined(CONFIG_DEBUG_LOCK_ALLOC) + +static inline void __read_lock(rwlock_t *lock) +{ + preempt_disable(); + rwlock_acquire_read(&lock->dep_map, 0, 0, _RET_IP_); + LOCK_CONTENDED(lock, _raw_read_trylock, _raw_read_lock); +} + +static inline unsigned long __spin_lock_irqsave(spinlock_t *lock) +{ + unsigned long flags; + + local_irq_save(flags); + preempt_disable(); + spin_acquire(&lock->dep_map, 0, 0, _RET_IP_); + /* + * On lockdep we dont want the hand-coded irq-enable of + * _raw_spin_lock_flags() code, because lockdep assumes + * that interrupts are not re-enabled during lock-acquire: + */ +#ifdef CONFIG_LOCKDEP + LOCK_CONTENDED(lock, _raw_spin_trylock, _raw_spin_lock); +#else + _raw_spin_lock_flags(lock, &flags); +#endif + return flags; +} + +static inline void __spin_lock_irq(spinlock_t *lock) +{ + local_irq_disable(); + preempt_disable(); + spin_acquire(&lock->dep_map, 0, 0, _RET_IP_); + LOCK_CONTENDED(lock, _raw_spin_trylock, _raw_spin_lock); +} + +static inline void __spin_lock_bh(spinlock_t *lock) +{ + local_bh_disable(); + preempt_disable(); + spin_acquire(&lock->dep_map, 0, 0, _RET_IP_); + LOCK_CONTENDED(lock, _raw_spin_trylock, _raw_spin_lock); +} + +static inline unsigned long __read_lock_irqsave(rwlock_t *lock) +{ + unsigned long flags; + + local_irq_save(flags); + preempt_disable(); + rwlock_acquire_read(&lock->dep_map, 0, 0, _RET_IP_); + LOCK_CONTENDED_FLAGS(lock, _raw_read_trylock, _raw_read_lock, + _raw_read_lock_flags, &flags); + return flags; +} + +static inline void __read_lock_irq(rwlock_t *lock) +{ + local_irq_disable(); + preempt_disable(); + rwlock_acquire_read(&lock->dep_map, 0, 0, _RET_IP_); + LOCK_CONTENDED(lock, _raw_read_trylock, _raw_read_lock); +} + +static inline void __read_lock_bh(rwlock_t *lock) +{ + local_bh_disable(); + preempt_disable(); + rwlock_acquire_read(&lock->dep_map, 0, 0, _RET_IP_); + LOCK_CONTENDED(lock, _raw_read_trylock, _raw_read_lock); +} + +static inline unsigned long __write_lock_irqsave(rwlock_t *lock) +{ + unsigned long flags; + + local_irq_save(flags); + preempt_disable(); + rwlock_acquire(&lock->dep_map, 0, 0, _RET_IP_); + LOCK_CONTENDED_FLAGS(lock, _raw_write_trylock, _raw_write_lock, + _raw_write_lock_flags, &flags); + return flags; +} + +static inline void __write_lock_irq(rwlock_t *lock) +{ + local_irq_disable(); + preempt_disable(); + rwlock_acquire(&lock->dep_map, 0, 0, _RET_IP_); + LOCK_CONTENDED(lock, _raw_write_trylock, _raw_write_lock); +} + +static inline void __write_lock_bh(rwlock_t *lock) +{ + local_bh_disable(); + preempt_disable(); + rwlock_acquire(&lock->dep_map, 0, 0, _RET_IP_); + LOCK_CONTENDED(lock, _raw_write_trylock, _raw_write_lock); +} + +static inline void __spin_lock(spinlock_t *lock) +{ + preempt_disable(); + spin_acquire(&lock->dep_map, 0, 0, _RET_IP_); + LOCK_CONTENDED(lock, _raw_spin_trylock, _raw_spin_lock); +} + +static inline void __write_lock(rwlock_t *lock) +{ + preempt_disable(); + rwlock_acquire(&lock->dep_map, 0, 0, _RET_IP_); + LOCK_CONTENDED(lock, _raw_write_trylock, _raw_write_lock); +} + +#endif /* CONFIG_PREEMPT */ + +static inline void __spin_unlock(spinlock_t *lock) +{ + spin_release(&lock->dep_map, 1, _RET_IP_); + _raw_spin_unlock(lock); + preempt_enable(); +} + +static inline void __write_unlock(rwlock_t *lock) +{ + rwlock_release(&lock->dep_map, 1, _RET_IP_); + _raw_write_unlock(lock); + preempt_enable(); +} + +static inline void __read_unlock(rwlock_t *lock) +{ + rwlock_release(&lock->dep_map, 1, _RET_IP_); + _raw_read_unlock(lock); + preempt_enable(); +} + +static inline void __spin_unlock_irqrestore(spinlock_t *lock, + unsigned long flags) +{ + spin_release(&lock->dep_map, 1, _RET_IP_); + _raw_spin_unlock(lock); + local_irq_restore(flags); + preempt_enable(); +} + +static inline void __spin_unlock_irq(spinlock_t *lock) +{ + spin_release(&lock->dep_map, 1, _RET_IP_); + _raw_spin_unlock(lock); + local_irq_enable(); + preempt_enable(); +} + +static inline void __spin_unlock_bh(spinlock_t *lock) +{ + spin_release(&lock->dep_map, 1, _RET_IP_); + _raw_spin_unlock(lock); + preempt_enable_no_resched(); + local_bh_enable_ip((unsigned long)__builtin_return_address(0)); +} + +static inline void __read_unlock_irqrestore(rwlock_t *lock, unsigned long flags) +{ + rwlock_release(&lock->dep_map, 1, _RET_IP_); + _raw_read_unlock(lock); + local_irq_restore(flags); + preempt_enable(); +} + +static inline void __read_unlock_irq(rwlock_t *lock) +{ + rwlock_release(&lock->dep_map, 1, _RET_IP_); + _raw_read_unlock(lock); + local_irq_enable(); + preempt_enable(); +} + +static inline void __read_unlock_bh(rwlock_t *lock) +{ + rwlock_release(&lock->dep_map, 1, _RET_IP_); + _raw_read_unlock(lock); + preempt_enable_no_resched(); + local_bh_enable_ip((unsigned long)__builtin_return_address(0)); +} + +static inline void __write_unlock_irqrestore(rwlock_t *lock, + unsigned long flags) +{ + rwlock_release(&lock->dep_map, 1, _RET_IP_); + _raw_write_unlock(lock); + local_irq_restore(flags); + preempt_enable(); +} + +static inline void __write_unlock_irq(rwlock_t *lock) +{ + rwlock_release(&lock->dep_map, 1, _RET_IP_); + _raw_write_unlock(lock); + local_irq_enable(); + preempt_enable(); +} + +static inline void __write_unlock_bh(rwlock_t *lock) +{ + rwlock_release(&lock->dep_map, 1, _RET_IP_); + _raw_write_unlock(lock); + preempt_enable_no_resched(); + local_bh_enable_ip((unsigned long)__builtin_return_address(0)); +} + +static inline int __spin_trylock_bh(spinlock_t *lock) +{ + local_bh_disable(); + preempt_disable(); + if (_raw_spin_trylock(lock)) { + spin_acquire(&lock->dep_map, 0, 1, _RET_IP_); + return 1; + } + preempt_enable_no_resched(); + local_bh_enable_ip((unsigned long)__builtin_return_address(0)); + return 0; +} + #endif /* __LINUX_SPINLOCK_API_SMP_H */ diff --git a/include/linux/swiotlb.h b/include/linux/swiotlb.h index cb1a6631b8f4..73b1f1cec423 100644 --- a/include/linux/swiotlb.h +++ b/include/linux/swiotlb.h @@ -14,7 +14,6 @@ struct scatterlist; */ #define IO_TLB_SEGSIZE 128 - /* * log of the size of each IO TLB slab. The number of slabs is command line * controllable. @@ -24,16 +23,6 @@ struct scatterlist; extern void swiotlb_init(void); -extern void *swiotlb_alloc_boot(size_t bytes, unsigned long nslabs); -extern void *swiotlb_alloc(unsigned order, unsigned long nslabs); - -extern dma_addr_t swiotlb_phys_to_bus(struct device *hwdev, - phys_addr_t address); -extern phys_addr_t swiotlb_bus_to_phys(struct device *hwdev, - dma_addr_t address); - -extern int swiotlb_arch_range_needs_mapping(phys_addr_t paddr, size_t size); - extern void *swiotlb_alloc_coherent(struct device *hwdev, size_t size, dma_addr_t *dma_handle, gfp_t flags); diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index 80de7003d8c2..a8e37821cc60 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h @@ -64,6 +64,7 @@ struct perf_counter_attr; #include <linux/sem.h> #include <asm/siginfo.h> #include <asm/signal.h> +#include <linux/unistd.h> #include <linux/quota.h> #include <linux/key.h> #include <trace/syscall.h> @@ -97,6 +98,53 @@ struct perf_counter_attr; #define __SC_TEST5(t5, a5, ...) __SC_TEST(t5); __SC_TEST4(__VA_ARGS__) #define __SC_TEST6(t6, a6, ...) __SC_TEST(t6); __SC_TEST5(__VA_ARGS__) +#ifdef CONFIG_EVENT_PROFILE +#define TRACE_SYS_ENTER_PROFILE(sname) \ +static int prof_sysenter_enable_##sname(struct ftrace_event_call *event_call) \ +{ \ + int ret = 0; \ + if (!atomic_inc_return(&event_enter_##sname.profile_count)) \ + ret = reg_prof_syscall_enter("sys"#sname); \ + return ret; \ +} \ + \ +static void prof_sysenter_disable_##sname(struct ftrace_event_call *event_call)\ +{ \ + if (atomic_add_negative(-1, &event_enter_##sname.profile_count)) \ + unreg_prof_syscall_enter("sys"#sname); \ +} + +#define TRACE_SYS_EXIT_PROFILE(sname) \ +static int prof_sysexit_enable_##sname(struct ftrace_event_call *event_call) \ +{ \ + int ret = 0; \ + if (!atomic_inc_return(&event_exit_##sname.profile_count)) \ + ret = reg_prof_syscall_exit("sys"#sname); \ + return ret; \ +} \ + \ +static void prof_sysexit_disable_##sname(struct ftrace_event_call *event_call) \ +{ \ + if (atomic_add_negative(-1, &event_exit_##sname.profile_count)) \ + unreg_prof_syscall_exit("sys"#sname); \ +} + +#define TRACE_SYS_ENTER_PROFILE_INIT(sname) \ + .profile_count = ATOMIC_INIT(-1), \ + .profile_enable = prof_sysenter_enable_##sname, \ + .profile_disable = prof_sysenter_disable_##sname, + +#define TRACE_SYS_EXIT_PROFILE_INIT(sname) \ + .profile_count = ATOMIC_INIT(-1), \ + .profile_enable = prof_sysexit_enable_##sname, \ + .profile_disable = prof_sysexit_disable_##sname, +#else +#define TRACE_SYS_ENTER_PROFILE(sname) +#define TRACE_SYS_ENTER_PROFILE_INIT(sname) +#define TRACE_SYS_EXIT_PROFILE(sname) +#define TRACE_SYS_EXIT_PROFILE_INIT(sname) +#endif + #ifdef CONFIG_FTRACE_SYSCALLS #define __SC_STR_ADECL1(t, a) #a #define __SC_STR_ADECL2(t, a, ...) #a, __SC_STR_ADECL1(__VA_ARGS__) @@ -112,7 +160,81 @@ struct perf_counter_attr; #define __SC_STR_TDECL5(t, a, ...) #t, __SC_STR_TDECL4(__VA_ARGS__) #define __SC_STR_TDECL6(t, a, ...) #t, __SC_STR_TDECL5(__VA_ARGS__) +#define SYSCALL_TRACE_ENTER_EVENT(sname) \ + static struct ftrace_event_call event_enter_##sname; \ + struct trace_event enter_syscall_print_##sname = { \ + .trace = print_syscall_enter, \ + }; \ + static int init_enter_##sname(void) \ + { \ + int num, id; \ + num = syscall_name_to_nr("sys"#sname); \ + if (num < 0) \ + return -ENOSYS; \ + id = register_ftrace_event(&enter_syscall_print_##sname);\ + if (!id) \ + return -ENODEV; \ + event_enter_##sname.id = id; \ + set_syscall_enter_id(num, id); \ + INIT_LIST_HEAD(&event_enter_##sname.fields); \ + return 0; \ + } \ + TRACE_SYS_ENTER_PROFILE(sname); \ + static struct ftrace_event_call __used \ + __attribute__((__aligned__(4))) \ + __attribute__((section("_ftrace_events"))) \ + event_enter_##sname = { \ + .name = "sys_enter"#sname, \ + .system = "syscalls", \ + .event = &event_syscall_enter, \ + .raw_init = init_enter_##sname, \ + .show_format = syscall_enter_format, \ + .define_fields = syscall_enter_define_fields, \ + .regfunc = reg_event_syscall_enter, \ + .unregfunc = unreg_event_syscall_enter, \ + .data = "sys"#sname, \ + TRACE_SYS_ENTER_PROFILE_INIT(sname) \ + } + +#define SYSCALL_TRACE_EXIT_EVENT(sname) \ + static struct ftrace_event_call event_exit_##sname; \ + struct trace_event exit_syscall_print_##sname = { \ + .trace = print_syscall_exit, \ + }; \ + static int init_exit_##sname(void) \ + { \ + int num, id; \ + num = syscall_name_to_nr("sys"#sname); \ + if (num < 0) \ + return -ENOSYS; \ + id = register_ftrace_event(&exit_syscall_print_##sname);\ + if (!id) \ + return -ENODEV; \ + event_exit_##sname.id = id; \ + set_syscall_exit_id(num, id); \ + INIT_LIST_HEAD(&event_exit_##sname.fields); \ + return 0; \ + } \ + TRACE_SYS_EXIT_PROFILE(sname); \ + static struct ftrace_event_call __used \ + __attribute__((__aligned__(4))) \ + __attribute__((section("_ftrace_events"))) \ + event_exit_##sname = { \ + .name = "sys_exit"#sname, \ + .system = "syscalls", \ + .event = &event_syscall_exit, \ + .raw_init = init_exit_##sname, \ + .show_format = syscall_exit_format, \ + .define_fields = syscall_exit_define_fields, \ + .regfunc = reg_event_syscall_exit, \ + .unregfunc = unreg_event_syscall_exit, \ + .data = "sys"#sname, \ + TRACE_SYS_EXIT_PROFILE_INIT(sname) \ + } + #define SYSCALL_METADATA(sname, nb) \ + SYSCALL_TRACE_ENTER_EVENT(sname); \ + SYSCALL_TRACE_EXIT_EVENT(sname); \ static const struct syscall_metadata __used \ __attribute__((__aligned__(4))) \ __attribute__((section("__syscalls_metadata"))) \ @@ -121,18 +243,23 @@ struct perf_counter_attr; .nb_args = nb, \ .types = types_##sname, \ .args = args_##sname, \ - } + .enter_event = &event_enter_##sname, \ + .exit_event = &event_exit_##sname, \ + }; #define SYSCALL_DEFINE0(sname) \ + SYSCALL_TRACE_ENTER_EVENT(_##sname); \ + SYSCALL_TRACE_EXIT_EVENT(_##sname); \ static const struct syscall_metadata __used \ __attribute__((__aligned__(4))) \ __attribute__((section("__syscalls_metadata"))) \ __syscall_meta_##sname = { \ .name = "sys_"#sname, \ .nb_args = 0, \ + .enter_event = &event_enter__##sname, \ + .exit_event = &event_exit__##sname, \ }; \ asmlinkage long sys_##sname(void) - #else #define SYSCALL_DEFINE0(name) asmlinkage long sys_##name(void) #endif diff --git a/include/linux/topology.h b/include/linux/topology.h index 7402c1a27c4f..85e8cf7d393c 100644 --- a/include/linux/topology.h +++ b/include/linux/topology.h @@ -85,20 +85,29 @@ int arch_update_cpu_topology(void); #define ARCH_HAS_SCHED_WAKE_IDLE /* Common values for SMT siblings */ #ifndef SD_SIBLING_INIT -#define SD_SIBLING_INIT (struct sched_domain) { \ - .min_interval = 1, \ - .max_interval = 2, \ - .busy_factor = 64, \ - .imbalance_pct = 110, \ - .flags = SD_LOAD_BALANCE \ - | SD_BALANCE_NEWIDLE \ - | SD_BALANCE_FORK \ - | SD_BALANCE_EXEC \ - | SD_WAKE_AFFINE \ - | SD_WAKE_BALANCE \ - | SD_SHARE_CPUPOWER, \ - .last_balance = jiffies, \ - .balance_interval = 1, \ +#define SD_SIBLING_INIT (struct sched_domain) { \ + .min_interval = 1, \ + .max_interval = 2, \ + .busy_factor = 64, \ + .imbalance_pct = 110, \ + \ + .flags = 1*SD_LOAD_BALANCE \ + | 1*SD_BALANCE_NEWIDLE \ + | 1*SD_BALANCE_EXEC \ + | 1*SD_BALANCE_FORK \ + | 0*SD_WAKE_IDLE \ + | 1*SD_WAKE_AFFINE \ + | 1*SD_WAKE_BALANCE \ + | 1*SD_SHARE_CPUPOWER \ + | 0*SD_POWERSAVINGS_BALANCE \ + | 0*SD_SHARE_PKG_RESOURCES \ + | 0*SD_SERIALIZE \ + | 0*SD_WAKE_IDLE_FAR \ + | 0*SD_PREFER_SIBLING \ + , \ + .last_balance = jiffies, \ + .balance_interval = 1, \ + .smt_gain = 1178, /* 15% */ \ } #endif #endif /* CONFIG_SCHED_SMT */ @@ -106,69 +115,94 @@ int arch_update_cpu_topology(void); #ifdef CONFIG_SCHED_MC /* Common values for MC siblings. for now mostly derived from SD_CPU_INIT */ #ifndef SD_MC_INIT -#define SD_MC_INIT (struct sched_domain) { \ - .min_interval = 1, \ - .max_interval = 4, \ - .busy_factor = 64, \ - .imbalance_pct = 125, \ - .cache_nice_tries = 1, \ - .busy_idx = 2, \ - .wake_idx = 1, \ - .forkexec_idx = 1, \ - .flags = SD_LOAD_BALANCE \ - | SD_BALANCE_FORK \ - | SD_BALANCE_EXEC \ - | SD_WAKE_AFFINE \ - | SD_WAKE_BALANCE \ - | SD_SHARE_PKG_RESOURCES\ - | sd_balance_for_mc_power()\ - | sd_power_saving_flags(),\ - .last_balance = jiffies, \ - .balance_interval = 1, \ +#define SD_MC_INIT (struct sched_domain) { \ + .min_interval = 1, \ + .max_interval = 4, \ + .busy_factor = 64, \ + .imbalance_pct = 125, \ + .cache_nice_tries = 1, \ + .busy_idx = 2, \ + .wake_idx = 1, \ + .forkexec_idx = 1, \ + \ + .flags = 1*SD_LOAD_BALANCE \ + | 1*SD_BALANCE_NEWIDLE \ + | 1*SD_BALANCE_EXEC \ + | 1*SD_BALANCE_FORK \ + | 1*SD_WAKE_IDLE \ + | 1*SD_WAKE_AFFINE \ + | 1*SD_WAKE_BALANCE \ + | 0*SD_SHARE_CPUPOWER \ + | 1*SD_SHARE_PKG_RESOURCES \ + | 0*SD_SERIALIZE \ + | 0*SD_WAKE_IDLE_FAR \ + | sd_balance_for_mc_power() \ + | sd_power_saving_flags() \ + , \ + .last_balance = jiffies, \ + .balance_interval = 1, \ } #endif #endif /* CONFIG_SCHED_MC */ /* Common values for CPUs */ #ifndef SD_CPU_INIT -#define SD_CPU_INIT (struct sched_domain) { \ - .min_interval = 1, \ - .max_interval = 4, \ - .busy_factor = 64, \ - .imbalance_pct = 125, \ - .cache_nice_tries = 1, \ - .busy_idx = 2, \ - .idle_idx = 1, \ - .newidle_idx = 2, \ - .wake_idx = 1, \ - .forkexec_idx = 1, \ - .flags = SD_LOAD_BALANCE \ - | SD_BALANCE_EXEC \ - | SD_BALANCE_FORK \ - | SD_WAKE_AFFINE \ - | SD_WAKE_BALANCE \ - | sd_balance_for_package_power()\ - | sd_power_saving_flags(),\ - .last_balance = jiffies, \ - .balance_interval = 1, \ +#define SD_CPU_INIT (struct sched_domain) { \ + .min_interval = 1, \ + .max_interval = 4, \ + .busy_factor = 64, \ + .imbalance_pct = 125, \ + .cache_nice_tries = 1, \ + .busy_idx = 2, \ + .idle_idx = 1, \ + .newidle_idx = 2, \ + .wake_idx = 1, \ + .forkexec_idx = 1, \ + \ + .flags = 1*SD_LOAD_BALANCE \ + | 1*SD_BALANCE_NEWIDLE \ + | 1*SD_BALANCE_EXEC \ + | 1*SD_BALANCE_FORK \ + | 1*SD_WAKE_IDLE \ + | 0*SD_WAKE_AFFINE \ + | 1*SD_WAKE_BALANCE \ + | 0*SD_SHARE_CPUPOWER \ + | 0*SD_SHARE_PKG_RESOURCES \ + | 0*SD_SERIALIZE \ + | 0*SD_WAKE_IDLE_FAR \ + | sd_balance_for_package_power() \ + | sd_power_saving_flags() \ + , \ + .last_balance = jiffies, \ + .balance_interval = 1, \ } #endif /* sched_domains SD_ALLNODES_INIT for NUMA machines */ -#define SD_ALLNODES_INIT (struct sched_domain) { \ - .min_interval = 64, \ - .max_interval = 64*num_online_cpus(), \ - .busy_factor = 128, \ - .imbalance_pct = 133, \ - .cache_nice_tries = 1, \ - .busy_idx = 3, \ - .idle_idx = 3, \ - .flags = SD_LOAD_BALANCE \ - | SD_BALANCE_NEWIDLE \ - | SD_WAKE_AFFINE \ - | SD_SERIALIZE, \ - .last_balance = jiffies, \ - .balance_interval = 64, \ +#define SD_ALLNODES_INIT (struct sched_domain) { \ + .min_interval = 64, \ + .max_interval = 64*num_online_cpus(), \ + .busy_factor = 128, \ + .imbalance_pct = 133, \ + .cache_nice_tries = 1, \ + .busy_idx = 3, \ + .idle_idx = 3, \ + .flags = 1*SD_LOAD_BALANCE \ + | 1*SD_BALANCE_NEWIDLE \ + | 0*SD_BALANCE_EXEC \ + | 0*SD_BALANCE_FORK \ + | 0*SD_WAKE_IDLE \ + | 1*SD_WAKE_AFFINE \ + | 0*SD_WAKE_BALANCE \ + | 0*SD_SHARE_CPUPOWER \ + | 0*SD_POWERSAVINGS_BALANCE \ + | 0*SD_SHARE_PKG_RESOURCES \ + | 1*SD_SERIALIZE \ + | 1*SD_WAKE_IDLE_FAR \ + | 0*SD_PREFER_SIBLING \ + , \ + .last_balance = jiffies, \ + .balance_interval = 64, \ } #ifdef CONFIG_NUMA diff --git a/include/linux/tracepoint.h b/include/linux/tracepoint.h index b9dc4ca0246f..63a3f7a80580 100644 --- a/include/linux/tracepoint.h +++ b/include/linux/tracepoint.h @@ -23,6 +23,8 @@ struct tracepoint; struct tracepoint { const char *name; /* Tracepoint name */ int state; /* State. */ + void (*regfunc)(void); + void (*unregfunc)(void); void **funcs; } __attribute__((aligned(32))); /* * Aligned on 32 bytes because it is @@ -78,12 +80,16 @@ struct tracepoint { return tracepoint_probe_unregister(#name, (void *)probe);\ } -#define DEFINE_TRACE(name) \ + +#define DEFINE_TRACE_FN(name, reg, unreg) \ static const char __tpstrtab_##name[] \ __attribute__((section("__tracepoints_strings"))) = #name; \ struct tracepoint __tracepoint_##name \ __attribute__((section("__tracepoints"), aligned(32))) = \ - { __tpstrtab_##name, 0, NULL } + { __tpstrtab_##name, 0, reg, unreg, NULL } + +#define DEFINE_TRACE(name) \ + DEFINE_TRACE_FN(name, NULL, NULL); #define EXPORT_TRACEPOINT_SYMBOL_GPL(name) \ EXPORT_SYMBOL_GPL(__tracepoint_##name) @@ -108,6 +114,7 @@ extern void tracepoint_update_probe_range(struct tracepoint *begin, return -ENOSYS; \ } +#define DEFINE_TRACE_FN(name, reg, unreg) #define DEFINE_TRACE(name) #define EXPORT_TRACEPOINT_SYMBOL_GPL(name) #define EXPORT_TRACEPOINT_SYMBOL(name) @@ -158,6 +165,15 @@ static inline void tracepoint_synchronize_unregister(void) #define PARAMS(args...) args +#endif /* _LINUX_TRACEPOINT_H */ + +/* + * Note: we keep the TRACE_EVENT outside the include file ifdef protection. + * This is due to the way trace events work. If a file includes two + * trace event headers under one "CREATE_TRACE_POINTS" the first include + * will override the TRACE_EVENT and break the second include. + */ + #ifndef TRACE_EVENT /* * For use with the TRACE_EVENT macro: @@ -259,10 +275,15 @@ static inline void tracepoint_synchronize_unregister(void) * can also by used by generic instrumentation like SystemTap), and * it is also used to expose a structured trace record in * /sys/kernel/debug/tracing/events/. + * + * A set of (un)registration functions can be passed to the variant + * TRACE_EVENT_FN to perform any (un)registration work. */ #define TRACE_EVENT(name, proto, args, struct, assign, print) \ DECLARE_TRACE(name, PARAMS(proto), PARAMS(args)) -#endif +#define TRACE_EVENT_FN(name, proto, args, struct, \ + assign, print, reg, unreg) \ + DECLARE_TRACE(name, PARAMS(proto), PARAMS(args)) -#endif +#endif /* ifdef TRACE_EVENT (see note above) */ diff --git a/include/trace/define_trace.h b/include/trace/define_trace.h index f7a7ae1e8f90..2a4b3bf74033 100644 --- a/include/trace/define_trace.h +++ b/include/trace/define_trace.h @@ -26,6 +26,11 @@ #define TRACE_EVENT(name, proto, args, tstruct, assign, print) \ DEFINE_TRACE(name) +#undef TRACE_EVENT_FN +#define TRACE_EVENT_FN(name, proto, args, tstruct, \ + assign, print, reg, unreg) \ + DEFINE_TRACE_FN(name, reg, unreg) + #undef DECLARE_TRACE #define DECLARE_TRACE(name, proto, args) \ DEFINE_TRACE(name) @@ -56,6 +61,8 @@ #include <trace/ftrace.h> #endif +#undef TRACE_EVENT +#undef TRACE_EVENT_FN #undef TRACE_HEADER_MULTI_READ /* Only undef what we defined in this file */ diff --git a/include/trace/events/module.h b/include/trace/events/module.h new file mode 100644 index 000000000000..84160fb18478 --- /dev/null +++ b/include/trace/events/module.h @@ -0,0 +1,126 @@ +#undef TRACE_SYSTEM +#define TRACE_SYSTEM module + +#if !defined(_TRACE_MODULE_H) || defined(TRACE_HEADER_MULTI_READ) +#define _TRACE_MODULE_H + +#include <linux/tracepoint.h> + +#ifdef CONFIG_MODULES + +struct module; + +#define show_module_flags(flags) __print_flags(flags, "", \ + { (1UL << TAINT_PROPRIETARY_MODULE), "P" }, \ + { (1UL << TAINT_FORCED_MODULE), "F" }, \ + { (1UL << TAINT_CRAP), "C" }) + +TRACE_EVENT(module_load, + + TP_PROTO(struct module *mod), + + TP_ARGS(mod), + + TP_STRUCT__entry( + __field( unsigned int, taints ) + __string( name, mod->name ) + ), + + TP_fast_assign( + __entry->taints = mod->taints; + __assign_str(name, mod->name); + ), + + TP_printk("%s %s", __get_str(name), show_module_flags(__entry->taints)) +); + +TRACE_EVENT(module_free, + + TP_PROTO(struct module *mod), + + TP_ARGS(mod), + + TP_STRUCT__entry( + __string( name, mod->name ) + ), + + TP_fast_assign( + __assign_str(name, mod->name); + ), + + TP_printk("%s", __get_str(name)) +); + +TRACE_EVENT(module_get, + + TP_PROTO(struct module *mod, unsigned long ip, int refcnt), + + TP_ARGS(mod, ip, refcnt), + + TP_STRUCT__entry( + __field( unsigned long, ip ) + __field( int, refcnt ) + __string( name, mod->name ) + ), + + TP_fast_assign( + __entry->ip = ip; + __entry->refcnt = refcnt; + __assign_str(name, mod->name); + ), + + TP_printk("%s call_site=%pf refcnt=%d", + __get_str(name), (void *)__entry->ip, __entry->refcnt) +); + +TRACE_EVENT(module_put, + + TP_PROTO(struct module *mod, unsigned long ip, int refcnt), + + TP_ARGS(mod, ip, refcnt), + + TP_STRUCT__entry( + __field( unsigned long, ip ) + __field( int, refcnt ) + __string( name, mod->name ) + ), + + TP_fast_assign( + __entry->ip = ip; + __entry->refcnt = refcnt; + __assign_str(name, mod->name); + ), + + TP_printk("%s call_site=%pf refcnt=%d", + __get_str(name), (void *)__entry->ip, __entry->refcnt) +); + +TRACE_EVENT(module_request, + + TP_PROTO(char *name, bool wait, unsigned long ip), + + TP_ARGS(name, wait, ip), + + TP_STRUCT__entry( + __field( bool, wait ) + __field( unsigned long, ip ) + __string( name, name ) + ), + + TP_fast_assign( + __entry->wait = wait; + __entry->ip = ip; + __assign_str(name, name); + ), + + TP_printk("%s wait=%d call_site=%pf", + __get_str(name), (int)__entry->wait, (void *)__entry->ip) +); + +#endif /* CONFIG_MODULES */ + +#endif /* _TRACE_MODULE_H */ + +/* This part must be outside protection */ +#include <trace/define_trace.h> + diff --git a/include/trace/events/sched.h b/include/trace/events/sched.h index 8949bb7eb082..b48f1ad7c946 100644 --- a/include/trace/events/sched.h +++ b/include/trace/events/sched.h @@ -94,6 +94,7 @@ TRACE_EVENT(sched_wakeup, __field( pid_t, pid ) __field( int, prio ) __field( int, success ) + __field( int, cpu ) ), TP_fast_assign( @@ -101,11 +102,12 @@ TRACE_EVENT(sched_wakeup, __entry->pid = p->pid; __entry->prio = p->prio; __entry->success = success; + __entry->cpu = task_cpu(p); ), - TP_printk("task %s:%d [%d] success=%d", + TP_printk("task %s:%d [%d] success=%d [%03d]", __entry->comm, __entry->pid, __entry->prio, - __entry->success) + __entry->success, __entry->cpu) ); /* @@ -125,6 +127,7 @@ TRACE_EVENT(sched_wakeup_new, __field( pid_t, pid ) __field( int, prio ) __field( int, success ) + __field( int, cpu ) ), TP_fast_assign( @@ -132,11 +135,12 @@ TRACE_EVENT(sched_wakeup_new, __entry->pid = p->pid; __entry->prio = p->prio; __entry->success = success; + __entry->cpu = task_cpu(p); ), - TP_printk("task %s:%d [%d] success=%d", + TP_printk("task %s:%d [%d] success=%d [%03d]", __entry->comm, __entry->pid, __entry->prio, - __entry->success) + __entry->success, __entry->cpu) ); /* @@ -340,6 +344,101 @@ TRACE_EVENT(sched_signal_send, __entry->sig, __entry->comm, __entry->pid) ); +/* + * XXX the below sched_stat tracepoints only apply to SCHED_OTHER/BATCH/IDLE + * adding sched_stat support to SCHED_FIFO/RR would be welcome. + */ + +/* + * Tracepoint for accounting wait time (time the task is runnable + * but not actually running due to scheduler contention). + */ +TRACE_EVENT(sched_stat_wait, + + TP_PROTO(struct task_struct *tsk, u64 delay), + + TP_ARGS(tsk, delay), + + TP_STRUCT__entry( + __array( char, comm, TASK_COMM_LEN ) + __field( pid_t, pid ) + __field( u64, delay ) + ), + + TP_fast_assign( + memcpy(__entry->comm, tsk->comm, TASK_COMM_LEN); + __entry->pid = tsk->pid; + __entry->delay = delay; + ) + TP_perf_assign( + __perf_count(delay); + ), + + TP_printk("task: %s:%d wait: %Lu [ns]", + __entry->comm, __entry->pid, + (unsigned long long)__entry->delay) +); + +/* + * Tracepoint for accounting sleep time (time the task is not runnable, + * including iowait, see below). + */ +TRACE_EVENT(sched_stat_sleep, + + TP_PROTO(struct task_struct *tsk, u64 delay), + + TP_ARGS(tsk, delay), + + TP_STRUCT__entry( + __array( char, comm, TASK_COMM_LEN ) + __field( pid_t, pid ) + __field( u64, delay ) + ), + + TP_fast_assign( + memcpy(__entry->comm, tsk->comm, TASK_COMM_LEN); + __entry->pid = tsk->pid; + __entry->delay = delay; + ) + TP_perf_assign( + __perf_count(delay); + ), + + TP_printk("task: %s:%d sleep: %Lu [ns]", + __entry->comm, __entry->pid, + (unsigned long long)__entry->delay) +); + +/* + * Tracepoint for accounting iowait time (time the task is not runnable + * due to waiting on IO to complete). + */ +TRACE_EVENT(sched_stat_iowait, + + TP_PROTO(struct task_struct *tsk, u64 delay), + + TP_ARGS(tsk, delay), + + TP_STRUCT__entry( + __array( char, comm, TASK_COMM_LEN ) + __field( pid_t, pid ) + __field( u64, delay ) + ), + + TP_fast_assign( + memcpy(__entry->comm, tsk->comm, TASK_COMM_LEN); + __entry->pid = tsk->pid; + __entry->delay = delay; + ) + TP_perf_assign( + __perf_count(delay); + ), + + TP_printk("task: %s:%d iowait: %Lu [ns]", + __entry->comm, __entry->pid, + (unsigned long long)__entry->delay) +); + #endif /* _TRACE_SCHED_H */ /* This part must be outside protection */ diff --git a/include/trace/events/syscalls.h b/include/trace/events/syscalls.h new file mode 100644 index 000000000000..397dff2dbd5a --- /dev/null +++ b/include/trace/events/syscalls.h @@ -0,0 +1,70 @@ +#undef TRACE_SYSTEM +#define TRACE_SYSTEM syscalls + +#if !defined(_TRACE_EVENTS_SYSCALLS_H) || defined(TRACE_HEADER_MULTI_READ) +#define _TRACE_EVENTS_SYSCALLS_H + +#include <linux/tracepoint.h> + +#include <asm/ptrace.h> +#include <asm/syscall.h> + + +#ifdef CONFIG_HAVE_SYSCALL_TRACEPOINTS + +extern void syscall_regfunc(void); +extern void syscall_unregfunc(void); + +TRACE_EVENT_FN(sys_enter, + + TP_PROTO(struct pt_regs *regs, long id), + + TP_ARGS(regs, id), + + TP_STRUCT__entry( + __field( long, id ) + __array( unsigned long, args, 6 ) + ), + + TP_fast_assign( + __entry->id = id; + syscall_get_arguments(current, regs, 0, 6, __entry->args); + ), + + TP_printk("NR %ld (%lx, %lx, %lx, %lx, %lx, %lx)", + __entry->id, + __entry->args[0], __entry->args[1], __entry->args[2], + __entry->args[3], __entry->args[4], __entry->args[5]), + + syscall_regfunc, syscall_unregfunc +); + +TRACE_EVENT_FN(sys_exit, + + TP_PROTO(struct pt_regs *regs, long ret), + + TP_ARGS(regs, ret), + + TP_STRUCT__entry( + __field( long, id ) + __field( long, ret ) + ), + + TP_fast_assign( + __entry->id = syscall_get_nr(current, regs); + __entry->ret = ret; + ), + + TP_printk("NR %ld = %ld", + __entry->id, __entry->ret), + + syscall_regfunc, syscall_unregfunc +); + +#endif /* CONFIG_HAVE_SYSCALL_TRACEPOINTS */ + +#endif /* _TRACE_EVENTS_SYSCALLS_H */ + +/* This part must be outside protection */ +#include <trace/define_trace.h> + diff --git a/include/trace/ftrace.h b/include/trace/ftrace.h index f64fbaae781a..308bafd93325 100644 --- a/include/trace/ftrace.h +++ b/include/trace/ftrace.h @@ -21,11 +21,14 @@ #undef __field #define __field(type, item) type item; +#undef __field_ext +#define __field_ext(type, item, filter_type) type item; + #undef __array #define __array(type, item, len) type item[len]; #undef __dynamic_array -#define __dynamic_array(type, item, len) unsigned short __data_loc_##item; +#define __dynamic_array(type, item, len) u32 __data_loc_##item; #undef __string #define __string(item, src) __dynamic_array(char, item, -1) @@ -42,6 +45,16 @@ }; \ static struct ftrace_event_call event_##name +#undef __cpparg +#define __cpparg(arg...) arg + +/* Callbacks are meaningless to ftrace. */ +#undef TRACE_EVENT_FN +#define TRACE_EVENT_FN(name, proto, args, tstruct, \ + assign, print, reg, unreg) \ + TRACE_EVENT(name, __cpparg(proto), __cpparg(args), \ + __cpparg(tstruct), __cpparg(assign), __cpparg(print)) \ + #include TRACE_INCLUDE(TRACE_INCLUDE_FILE) @@ -51,23 +64,27 @@ * Include the following: * * struct ftrace_data_offsets_<call> { - * int <item1>; - * int <item2>; + * u32 <item1>; + * u32 <item2>; * [...] * }; * - * The __dynamic_array() macro will create each int <item>, this is + * The __dynamic_array() macro will create each u32 <item>, this is * to keep the offset of each array from the beginning of the event. + * The size of an array is also encoded, in the higher 16 bits of <item>. */ #undef __field -#define __field(type, item); +#define __field(type, item) + +#undef __field_ext +#define __field_ext(type, item, filter_type) #undef __array #define __array(type, item, len) #undef __dynamic_array -#define __dynamic_array(type, item, len) int item; +#define __dynamic_array(type, item, len) u32 item; #undef __string #define __string(item, src) __dynamic_array(char, item, -1) @@ -109,6 +126,9 @@ if (!ret) \ return 0; +#undef __field_ext +#define __field_ext(type, item, filter_type) __field(type, item) + #undef __array #define __array(type, item, len) \ ret = trace_seq_printf(s, "\tfield:" #type " " #item "[" #len "];\t" \ @@ -120,7 +140,7 @@ #undef __dynamic_array #define __dynamic_array(type, item, len) \ - ret = trace_seq_printf(s, "\tfield:__data_loc " #item ";\t" \ + ret = trace_seq_printf(s, "\tfield:__data_loc " #type "[] " #item ";\t"\ "offset:%u;\tsize:%u;\n", \ (unsigned int)offsetof(typeof(field), \ __data_loc_##item), \ @@ -150,7 +170,8 @@ #undef TRACE_EVENT #define TRACE_EVENT(call, proto, args, tstruct, func, print) \ static int \ -ftrace_format_##call(struct trace_seq *s) \ +ftrace_format_##call(struct ftrace_event_call *unused, \ + struct trace_seq *s) \ { \ struct ftrace_raw_##call field __attribute__((unused)); \ int ret = 0; \ @@ -210,7 +231,7 @@ ftrace_format_##call(struct trace_seq *s) \ #undef __get_dynamic_array #define __get_dynamic_array(field) \ - ((void *)__entry + __entry->__data_loc_##field) + ((void *)__entry + (__entry->__data_loc_##field & 0xffff)) #undef __get_str #define __get_str(field) (char *)__get_dynamic_array(field) @@ -263,28 +284,33 @@ ftrace_raw_output_##call(struct trace_iterator *iter, int flags) \ #include TRACE_INCLUDE(TRACE_INCLUDE_FILE) -#undef __field -#define __field(type, item) \ +#undef __field_ext +#define __field_ext(type, item, filter_type) \ ret = trace_define_field(event_call, #type, #item, \ offsetof(typeof(field), item), \ - sizeof(field.item), is_signed_type(type)); \ + sizeof(field.item), \ + is_signed_type(type), filter_type); \ if (ret) \ return ret; +#undef __field +#define __field(type, item) __field_ext(type, item, FILTER_OTHER) + #undef __array #define __array(type, item, len) \ BUILD_BUG_ON(len > MAX_FILTER_STR_VAL); \ ret = trace_define_field(event_call, #type "[" #len "]", #item, \ offsetof(typeof(field), item), \ - sizeof(field.item), 0); \ + sizeof(field.item), 0, FILTER_OTHER); \ if (ret) \ return ret; #undef __dynamic_array #define __dynamic_array(type, item, len) \ - ret = trace_define_field(event_call, "__data_loc" "[" #type "]", #item,\ - offsetof(typeof(field), __data_loc_##item), \ - sizeof(field.__data_loc_##item), 0); + ret = trace_define_field(event_call, "__data_loc " #type "[]", #item, \ + offsetof(typeof(field), __data_loc_##item), \ + sizeof(field.__data_loc_##item), 0, \ + FILTER_OTHER); #undef __string #define __string(item, src) __dynamic_array(char, item, -1) @@ -292,17 +318,14 @@ ftrace_raw_output_##call(struct trace_iterator *iter, int flags) \ #undef TRACE_EVENT #define TRACE_EVENT(call, proto, args, tstruct, func, print) \ int \ -ftrace_define_fields_##call(void) \ +ftrace_define_fields_##call(struct ftrace_event_call *event_call) \ { \ struct ftrace_raw_##call field; \ - struct ftrace_event_call *event_call = &event_##call; \ int ret; \ \ - __common_field(int, type, 1); \ - __common_field(unsigned char, flags, 0); \ - __common_field(unsigned char, preempt_count, 0); \ - __common_field(int, pid, 1); \ - __common_field(int, tgid, 1); \ + ret = trace_define_common_fields(event_call); \ + if (ret) \ + return ret; \ \ tstruct; \ \ @@ -321,6 +344,9 @@ ftrace_define_fields_##call(void) \ #undef __field #define __field(type, item) +#undef __field_ext +#define __field_ext(type, item, filter_type) + #undef __array #define __array(type, item, len) @@ -328,6 +354,7 @@ ftrace_define_fields_##call(void) \ #define __dynamic_array(type, item, len) \ __data_offsets->item = __data_size + \ offsetof(typeof(*entry), __data); \ + __data_offsets->item |= (len * sizeof(type)) << 16; \ __data_size += (len) * sizeof(type); #undef __string @@ -433,13 +460,15 @@ static void ftrace_profile_disable_##call(struct ftrace_event_call *event_call)\ * { * struct ring_buffer_event *event; * struct ftrace_raw_<call> *entry; <-- defined in stage 1 + * struct ring_buffer *buffer; * unsigned long irq_flags; * int pc; * * local_save_flags(irq_flags); * pc = preempt_count(); * - * event = trace_current_buffer_lock_reserve(event_<call>.id, + * event = trace_current_buffer_lock_reserve(&buffer, + * event_<call>.id, * sizeof(struct ftrace_raw_<call>), * irq_flags, pc); * if (!event) @@ -449,7 +478,7 @@ static void ftrace_profile_disable_##call(struct ftrace_event_call *event_call)\ * <assign>; <-- Here we assign the entries by the __field and * __array macros. * - * trace_current_buffer_unlock_commit(event, irq_flags, pc); + * trace_current_buffer_unlock_commit(buffer, event, irq_flags, pc); * } * * static int ftrace_raw_reg_event_<call>(void) @@ -541,6 +570,7 @@ static void ftrace_raw_event_##call(proto) \ struct ftrace_event_call *event_call = &event_##call; \ struct ring_buffer_event *event; \ struct ftrace_raw_##call *entry; \ + struct ring_buffer *buffer; \ unsigned long irq_flags; \ int __data_size; \ int pc; \ @@ -550,7 +580,8 @@ static void ftrace_raw_event_##call(proto) \ \ __data_size = ftrace_get_offsets_##call(&__data_offsets, args); \ \ - event = trace_current_buffer_lock_reserve(event_##call.id, \ + event = trace_current_buffer_lock_reserve(&buffer, \ + event_##call.id, \ sizeof(*entry) + __data_size, \ irq_flags, pc); \ if (!event) \ @@ -562,11 +593,12 @@ static void ftrace_raw_event_##call(proto) \ \ { assign; } \ \ - if (!filter_current_check_discard(event_call, entry, event)) \ - trace_nowake_buffer_unlock_commit(event, irq_flags, pc); \ + if (!filter_current_check_discard(buffer, event_call, entry, event)) \ + trace_nowake_buffer_unlock_commit(buffer, \ + event, irq_flags, pc); \ } \ \ -static int ftrace_raw_reg_event_##call(void) \ +static int ftrace_raw_reg_event_##call(void *ptr) \ { \ int ret; \ \ @@ -577,7 +609,7 @@ static int ftrace_raw_reg_event_##call(void) \ return ret; \ } \ \ -static void ftrace_raw_unreg_event_##call(void) \ +static void ftrace_raw_unreg_event_##call(void *ptr) \ { \ unregister_trace_##call(ftrace_raw_event_##call); \ } \ @@ -595,7 +627,6 @@ static int ftrace_raw_init_event_##call(void) \ return -ENODEV; \ event_##call.id = id; \ INIT_LIST_HEAD(&event_##call.fields); \ - init_preds(&event_##call); \ return 0; \ } \ \ diff --git a/include/trace/syscall.h b/include/trace/syscall.h index 8cfe515cbc47..5dc283ba5ae0 100644 --- a/include/trace/syscall.h +++ b/include/trace/syscall.h @@ -1,8 +1,13 @@ #ifndef _TRACE_SYSCALL_H #define _TRACE_SYSCALL_H +#include <linux/tracepoint.h> +#include <linux/unistd.h> +#include <linux/ftrace_event.h> + #include <asm/ptrace.h> + /* * A syscall entry in the ftrace syscalls array. * @@ -10,26 +15,49 @@ * @nb_args: number of parameters it takes * @types: list of types as strings * @args: list of args as strings (args[i] matches types[i]) + * @enter_id: associated ftrace enter event id + * @exit_id: associated ftrace exit event id + * @enter_event: associated syscall_enter trace event + * @exit_event: associated syscall_exit trace event */ struct syscall_metadata { const char *name; int nb_args; const char **types; const char **args; + int enter_id; + int exit_id; + + struct ftrace_event_call *enter_event; + struct ftrace_event_call *exit_event; }; #ifdef CONFIG_FTRACE_SYSCALLS -extern void arch_init_ftrace_syscalls(void); extern struct syscall_metadata *syscall_nr_to_meta(int nr); -extern void start_ftrace_syscalls(void); -extern void stop_ftrace_syscalls(void); -extern void ftrace_syscall_enter(struct pt_regs *regs); -extern void ftrace_syscall_exit(struct pt_regs *regs); -#else -static inline void start_ftrace_syscalls(void) { } -static inline void stop_ftrace_syscalls(void) { } -static inline void ftrace_syscall_enter(struct pt_regs *regs) { } -static inline void ftrace_syscall_exit(struct pt_regs *regs) { } +extern int syscall_name_to_nr(char *name); +void set_syscall_enter_id(int num, int id); +void set_syscall_exit_id(int num, int id); +extern struct trace_event event_syscall_enter; +extern struct trace_event event_syscall_exit; +extern int reg_event_syscall_enter(void *ptr); +extern void unreg_event_syscall_enter(void *ptr); +extern int reg_event_syscall_exit(void *ptr); +extern void unreg_event_syscall_exit(void *ptr); +extern int syscall_enter_format(struct ftrace_event_call *call, + struct trace_seq *s); +extern int syscall_exit_format(struct ftrace_event_call *call, + struct trace_seq *s); +extern int syscall_enter_define_fields(struct ftrace_event_call *call); +extern int syscall_exit_define_fields(struct ftrace_event_call *call); +enum print_line_t print_syscall_enter(struct trace_iterator *iter, int flags); +enum print_line_t print_syscall_exit(struct trace_iterator *iter, int flags); +#endif +#ifdef CONFIG_EVENT_PROFILE +int reg_prof_syscall_enter(char *name); +void unreg_prof_syscall_enter(char *name); +int reg_prof_syscall_exit(char *name); +void unreg_prof_syscall_exit(char *name); + #endif #endif /* _TRACE_SYSCALL_H */ |